Galaxy |

Changeset 31:0ab839023fe4 (2013-04-30)

Previous changeset 30:5677346472b5 (2013-04-29) Next changeset 32:3441fe98a2ba (2013-04-30)

Commit message:
Uploaded

removed:
SMART/Java/Python/GetIntersection.py
SMART/Java/Python/ncList/.NCList.py.swp
SMART/Java/Python/ncList/.NCListCursor.py.swp
SMART/Java/Python/ncList/Benchmark.py
SMART/Java/Python/script.Rout
SMART/Java/PythonProgramFinder$1.class
SMART/Java/Smart$1.class
SMART/Java/Smart$2.class
SMART/galaxy/tool_conf.xml
SMART/galaxy/tool_dependencies.xml

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/HTseqClean.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/HTseqClean.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,19 @@
+# HTseqClean
+# remove extra counts out of genes
+# for HTseq output
+
+# input : rawCounts
+# output : cleaned rawCounts
+
+# created Feb 6th, 2012
+# Modified Feb 16th, 2012
+# Marie-Agnes Dillies
+
+
+HTseqClean <- function( rawCounts ){
+
+  row2remove <- c("alignment_not_unique", "ambiguous", "no_feature", "not_aligned", "too_low_aQual")
+  rawCounts <- rawCounts[!rawCounts$Id %in% row2remove,]
+  rawCounts[is.na(rawCounts)] <- 0
+  return(rawCounts)
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/MAplotDE.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/MAplotDE.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,16 @@
+# MAplotDE
+# MAplot of DE genes
+
+# input : res, alpha,OUT_MAplotDEName
+# output : MAplot (png)
+
+MAplotDE <- function( res, alpha, OUT_MAplotDEName, out = TRUE ){
+
+ if (out) png( file=OUT_MAplotDEName )
+
+ plot( res$baseMean, res$log2FoldChange, pch=".", xlab="Mean expression", ylab="log2FC", main="",
+   log="x", col=ifelse(res$padj < alpha, "red", "black") )
+ abline(h=0, col="red")
+
+   if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/RNAseqFunctions.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/RNAseqFunctions.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,38 @@
+# RNAseqFunctions
+# when sourced, sources all R functions associated with RNAseq data analysis
+
+RNAseqFunctions <- function( RfuncDir ){
+
+  source(paste(RfuncDir, "loadTargetFile.R", sep=""))
+  source(paste(RfuncDir, "loadCountData.R", sep=""))
+# source(paste(RfuncDir, "loadStrandData.R", sep=""))
+  source(paste(RfuncDir, "HTseqClean.R", sep=""))
+  source(paste(RfuncDir, "raw2counts.R", sep=""))
+  source(paste(RfuncDir, "barplotTC.R", sep=""))
+  source(paste(RfuncDir, "barplotNul.R", sep=""))
+  source(paste(RfuncDir, "removeNul.R", sep=""))
+  source(paste(RfuncDir, "densityPlot.R", sep=""))
+  source(paste(RfuncDir, "boxplotCounts.R", sep=""))
+  source(paste(RfuncDir, "majSequence.R", sep=""))
+  source(paste(RfuncDir, "clusterPlot.R", sep=""))
+  source(paste(RfuncDir, "pairwiseSERE.R", sep=""))
+  source(paste(RfuncDir, "pairwiseScatterPlots.R", sep=""))
+#  source(paste(RfuncDir, "pairwiseScatterPlotsAll.R", sep=""))
+  source(paste(RfuncDir, "plotDispEstimates.R", sep=""))
+#  source(paste(RfuncDir, "deseqByCond.R", sep=""))
+#  source(paste(RfuncDir, "edgeRByCond.R", sep=""))
+#  source(paste(RfuncDir, "fisher.R", sep=""))
+  source(paste(RfuncDir, "histoRawp.R", sep=""))
+#  source(paste(RfuncDir, "histoRawpMconds.R", sep=""))
+  source(paste(RfuncDir, "MAplotDE.R", sep=""))
+#  source(paste(RfuncDir, "MAplotDEMconds.R", sep=""))
+  source(paste(RfuncDir, "exportComplete.R", sep=""))
+#  source(paste(RfuncDir, "exportCompleteEdgeR.R", sep=""))
+#  source(paste(RfuncDir, "exportCompleteFisher.R", sep=""))
+#  source(paste(RfuncDir, "exportCompleteMconds.R", sep=""))
+#  source(paste(RfuncDir, "exportCompleteByCond.R", sep=""))
+#  source(paste(RfuncDir, "exportCompletePaired.R", sep=""))
+  source(paste(RfuncDir, "exportDiff.R", sep=""))
+#  source(paste(RfuncDir, "synthese.R", sep=""))
+#  source(paste(RfuncDir, "exportDiffByCond.R", sep=""))
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,191 @@
+# Analyse differentielle de donnees d expression par gene
+# avec DESeq
+# 2 conditions
+
+args <- commandArgs()
+#print(args[1])
+#print(args[2])
+#print(args[3])
+#print(args[4])
+#print(args[5])
+#print(args[6])
+#output file names
+#print(args[7]) # HTML file name
+#print(args[8]) # HTML file all images directory
+#print(args[9]) # complete xls file name
+#print(args[10])# UP xls file name
+#print(args[11]) #Down xls file name
+#print(args[12]) #the executable scipt (for getting the path)
+
+library(R2HTML)
+library(R.utils)
+
+#run example:
+projectName <- "DESeqAnalysis"
+analysisVersion <- "V1"    # fitType=local, sharingMode=fit-only, method=blind
+rawDir <- "raw"
+targetFile <- args[4]
+header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0
+withOutReplicates <- as.integer(args[6])
+
+#get the directory to write the results
+tab <- splitByPattern(args[7], pattern="/")
+res_dir <- ""
+for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")}
+#get the html output file name
+OUT_HTMLname <- args[7]
+#get the images directory to write to
+OUT_imgDir <- args[8]
+#if the directory dosen't existe, we should create it first
+
+alpha <- 0.05
+adjMethod <- "BH"
+outfile <- T
+runningScriptTab <-  splitByPattern(args[12], pattern="/")
+RfuncDir <- ""
+for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script
+RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path
+# Dossier contenant les fonctions
+print(RfuncDir)
+source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") )
+
+# Chargement des packages et des fonctions
+library(DESeq)
+RNAseqFunctions(RfuncDir)
+# Chargement du target file
+target <- loadTargetFile( targetFile, header )
+# Chargement des donnees, construction d'une table de comptages par gene
+#have changed
+rawCounts <- loadCountData( target, header )
+conds <- unique(target$group)
+cond1 <- as.character(conds[1])
+cond2 <- as.character(conds[!conds == conds[1]])
+rawCounts <- HTseqClean( rawCounts )
+
+# Transformation en matrice de comptages
+counts <- raw2counts( rawCounts )[[1]]
+
+# Nombre de reads par echantillon
+OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/")
+barplotTC( counts, target$group, OUT_barplotTCName, out=outfile )
+
+# Proportion comptages nuls
+OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/")
+barplotNul( counts, target$group, OUT_barplotNulName, out=outfile )
+
+# Suppression comptages nuls
+counts <- removeNul( counts )[[1]]
+
+# Density plot
+OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/")
+densityPlot( counts, target$group, OUT_densityPlotName, out=outfile )
+
+# Boxplot
+OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/")
+boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile )
+# Sequence majoritaire
+OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/")
+majSequence( counts, target$group, OUT_majSequenceName, out=outfile )
+
+# ScatterPlot between two samples
+OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/")
+pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE)
+
+# SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not.
+coef <- pairwiseSERE(counts)
+print(coef)
+coef
+# Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values.
+cds <- newCountDataSet( counts, target$group )
+
+# Diagnostic for clustering of non-normalized samples
+OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/")
+clusterPlot(cds, OUT_clusterPlot_before, out=outfile)
+
+
+# Normalisation (calcul des lib size factors )
+cds <- estimateSizeFactors( cds )
+
+# Estimation de la dispersion
+# parametres:
+ # method: how samples are pooled to estimate dispersion. If no replicates use "blind"
+ # sharingMode: how variance estimate is computed with respect to the fitted line.
+ # "Maximum" is the most conservative (max between fit and estimation), "fit-only" keeps the estimated value
+ # fitType: refers to the model. "Local" is the published model, "parametric" is glm-based (may not converge), now we use "parametric" as default value.
+#in this case, without replicates
+if(withOutReplicates!=0){
+ cds <- estimateDispersions( cds, sharingMode="fit-only", method="blind")
+} else if(withOutReplicates==0){
+ #cds <- estimateDispersions( cds, sharingMode="fit-only", fitType="local")}
+ cds <- estimateDispersions( cds)}
+# Analyse differentielle, ajustement BH par defaut
+res <- nbinomTest( cds, cond1, cond2)
+
+# Diagnostic for clustering of normalized samples
+OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/")
+clusterPlot(cds, OUT_clusterPlot, out=outfile)
+
+# Control plot of dispersion estimates
+OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/")
+plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile )
+
+# Distribution of raw p-values
+OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/")
+histoRawp( res, OUT_histoRawpName, out=outfile )
+
+# MAplot showing DE genes
+OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/")
+MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile )
+
+# export complete data
+OUT_completeName <- args[9]
+complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile )
+
+# export significant genes
+OUT_upName <- args[10]
+OUT_downName <- args[11]
+diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile )
+
+# write all images results into an HTML file
+prefixHTMLname <- tab[length(tab)]
+#HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML")
+HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white")
+HTML.title("<center>Differential Expression DESeq analysis.", HR=1)
+HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2)
+     HTMLInsertGraph("barplotTC.png")
+
+HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2)
+ HTMLInsertGraph("barplotNul.png")
+
+HTML.title("<center>DensityPlot: density of each sample.", HR=2)
+ HTMLInsertGraph("densityPlot.png")
+
+HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2)
+ HTMLInsertGraph("boxplotCounts.png")
+
+HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2)
+ HTMLInsertGraph("majSequence.png")
+
+HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2)
+ HTMLInsertGraph("scatterPlot.png")
+
+HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2)
+ HTMLInsertGraph("clusteringOfSamplesBefore.png")
+
+HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2)
+ HTMLInsertGraph("clusteringOfSamples.png")
+
+HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2)
+ HTMLInsertGraph("disperssionEstimates.png")
+
+HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2)
+ HTMLInsertGraph("histoRawPvalue.png")
+
+HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2)
+ HTMLInsertGraph("MAplotDE.png")
+HTMLEndFile()
+absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="")
+outName <- paste(absoluPrefixHTMLname, ".html", sep="")
+# change name is to be adapted into Galaxy
+file.rename(outName, OUT_HTMLname)
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/barplotNul.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/barplotNul.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,19 @@
+# barplotNul
+# barplot representing null counts per sample
+
+# input : counts, target, projectName
+# output : barplotNul (png)
+
+# created Feb 7th, 2012
+# modified April 30th, 2012 (target$group instead of target)
+
+barplotNul <- function( counts, group,  OUT_barplotNulName, out = TRUE ){
+
+  if (out) png( file=OUT_barplotNulName )
+
+  N <- apply(counts, 2, function(x){sum(x == 0)})/nrow(counts)
+  barplot(N, col=as.integer(group)+1, main = "Proportion of null counts per Sample", ylim = c(0,1))
+  legend("topright", as.character(unique(group)), lty=1, col=as.integer(unique(group))+1)
+
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/barplotTC.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/barplotTC.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,20 @@
+# barplotTC
+# barplot representing total count per sample
+
+# input : counts, target, projectName
+# output : barplotTC (png)
+
+# created Feb 7th, 2012
+# modified April 30th, 2012 (group instead of target$group)
+
+barplotTC <- function( counts, group, OUT_barplotTCName, out = TRUE ){
+
+  if (out) png( file=OUT_barplotTCName )
+
+  ylim <- c(0, max(colSums(counts))*1.2)
+  barplot( colSums(counts), col=as.integer(group)+1, main = "Total Read Count per Sample",  ylim=ylim )
+  legend( "topright", as.character(unique(group)), lty=1,
+         col=as.integer(unique(group))+1 )
+
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/boxplotCounts.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/boxplotCounts.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,18 @@
+# boxplotCounts
+# boxplots representing counts distribution per sample
+
+# input : counts, target, projectName, type of data (raw or norm)
+# output : boxplot (png)
+
+# created Feb 7th, 2012
+# modified April 30th, 2012
+
+boxplotCounts <- function( counts, group, type = c("raw", "norm"), OUT_boxplotCountsName, out = TRUE ){
+
+  if (out) png( file=OUT_boxplotCountsName )
+
+  boxplot( log2(counts+1), col=as.integer(group)+1, main = paste(type[1], " counts distribution", sep="" ) )
+  legend( "topright", as.character(unique(group)), lty=1, col=as.integer(unique(group))+1 )
+
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/clusterPlot.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/clusterPlot.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,27 @@
+# clusterPlot
+# dendrogram of sample clustering
+
+# input : counts, outputName, type of data (raw or norm)
+# output : dendrogram (jpeg)
+
+# created Sept 13th, 2012
+# modified Oct 30th, 2012
+# Marie-Agnes Dillies
+
+
+clusterPlot <- function( cds, OUT_clusterPlot, type = "raw", out = TRUE ){
+
+  if (out) png( file=OUT_clusterPlot )
+
+  if (type == "norm"){
+    cdsblind <- estimateDispersions( cds, method="blind" )
+    vsd <- getVarianceStabilizedData( cdsblind )
+  }
+  else {
+    vsd <- counts(cds)
+  }
+  hc <- hclust( dist(t(vsd)), method="ward" )
+  plot( hc, xlab = "Euclidean distance, Ward criterion", main=paste("Cluster Dendrogram, ", type, " data", sep="") )
+
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/densityPlot.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/densityPlot.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,23 @@
+# densityPlot
+# density plot of all samples
+
+# input : counts, target, projectName
+# output : densplot (png)
+
+# created Feb 7th, 2012
+# modified April 30th, 2012
+
+
+densityPlot <- function( counts, group, OUT_densityPlotName, out = TRUE ){
+
+  if (out) png( file=OUT_densityPlotName )
+
+  couleurs <- as.integer( group ) + 1
+  ylim <- c(0, max(density(log2(counts)+1)$y)*1.5)
+  plot( density(log2(counts[,1])+1), main="Density of counts distribution", col=couleurs[1], ylim = ylim )
+  for (i in 2:ncol(counts))
+   lines( density(log2(counts[,i])+1), col=couleurs[i] )
+  legend( "topright", as.character(unique(group)), lty=1, col=as.integer(unique(group))+1 )
+
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/exportComplete.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/exportComplete.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,20 @@
+# exportComplete
+# export complete data and results
+
+# input : counts res, target
+# output : complete data and xls file (in text format)
+
+# created Feb 14th, 2012
+# modified March 9th, 2012 (order of cond1 and cond2)
+
+
+exportComplete <- function( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out = T ){
+
+ complete <- data.frame( res$id, counts, res[,3:ncol(res)] )
+ colnames(complete) <- c( "id", as.character(target$label), cond2, cond1, "FC", "log2FC", "rawp",
+ paste("adjp",adjMethod,sep="") )
+
+  if (out)
+   write.table( complete, file=OUT_completeName, sep="\t", row.names=F )
+  return( complete )
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/exportDiff.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/exportDiff.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,42 @@
+# exportDiff
+# export differentially expressed genes
+
+# input : complete, alpha, adjMethod, projectName
+# output : diff genes, up and down in xls files
+
+# created Feb 14th, 2012
+
+
+exportDiff <- function( complete, alpha, adjMethod, OUT_upName, OUT_downName, out = T ){
+
+ diff <- complete[which(complete[,grep("adjp",colnames(complete))] < alpha),]
+
+ gup <- up( diff )
+ gdown <- down( diff )
+
+  if (out){
+    gup[,(ncol(gup)-4):ncol(gup)] <- format( gup[,(ncol(gup)-4):ncol(gup)], digits=3, dec=",")
+    gdown[,(ncol(gdown)-4):ncol(gdown)] <- format( gdown[,(ncol(gdown)-4):ncol(gdown)], digits=3, dec=",")
+ write.table(gup, file=OUT_upName, row.names=F, sep="\t")
+ write.table(gdown, file=OUT_downName, row.names=F, sep="\t")
+  }
+  return( diff )
+}
+
+
+up <- function( diff ){
+
+ up <- diff[diff$log2FC > 0,]
+ up <- up[order(up[,grep("adjp",colnames(up))]),]
+
+ return( up )
+}
+
+
+down <- function( diff ){
+
+ down <- diff[diff$log2FC < 0,]
+ down <- down[order(down[,grep("adjp",colnames(down))]),]
+
+ return( down )
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/histoRawp.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/histoRawp.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,18 @@
+# histoRawp
+# histogram of raw p-values
+
+# input : res, OUT_histoRawpName
+# output : histogram (png)
+
+
+histoRawp <- function( res, OUT_histoRawpName, out = TRUE ){
+
+  if (out) png( file=OUT_histoRawpName )
+
+  ind <- grep("val", colnames(res))
+  hist( res[,ind], nclass=50, xlab="Raw p-values", main="", col="skyblue" )
+
+  if (out) dev.off()
+}
+
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/loadCountData.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/loadCountData.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,36 @@
+# loadCountData
+# loads counts, one file per lane
+# file names from target file
+
+# input : target
+# output : raw count table
+
+# created Feb 6th, 2012
+# modified May 2nd, 2012 (colnames -> target$label)
+# Marie-Agnes Dillies
+
+
+loadCountData <- function(target, header){
+
+  require(DESeq)
+  fileNames <- target$files
+
+if(header!=0){
+ #rawCounts <- read.table(as.character(paste(rawDir,target$files[1],sep="/")), sep="\t", header=TRUE)
+ rawCounts <- read.table(as.character(target$files[1],sep="/"), sep="\t", header=TRUE)
+} else if(header==0){
+ rawCounts <- read.table(as.character(target$files[1],sep="/"), sep="\t")}
+
+  colnames(rawCounts) <- c("Id", as.character(target$label[1]))
+
+  for (i in 2:length(fileNames)){
+ if(header!=0){
+   tmp <- read.table(as.character(target$files[i],sep="/"), sep="\t", header=TRUE)
+ } else if(header==0){
+ tmp <- read.table(as.character(target$files[i],sep="/"), sep="\t")}
+   colnames(tmp) <- c("Id", as.character(target$label[i]))
+   rawCounts <- merge(rawCounts, tmp, by="Id", all=T)
+  }
+  rawCounts[is.na(rawCounts)] <- 0
+  return(rawCounts)
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/loadTargetFile.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/loadTargetFile.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,17 @@
+# loadTargetFile
+# loads file containing sample info
+
+# input : targetFile Name
+# output : target
+
+# created Feb 6th, 2012
+# Marie-Agnes Dillies
+
+
+loadTargetFile <- function(targetFile, header){
+if(header!=0){
+ return(read.table(targetFile, header=T, sep="\t"))
+ }else if(header==0){
+ return(read.table(targetFile, sep="\t"))
+ }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/majSequence.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/majSequence.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,26 @@
+# majSequence
+# compute proportion of reads associated with most expressed sequence
+
+# input : counts, target, projectName
+# output : barplot, % associated with majority gene
+
+# created Feb 7th, 2012
+# modified Feb 20th, 2012
+# modified April 30th, 2012
+# Marie-Agnes Dillies
+
+
+majSequence <- function( counts, group, OUT_majSequenceName, out = T, position = "topright" ){
+
+  if (out) png( file=OUT_majSequenceName )
+
+  maj <- apply(counts, 2, function(x){x <- x[order(x, decreasing=T)]; x[1]*100/sum(x)})
+  seqname <- apply(counts, 2, function(x){x <- x[order(x, decreasing=T)]; names(x)[1]})
+
+  x <- barplot( maj, col=as.integer(group)+1, main = "Proportion of reads from most expressed gene",
+ ylim = c(0, max(maj)*1.2), cex.main=0.8 )
+  for (i in 1:length(seqname)) text( x[i], maj[i]/2, seqname[i], cex=0.8, srt=90, adj=0)
+  legend( position, as.character(unique(group)), lty=1, col=as.integer(unique(group))+1 )
+
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/pairwiseSERE.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/pairwiseSERE.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,41 @@
+# pairwiseSERE
+# compute pairwise SERE statistics
+
+# input : counts
+# output : matrix of SERE values
+
+# created october 19th, 2012
+# Marie-Agnes Dillies
+
+
+pairwiseSERE <- function( counts ){
+
+  sere <- matrix( NA, ncol=ncol(counts), nrow=ncol(counts) )
+  for (i in 1:ncol(counts)){
+    for (j in 1:ncol(counts)){
+      sere[i,j] <- sigfun_Pearson( counts[,c(i,j)] )
+    }
+  }
+  colnames(sere) <- rownames(sere) <- colnames(counts)
+  return( formatC(sere, format="f", digits=2) )
+}
+
+sigfun_Pearson <- function(observed) {
+  #calculate lambda and expected values
+  laneTotals<- colSums(observed);
+  total <- sum(laneTotals)
+  fullObserved <- observed[rowSums(observed)>0,];
+  fullLambda <- rowSums(fullObserved)/total;
+  fullLhat <- fullLambda > 0;
+  fullExpected<- outer(fullLambda, laneTotals);
+
+  #keep values
+  fullKeep <- which(fullExpected > 0);
+
+  #calculate degrees of freedom (nrow*(ncol -1) >> number of parameters - calculated (just lamda is calculated >> thats why minus 1)
+  #calculate pearson and deviance for all values
+  oeFull <- (fullObserved[fullKeep] - fullExpected[fullKeep])^2/ fullExpected[fullKeep] # pearson chisq test
+  dfFull <- length(fullKeep) - sum(fullLhat!=0);
+
+  return(c(sqrt(sum(oeFull)/dfFull)));
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/pairwiseScatterPlots.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/pairwiseScatterPlots.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,31 @@
+# pairwiseScatterPlots
+# scatter plots for pairwise comparaisons of log counts
+
+# input : counts, target, outputName
+# output : scatter plots (pdf: allows multiple figures in one file)
+
+# created Feb 21th, 2012
+# modified Sept 27th, 2012 (pdf output file)
+# modified Oct 30th, 2012 (png)
+# Marie-Agnes Dillies
+
+
+pairwiseScatterPlots <- function( counts, target, OUT_scatterPlot, out = TRUE, pdffile = FALSE ){
+
+  if (out & !pdffile) png( OUT_scatterPlot )
+  if (pdffile) pdf( OUT_scatterPlot )
+
+  conds <- unique(target$group)
+  # colnames(counts) <- target$label
+
+  for (i in 1:(length(conds)-1)){
+   for (j in (i+1):length(conds)){
+   cond1 <- conds[i]; cond2 <- conds[j]
+ pairs( log2(counts[, which(target$group %in% c(as.character(cond1), as.character(cond2)))]+1),
+ pch=".", cex=0.5, main = paste(cond1, cond2, sep=" vs ") )
+   }
+  }
+
+  if (pdffile) dev.off()
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/plotDispEstimates.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/plotDispEstimates.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,21 @@
+# plotDispEstimates
+# scatter plots representing dispersion estimates vs mean expression
+
+# input : cds, OUT_plotDispEstimatesName
+# output : scatterplot (png)
+
+plotDispEstimates <- function( cds, OUT_plotDispEstimatesName, out = TRUE ){
+
+  if (out) png( file=OUT_plotDispEstimatesName )
+
+  plot(
+   rowMeans( counts(cds, normalized=T) ),
+   fitInfo(cds)$perGeneDispEsts,
+   pch=".", log="xy",
+   xlab = "Mean expression strength", ylab = "Dispersion estimate" )
+
+  xg <- 10^seq(-.5, 5, length.out=300)
+  lines( xg, fitInfo(cds)$dispFun(xg), col="red" )
+
+  if (out) dev.off()
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/raw/f1cond1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/raw/f1cond1.tsv Tue Apr 30 14:33:21 2013 -0400

b"@@ -0,0 +1,18761 @@\n+GliNS1\tG144\n+13CDNA73\t4\n+15E1.2\t75\n+182-FIP\t118\n+2'-PDE\t39\n+3'HEXO\t18\n+3.8-1\t0\n+384D8-2\t3\n+76P\t61\n+7h3\t4\n+8D6A\t1\n+A1BG\t1\n+A2BP1\t19\n+A2M\t2724\n+A4GALT\t0\n+A4GNT\t0\n+AAA1\t2\n+AAAS\t57\n+AACS\t1904\n+AADACL1\t3\n+AADAT\t18\n+AAK1\t2\n+AAMP\t215\n+AANAT\t0\n+AARS\t157\n+AARSD1\t27\n+AARSL\t21\n+AASDH\t15\n+AASDHPPT\t162\n+AASS\t159\n+AATF\t68\n+AATK\t3\n+ABAT\t493\n+ABC1\t7\n+ABCA1\t23\n+ABCA10\t1\n+ABCA11\t10\n+ABCA12\t3\n+ABCA13\t0\n+ABCA2\t38\n+ABCA3\t95\n+ABCA4\t0\n+ABCA5\t1\n+ABCA6\t0\n+ABCA7\t23\n+ABCA8\t98\n+ABCA9\t155\n+ABCB1\t0\n+ABCB10\t64\n+ABCB11\t0\n+ABCB4\t51\n+ABCB5\t2\n+ABCB6\t26\n+ABCB7\t34\n+ABCB8\t84\n+ABCB9\t12\n+ABCC1\t24\n+ABCC10\t17\n+ABCC11\t4\n+ABCC12\t0\n+ABCC13\t4\n+ABCC2\t1\n+ABCC3\t4\n+ABCC4\t17\n+ABCC5\t41\n+ABCC6\t27\n+ABCC8\t29\n+ABCC9\t4\n+ABCD1\t9\n+ABCD2\t2\n+ABCD3\t147\n+ABCD4\t44\n+ABCE1\t490\n+ABCF1\t68\n+ABCF2\t42\n+ABCF3\t35\n+ABCG1\t1\n+ABCG2\t48\n+ABCG4\t0\n+ABHD1\t0\n+ABHD10\t6\n+ABHD11\t1804\n+ABHD14A\t44\n+ABHD14B\t18\n+ABHD2\t85\n+ABHD3\t133\n+ABHD4\t114\n+ABHD5\t34\n+ABHD6\t62\n+ABHD7\t0\n+ABHD8\t49\n+ABI1\t275\n+ABI2\t195\n+ABI3\t0\n+ABI3BP\t3\n+ABL1\t112\n+ABL2\t25\n+ABLIM1\t70\n+ABLIM2\t0\n+ABLIM3\t1\n+ABO\t0\n+ABR\t199\n+ABRA\t0\n+ABT1\t51\n+ABTB1\t11\n+ABTB2\t19\n+ACAA1\t34\n+ACAA2\t144\n+ACACA\t47\n+ACACB\t2\n+ACAD10\t36\n+ACAD11\t109\n+ACAD8\t22\n+ACAD9\t116\n+ACADL\t1\n+ACADM\t69\n+ACADS\t37\n+ACADSB\t36\n+ACADVL\t200\n+ACAS2\t9\n+ACAS2L\t3\n+ACAT1\t111\n+ACAT2\t165\n+ACATE2\t12\n+ACBD3\t213\n+ACBD4\t4\n+ACBD5\t14\n+ACBD6\t130\n+ACBD7\t6\n+ACCN2\t20\n+ACCN3\t11\n+ACCN4\t1\n+ACD\t9\n+ACDC\t2\n+ACE\t0\n+ACF\t25\n+ACHE\t199\n+ACIN1\t44\n+ACLY\t313\n+ACMSD\t6\n+ACN9\t133\n+ACO1\t70\n+ACO2\t348\n+ACOT2\t19\n+ACOT4\t1\n+ACOT7\t192\n+ACOT8\t2\n+ACOT9\t15\n+ACOX1\t97\n+ACOX2\t2\n+ACOX3\t4\n+ACOXL\t0\n+ACP1\t239\n+ACP2\t42\n+ACP5\t0\n+ACP6\t241\n+ACPL2\t55\n+ACPP\t7\n+ACR\t0\n+ACRBP\t0\n+ACRC\t3\n+ACRV1\t0\n+ACSBG1\t7\n+ACSL1\t5\n+ACSL3\t440\n+ACSL4\t63\n+ACSL5\t2\n+ACSL6\t19\n+ACSM2\t0\n+ACSM3\t1\n+ACSS1\t91\n+ACSS2\t55\n+ACTA1\t74\n+ACTA2\t0\n+ACTB\t26071\n+ACTC\t1\n+ACTG1\t2667\n+ACTG2\t26\n+ACTL6A\t58\n+ACTL6B\t0\n+ACTL8\t0\n+ACTN1\t463\n+ACTN2\t12\n+ACTN4\t3028\n+ACTR10\t77\n+ACTR1A\t234\n+ACTR1B\t18\n+ACTR2\t3044\n+ACTR3\t319\n+ACTR3B\t2\n+ACTR5\t13\n+ACTR6\t162\n+ACTR8\t64\n+ACTRT1\t0\n+ACVR1\t7\n+ACVR1B\t17\n+ACVR1C\t0\n+ACVR2\t14\n+ACVR2A\t3\n+ACVR2B\t2\n+ACVRL1\t0\n+ACY1\t10\n+ACY1L2\t46\n+ACY3\t0\n+ACYP1\t59\n+ACYP2\t58\n+AD-003\t1\n+AD-020\t9\n+AD023\t0\n+AD031\t51\n+AD7C-NTP\t3\n+ADA\t1889\n+ADAL\t0\n+ADAM10\t251\n+ADAM11\t9\n+ADAM12\t932\n+ADAM15\t74\n+ADAM17\t101\n+ADAM18\t0\n+ADAM19\t118\n+ADAM20\t0\n+ADAM21\t6\n+ADAM22\t264\n+ADAM23\t66\n+ADAM28\t0\n+ADAM32\t0\n+ADAM33\t35\n+ADAM8\t0\n+ADAM9\t581\n+ADAMDEC1\t0\n+ADAMTS1\t61\n+ADAMTS10\t58\n+ADAMTS12\t22\n+ADAMTS13\t3\n+ADAMTS15\t19\n+ADAMTS16\t16\n+ADAMTS17\t1\n+ADAMTS18\t0\n+ADAMTS19\t0\n+ADAMTS2\t2\n+ADAMTS20\t0\n+ADAMTS3\t147\n+ADAMTS4\t23\n+ADAMTS5\t43\n+ADAMTS6\t31\n+ADAMTS7\t2\n+ADAMTS8\t0\n+ADAMTS9\t321\n+ADAMTSL1\t6\n+ADAMTSL2\t0\n+ADAMTSL3\t0\n+ADAMTSL4\t1\n+ADAR\t53\n+ADARB1\t7\n+ADARB2\t0\n+ADAT1\t38\n+ADC\t0\n+ADCK1\t28\n+ADCK2\t11\n+ADCK4\t106\n+ADCK5\t4\n+ADCY1\t186\n+ADCY2\t0\n+ADCY3\t14\n+ADCY5\t9\n+ADCY6\t182\n+ADCY7\t19\n+ADCY8\t0\n+ADCY9\t21\n+ADCYAP1\t0\n+ADCYAP1R1\t3\n+ADD1\t322\n+ADD2\t48\n+ADD3\t448\n+ADFP\t31\n+ADH1B\t0\n+ADH1C\t0\n+ADH4\t3\n+ADH5\t490\n+ADHFE1\t5\n+ADI1\t181\n+ADIPOR1\t102\n+ADIPOR2\t26\n+ADK\t135\n+ADM\t78\n+ADM2\t0\n+ADMP\t0\n+ADMR\t2386\n+ADNP\t253\n+ADORA1\t3\n+ADORA2A\t1\n+ADORA2B\t7\n+ADPGK\t2019\n+ADPN\t7\n+ADPRH\t2\n+ADPRHL1\t0\n+ADPRHL2\t31\n+ADRA1A\t0\n+ADRA1B\t0\n+ADRA1D\t1\n+ADRA2A\t39\n+ADRA2B\t0\n+ADRB1\t11\n+ADRB2\t0\n+ADRB3\t17\n+ADRBK1\t51\n+ADRBK2\t4\n+ADRM1\t189\n+ADSL\t96\n+ADSS\t165\n+ADSSL1\t0\n+AE2\t6\n+AEBP1\t1856\n+AEBP2\t144\n+AEGP\t3\n+AER61\t3\n+AES\t1381\n+AF15Q14\t1\n+AF1Q\t24\n+AF5Q31\t244\n+AFAP\t139\n+AFAR3\t6\n+AFF1\t19\n+AFF2\t0\n+AFF3\t7\n+AFF4\t1\n+AFG3L1\t5\n+AFG3L2\t107\n+AFMID\t92\n+AFP\t0\n+AFTIPHILIN\t81\n+AG1\t6\n+AGA\t19\n+AGBL2\t1\n+AGBL3\t2\n+AGC1\t8\n+AGER\t4\n+AGGF1\t52\n+AGL\t71\n+AGMAT\t5\n+AGPAT1\t83\n+AGPAT2\t2\n+AGPAT3\t20\n+AGPAT4\t51\n+AGPAT5\t260\n+AGPAT6\t51\n+AGPAT7\t25\n+AGPS\t96\n+AGR2\t21\n+AGRN\t345\n+AGRP\t0\n+AGT\t948\n+AGTPBP1\t28\n+AGTR1\t0\n+AGTR2\t0\n+AGTRAP\t204\n+AGXT2L1\t0\n+AHCTF1\t69\n+AHCY\t594\n+AHCYL1\t709\n+AHDC1\t8\n+AHI1\t55\n+AHNAK\t0\n+AHR\t458\n+AHSA1\t136\n+AHSA2\t38\n+AHSG\t0\n+AICDA\t1\n+AIFL\t0\n+AIG1\t63\n+AIM1\t2\n+AIM1L\t0\n+AIP\t87\n+AIP1\t92\n+AIPL1\t8\n+AK1\t37\n+AK2\t156\n+AK3\t197\n+AK3L1\t14\n+AK5\t9\n+AK7\t1\n+AKAP1\t42\n+AKAP10\t86\n+AKAP11\t78\n+AKAP12\t72\n+AKAP13\t26\n+AKAP14\t9\n+AKAP3\t3\n+AKAP6\t29\n+AKAP7\t57\n+AKAP8\t51\n+AKAP8L\t155\n+AKAP9\t56\n+AKIP\t1\n+AKNA\t8\n+AKR1A1\t126\n+AKR1B1\t305\n+AKR1B10"..b'59\n+ZNF278\t80\n+ZNF28\t31\n+ZNF281\t139\n+ZNF282\t64\n+ZNF283\t18\n+ZNF284\t64\n+ZNF285\t9\n+ZNF286\t488\n+ZNF287\t48\n+ZNF289\t51\n+ZNF291\t49\n+ZNF292\t112\n+ZNF294\t45\n+ZNF295\t52\n+ZNF297\t45\n+ZNF297B\t84\n+ZNF3\t39\n+ZNF30\t97\n+ZNF300\t6\n+ZNF302\t170\n+ZNF304\t45\n+ZNF305\t18\n+ZNF306\t2\n+ZNF307\t24\n+ZNF31\t10\n+ZNF311\t15\n+ZNF312\t0\n+ZNF313\t199\n+ZNF317\t51\n+ZNF318\t50\n+ZNF319\t22\n+ZNF32\t66\n+ZNF322A\t6\n+ZNF323\t6\n+ZNF324\t105\n+ZNF326\t43\n+ZNF329\t32\n+ZNF330\t33\n+ZNF331\t28\n+ZNF333\t41\n+ZNF334\t11\n+ZNF335\t6\n+ZNF336\t9\n+ZNF337\t33\n+ZNF33A\t11\n+ZNF34\t13\n+ZNF341\t1\n+ZNF342\t0\n+ZNF343\t16\n+ZNF345\t9\n+ZNF346\t47\n+ZNF347\t35\n+ZNF35\t12\n+ZNF350\t14\n+ZNF354A\t62\n+ZNF354B\t22\n+ZNF354C\t1336\n+ZNF358\t70\n+ZNF364\t40\n+ZNF365\t7\n+ZNF366\t0\n+ZNF367\t66\n+ZNF37A\t74\n+ZNF37B\t102\n+ZNF38\t53\n+ZNF382\t6\n+ZNF383\t7\n+ZNF384\t90\n+ZNF385\t1\n+ZNF390\t5\n+ZNF394\t643\n+ZNF395\t80\n+ZNF396\t2\n+ZNF397\t43\n+ZNF398\t29\n+ZNF403\t106\n+ZNF404\t17\n+ZNF406\t1\n+ZNF407\t9\n+ZNF408\t18\n+ZNF41\t57\n+ZNF410\t66\n+ZNF414\t3\n+ZNF415\t132\n+ZNF416\t49\n+ZNF417\t0\n+ZNF418\t10\n+ZNF419\t30\n+ZNF42\t84\n+ZNF420\t1\n+ZNF423\t38\n+ZNF425\t9\n+ZNF426\t12\n+ZNF429\t42\n+ZNF43\t55\n+ZNF430\t59\n+ZNF431\t11\n+ZNF432\t26\n+ZNF433\t22\n+ZNF434\t48\n+ZNF435\t1\n+ZNF436\t412\n+ZNF438\t67\n+ZNF439\t35\n+ZNF44\t71\n+ZNF440\t23\n+ZNF440L\t7\n+ZNF441\t15\n+ZNF442\t3\n+ZNF443\t52\n+ZNF444\t299\n+ZNF445\t15\n+ZNF446\t34\n+ZNF447\t56\n+ZNF449\t24\n+ZNF45\t187\n+ZNF451\t43\n+ZNF452\t0\n+ZNF454\t39\n+ZNF46\t7\n+ZNF462\t70\n+ZNF467\t10\n+ZNF468\t198\n+ZNF469\t3\n+ZNF471\t41\n+ZNF473\t18\n+ZNF479\t12\n+ZNF480\t117\n+ZNF482\t37\n+ZNF483\t3\n+ZNF484\t12\n+ZNF485\t4\n+ZNF486\t630\n+ZNF488\t10\n+ZNF490\t9\n+ZNF491\t3\n+ZNF492\t0\n+ZNF493\t27\n+ZNF496\t91\n+ZNF497\t4\n+ZNF498\t38\n+ZNF499\t15\n+ZNF500\t7\n+ZNF501\t11\n+ZNF502\t11\n+ZNF503\t111\n+ZNF505\t3\n+ZNF506\t19\n+ZNF507\t101\n+ZNF509\t6\n+ZNF510\t63\n+ZNF511\t336\n+ZNF512\t67\n+ZNF513\t41\n+ZNF514\t9\n+ZNF516\t104\n+ZNF517\t14\n+ZNF518\t50\n+ZNF519\t27\n+ZNF521\t49\n+ZNF524\t79\n+ZNF525\t12\n+ZNF526\t39\n+ZNF527\t8\n+ZNF528\t51\n+ZNF529\t53\n+ZNF530\t14\n+ZNF532\t459\n+ZNF533\t0\n+ZNF536\t25\n+ZNF537\t299\n+ZNF539\t118\n+ZNF540\t13\n+ZNF542\t40\n+ZNF543\t16\n+ZNF544\t33\n+ZNF545\t0\n+ZNF546\t6\n+ZNF547\t138\n+ZNF548\t44\n+ZNF549\t29\n+ZNF550\t26\n+ZNF551\t65\n+ZNF552\t14\n+ZNF553\t89\n+ZNF554\t8\n+ZNF555\t27\n+ZNF557\t13\n+ZNF558\t60\n+ZNF559\t87\n+ZNF560\t0\n+ZNF561\t131\n+ZNF562\t10\n+ZNF563\t19\n+ZNF564\t68\n+ZNF565\t14\n+ZNF566\t14\n+ZNF567\t17\n+ZNF568\t27\n+ZNF569\t59\n+ZNF570\t6\n+ZNF571\t7\n+ZNF572\t0\n+ZNF573\t4\n+ZNF574\t32\n+ZNF575\t6\n+ZNF576\t46\n+ZNF577\t50\n+ZNF578\t8\n+ZNF579\t47\n+ZNF580\t270\n+ZNF581\t158\n+ZNF582\t7\n+ZNF583\t65\n+ZNF584\t20\n+ZNF585A\t133\n+ZNF585B\t99\n+ZNF586\t12\n+ZNF587\t70\n+ZNF588\t40\n+ZNF589\t24\n+ZNF592\t14\n+ZNF593\t21\n+ZNF594\t6\n+ZNF595\t12\n+ZNF596\t227\n+ZNF597\t0\n+ZNF598\t26\n+ZNF599\t30\n+ZNF6\t43\n+ZNF600\t17\n+ZNF605\t14\n+ZNF606\t37\n+ZNF607\t72\n+ZNF608\t84\n+ZNF609\t22\n+ZNF610\t5\n+ZNF611\t186\n+ZNF613\t17\n+ZNF614\t25\n+ZNF615\t52\n+ZNF616\t35\n+ZNF618\t1\n+ZNF619\t1\n+ZNF620\t19\n+ZNF621\t26\n+ZNF622\t36\n+ZNF623\t45\n+ZNF624\t20\n+ZNF625\t0\n+ZNF626\t37\n+ZNF627\t31\n+ZNF629\t72\n+ZNF630\t0\n+ZNF638\t261\n+ZNF639\t50\n+ZNF641\t14\n+ZNF642\t0\n+ZNF643\t0\n+ZNF644\t65\n+ZNF646\t19\n+ZNF649\t119\n+ZNF650\t33\n+ZNF651\t174\n+ZNF652\t27\n+ZNF653\t6\n+ZNF654\t20\n+ZNF655\t103\n+ZNF658\t20\n+ZNF659\t5\n+ZNF66\t0\n+ZNF660\t8\n+ZNF663\t0\n+ZNF664\t120\n+ZNF665\t15\n+ZNF667\t191\n+ZNF668\t9\n+ZNF669\t51\n+ZNF670\t3\n+ZNF671\t51\n+ZNF672\t70\n+ZNF673\t7\n+ZNF677\t0\n+ZNF678\t7\n+ZNF680\t13\n+ZNF681\t1\n+ZNF682\t11\n+ZNF684\t6\n+ZNF687\t32\n+ZNF688\t9\n+ZNF689\t55\n+ZNF69\t7\n+ZNF690\t11\n+ZNF691\t28\n+ZNF692\t12\n+ZNF694\t11\n+ZNF695\t0\n+ZNF697\t26\n+ZNF7\t49\n+ZNF70\t1\n+ZNF700\t53\n+ZNF701\t6\n+ZNF702\t0\n+ZNF703\t3\n+ZNF704\t0\n+ZNF706\t353\n+ZNF707\t13\n+ZNF708\t35\n+ZNF71\t33\n+ZNF710\t36\n+ZNF713\t2\n+ZNF714\t61\n+ZNF718\t0\n+ZNF720\t66\n+ZNF721\t15\n+ZNF722\t0\n+ZNF74\t41\n+ZNF740\t19\n+ZNF75\t36\n+ZNF75A\t86\n+ZNF76\t125\n+ZNF77\t3\n+ZNF79\t1\n+ZNF8\t1\n+ZNF80\t1\n+ZNF81\t3\n+ZNF83\t572\n+ZNF84\t23\n+ZNF85\t66\n+ZNF9\t112\n+ZNF91\t163\n+ZNF92\t68\n+ZNF93\t72\n+ZNF96\t1\n+ZNFN1A2\t0\n+ZNFN1A3\t0\n+ZNFN1A4\t21\n+ZNFN1A5\t59\n+ZNFX1\t15\n+ZNHIT1\t18\n+ZNHIT2\t9\n+ZNHIT3\t58\n+ZNHIT4\t14\n+ZNRD1\t42\n+ZNRF1\t112\n+ZNRF2\t15\n+ZNRF3\t54\n+ZP3\t21\n+ZPBP\t0\n+ZPLD1\t0\n+ZRANB1\t53\n+ZRANB3\t7\n+ZRF1\t134\n+ZSCAN1\t73\n+ZSCAN2\t8\n+ZSCAN5\t17\n+ZSWIM1\t37\n+ZSWIM3\t3\n+ZSWIM4\t28\n+ZSWIM5\t7\n+ZSWIM6\t381\n+ZW10\t19\n+ZWILCH\t4\n+ZWINT\t157\n+ZXDA\t0\n+ZXDB\t16\n+ZXDC\t42\n+ZYG11B\t1581\n+ZYG11BL\t28\n+ZYX\t1233\n+ZZANK1\t48\n+ZZEF1\t26\n+ZZZ3\t77\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/raw/f1cond2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/raw/f1cond2.tsv Tue Apr 30 14:33:21 2013 -0400

b"@@ -0,0 +1,18761 @@\n+GliNS1\tCB541\n+13CDNA73\t1\n+15E1.2\t458\n+182-FIP\t231\n+2'-PDE\t127\n+3'HEXO\t111\n+3.8-1\t0\n+384D8-2\t4\n+76P\t108\n+7h3\t0\n+8D6A\t0\n+A1BG\t0\n+A2BP1\t7\n+A2M\t49\n+A4GALT\t0\n+A4GNT\t0\n+AAA1\t0\n+AAAS\t49\n+AACS\t5365\n+AADACL1\t683\n+AADAT\t24\n+AAK1\t2\n+AAMP\t383\n+AANAT\t0\n+AARS\t183\n+AARSD1\t122\n+AARSL\t11\n+AASDH\t47\n+AASDHPPT\t543\n+AASS\t145\n+AATF\t36\n+AATK\t0\n+ABAT\t281\n+ABC1\t24\n+ABCA1\t27\n+ABCA10\t6\n+ABCA11\t73\n+ABCA12\t0\n+ABCA13\t30\n+ABCA2\t1\n+ABCA3\t166\n+ABCA4\t0\n+ABCA5\t51\n+ABCA6\t7\n+ABCA7\t6\n+ABCA8\t1\n+ABCA9\t317\n+ABCB1\t2\n+ABCB10\t169\n+ABCB11\t0\n+ABCB4\t0\n+ABCB5\t1\n+ABCB6\t69\n+ABCB7\t51\n+ABCB8\t28\n+ABCB9\t11\n+ABCC1\t127\n+ABCC10\t7\n+ABCC11\t27\n+ABCC12\t0\n+ABCC13\t3\n+ABCC2\t0\n+ABCC3\t301\n+ABCC4\t57\n+ABCC5\t38\n+ABCC6\t44\n+ABCC8\t0\n+ABCC9\t6\n+ABCD1\t13\n+ABCD2\t0\n+ABCD3\t86\n+ABCD4\t598\n+ABCE1\t790\n+ABCF1\t32\n+ABCF2\t36\n+ABCF3\t32\n+ABCG1\t0\n+ABCG2\t19\n+ABCG4\t0\n+ABHD1\t1\n+ABHD10\t16\n+ABHD11\t676\n+ABHD14A\t8\n+ABHD14B\t7\n+ABHD2\t35\n+ABHD3\t343\n+ABHD4\t210\n+ABHD5\t68\n+ABHD6\t394\n+ABHD7\t0\n+ABHD8\t0\n+ABI1\t257\n+ABI2\t247\n+ABI3\t0\n+ABI3BP\t145\n+ABL1\t94\n+ABL2\t45\n+ABLIM1\t13\n+ABLIM2\t8\n+ABLIM3\t15\n+ABO\t1\n+ABR\t1137\n+ABRA\t0\n+ABT1\t25\n+ABTB1\t10\n+ABTB2\t12\n+ACAA1\t104\n+ACAA2\t156\n+ACACA\t173\n+ACACB\t1\n+ACAD10\t1\n+ACAD11\t17\n+ACAD8\t56\n+ACAD9\t256\n+ACADL\t2\n+ACADM\t2\n+ACADS\t50\n+ACADSB\t82\n+ACADVL\t35\n+ACAS2\t44\n+ACAS2L\t8\n+ACAT1\t195\n+ACAT2\t25\n+ACATE2\t41\n+ACBD3\t692\n+ACBD4\t2\n+ACBD5\t5\n+ACBD6\t123\n+ACBD7\t9\n+ACCN2\t40\n+ACCN3\t28\n+ACCN4\t0\n+ACD\t117\n+ACDC\t17\n+ACE\t1\n+ACF\t45\n+ACHE\t37\n+ACIN1\t0\n+ACLY\t234\n+ACMSD\t0\n+ACN9\t399\n+ACO1\t124\n+ACO2\t1552\n+ACOT2\t28\n+ACOT4\t0\n+ACOT7\t208\n+ACOT8\t0\n+ACOT9\t16\n+ACOX1\t123\n+ACOX2\t66\n+ACOX3\t5\n+ACOXL\t1\n+ACP1\t942\n+ACP2\t25\n+ACP5\t0\n+ACP6\t361\n+ACPL2\t136\n+ACPP\t6\n+ACR\t0\n+ACRBP\t2\n+ACRC\t13\n+ACRV1\t0\n+ACSBG1\t2\n+ACSL1\t77\n+ACSL3\t1546\n+ACSL4\t126\n+ACSL5\t52\n+ACSL6\t1\n+ACSM2\t0\n+ACSM3\t0\n+ACSS1\t48\n+ACSS2\t18\n+ACTA1\t10\n+ACTA2\t182\n+ACTB\t11099\n+ACTC\t0\n+ACTG1\t1712\n+ACTG2\t29\n+ACTL6A\t78\n+ACTL6B\t0\n+ACTL8\t1\n+ACTN1\t7604\n+ACTN2\t0\n+ACTN4\t8247\n+ACTR10\t30\n+ACTR1A\t18\n+ACTR1B\t1\n+ACTR2\t3964\n+ACTR3\t830\n+ACTR3B\t1\n+ACTR5\t20\n+ACTR6\t21\n+ACTR8\t289\n+ACTRT1\t1\n+ACVR1\t31\n+ACVR1B\t6\n+ACVR1C\t0\n+ACVR2\t41\n+ACVR2A\t21\n+ACVR2B\t3\n+ACVRL1\t0\n+ACY1\t3\n+ACY1L2\t253\n+ACY3\t3\n+ACYP1\t155\n+ACYP2\t240\n+AD-003\t0\n+AD-020\t24\n+AD023\t30\n+AD031\t48\n+AD7C-NTP\t3\n+ADA\t5311\n+ADAL\t0\n+ADAM10\t403\n+ADAM11\t1\n+ADAM12\t144\n+ADAM15\t76\n+ADAM17\t560\n+ADAM18\t0\n+ADAM19\t142\n+ADAM20\t5\n+ADAM21\t0\n+ADAM22\t116\n+ADAM23\t61\n+ADAM28\t0\n+ADAM32\t0\n+ADAM33\t12\n+ADAM8\t7\n+ADAM9\t2589\n+ADAMDEC1\t0\n+ADAMTS1\t94\n+ADAMTS10\t2\n+ADAMTS12\t4\n+ADAMTS13\t32\n+ADAMTS15\t44\n+ADAMTS16\t10\n+ADAMTS17\t1\n+ADAMTS18\t3\n+ADAMTS19\t11\n+ADAMTS2\t0\n+ADAMTS20\t0\n+ADAMTS3\t0\n+ADAMTS4\t78\n+ADAMTS5\t9\n+ADAMTS6\t86\n+ADAMTS7\t0\n+ADAMTS8\t0\n+ADAMTS9\t223\n+ADAMTSL1\t37\n+ADAMTSL2\t0\n+ADAMTSL3\t0\n+ADAMTSL4\t19\n+ADAR\t96\n+ADARB1\t28\n+ADARB2\t1\n+ADAT1\t6\n+ADC\t12\n+ADCK1\t5\n+ADCK2\t23\n+ADCK4\t21\n+ADCK5\t2\n+ADCY1\t59\n+ADCY2\t0\n+ADCY3\t166\n+ADCY5\t0\n+ADCY6\t519\n+ADCY7\t98\n+ADCY8\t7\n+ADCY9\t43\n+ADCYAP1\t0\n+ADCYAP1R1\t1\n+ADD1\t808\n+ADD2\t339\n+ADD3\t400\n+ADFP\t77\n+ADH1B\t0\n+ADH1C\t0\n+ADH4\t0\n+ADH5\t612\n+ADHFE1\t26\n+ADI1\t204\n+ADIPOR1\t123\n+ADIPOR2\t40\n+ADK\t287\n+ADM\t215\n+ADM2\t0\n+ADMP\t10\n+ADMR\t8582\n+ADNP\t615\n+ADORA1\t132\n+ADORA2A\t30\n+ADORA2B\t42\n+ADPGK\t4998\n+ADPN\t4\n+ADPRH\t0\n+ADPRHL1\t0\n+ADPRHL2\t77\n+ADRA1A\t0\n+ADRA1B\t1\n+ADRA1D\t2\n+ADRA2A\t0\n+ADRA2B\t0\n+ADRB1\t12\n+ADRB2\t4\n+ADRB3\t10\n+ADRBK1\t11\n+ADRBK2\t18\n+ADRM1\t204\n+ADSL\t112\n+ADSS\t386\n+ADSSL1\t9\n+AE2\t30\n+AEBP1\t4799\n+AEBP2\t320\n+AEGP\t9\n+AER61\t11\n+AES\t292\n+AF15Q14\t0\n+AF1Q\t45\n+AF5Q31\t395\n+AFAP\t480\n+AFAR3\t11\n+AFF1\t132\n+AFF2\t0\n+AFF3\t70\n+AFF4\t6\n+AFG3L1\t19\n+AFG3L2\t88\n+AFMID\t232\n+AFP\t4\n+AFTIPHILIN\t309\n+AG1\t1\n+AGA\t159\n+AGBL2\t6\n+AGBL3\t0\n+AGC1\t0\n+AGER\t0\n+AGGF1\t130\n+AGL\t209\n+AGMAT\t1\n+AGPAT1\t12\n+AGPAT2\t0\n+AGPAT3\t54\n+AGPAT4\t45\n+AGPAT5\t1757\n+AGPAT6\t33\n+AGPAT7\t7\n+AGPS\t350\n+AGR2\t12\n+AGRN\t98\n+AGRP\t0\n+AGT\t79\n+AGTPBP1\t9\n+AGTR1\t0\n+AGTR2\t0\n+AGTRAP\t286\n+AGXT2L1\t0\n+AHCTF1\t33\n+AHCY\t463\n+AHCYL1\t745\n+AHDC1\t5\n+AHI1\t40\n+AHNAK\t7\n+AHR\t1326\n+AHSA1\t159\n+AHSA2\t185\n+AHSG\t2\n+AICDA\t0\n+AIFL\t0\n+AIG1\t289\n+AIM1\t58\n+AIM1L\t0\n+AIP\t453\n+AIP1\t142\n+AIPL1\t85\n+AK1\t138\n+AK2\t415\n+AK3\t106\n+AK3L1\t21\n+AK5\t36\n+AK7\t13\n+AKAP1\t18\n+AKAP10\t557\n+AKAP11\t3\n+AKAP12\t196\n+AKAP13\t40\n+AKAP14\t0\n+AKAP3\t9\n+AKAP6\t206\n+AKAP7\t225\n+AKAP8\t80\n+AKAP8L\t19\n+AKAP9\t36"..b'F281\t151\n+ZNF282\t100\n+ZNF283\t19\n+ZNF284\t62\n+ZNF285\t0\n+ZNF286\t562\n+ZNF287\t64\n+ZNF289\t16\n+ZNF291\t57\n+ZNF292\t245\n+ZNF294\t69\n+ZNF295\t105\n+ZNF297\t116\n+ZNF297B\t337\n+ZNF3\t27\n+ZNF30\t29\n+ZNF300\t0\n+ZNF302\t262\n+ZNF304\t29\n+ZNF305\t60\n+ZNF306\t0\n+ZNF307\t20\n+ZNF31\t13\n+ZNF311\t0\n+ZNF312\t0\n+ZNF313\t462\n+ZNF317\t176\n+ZNF318\t45\n+ZNF319\t95\n+ZNF32\t53\n+ZNF322A\t4\n+ZNF323\t20\n+ZNF324\t162\n+ZNF326\t143\n+ZNF329\t39\n+ZNF330\t105\n+ZNF331\t72\n+ZNF333\t49\n+ZNF334\t121\n+ZNF335\t9\n+ZNF336\t17\n+ZNF337\t63\n+ZNF33A\t1\n+ZNF34\t0\n+ZNF341\t0\n+ZNF342\t0\n+ZNF343\t44\n+ZNF345\t0\n+ZNF346\t39\n+ZNF347\t49\n+ZNF35\t8\n+ZNF350\t12\n+ZNF354A\t165\n+ZNF354B\t43\n+ZNF354C\t1139\n+ZNF358\t10\n+ZNF364\t45\n+ZNF365\t47\n+ZNF366\t0\n+ZNF367\t441\n+ZNF37A\t79\n+ZNF37B\t57\n+ZNF38\t78\n+ZNF382\t11\n+ZNF383\t0\n+ZNF384\t43\n+ZNF385\t1\n+ZNF390\t4\n+ZNF394\t598\n+ZNF395\t56\n+ZNF396\t11\n+ZNF397\t32\n+ZNF398\t19\n+ZNF403\t89\n+ZNF404\t0\n+ZNF406\t0\n+ZNF407\t10\n+ZNF408\t8\n+ZNF41\t192\n+ZNF410\t117\n+ZNF414\t4\n+ZNF415\t35\n+ZNF416\t2\n+ZNF417\t2\n+ZNF418\t20\n+ZNF419\t7\n+ZNF42\t27\n+ZNF420\t0\n+ZNF423\t82\n+ZNF425\t23\n+ZNF426\t27\n+ZNF429\t13\n+ZNF43\t59\n+ZNF430\t66\n+ZNF431\t4\n+ZNF432\t15\n+ZNF433\t19\n+ZNF434\t34\n+ZNF435\t0\n+ZNF436\t193\n+ZNF438\t95\n+ZNF439\t15\n+ZNF44\t96\n+ZNF440\t21\n+ZNF440L\t0\n+ZNF441\t45\n+ZNF442\t0\n+ZNF443\t63\n+ZNF444\t230\n+ZNF445\t9\n+ZNF446\t25\n+ZNF447\t775\n+ZNF449\t18\n+ZNF45\t99\n+ZNF451\t197\n+ZNF452\t0\n+ZNF454\t97\n+ZNF46\t24\n+ZNF462\t61\n+ZNF467\t0\n+ZNF468\t513\n+ZNF469\t0\n+ZNF471\t40\n+ZNF473\t8\n+ZNF479\t9\n+ZNF480\t108\n+ZNF482\t157\n+ZNF483\t0\n+ZNF484\t11\n+ZNF485\t13\n+ZNF486\t338\n+ZNF488\t0\n+ZNF490\t0\n+ZNF491\t0\n+ZNF492\t1\n+ZNF493\t21\n+ZNF496\t132\n+ZNF497\t0\n+ZNF498\t119\n+ZNF499\t2\n+ZNF500\t2\n+ZNF501\t21\n+ZNF502\t22\n+ZNF503\t0\n+ZNF505\t0\n+ZNF506\t23\n+ZNF507\t94\n+ZNF509\t76\n+ZNF510\t223\n+ZNF511\t275\n+ZNF512\t18\n+ZNF513\t110\n+ZNF514\t11\n+ZNF516\t38\n+ZNF517\t57\n+ZNF518\t98\n+ZNF519\t27\n+ZNF521\t8\n+ZNF524\t8\n+ZNF525\t37\n+ZNF526\t33\n+ZNF527\t0\n+ZNF528\t45\n+ZNF529\t4\n+ZNF530\t5\n+ZNF532\t1033\n+ZNF533\t2\n+ZNF536\t37\n+ZNF537\t238\n+ZNF539\t30\n+ZNF540\t15\n+ZNF542\t45\n+ZNF543\t5\n+ZNF544\t50\n+ZNF545\t148\n+ZNF546\t4\n+ZNF547\t225\n+ZNF548\t23\n+ZNF549\t4\n+ZNF550\t0\n+ZNF551\t50\n+ZNF552\t41\n+ZNF553\t186\n+ZNF554\t7\n+ZNF555\t49\n+ZNF557\t15\n+ZNF558\t5\n+ZNF559\t116\n+ZNF560\t13\n+ZNF561\t135\n+ZNF562\t0\n+ZNF563\t12\n+ZNF564\t79\n+ZNF565\t8\n+ZNF566\t0\n+ZNF567\t19\n+ZNF568\t39\n+ZNF569\t55\n+ZNF570\t6\n+ZNF571\t36\n+ZNF572\t1\n+ZNF573\t0\n+ZNF574\t2\n+ZNF575\t14\n+ZNF576\t36\n+ZNF577\t20\n+ZNF578\t18\n+ZNF579\t1\n+ZNF580\t131\n+ZNF581\t13\n+ZNF582\t0\n+ZNF583\t43\n+ZNF584\t42\n+ZNF585A\t147\n+ZNF585B\t23\n+ZNF586\t13\n+ZNF587\t15\n+ZNF588\t92\n+ZNF589\t9\n+ZNF592\t88\n+ZNF593\t165\n+ZNF594\t17\n+ZNF595\t9\n+ZNF596\t423\n+ZNF597\t4\n+ZNF598\t12\n+ZNF599\t17\n+ZNF6\t44\n+ZNF600\t14\n+ZNF605\t41\n+ZNF606\t26\n+ZNF607\t23\n+ZNF608\t130\n+ZNF609\t13\n+ZNF610\t16\n+ZNF611\t84\n+ZNF613\t13\n+ZNF614\t31\n+ZNF615\t20\n+ZNF616\t12\n+ZNF618\t0\n+ZNF619\t0\n+ZNF620\t33\n+ZNF621\t55\n+ZNF622\t100\n+ZNF623\t245\n+ZNF624\t19\n+ZNF625\t9\n+ZNF626\t66\n+ZNF627\t35\n+ZNF629\t117\n+ZNF630\t8\n+ZNF638\t1139\n+ZNF639\t454\n+ZNF641\t18\n+ZNF642\t41\n+ZNF643\t0\n+ZNF644\t210\n+ZNF646\t4\n+ZNF649\t60\n+ZNF650\t131\n+ZNF651\t145\n+ZNF652\t104\n+ZNF653\t10\n+ZNF654\t10\n+ZNF655\t483\n+ZNF658\t52\n+ZNF659\t11\n+ZNF66\t0\n+ZNF660\t51\n+ZNF663\t0\n+ZNF664\t352\n+ZNF665\t7\n+ZNF667\t37\n+ZNF668\t3\n+ZNF669\t12\n+ZNF670\t46\n+ZNF671\t97\n+ZNF672\t135\n+ZNF673\t50\n+ZNF677\t0\n+ZNF678\t19\n+ZNF680\t18\n+ZNF681\t0\n+ZNF682\t8\n+ZNF684\t52\n+ZNF687\t28\n+ZNF688\t36\n+ZNF689\t6\n+ZNF69\t6\n+ZNF690\t9\n+ZNF691\t54\n+ZNF692\t0\n+ZNF694\t10\n+ZNF695\t39\n+ZNF697\t11\n+ZNF7\t163\n+ZNF70\t0\n+ZNF700\t126\n+ZNF701\t7\n+ZNF702\t14\n+ZNF703\t0\n+ZNF704\t0\n+ZNF706\t1130\n+ZNF707\t2\n+ZNF708\t70\n+ZNF71\t1\n+ZNF710\t20\n+ZNF713\t0\n+ZNF714\t0\n+ZNF718\t24\n+ZNF720\t92\n+ZNF721\t88\n+ZNF722\t5\n+ZNF74\t23\n+ZNF740\t28\n+ZNF75\t191\n+ZNF75A\t381\n+ZNF76\t661\n+ZNF77\t14\n+ZNF79\t0\n+ZNF8\t0\n+ZNF80\t0\n+ZNF81\t16\n+ZNF83\t553\n+ZNF84\t72\n+ZNF85\t147\n+ZNF9\t326\n+ZNF91\t79\n+ZNF92\t618\n+ZNF93\t90\n+ZNF96\t0\n+ZNFN1A2\t0\n+ZNFN1A3\t0\n+ZNFN1A4\t32\n+ZNFN1A5\t91\n+ZNFX1\t64\n+ZNHIT1\t0\n+ZNHIT2\t18\n+ZNHIT3\t276\n+ZNHIT4\t31\n+ZNRD1\t58\n+ZNRF1\t278\n+ZNRF2\t121\n+ZNRF3\t117\n+ZP3\t144\n+ZPBP\t0\n+ZPLD1\t0\n+ZRANB1\t31\n+ZRANB3\t34\n+ZRF1\t313\n+ZSCAN1\t118\n+ZSCAN2\t5\n+ZSCAN5\t0\n+ZSWIM1\t62\n+ZSWIM3\t0\n+ZSWIM4\t26\n+ZSWIM5\t9\n+ZSWIM6\t752\n+ZW10\t14\n+ZWILCH\t22\n+ZWINT\t222\n+ZXDA\t0\n+ZXDB\t20\n+ZXDC\t44\n+ZYG11B\t4403\n+ZYG11BL\t21\n+ZYX\t1378\n+ZZANK1\t42\n+ZZEF1\t25\n+ZZZ3\t430\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/raw/f2cond1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/raw/f2cond1.tsv Tue Apr 30 14:33:21 2013 -0400

b"@@ -0,0 +1,18761 @@\n+GliNS1\tG166\n+13CDNA73\t0\n+15E1.2\t74\n+182-FIP\t127\n+2'-PDE\t38\n+3'HEXO\t20\n+3.8-1\t0\n+384D8-2\t3\n+76P\t51\n+7h3\t0\n+8D6A\t1\n+A1BG\t0\n+A2BP1\t18\n+A2M\t2209\n+A4GALT\t0\n+A4GNT\t0\n+AAA1\t0\n+AAAS\t29\n+AACS\t1294\n+AADACL1\t13\n+AADAT\t5\n+AAK1\t0\n+AAMP\t170\n+AANAT\t0\n+AARS\t105\n+AARSD1\t48\n+AARSL\t10\n+AASDH\t5\n+AASDHPPT\t128\n+AASS\t101\n+AATF\t40\n+AATK\t0\n+ABAT\t838\n+ABC1\t13\n+ABCA1\t13\n+ABCA10\t0\n+ABCA11\t1\n+ABCA12\t0\n+ABCA13\t0\n+ABCA2\t5\n+ABCA3\t91\n+ABCA4\t0\n+ABCA5\t2\n+ABCA6\t1\n+ABCA7\t7\n+ABCA8\t76\n+ABCA9\t133\n+ABCB1\t0\n+ABCB10\t41\n+ABCB11\t0\n+ABCB4\t28\n+ABCB5\t0\n+ABCB6\t17\n+ABCB7\t47\n+ABCB8\t53\n+ABCB9\t6\n+ABCC1\t31\n+ABCC10\t16\n+ABCC11\t0\n+ABCC12\t0\n+ABCC13\t5\n+ABCC2\t0\n+ABCC3\t3\n+ABCC4\t24\n+ABCC5\t15\n+ABCC6\t8\n+ABCC8\t13\n+ABCC9\t2\n+ABCD1\t19\n+ABCD2\t0\n+ABCD3\t147\n+ABCD4\t71\n+ABCE1\t378\n+ABCF1\t39\n+ABCF2\t42\n+ABCF3\t29\n+ABCG1\t5\n+ABCG2\t29\n+ABCG4\t0\n+ABHD1\t1\n+ABHD10\t14\n+ABHD11\t1182\n+ABHD14A\t37\n+ABHD14B\t10\n+ABHD2\t79\n+ABHD3\t95\n+ABHD4\t82\n+ABHD5\t31\n+ABHD6\t204\n+ABHD7\t0\n+ABHD8\t6\n+ABI1\t199\n+ABI2\t200\n+ABI3\t0\n+ABI3BP\t4\n+ABL1\t67\n+ABL2\t14\n+ABLIM1\t50\n+ABLIM2\t1\n+ABLIM3\t0\n+ABO\t1\n+ABR\t158\n+ABRA\t0\n+ABT1\t31\n+ABTB1\t12\n+ABTB2\t9\n+ACAA1\t52\n+ACAA2\t109\n+ACACA\t67\n+ACACB\t2\n+ACAD10\t19\n+ACAD11\t70\n+ACAD8\t43\n+ACAD9\t53\n+ACADL\t0\n+ACADM\t67\n+ACADS\t21\n+ACADSB\t51\n+ACADVL\t86\n+ACAS2\t12\n+ACAS2L\t2\n+ACAT1\t141\n+ACAT2\t153\n+ACATE2\t19\n+ACBD3\t169\n+ACBD4\t0\n+ACBD5\t21\n+ACBD6\t167\n+ACBD7\t1\n+ACCN2\t14\n+ACCN3\t6\n+ACCN4\t0\n+ACD\t3\n+ACDC\t0\n+ACE\t0\n+ACF\t12\n+ACHE\t75\n+ACIN1\t0\n+ACLY\t431\n+ACMSD\t9\n+ACN9\t128\n+ACO1\t42\n+ACO2\t309\n+ACOT2\t9\n+ACOT4\t0\n+ACOT7\t128\n+ACOT8\t0\n+ACOT9\t11\n+ACOX1\t71\n+ACOX2\t4\n+ACOX3\t4\n+ACOXL\t0\n+ACP1\t303\n+ACP2\t26\n+ACP5\t1\n+ACP6\t125\n+ACPL2\t71\n+ACPP\t5\n+ACR\t0\n+ACRBP\t0\n+ACRC\t10\n+ACRV1\t2\n+ACSBG1\t4\n+ACSL1\t3\n+ACSL3\t521\n+ACSL4\t68\n+ACSL5\t2\n+ACSL6\t46\n+ACSM2\t4\n+ACSM3\t0\n+ACSS1\t26\n+ACSS2\t30\n+ACTA1\t28\n+ACTA2\t0\n+ACTB\t16535\n+ACTC\t0\n+ACTG1\t1349\n+ACTG2\t23\n+ACTL6A\t38\n+ACTL6B\t2\n+ACTL8\t0\n+ACTN1\t669\n+ACTN2\t30\n+ACTN4\t4178\n+ACTR10\t61\n+ACTR1A\t55\n+ACTR1B\t3\n+ACTR2\t4056\n+ACTR3\t264\n+ACTR3B\t1\n+ACTR5\t16\n+ACTR6\t218\n+ACTR8\t78\n+ACTRT1\t0\n+ACVR1\t3\n+ACVR1B\t11\n+ACVR1C\t0\n+ACVR2\t21\n+ACVR2A\t4\n+ACVR2B\t0\n+ACVRL1\t2\n+ACY1\t21\n+ACY1L2\t58\n+ACY3\t0\n+ACYP1\t61\n+ACYP2\t81\n+AD-003\t2\n+AD-020\t4\n+AD023\t0\n+AD031\t36\n+AD7C-NTP\t0\n+ADA\t1274\n+ADAL\t1\n+ADAM10\t188\n+ADAM11\t7\n+ADAM12\t333\n+ADAM15\t40\n+ADAM17\t69\n+ADAM18\t0\n+ADAM19\t100\n+ADAM20\t0\n+ADAM21\t4\n+ADAM22\t238\n+ADAM23\t48\n+ADAM28\t0\n+ADAM32\t0\n+ADAM33\t11\n+ADAM8\t1\n+ADAM9\t411\n+ADAMDEC1\t0\n+ADAMTS1\t192\n+ADAMTS10\t28\n+ADAMTS12\t9\n+ADAMTS13\t2\n+ADAMTS15\t4\n+ADAMTS16\t10\n+ADAMTS17\t0\n+ADAMTS18\t0\n+ADAMTS19\t2\n+ADAMTS2\t10\n+ADAMTS20\t0\n+ADAMTS3\t122\n+ADAMTS4\t21\n+ADAMTS5\t25\n+ADAMTS6\t36\n+ADAMTS7\t0\n+ADAMTS8\t0\n+ADAMTS9\t206\n+ADAMTSL1\t2\n+ADAMTSL2\t0\n+ADAMTSL3\t0\n+ADAMTSL4\t2\n+ADAR\t42\n+ADARB1\t5\n+ADARB2\t0\n+ADAT1\t15\n+ADC\t0\n+ADCK1\t10\n+ADCK2\t2\n+ADCK4\t87\n+ADCK5\t5\n+ADCY1\t158\n+ADCY2\t0\n+ADCY3\t15\n+ADCY5\t7\n+ADCY6\t78\n+ADCY7\t9\n+ADCY8\t0\n+ADCY9\t11\n+ADCYAP1\t0\n+ADCYAP1R1\t4\n+ADD1\t196\n+ADD2\t52\n+ADD3\t581\n+ADFP\t24\n+ADH1B\t0\n+ADH1C\t0\n+ADH4\t0\n+ADH5\t389\n+ADHFE1\t5\n+ADI1\t170\n+ADIPOR1\t71\n+ADIPOR2\t12\n+ADK\t153\n+ADM\t40\n+ADM2\t0\n+ADMP\t0\n+ADMR\t2037\n+ADNP\t246\n+ADORA1\t11\n+ADORA2A\t0\n+ADORA2B\t1\n+ADPGK\t1928\n+ADPN\t2\n+ADPRH\t3\n+ADPRHL1\t4\n+ADPRHL2\t16\n+ADRA1A\t0\n+ADRA1B\t0\n+ADRA1D\t0\n+ADRA2A\t66\n+ADRA2B\t0\n+ADRB1\t6\n+ADRB2\t2\n+ADRB3\t17\n+ADRBK1\t9\n+ADRBK2\t5\n+ADRM1\t150\n+ADSL\t99\n+ADSS\t166\n+ADSSL1\t2\n+AE2\t15\n+AEBP1\t1063\n+AEBP2\t109\n+AEGP\t0\n+AER61\t14\n+AES\t864\n+AF15Q14\t0\n+AF1Q\t26\n+AF5Q31\t106\n+AFAP\t104\n+AFAR3\t6\n+AFF1\t15\n+AFF2\t1\n+AFF3\t5\n+AFF4\t1\n+AFG3L1\t7\n+AFG3L2\t74\n+AFMID\t112\n+AFP\t0\n+AFTIPHILIN\t66\n+AG1\t2\n+AGA\t17\n+AGBL2\t0\n+AGBL3\t0\n+AGC1\t2\n+AGER\t3\n+AGGF1\t35\n+AGL\t13\n+AGMAT\t5\n+AGPAT1\t31\n+AGPAT2\t1\n+AGPAT3\t14\n+AGPAT4\t41\n+AGPAT5\t280\n+AGPAT6\t24\n+AGPAT7\t11\n+AGPS\t55\n+AGR2\t7\n+AGRN\t298\n+AGRP\t0\n+AGT\t860\n+AGTPBP1\t29\n+AGTR1\t0\n+AGTR2\t0\n+AGTRAP\t152\n+AGXT2L1\t0\n+AHCTF1\t53\n+AHCY\t395\n+AHCYL1\t653\n+AHDC1\t2\n+AHI1\t69\n+AHNAK\t0\n+AHR\t252\n+AHSA1\t107\n+AHSA2\t33\n+AHSG\t0\n+AICDA\t0\n+AIFL\t0\n+AIG1\t65\n+AIM1\t0\n+AIM1L\t0\n+AIP\t115\n+AIP1\t74\n+AIPL1\t15\n+AK1\t45\n+AK2\t110\n+AK3\t101\n+AK3L1\t22\n+AK5\t4\n+AK7\t1\n+AKAP1\t39\n+AKAP10\t81\n+AKAP11\t90\n+AKAP12\t103\n+AKAP13\t45\n+AKAP14\t1\n+AKAP3\t4\n+AKAP6\t34\n+AKAP7\t59\n+AKAP8\t54\n+AKAP8L\t20\n+AKAP9\t39\n+AKIP\t0\n+AKNA\t2\n+AKR1A1\t183\n+AKR1B1\t370\n+AKR1B10\t0\n+AKR1C1\t1\n+AKR1C"..b'NF274\t20\n+ZNF275\t30\n+ZNF276\t16\n+ZNF277\t442\n+ZNF278\t43\n+ZNF28\t26\n+ZNF281\t94\n+ZNF282\t51\n+ZNF283\t23\n+ZNF284\t103\n+ZNF285\t14\n+ZNF286\t306\n+ZNF287\t35\n+ZNF289\t21\n+ZNF291\t22\n+ZNF292\t84\n+ZNF294\t37\n+ZNF295\t76\n+ZNF297\t38\n+ZNF297B\t90\n+ZNF3\t21\n+ZNF30\t128\n+ZNF300\t7\n+ZNF302\t194\n+ZNF304\t33\n+ZNF305\t27\n+ZNF306\t1\n+ZNF307\t4\n+ZNF31\t7\n+ZNF311\t24\n+ZNF312\t0\n+ZNF313\t142\n+ZNF317\t47\n+ZNF318\t59\n+ZNF319\t29\n+ZNF32\t41\n+ZNF322A\t0\n+ZNF323\t1\n+ZNF324\t112\n+ZNF326\t59\n+ZNF329\t17\n+ZNF330\t41\n+ZNF331\t27\n+ZNF333\t32\n+ZNF334\t10\n+ZNF335\t4\n+ZNF336\t3\n+ZNF337\t27\n+ZNF33A\t6\n+ZNF34\t0\n+ZNF341\t0\n+ZNF342\t1\n+ZNF343\t17\n+ZNF345\t10\n+ZNF346\t34\n+ZNF347\t14\n+ZNF35\t4\n+ZNF350\t39\n+ZNF354A\t31\n+ZNF354B\t13\n+ZNF354C\t964\n+ZNF358\t15\n+ZNF364\t29\n+ZNF365\t5\n+ZNF366\t0\n+ZNF367\t66\n+ZNF37A\t59\n+ZNF37B\t52\n+ZNF38\t37\n+ZNF382\t10\n+ZNF383\t8\n+ZNF384\t66\n+ZNF385\t2\n+ZNF390\t2\n+ZNF394\t349\n+ZNF395\t24\n+ZNF396\t0\n+ZNF397\t36\n+ZNF398\t26\n+ZNF403\t128\n+ZNF404\t5\n+ZNF406\t3\n+ZNF407\t12\n+ZNF408\t9\n+ZNF41\t72\n+ZNF410\t54\n+ZNF414\t0\n+ZNF415\t111\n+ZNF416\t37\n+ZNF417\t1\n+ZNF418\t10\n+ZNF419\t29\n+ZNF42\t73\n+ZNF420\t4\n+ZNF423\t16\n+ZNF425\t4\n+ZNF426\t17\n+ZNF429\t30\n+ZNF43\t66\n+ZNF430\t86\n+ZNF431\t1\n+ZNF432\t37\n+ZNF433\t19\n+ZNF434\t43\n+ZNF435\t2\n+ZNF436\t279\n+ZNF438\t102\n+ZNF439\t10\n+ZNF44\t103\n+ZNF440\t16\n+ZNF440L\t6\n+ZNF441\t3\n+ZNF442\t2\n+ZNF443\t50\n+ZNF444\t392\n+ZNF445\t11\n+ZNF446\t5\n+ZNF447\t52\n+ZNF449\t36\n+ZNF45\t171\n+ZNF451\t53\n+ZNF452\t0\n+ZNF454\t53\n+ZNF46\t3\n+ZNF462\t114\n+ZNF467\t6\n+ZNF468\t171\n+ZNF469\t0\n+ZNF471\t32\n+ZNF473\t22\n+ZNF479\t8\n+ZNF480\t121\n+ZNF482\t27\n+ZNF483\t2\n+ZNF484\t7\n+ZNF485\t3\n+ZNF486\t410\n+ZNF488\t5\n+ZNF490\t2\n+ZNF491\t0\n+ZNF492\t2\n+ZNF493\t29\n+ZNF496\t51\n+ZNF497\t0\n+ZNF498\t35\n+ZNF499\t9\n+ZNF500\t3\n+ZNF501\t7\n+ZNF502\t3\n+ZNF503\t32\n+ZNF505\t7\n+ZNF506\t21\n+ZNF507\t70\n+ZNF509\t5\n+ZNF510\t54\n+ZNF511\t403\n+ZNF512\t58\n+ZNF513\t77\n+ZNF514\t7\n+ZNF516\t76\n+ZNF517\t21\n+ZNF518\t72\n+ZNF519\t17\n+ZNF521\t69\n+ZNF524\t8\n+ZNF525\t9\n+ZNF526\t14\n+ZNF527\t3\n+ZNF528\t28\n+ZNF529\t51\n+ZNF530\t7\n+ZNF532\t340\n+ZNF533\t1\n+ZNF536\t48\n+ZNF537\t179\n+ZNF539\t80\n+ZNF540\t2\n+ZNF542\t31\n+ZNF543\t19\n+ZNF544\t10\n+ZNF545\t0\n+ZNF546\t6\n+ZNF547\t212\n+ZNF548\t19\n+ZNF549\t11\n+ZNF550\t20\n+ZNF551\t18\n+ZNF552\t16\n+ZNF553\t77\n+ZNF554\t11\n+ZNF555\t23\n+ZNF557\t1\n+ZNF558\t25\n+ZNF559\t59\n+ZNF560\t0\n+ZNF561\t102\n+ZNF562\t11\n+ZNF563\t23\n+ZNF564\t76\n+ZNF565\t2\n+ZNF566\t10\n+ZNF567\t7\n+ZNF568\t14\n+ZNF569\t61\n+ZNF570\t2\n+ZNF571\t7\n+ZNF572\t0\n+ZNF573\t7\n+ZNF574\t25\n+ZNF575\t1\n+ZNF576\t41\n+ZNF577\t36\n+ZNF578\t9\n+ZNF579\t36\n+ZNF580\t343\n+ZNF581\t76\n+ZNF582\t6\n+ZNF583\t41\n+ZNF584\t22\n+ZNF585A\t93\n+ZNF585B\t64\n+ZNF586\t12\n+ZNF587\t26\n+ZNF588\t26\n+ZNF589\t8\n+ZNF592\t19\n+ZNF593\t33\n+ZNF594\t3\n+ZNF595\t7\n+ZNF596\t329\n+ZNF597\t0\n+ZNF598\t43\n+ZNF599\t8\n+ZNF6\t40\n+ZNF600\t15\n+ZNF605\t12\n+ZNF606\t63\n+ZNF607\t83\n+ZNF608\t59\n+ZNF609\t12\n+ZNF610\t8\n+ZNF611\t122\n+ZNF613\t25\n+ZNF614\t20\n+ZNF615\t51\n+ZNF616\t23\n+ZNF618\t3\n+ZNF619\t0\n+ZNF620\t12\n+ZNF621\t11\n+ZNF622\t24\n+ZNF623\t43\n+ZNF624\t34\n+ZNF625\t0\n+ZNF626\t35\n+ZNF627\t29\n+ZNF629\t65\n+ZNF630\t0\n+ZNF638\t382\n+ZNF639\t74\n+ZNF641\t5\n+ZNF642\t2\n+ZNF643\t0\n+ZNF644\t44\n+ZNF646\t1\n+ZNF649\t67\n+ZNF650\t38\n+ZNF651\t115\n+ZNF652\t17\n+ZNF653\t18\n+ZNF654\t10\n+ZNF655\t99\n+ZNF658\t10\n+ZNF659\t5\n+ZNF66\t0\n+ZNF660\t12\n+ZNF663\t2\n+ZNF664\t82\n+ZNF665\t4\n+ZNF667\t231\n+ZNF668\t1\n+ZNF669\t22\n+ZNF670\t2\n+ZNF671\t57\n+ZNF672\t62\n+ZNF673\t1\n+ZNF677\t2\n+ZNF678\t7\n+ZNF680\t8\n+ZNF681\t0\n+ZNF682\t5\n+ZNF684\t12\n+ZNF687\t10\n+ZNF688\t2\n+ZNF689\t53\n+ZNF69\t6\n+ZNF690\t11\n+ZNF691\t25\n+ZNF692\t5\n+ZNF694\t15\n+ZNF695\t0\n+ZNF697\t25\n+ZNF7\t27\n+ZNF70\t0\n+ZNF700\t31\n+ZNF701\t12\n+ZNF702\t0\n+ZNF703\t0\n+ZNF704\t2\n+ZNF706\t359\n+ZNF707\t17\n+ZNF708\t26\n+ZNF71\t29\n+ZNF710\t37\n+ZNF713\t0\n+ZNF714\t29\n+ZNF718\t0\n+ZNF720\t54\n+ZNF721\t15\n+ZNF722\t0\n+ZNF74\t28\n+ZNF740\t5\n+ZNF75\t68\n+ZNF75A\t84\n+ZNF76\t110\n+ZNF77\t3\n+ZNF79\t1\n+ZNF8\t2\n+ZNF80\t0\n+ZNF81\t2\n+ZNF83\t698\n+ZNF84\t34\n+ZNF85\t80\n+ZNF9\t85\n+ZNF91\t128\n+ZNF92\t13\n+ZNF93\t51\n+ZNF96\t0\n+ZNFN1A2\t0\n+ZNFN1A3\t0\n+ZNFN1A4\t5\n+ZNFN1A5\t43\n+ZNFX1\t9\n+ZNHIT1\t5\n+ZNHIT2\t4\n+ZNHIT3\t95\n+ZNHIT4\t4\n+ZNRD1\t17\n+ZNRF1\t77\n+ZNRF2\t18\n+ZNRF3\t72\n+ZP3\t39\n+ZPBP\t0\n+ZPLD1\t0\n+ZRANB1\t54\n+ZRANB3\t9\n+ZRF1\t99\n+ZSCAN1\t99\n+ZSCAN2\t12\n+ZSCAN5\t10\n+ZSWIM1\t37\n+ZSWIM3\t0\n+ZSWIM4\t14\n+ZSWIM5\t13\n+ZSWIM6\t329\n+ZW10\t6\n+ZWILCH\t6\n+ZWINT\t152\n+ZXDA\t1\n+ZXDB\t12\n+ZXDC\t35\n+ZYG11B\t1785\n+ZYG11BL\t11\n+ZYX\t698\n+ZZANK1\t15\n+ZZEF1\t15\n+ZZZ3\t94\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/raw/f2cond2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/raw/f2cond2.tsv Tue Apr 30 14:33:21 2013 -0400

b"@@ -0,0 +1,18761 @@\n+GliNS1\tCB660\n+13CDNA73\t0\n+15E1.2\t215\n+182-FIP\t334\n+2'-PDE\t34\n+3'HEXO\t121\n+3.8-1\t0\n+384D8-2\t3\n+76P\t358\n+7h3\t9\n+8D6A\t0\n+A1BG\t0\n+A2BP1\t1\n+A2M\t193\n+A4GALT\t0\n+A4GNT\t0\n+AAA1\t0\n+AAAS\t202\n+AACS\t3737\n+AADACL1\t158\n+AADAT\t22\n+AAK1\t0\n+AAMP\t512\n+AANAT\t0\n+AARS\t111\n+AARSD1\t112\n+AARSL\t55\n+AASDH\t18\n+AASDHPPT\t888\n+AASS\t130\n+AATF\t100\n+AATK\t0\n+ABAT\t8467\n+ABC1\t34\n+ABCA1\t11\n+ABCA10\t6\n+ABCA11\t10\n+ABCA12\t14\n+ABCA13\t180\n+ABCA2\t20\n+ABCA3\t250\n+ABCA4\t0\n+ABCA5\t181\n+ABCA6\t30\n+ABCA7\t1\n+ABCA8\t9\n+ABCA9\t302\n+ABCB1\t1\n+ABCB10\t71\n+ABCB11\t1\n+ABCB4\t7\n+ABCB5\t1\n+ABCB6\t26\n+ABCB7\t34\n+ABCB8\t82\n+ABCB9\t36\n+ABCC1\t21\n+ABCC10\t19\n+ABCC11\t11\n+ABCC12\t1\n+ABCC13\t2\n+ABCC2\t46\n+ABCC3\t57\n+ABCC4\t73\n+ABCC5\t28\n+ABCC6\t7\n+ABCC8\t0\n+ABCC9\t25\n+ABCD1\t13\n+ABCD2\t13\n+ABCD3\t261\n+ABCD4\t208\n+ABCE1\t567\n+ABCF1\t58\n+ABCF2\t90\n+ABCF3\t21\n+ABCG1\t3\n+ABCG2\t10\n+ABCG4\t0\n+ABHD1\t3\n+ABHD10\t6\n+ABHD11\t1161\n+ABHD14A\t219\n+ABHD14B\t16\n+ABHD2\t149\n+ABHD3\t406\n+ABHD4\t253\n+ABHD5\t90\n+ABHD6\t315\n+ABHD7\t49\n+ABHD8\t32\n+ABI1\t510\n+ABI2\t413\n+ABI3\t0\n+ABI3BP\t14\n+ABL1\t223\n+ABL2\t36\n+ABLIM1\t197\n+ABLIM2\t25\n+ABLIM3\t0\n+ABO\t0\n+ABR\t538\n+ABRA\t0\n+ABT1\t48\n+ABTB1\t11\n+ABTB2\t94\n+ACAA1\t120\n+ACAA2\t134\n+ACACA\t183\n+ACACB\t9\n+ACAD10\t33\n+ACAD11\t61\n+ACAD8\t33\n+ACAD9\t228\n+ACADL\t6\n+ACADM\t55\n+ACADS\t63\n+ACADSB\t107\n+ACADVL\t179\n+ACAS2\t8\n+ACAS2L\t1\n+ACAT1\t288\n+ACAT2\t88\n+ACATE2\t74\n+ACBD3\t866\n+ACBD4\t1\n+ACBD5\t38\n+ACBD6\t112\n+ACBD7\t22\n+ACCN2\t10\n+ACCN3\t3\n+ACCN4\t0\n+ACD\t28\n+ACDC\t10\n+ACE\t0\n+ACF\t54\n+ACHE\t15\n+ACIN1\t3\n+ACLY\t497\n+ACMSD\t7\n+ACN9\t114\n+ACO1\t134\n+ACO2\t770\n+ACOT2\t0\n+ACOT4\t0\n+ACOT7\t266\n+ACOT8\t8\n+ACOT9\t19\n+ACOX1\t277\n+ACOX2\t300\n+ACOX3\t8\n+ACOXL\t0\n+ACP1\t1384\n+ACP2\t80\n+ACP5\t0\n+ACP6\t172\n+ACPL2\t101\n+ACPP\t4\n+ACR\t13\n+ACRBP\t0\n+ACRC\t14\n+ACRV1\t0\n+ACSBG1\t52\n+ACSL1\t14\n+ACSL3\t2158\n+ACSL4\t66\n+ACSL5\t21\n+ACSL6\t0\n+ACSM2\t0\n+ACSM3\t10\n+ACSS1\t15\n+ACSS2\t83\n+ACTA1\t65\n+ACTA2\t824\n+ACTB\t25909\n+ACTC\t0\n+ACTG1\t14476\n+ACTG2\t3\n+ACTL6A\t198\n+ACTL6B\t0\n+ACTL8\t1\n+ACTN1\t4575\n+ACTN2\t0\n+ACTN4\t7378\n+ACTR10\t33\n+ACTR1A\t159\n+ACTR1B\t40\n+ACTR2\t8300\n+ACTR3\t584\n+ACTR3B\t0\n+ACTR5\t17\n+ACTR6\t65\n+ACTR8\t176\n+ACTRT1\t0\n+ACVR1\t0\n+ACVR1B\t6\n+ACVR1C\t0\n+ACVR2\t43\n+ACVR2A\t22\n+ACVR2B\t6\n+ACVRL1\t0\n+ACY1\t17\n+ACY1L2\t93\n+ACY3\t0\n+ACYP1\t69\n+ACYP2\t282\n+AD-003\t0\n+AD-020\t19\n+AD023\t1\n+AD031\t32\n+AD7C-NTP\t6\n+ADA\t3713\n+ADAL\t12\n+ADAM10\t638\n+ADAM11\t1\n+ADAM12\t191\n+ADAM15\t58\n+ADAM17\t283\n+ADAM18\t2\n+ADAM19\t555\n+ADAM20\t0\n+ADAM21\t0\n+ADAM22\t254\n+ADAM23\t59\n+ADAM28\t0\n+ADAM32\t12\n+ADAM33\t22\n+ADAM8\t2\n+ADAM9\t3047\n+ADAMDEC1\t0\n+ADAMTS1\t0\n+ADAMTS10\t175\n+ADAMTS12\t89\n+ADAMTS13\t1\n+ADAMTS15\t163\n+ADAMTS16\t8\n+ADAMTS17\t0\n+ADAMTS18\t0\n+ADAMTS19\t0\n+ADAMTS2\t21\n+ADAMTS20\t0\n+ADAMTS3\t515\n+ADAMTS4\t0\n+ADAMTS5\t7\n+ADAMTS6\t46\n+ADAMTS7\t0\n+ADAMTS8\t0\n+ADAMTS9\t281\n+ADAMTSL1\t19\n+ADAMTSL2\t0\n+ADAMTSL3\t1\n+ADAMTSL4\t4\n+ADAR\t96\n+ADARB1\t44\n+ADARB2\t0\n+ADAT1\t17\n+ADC\t7\n+ADCK1\t21\n+ADCK2\t8\n+ADCK4\t84\n+ADCK5\t12\n+ADCY1\t63\n+ADCY2\t23\n+ADCY3\t27\n+ADCY5\t1\n+ADCY6\t376\n+ADCY7\t44\n+ADCY8\t0\n+ADCY9\t41\n+ADCYAP1\t0\n+ADCYAP1R1\t127\n+ADD1\t1109\n+ADD2\t0\n+ADD3\t680\n+ADFP\t85\n+ADH1B\t1\n+ADH1C\t0\n+ADH4\t6\n+ADH5\t1368\n+ADHFE1\t35\n+ADI1\t213\n+ADIPOR1\t208\n+ADIPOR2\t31\n+ADK\t195\n+ADM\t323\n+ADM2\t0\n+ADMP\t0\n+ADMR\t5108\n+ADNP\t961\n+ADORA1\t27\n+ADORA2A\t43\n+ADORA2B\t2\n+ADPGK\t3996\n+ADPN\t4\n+ADPRH\t0\n+ADPRHL1\t0\n+ADPRHL2\t86\n+ADRA1A\t0\n+ADRA1B\t0\n+ADRA1D\t0\n+ADRA2A\t0\n+ADRA2B\t1\n+ADRB1\t11\n+ADRB2\t0\n+ADRB3\t17\n+ADRBK1\t88\n+ADRBK2\t14\n+ADRM1\t405\n+ADSL\t188\n+ADSS\t526\n+ADSSL1\t10\n+AE2\t23\n+AEBP1\t3115\n+AEBP2\t468\n+AEGP\t25\n+AER61\t2\n+AES\t1972\n+AF15Q14\t1\n+AF1Q\t84\n+AF5Q31\t574\n+AFAP\t84\n+AFAR3\t15\n+AFF1\t69\n+AFF2\t8\n+AFF3\t1\n+AFF4\t0\n+AFG3L1\t25\n+AFG3L2\t94\n+AFMID\t293\n+AFP\t22\n+AFTIPHILIN\t144\n+AG1\t2\n+AGA\t133\n+AGBL2\t0\n+AGBL3\t0\n+AGC1\t0\n+AGER\t3\n+AGGF1\t184\n+AGL\t374\n+AGMAT\t6\n+AGPAT1\t84\n+AGPAT2\t1\n+AGPAT3\t101\n+AGPAT4\t64\n+AGPAT5\t1149\n+AGPAT6\t47\n+AGPAT7\t48\n+AGPS\t293\n+AGR2\t14\n+AGRN\t587\n+AGRP\t0\n+AGT\t0\n+AGTPBP1\t47\n+AGTR1\t0\n+AGTR2\t0\n+AGTRAP\t306\n+AGXT2L1\t0\n+AHCTF1\t41\n+AHCY\t489\n+AHCYL1\t767\n+AHDC1\t11\n+AHI1\t40\n+AHNAK\t0\n+AHR\t1101\n+AHSA1\t311\n+AHSA2\t67\n+AHSG\t0\n+AICDA\t0\n+AIFL\t0\n+AIG1\t243\n+AIM1\t0\n+AIM1L\t0\n+AIP\t468\n+AIP1\t59\n+AIPL1\t12\n+AK1\t112\n+AK2\t316\n+AK3\t131\n+AK3L1\t52\n+AK5\t16\n+AK7\t0\n+AKAP1\t47\n+AKAP10\t309\n+AKAP11\t76\n+AKAP12\t370\n+AKAP13\t79\n+AKAP14\t11\n+AKAP3\t3\n+AKAP6\t18\n+AKAP7\t410\n+AKAP8"..b'4\n+ZNF283\t11\n+ZNF284\t85\n+ZNF285\t20\n+ZNF286\t719\n+ZNF287\t75\n+ZNF289\t112\n+ZNF291\t83\n+ZNF292\t208\n+ZNF294\t82\n+ZNF295\t61\n+ZNF297\t128\n+ZNF297B\t261\n+ZNF3\t19\n+ZNF30\t100\n+ZNF300\t3\n+ZNF302\t493\n+ZNF304\t44\n+ZNF305\t34\n+ZNF306\t5\n+ZNF307\t21\n+ZNF31\t3\n+ZNF311\t0\n+ZNF312\t41\n+ZNF313\t247\n+ZNF317\t151\n+ZNF318\t81\n+ZNF319\t130\n+ZNF32\t70\n+ZNF322A\t18\n+ZNF323\t12\n+ZNF324\t85\n+ZNF326\t122\n+ZNF329\t85\n+ZNF330\t52\n+ZNF331\t23\n+ZNF333\t88\n+ZNF334\t78\n+ZNF335\t10\n+ZNF336\t17\n+ZNF337\t55\n+ZNF33A\t26\n+ZNF34\t5\n+ZNF341\t4\n+ZNF342\t0\n+ZNF343\t48\n+ZNF345\t0\n+ZNF346\t171\n+ZNF347\t27\n+ZNF35\t15\n+ZNF350\t24\n+ZNF354A\t59\n+ZNF354B\t46\n+ZNF354C\t1747\n+ZNF358\t61\n+ZNF364\t98\n+ZNF365\t42\n+ZNF366\t0\n+ZNF367\t134\n+ZNF37A\t74\n+ZNF37B\t94\n+ZNF38\t45\n+ZNF382\t24\n+ZNF383\t8\n+ZNF384\t95\n+ZNF385\t4\n+ZNF390\t11\n+ZNF394\t684\n+ZNF395\t62\n+ZNF396\t6\n+ZNF397\t50\n+ZNF398\t29\n+ZNF403\t89\n+ZNF404\t38\n+ZNF406\t0\n+ZNF407\t27\n+ZNF408\t16\n+ZNF41\t73\n+ZNF410\t189\n+ZNF414\t14\n+ZNF415\t171\n+ZNF416\t43\n+ZNF417\t2\n+ZNF418\t21\n+ZNF419\t16\n+ZNF42\t134\n+ZNF420\t8\n+ZNF423\t277\n+ZNF425\t5\n+ZNF426\t20\n+ZNF429\t13\n+ZNF43\t52\n+ZNF430\t51\n+ZNF431\t0\n+ZNF432\t15\n+ZNF433\t12\n+ZNF434\t30\n+ZNF435\t0\n+ZNF436\t455\n+ZNF438\t62\n+ZNF439\t80\n+ZNF44\t42\n+ZNF440\t28\n+ZNF440L\t0\n+ZNF441\t19\n+ZNF442\t0\n+ZNF443\t88\n+ZNF444\t959\n+ZNF445\t3\n+ZNF446\t27\n+ZNF447\t298\n+ZNF449\t39\n+ZNF45\t367\n+ZNF451\t138\n+ZNF452\t0\n+ZNF454\t11\n+ZNF46\t27\n+ZNF462\t376\n+ZNF467\t1\n+ZNF468\t350\n+ZNF469\t0\n+ZNF471\t51\n+ZNF473\t1\n+ZNF479\t0\n+ZNF480\t44\n+ZNF482\t175\n+ZNF483\t3\n+ZNF484\t26\n+ZNF485\t29\n+ZNF486\t648\n+ZNF488\t44\n+ZNF490\t0\n+ZNF491\t0\n+ZNF492\t1\n+ZNF493\t28\n+ZNF496\t229\n+ZNF497\t7\n+ZNF498\t69\n+ZNF499\t8\n+ZNF500\t8\n+ZNF501\t7\n+ZNF502\t6\n+ZNF503\t13\n+ZNF505\t1\n+ZNF506\t3\n+ZNF507\t291\n+ZNF509\t39\n+ZNF510\t80\n+ZNF511\t649\n+ZNF512\t56\n+ZNF513\t10\n+ZNF514\t0\n+ZNF516\t78\n+ZNF517\t120\n+ZNF518\t150\n+ZNF519\t47\n+ZNF521\t145\n+ZNF524\t38\n+ZNF525\t8\n+ZNF526\t48\n+ZNF527\t5\n+ZNF528\t35\n+ZNF529\t136\n+ZNF530\t8\n+ZNF532\t830\n+ZNF533\t0\n+ZNF536\t0\n+ZNF537\t0\n+ZNF539\t114\n+ZNF540\t32\n+ZNF542\t58\n+ZNF543\t40\n+ZNF544\t49\n+ZNF545\t71\n+ZNF546\t4\n+ZNF547\t302\n+ZNF548\t98\n+ZNF549\t1\n+ZNF550\t23\n+ZNF551\t44\n+ZNF552\t0\n+ZNF553\t300\n+ZNF554\t17\n+ZNF555\t24\n+ZNF557\t0\n+ZNF558\t22\n+ZNF559\t237\n+ZNF560\t0\n+ZNF561\t315\n+ZNF562\t11\n+ZNF563\t36\n+ZNF564\t110\n+ZNF565\t0\n+ZNF566\t11\n+ZNF567\t18\n+ZNF568\t59\n+ZNF569\t58\n+ZNF570\t0\n+ZNF571\t19\n+ZNF572\t3\n+ZNF573\t4\n+ZNF574\t47\n+ZNF575\t2\n+ZNF576\t37\n+ZNF577\t42\n+ZNF578\t3\n+ZNF579\t19\n+ZNF580\t695\n+ZNF581\t133\n+ZNF582\t3\n+ZNF583\t24\n+ZNF584\t87\n+ZNF585A\t192\n+ZNF585B\t5\n+ZNF586\t24\n+ZNF587\t47\n+ZNF588\t18\n+ZNF589\t2\n+ZNF592\t182\n+ZNF593\t98\n+ZNF594\t3\n+ZNF595\t0\n+ZNF596\t321\n+ZNF597\t9\n+ZNF598\t32\n+ZNF599\t36\n+ZNF6\t93\n+ZNF600\t7\n+ZNF605\t17\n+ZNF606\t100\n+ZNF607\t83\n+ZNF608\t45\n+ZNF609\t16\n+ZNF610\t8\n+ZNF611\t178\n+ZNF613\t44\n+ZNF614\t3\n+ZNF615\t86\n+ZNF616\t46\n+ZNF618\t1\n+ZNF619\t0\n+ZNF620\t15\n+ZNF621\t52\n+ZNF622\t128\n+ZNF623\t121\n+ZNF624\t28\n+ZNF625\t1\n+ZNF626\t14\n+ZNF627\t95\n+ZNF629\t336\n+ZNF630\t0\n+ZNF638\t1201\n+ZNF639\t225\n+ZNF641\t60\n+ZNF642\t26\n+ZNF643\t0\n+ZNF644\t151\n+ZNF646\t5\n+ZNF649\t124\n+ZNF650\t136\n+ZNF651\t263\n+ZNF652\t14\n+ZNF653\t20\n+ZNF654\t2\n+ZNF655\t416\n+ZNF658\t23\n+ZNF659\t0\n+ZNF66\t1\n+ZNF660\t44\n+ZNF663\t0\n+ZNF664\t241\n+ZNF665\t0\n+ZNF667\t120\n+ZNF668\t8\n+ZNF669\t15\n+ZNF670\t11\n+ZNF671\t312\n+ZNF672\t81\n+ZNF673\t31\n+ZNF677\t6\n+ZNF678\t4\n+ZNF680\t22\n+ZNF681\t0\n+ZNF682\t0\n+ZNF684\t0\n+ZNF687\t59\n+ZNF688\t15\n+ZNF689\t10\n+ZNF69\t48\n+ZNF690\t22\n+ZNF691\t129\n+ZNF692\t10\n+ZNF694\t86\n+ZNF695\t4\n+ZNF697\t22\n+ZNF7\t80\n+ZNF70\t0\n+ZNF700\t100\n+ZNF701\t10\n+ZNF702\t20\n+ZNF703\t22\n+ZNF704\t0\n+ZNF706\t996\n+ZNF707\t15\n+ZNF708\t67\n+ZNF71\t77\n+ZNF710\t260\n+ZNF713\t0\n+ZNF714\t0\n+ZNF718\t1\n+ZNF720\t170\n+ZNF721\t21\n+ZNF722\t0\n+ZNF74\t112\n+ZNF740\t28\n+ZNF75\t88\n+ZNF75A\t349\n+ZNF76\t407\n+ZNF77\t19\n+ZNF79\t34\n+ZNF8\t2\n+ZNF80\t0\n+ZNF81\t4\n+ZNF83\t470\n+ZNF84\t161\n+ZNF85\t113\n+ZNF9\t249\n+ZNF91\t86\n+ZNF92\t69\n+ZNF93\t53\n+ZNF96\t0\n+ZNFN1A2\t1\n+ZNFN1A3\t0\n+ZNFN1A4\t62\n+ZNFN1A5\t120\n+ZNFX1\t31\n+ZNHIT1\t6\n+ZNHIT2\t48\n+ZNHIT3\t181\n+ZNHIT4\t30\n+ZNRD1\t84\n+ZNRF1\t180\n+ZNRF2\t109\n+ZNRF3\t218\n+ZP3\t176\n+ZPBP\t0\n+ZPLD1\t0\n+ZRANB1\t144\n+ZRANB3\t55\n+ZRF1\t104\n+ZSCAN1\t208\n+ZSCAN2\t51\n+ZSCAN5\t32\n+ZSWIM1\t116\n+ZSWIM3\t0\n+ZSWIM4\t35\n+ZSWIM5\t1\n+ZSWIM6\t629\n+ZW10\t24\n+ZWILCH\t31\n+ZWINT\t371\n+ZXDA\t0\n+ZXDB\t63\n+ZXDC\t35\n+ZYG11B\t2909\n+ZYG11BL\t72\n+ZYX\t3056\n+ZZANK1\t67\n+ZZEF1\t16\n+ZZZ3\t270\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/raw2counts.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/raw2counts.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,20 @@
+# raw2counts
+# extract counts only from rawCounts
+# and add rownames to counts
+
+# input : rawCounts
+# output : counts
+
+# created Feb 6th, 2012
+# modified April 12, 2012
+# Marie-Agnes Dillies
+
+
+raw2counts <- function( rawCounts, annot=1 ){
+
+  ex <- 1:annot
+  counts <- as.matrix( rawCounts[,-ex] )
+  rownames(counts) <- rawCounts[,1]
+  infoCounts <- rawCounts[,ex]
+  return( list("counts"=counts, "infoCounts"= infoCounts) )
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/DESeqTools/removeNul.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/DESeqTools/removeNul.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,14 @@
+# removeNul
+# remove genes with null counts in all samples
+
+# input : counts
+# output : counts
+
+# created Feb 7th, 2012
+# Marie-Agnes Dillies
+
+
+removeNul <- function( counts, info = NULL ){
+
+ return( list(counts[rowSums(counts) > 0,], info[rowSums(counts) > 0,]) )
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/bam_to_sam_parallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/bam_to_sam_parallel.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+"""
+Converts BAM data to sorted SAM data.
+usage: bam_to_sam.py [options]
+   --input1: SAM file to be converted
+   --output1: output dataset in bam format
+"""
+
+import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
+#from galaxy import eggs
+#import pkg_resources; pkg_resources.require( "bx-python" )
+#from bx.cookbook import doc_optparse
+#from galaxy import util
+
+def stop_err( msg ):
+    sys.stderr.write( '%s\n' % msg )
+    sys.exit()
+
+def toTar(tarFileName, samOutputNames):
+    dir = os.path.dirname(tarFileName)
+    tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
+    currentPath = os.getcwd()
+    os.chdir(dir)
+    for file in samOutputNames:
+        relativeFileName = os.path.basename(file)
+        tfile.add(relativeFileName)
+    os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
+    tfile.close()
+    os.chdir(currentPath)
+
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-t', '--tar', dest='outputTar', default=None, help='output all SAM results in a tar file.' )
+    parser.add_option( '', '--input1', dest='input1', help='The input list of BAM datasets on txt format.' )
+    #parser.add_option( '', '--input1', dest='input1', help='The input BAM dataset' )
+    parser.add_option( '', '--output1', dest='output1', help='The output list of SAM datasets on txt format.' )
+    #parser.add_option( '', '--output1', dest='output1', help='The output SAM dataset' )
+    parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' )
+    ( options, args ) = parser.parse_args()
+
+
+    #Parse the input txt file and read a list of BAM files.
+    file = open(options.input1, "r")
+    lines = file.readlines()
+    inputFileNames = []
+    samOutputNames = []
+    outputName = options.output1
+    resDirName = os.path.dirname(outputName) + '/'
+    #Write output txt file and define all output sam file names.
+    out = open(outputName, "w")
+    for line in lines:
+        tab = line.split()
+        inputFileNames.append(tab[1])
+        samOutName = resDirName + tab[0] + '_samOutput_%s.sam' % random.randrange(0, 10000)
+        samOutputNames.append(samOutName)
+        out.write(tab[0] + '\t' + samOutName  + '\n')
+    file.close()
+    out.close()
+
+    # output version # of tool
+    try:
+        tmp_files = []
+        tmp = tempfile.NamedTemporaryFile().name
+        tmp_files.append(tmp)
+        tmp_stdout = open( tmp, 'wb' )
+        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
+        tmp_stdout.close()
+        returncode = proc.wait()
+        stdout = None
+        for line in open( tmp_stdout.name, 'rb' ):
+            if line.lower().find( 'version' ) >= 0:
+                stdout = line.strip()
+                break
+        if stdout:
+            sys.stdout.write( 'Samtools %s\n' % stdout )
+        else:
+            raise Exception
+    except:
+        sys.stdout.write( 'Could not determine Samtools version\n' )
+
+
+
+    tmp_dirs = []
+    for i in range(len(inputFileNames)):
+        try:
+            # exit if input file empty
+            if os.path.getsize( inputFileNames[i] ) == 0:
+                raise Exception, 'Initial input txt file is empty.'
+            # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command
+            # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted
+            # into memory ( controlled by option -m ).
+            tmp_dir = tempfile.mkdtemp()
+            tmp_dirs.append(tmp_dir)
+            tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
+            tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name
+            tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name
+            tmp_files.append(tmp_sorted_aligns_file_name)
+            tmp_sorted_aligns_file.close()
+
+            command = 'samtools sort %s %s' % ( inputFileNames[i], tmp_sorted_aligns_file_base )
+            tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
+            tmp_stderr = open( tmp, 'wb' )
+            proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
+            returncode = proc.wait()
+            tmp_stderr.close()
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, stderr
+            # exit if sorted BAM file empty
+            if os.path.getsize( tmp_sorted_aligns_file_name) == 0:
+                raise Exception, 'Intermediate sorted BAM file empty'
+        except Exception, e:
+            stop_err( 'Error sorting alignments from (%s), %s' % ( inputFileNames[i], str( e ) ) )
+
+        try:
+            # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ).
+            if options.header:
+                view_options = "-h"
+            else:
+                view_options = ""
+            command = 'samtools view %s -o %s %s' % ( view_options, samOutputNames[i], tmp_sorted_aligns_file_name )
+            tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
+            tmp_stderr = open( tmp, 'wb' )
+            proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
+            returncode = proc.wait()
+            tmp_stderr.close()
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, stderr
+        except Exception, e:
+            stop_err( 'Error extracting alignments from (%s), %s' % ( inputFileNames[i], str( e ) ) )
+        if os.path.getsize( samOutputNames[i] ) > 0:
+            sys.stdout.write( 'BAM file converted to SAM' )
+        else:
+            stop_err( 'The output file is empty, there may be an error with your input file.' )
+
+    if options.outputTar != None:
+        toTar(options.outputTar, samOutputNames)
+    #clean up temp files
+    for tmp_dir in tmp_dirs:
+        if os.path.exists( tmp_dir ):
+            shutil.rmtree( tmp_dir )
+    #print tmp_files
+    #for tmp in tmp_files:
+    #    os.remove(tmp)
+
+
+if __name__=="__main__": __main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/bam_to_sam_parallel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/bam_to_sam_parallel.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,32 @@
+<tool id="bam_to_sam_parallel" name="BAM to SAM (for DEA)" version="1.0.0">
+  <description>converts a list of BAM format files to SAM format.</description>
+  <requirements>
+ <requirement type="package">samtools</requirement>
+  </requirements>
+  <command interpreter="python"> bam_to_sam_parallel.py
+      --input1=$input1
+      --output1=$output1
+      $header
+      $tar $outputTarFile
+  </command>
+  <inputs>
+    <param name="input1" type="data" format="txt" label="BAM File LIST to Convert" />
+    <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" label="Include header in output" />
+    <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />
+  </inputs>
+  <outputs>
+   <data format="txt" name="output1" label="converted SAM LIST files " />
+   <data name="outputTarFile" format="tar">
+   <filter>tar</filter>
+   </data>
+  </outputs>
+  <help>
+
+**What it does**
+
+This tool uses the SAMTools_ toolkit to produce a SAM file from a BAM file.
+
+.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
+
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+"""
+Converts BAM data to sorted SAM data.
+usage: bam_to_sam.py [options]
+   --input1: SAM file to be converted
+   --output1: output dataset in bam format
+"""
+
+import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
+from commons.core.launcher.Launcher import Launcher
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+#from galaxy import eggs
+#import pkg_resources; pkg_resources.require( "bx-python" )
+#from bx.cookbook import doc_optparse
+#from galaxy import util
+
+def stop_err( msg ):
+    sys.stderr.write( '%s\n' % msg )
+    sys.exit()
+
+def toTar(tarFileName, samOutputNames):
+    dir = os.path.dirname(tarFileName)
+    tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
+    currentPath = os.getcwd()
+    os.chdir(dir)
+    for file in samOutputNames:
+        relativeFileName = os.path.basename(file)
+        tfile.add(relativeFileName)
+    os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
+    tfile.close()
+    os.chdir(currentPath)
+
+def _map(iLauncher, cmd, cmdStart, cmdFinish ):
+    lCmds = []
+    lCmds.extend(cmd)
+    lCmdStart = []
+    lCmdStart.extend(cmdStart)
+    lCmdFinish = []
+    lCmdFinish.extend(cmdFinish)
+    return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
+
+def _createSamToolsViewCmd(iLauncher, inputFile, tmp_sorted_aligns_file_name, header):
+        lArgs = []
+        lArgs.append("-o %s" %  inputFile)
+        lArgs.append("%s" % tmp_sorted_aligns_file_name)
+        if header:
+            lArgs.append("-h")
+        return iLauncher.getSystemCommand("samtools view", lArgs)
+
+def _createSamToolsSortCmd(iLauncher, inputFile, tmp_sorted_aligns_file_base):
+        lArgs = []
+        lArgs.append("%s" % inputFile)
+        lArgs.append("%s" %  tmp_sorted_aligns_file_base)
+        return iLauncher.getSystemCommand("samtools sort", lArgs)
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-t', '--tar', dest='outputTar', default=None, help='output all SAM results in a tar file.' )
+    parser.add_option( '', '--input1', dest='input1', help='The input list of BAM datasets on txt format.' )
+    #parser.add_option( '', '--input1', dest='input1', help='The input BAM dataset' )
+    parser.add_option( '', '--output1', dest='output1', help='The output list of SAM datasets on txt format.' )
+    #parser.add_option( '', '--output1', dest='output1', help='The output SAM dataset' )
+    parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' )
+    ( options, args ) = parser.parse_args()
+
+
+    #Parse the input txt file and read a list of BAM files.
+    file = open(options.input1, "r")
+    lines = file.readlines()
+    inputFileNames = []
+    samOutputNames = []
+    outputName = options.output1
+    resDirName = os.path.dirname(outputName) + '/'
+    #Write output txt file and define all output sam file names.
+    out = open(outputName, "w")
+    for line in lines:
+        tab = line.split()
+        inputFileNames.append(tab[1])
+        samOutName = resDirName + tab[0] + '_samOutput_%s.sam' % random.randrange(0, 10000)
+        samOutputNames.append(samOutName)
+        out.write(tab[0] + '\t' + samOutName  + '\n')
+    file.close()
+    out.close()
+
+    # output version # of tool
+    try:
+        tmp_files = []
+        tmp = tempfile.NamedTemporaryFile().name
+        tmp_files.append(tmp)
+        tmp_stdout = open( tmp, 'wb' )
+        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
+        tmp_stdout.close()
+        returncode = proc.wait()
+        stdout = None
+        for line in open( tmp_stdout.name, 'rb' ):
+            if line.lower().find( 'version' ) >= 0:
+                stdout = line.strip()
+                break
+        if stdout:
+            sys.stdout.write( 'Samtools %s\n' % stdout )
+        else:
+            raise Exception
+    except:
+        sys.stdout.write( 'Could not determine Samtools version\n' )
+
+    tmp_dirs = []
+    acronym = "bam_to_sam"
+    jobdb = TableJobAdaptatorFactory.createJobInstance()
+    iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
+    lCmdsTuples = []
+    for i in range(len(inputFileNames)):    #Construct the lines commands
+        if os.path.getsize( inputFileNames[i] ) == 0:
+            raise Exception, 'Initial input txt file is empty.'
+        tmp_dir = tempfile.mkdtemp(dir="%s" % os.getcwd())
+        tmp_dirs.append(tmp_dir)
+        tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
+        tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name
+        tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name
+        tmp_files.append(tmp_sorted_aligns_file_name)
+        tmp_sorted_aligns_file.close()
+
+        inputFile = inputFileNames[i]
+        outputFile = samOutputNames[i]
+        cmd2Launch = []
+        cmd2Launch.append(_createSamToolsSortCmd(iLauncher, inputFile, tmp_sorted_aligns_file_base))
+        cmd2Launch.append(_createSamToolsViewCmd(iLauncher, outputFile, tmp_sorted_aligns_file_name, options.header))
+        cmdStart = []
+        cmdFinish = []
+        lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))
+
+    iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)
+
+    if options.outputTar != None:
+        toTar(options.outputTar, samOutputNames)
+    #clean up temp files
+    for tmp_dir in tmp_dirs:
+        if os.path.exists( tmp_dir ):
+            shutil.rmtree( tmp_dir )
+    #print tmp_files
+    #for tmp in tmp_files:
+    #    os.remove(tmp)
+
+
+if __name__=="__main__": __main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,32 @@
+<tool id="bam_to_sam_parallel_unSQL" name="BAM to SAM (for DEA in parallel)" version="1.0.0">
+  <description>converts a list of BAM format files to SAM format (parallelized).</description>
+  <requirements>
+ <requirement type="package">samtools</requirement>
+  </requirements>
+  <command interpreter="python"> bam_to_sam_parallel_unSQL.py
+      --input1=$input1
+      --output1=$output1
+      $header
+      $tar $outputTarFile
+  </command>
+  <inputs>
+    <param name="input1" type="data" format="txt" label="BAM File LIST to Convert" />
+    <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" label="Include header in output" />
+    <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />
+  </inputs>
+  <outputs>
+   <data format="txt" name="output1" label="converted SAM LIST files " />
+   <data name="outputTarFile" format="tar">
+   <filter>tar</filter>
+   </data>
+  </outputs>
+  <help>
+
+**What it does**
+
+This tool uses the SAMTools_ toolkit to produce a SAM file from a BAM file.
+
+.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
+
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/compareOverlapping_parallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/compareOverlapping_parallel.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,175 @@\n+#! /usr/bin/env python\n+#This program is a wrapp for CompareOverlapping.py.\n+import optparse, os, sys, subprocess, tempfile, shutil, tarfile, glob\n+import os, struct, time, random\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.CompareOverlapping import CompareOverlapping\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.ncList.NCList import NCList\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n+from SMART.Java.Python.ncList.FileSorter import FileSorter\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+from SMART.Java.Python.misc import Utils\n+\n+\n+\n+def stop_err( msg ):\n+\tsys.stderr.write( "%s\\n" % msg )\n+\tsys.exit()\n+\n+def toTar(tarFileName, overlapOutputNames):\n+\tdir = os.path.dirname(tarFileName)\t\n+\ttfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n+\tcurrentPath = os.getcwd()\n+\tos.chdir(dir)\n+\tfor file in overlapOutputNames:\n+\t\trelativeFileName = os.path.basename(file)\n+\t\ttfile.add(relativeFileName)\n+\tos.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n+\ttfile.close()\n+\tos.chdir(currentPath)\n+\n+def __main__():\n+\tdescription = "Compare Overlapping wrapp script: Get the a list of data which overlap with a reference set. [Category: Data Comparison]"\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--input1",\t\t dest="inputFileName1", action="store",\t\t\t\t\t type="string", help="input file 1 (for annotation) [compulsory] [format: file in transcript format given by -f]")\n+\tparser.add_option("-f", "--format1",\t\t dest="format1",\t\taction="store",\t\t\t\t\t type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n+\tparser.add_option("", "--inputTxt", \t\tdest="inputTxt", \t\taction="store", \t\t\t\ttype="string", \thelp="input, a txt file for a list of input reads files. Should identify all reads files format, given by -g [compulsory]")\n+\t#parser.add_option("-j", "--input2",\t\t dest="inputFileName2", action="store",\tdefault="inputRead",\t type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n+\tparser.add_option("-g", "--format2",\t\t dest="format2",\t\taction="store",\t\t\t\t type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n+\t#parser.add_option("-o", "--output",\t\t dest="output",\t\t action="store",\t default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+\tparser.add_option("-S", "--start1",\t\t dest="start1",\t\t action="store",\t default=None, type="int",\thelp="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")\n+\tparser.add_option("-s", "--start2",\t\t dest="start2",\t\t action="store",\t default=None, type="int",\thelp="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")\n+\tparser.add_option("-U", "--end1",\t\t\t dest="end1",\t\t action="store",\t default=None, type="int",\thelp="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]")\n+\tparser.add_option("-u", "--end2",\t\t\t dest="end2",\t\t action="store",\t default=None, type="int",\thelp="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")\n+\tparser.add_option("-t", "--intron",\t\t dest="introns",\t\taction="store_true", default=False,\t\t\t\thelp="also report introns [format: bool] [default: false]")\n+\tparser.add_option("-E", "--5primeExtension1", dest="fivePrime1",\t action="store",\t default=None, type="int",\thelp="extension towards 5\' in file 1 [format: int]")\n+\tparser.add_option("-e"'..b'verlapping data [format: bool] [default: false]")\n+\tparser.add_option("-x", "--exclude",\t\t dest="exclude",\t\taction="store_true", default=False,\t\t\t\thelp="invert the match [format: bool] [default: false]")\n+\tparser.add_option("-v", "--verbosity",\t\tdest="verbosity",\t action="store",\t default=1,\t type="int",\thelp="trace level [format: int]")\n+\tparser.add_option(\'\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all SAM results in a tar file.\' )\n+\tparser.add_option( \'\', \'--outTxt\', dest=\'outTxtFile\', help=\'The output list of results files on txt format.[compulsory]\' )\n+\t(options, args) = parser.parse_args()\n+\t\n+\t\n+\t#Parse the input txt file and read a list of BAM files.\n+\tfile = open(options.inputTxt, "r")\n+\tlines = file.readlines()\n+\tinputFileNames = []\n+\toverlapOutputNames = []\n+\toutputName = options.outTxtFile\n+\tresDirName = os.path.dirname(outputName) + "/"\n+\t#Write output txt file and define all output sam file names.\n+\tout = open(outputName, "w")\n+\tfor line in lines:\n+\t\ttab = line.split()\n+\t\tinputFileNames.append(tab[1])\n+\t\toverlapOutName = resDirName + tab[0] + \'_overlapOut_%s.gff3\' % random.randrange(0, 10000)\n+\t\toverlapOutputNames.append(overlapOutName)\n+\t\tout.write(tab[0] + \'\\t\' + overlapOutName + \'\\n\')\n+\tfile.close()\n+\tout.close()\n+\t\n+\t#construction the commandes for each input file\n+\tcmds = []\n+\tfor i in range(len(inputFileNames)):\n+\t\tabsFile = sys.argv[0]\n+\t\tabsDir = os.path.dirname(absFile)\n+\t\tparentDir = os.path.abspath(os.path.join(absDir, os.path.pardir))\n+\t\tcmd = "python %s/Java/Python/CompareOverlappingSmallQuery.py " % parentDir\n+\t\topts = "-i %s -f %s -j %s -g %s -o %s " % (options.inputFileName1, options.format1, inputFileNames[i], options.format2, overlapOutputNames[i])\n+\t\t#if options.start1 != None:\n+\t\t#\topts += "-S %s " % options.start1\n+\t\t#if options.start2 != None:\n+\t\t#\topts += "-s %s " % options.start2\n+\t\t#if options.end1 != None:\n+\t\t#\topts += "-U %s " % options.end1\n+\t\t#if options.end2 != None:\n+\t\t#\topts += "-u %s " % options.end2\n+\t\t#if options.fivePrime1 != None:\n+\t\t#\topts += "-E %s " % options.fivePrime1\n+\t\t#if options.fivePrime2 != None:\n+\t\t#\topts += "-e %s " % options.fivePrime2\n+\t\t#if options.threePrime1 != None:\n+\t\t#\topts += "-N %s " % options.threePrime1\n+\t\t#if options.threePrime2 != None:\n+\t\t#\topts += "-n %s " % options.threePrime2\n+\t\t#if options.colinear:\n+\t\t#\topts += "-c "\n+\t\t#if options.antisense:\n+\t\t#\topts +="-a "\n+\t\t#if options.included:\n+\t\t#\topts += "-k "\n+\t\t#if options.including:\n+\t\t#\topts += "-K "\n+\t\t#if options.pcOverlap != None:\n+\t\t#\topts += "-p %s " % options.pcOverlap\n+\t\tif options.notOverlapping:\n+\t\t\topts += "-O "\n+\t\tif options.exclude:\n+\t\t\topts += "-x "\n+\t\tif options.distance != None:\n+\t\t\topts += "-d %s " % options.distance\n+\t\t#if options.minOverlap != None:\n+\t\t#\topts += "-m %s " % options.minOverlap\n+\t\tcmd += opts\n+\t\tcmds.append(cmd)\n+\n+\n+\tprint "les commandes sont %s \\n" % cmds\n+\n+\ttmp_files = []\t\n+\tfor i in range(len(cmds)):\n+\t\ttry:\n+\t\t\ttmp_out = tempfile.NamedTemporaryFile().name\n+\t\t\ttmp_files.append(tmp_out)\n+\t\t\ttmp_stdout = open( tmp_out, \'wb\' )\n+\t\t\ttmp_err = tempfile.NamedTemporaryFile().name\n+\t\t\ttmp_files.append(tmp_err)\n+\t\t\ttmp_stderr = open( tmp_err, \'wb\' )\n+\t\t\tproc = subprocess.Popen( args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )\n+\t\t\treturncode = proc.wait()\n+\t\t\ttmp_stderr.close()\n+\t\t\t# get stderr, allowing for case where it\'s very large\n+\t\t\ttmp_stderr = open( tmp_err, \'rb\' )\n+\t\t\tstderr = \'\'\n+\t\t\tbuffsize = 1048576\n+\t\t\ttry:\n+\t\t\t\twhile True:\n+\t\t\t\t\tstderr += tmp_stderr.read( buffsize )\n+\t\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\n+\t\t\t\t\t\tbreak\n+\t\t\texcept OverflowError:\n+\t\t\t\tpass\n+\t\t\ttmp_stdout.close()\n+\t\t\ttmp_stderr.close()\n+\t\t\tif returncode != 0:\n+\t\t\t\traise Exception, stderr\n+\t\texcept Exception, e:\n+\t\t\tstop_err( \'Error in :\\n\' + str( e ) )\n+\n+\tif options.outputTar != None:\n+\t\ttoTar(options.outputTar, overlapOutputNames)\t\n+\t\n+\tfor tmp_file in tmp_files:\n+\t\tos.remove(tmp_file)\n+\n+\n+if __name__=="__main__": __main__()\t\t\n+\t\t\n+\t\t\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/compareOverlapping_parallel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/compareOverlapping_parallel.xml Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,251 @@\n+<tool id="CompareOverlapping_parallel" name="CompareOverlapping (for DEA)">\n+\t<description>Shrink or extend the sets of genomic coordinates to get the information between starts of reads and starts of genes.</description>\n+\t<command interpreter="python">\n+\t\tcompareOverlapping_parallel.py -i $formatType.inputFileName1\n+\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n+\t\t\t-f bed\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n+\t\t\t-f gff\t\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n+\t\t\t-f gff2\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n+\t\t\t-f gff3\n+\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n+\t\t\t-f sam\n+\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n+\t\t\t-f gtf\n+\t\t#end if\n+\t\t\t\n+\t\t--inputTxt $inputTxt \n+\t\t\n+\t\t-g $format2\n+\n+\t\t--outTxt $outTxtFile\n+\n+\t\t#if $optionNFirstFile1.NFirstForFile1 == \'Yes\':\n+\t\t\t-S $optionNFirstFile1.firstNtFile1\n+\t\t#end if\n+\t\t#if $optionNFirstFile2.NFirstForFile2 == \'Yes\':\n+\t\t\t-s $optionNFirstFile2.firstNtFile2\n+\t\t#end if\n+\t\t#if $optionNLastFile1.NLastForFile1 == \'Yes\':\n+\t\t\t-U $optionNLastFile1.lastNtFile1\n+\t\t#end if\n+\t\t#if $optionNLastFile2.NLastForFile2 == \'Yes\':\n+\t\t\t-u $optionNLastFile2.lastNtFile2\n+\t\t#end if\n+\t\n+\t\t#if $optionExtentionCinqFile1.extentionFile1 == \'Yes\':\n+\t\t\t-E $optionExtentionCinqFile1.extention51\n+\t\t#end if\n+\t\t#if $optionExtentionCinqFile2.extentionFile2 == \'Yes\':\n+\t\t\t-e $optionExtentionCinqFile2.extention52\n+\t\t#end if\n+\n+\t\t#if $optionExtentionTroisFile1.extentionFile1 == \'Yes\':\n+\t\t\t-N $optionExtentionTroisFile1.extention31\n+\t\t#end if\n+\t\t#if $optionExtentionTroisFile2.extentionFile2 == \'Yes\':\n+\t\t\t-n $optionExtentionTroisFile2.extention32\n+\t\t#end if\t\n+\n+\t\t#if $OptionColinearOrAntiSens.OptionCA == \'Colinear\':\n+\t\t\t-c \n+\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n+\t\t\t-a\n+\t\t#end if\t\n+\n+\t\t#if $OptionDistance.Dist == \'Yes\':\n+\t\t\t-d $OptionDistance.distance\n+\t\t#end if\n+\n+\t\t#if $OptionMinOverlap.MO == \'Yes\':\n+\t\t\t-m $OptionMinOverlap.minOverlap\n+\t\t#end if\n+\n+\t\t$InvertMatch\n+\t\t$ReportIntron\n+\t\t$NotOverlapping\n+\t\t$tar $outputTarFile\n+\t</command>\n+\n+\t<inputs>\n+\n+\t\t<conditional name="formatType">\n+\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="sam">sam</option>\n+\t\t\t\t<option value="gtf">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff">\n+\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff2">\n+\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff3">\n+\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="sam">\n+\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gtf">\n+\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n+ </when>\n+\t\t</conditional>\n+\t\t\n+\t\t<param name="inputTxt" type="data" format="txt" label="A txt file contains a list of several input transcripts files." />\n+\t\t\n+\t\t<param name="format2" type="text" value="bed" label="format for File 2, you can choose [bed, gff, gff2, gff3, sam, gtf]"/>\n+\t\t\n+\t\t<conditional name="optionNFirstFile1">\n+\t\t\t<param name="NFirstForFile1" type="select" label="NFirst for file 1" help="only consider the n first nucleotides of the transcripts in file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="n first nucleotides for input file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t<conditional name="optionNFirstFile2">\n+\t\t\t<param name="NFirs'..b'tForFile2" type="select" label="NLast for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionCinqFile1">\n+\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention51" type="integer" value="1" label="in file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionCinqFile2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extension towards 5 for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention52" type="integer" value="1" label="in file 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionTroisFile1">\n+\t\t\t<param name="extentionFile1" type="select" label="Extension towards 3 for file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention31" type="integer" value="1" label="in file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionTroisFile2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention32" type="integer" value="1" label="in file 2" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionColinearOrAntiSens">\n+\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n+\t\t\t\t<option value="Colinear">Colinear</option>\n+\t\t\t\t<option value="AntiSens">AntiSens</option>\n+\t\t\t\t<option value="NONE" selected="true">NONE</option>\n+\t\t\t</param>\n+\t\t\t<when value="Colinear">\n+\t\t\t</when>\n+\t\t\t<when value="AntiSens">\n+\t\t\t</when>\n+\t\t\t<when value="NONE">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionDistance">\n+\t\t\t<param name="Dist" type="select" label="Maximum Distance between two reads">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="distance" type="integer" value="0"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionMinOverlap">\n+\t\t\t<param name="MO" type="select" label="Minimum number of overlapping between two reads">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="minOverlap" type="integer" value="1"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n+\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n+\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n+\t\t<param name="tar" type="boolean" truevalue="--tar" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />\n+\t</inputs>\n+\n+\t<outputs>\n+\t\t<data name="outTxtFile" format="txt" label="overlapping output files "/>\n+\t\t<data name="outputTarFile" format="tar">\n+\t\t <filter>tar</filter>\n+\t </data>\n+\t</outputs> \n+\t\n+</tool>\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,205 @@\n+#! /usr/bin/env python\n+#This program is a wrapp for CompareOverlapping.py.\n+import os, sys, tarfile, optparse\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from optparse import OptionParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from SMART.Java.Python.structure.TranscriptList import TranscriptList\n+from commons.core.writer.WriterChooser import WriterChooser\n+\n+def stop_err( msg ):\n+\tsys.stderr.write( "%s\\n" % msg )\n+\tsys.exit()\n+\n+def toTar(tarFileName, overlapOutputNames):\n+\tdir = os.path.dirname(tarFileName)\t\n+\ttfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n+\tcurrentPath = os.getcwd()\n+\tos.chdir(dir)\n+\tfor file in overlapOutputNames:\n+\t\trelativeFileName = os.path.basename(file)\n+\t\ttfile.add(relativeFileName)\n+\tos.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n+\ttfile.close()\n+\tos.chdir(currentPath)\n+\n+def _createCompareOverlappingCmd(iLauncher, options, inputFileName, annotationFile, overlapOutputName):\n+\tlArgs = []\n+\tlArgs.append("-i %s" % annotationFile)\n+\tlArgs.append("-f %s" % options.format1)\n+\tlArgs.append("-j %s" % inputFileName)\n+\tlArgs.append("-g %s" % options.format2)\n+\tlArgs.append("-o %s" % overlapOutputName)\n+\tif options.notOverlapping:\n+\t\tlArgs.append("-O")\n+\tif options.exclude:\n+\t\tlArgs.append("-x")\n+\tif options.distance != None:\n+\t\tlArgs.append("-d %s" % options.distance)\n+\treturn(iLauncher.getSystemCommand("python %s/SMART/Java/Python/CompareOverlappingSmallQuery.py" % os.environ["REPET_PATH"], lArgs))\n+\n+def _map(iLauncher, cmd, cmdStart, cmdFinish ):\n+\tlCmds = []\n+\tlCmds.append(cmd)\n+\tlCmdStart = []\n+\tlCmdStart.append(cmdStart)\n+\tlCmdFinish = []\n+\tlCmdFinish.append(cmdFinish)\n+\treturn(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n+\n+def split(fileName, nbOfSeqPerBatch):\n+\tfilePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n+\tresDir = os.path.dirname(fileName)\n+\tlInputName = []\n+\tfileNb = 1\n+\tSeqNb = 0\n+\toutFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n+\tlInputName.append(outFileName)\n+\toutFile = open(outFileName, "w")\n+\tf = open(fileName, "r")\n+\tline = f.readline()\n+\tpreviousRefName = ""\n+\twhile line != "":\n+\t\tif not line.startswith(\'@SQ\'):\n+\t\t\tif SeqNb == nbOfSeqPerBatch:\n+\t\t\t\tSeqNb = 0\n+\t\t\t\tfileNb += 1\n+\t\t\t\toutFile.close()\n+\t\t\t\toutFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n+\t\t\t\tlInputName.append(outFileName)\n+\t\t\t\toutFile = open(outFileName, "w")\n+\t\t\trefName = line.split("\\t")[2]\n+\t\t\tif previousRefName != refName:\n+\t\t\t\tSeqNb += 1\n+\t\t\t\toutFile.write(line)\n+\t\t\telse:\n+\t\t\t\tpreviousRefName = refName\n+\t\t\t\toutFile.write(line)\n+\t\tline = f.readline()\n+\treturn lInputName\t\t\n+\n+def join(dCutOut2Out, options):\n+\tchooser = ParserChooser()\n+\tchooser.findFormat("gtf")\n+\tgtfParser = chooser.getParser(options.inputFileName1)\n+\tref = {}\n+\tfor transcript in gtfParser.getIterator():\n+\t\tref[transcript.getTagValue("ID")] = transcript\n+\tfor key in dCutOut2Out.keys():\n+\t\twriterChooser = WriterChooser()\n+\t\twriterChooser.findFormat("gff3")\n+\t\tfor inputFile in dCutOut2Out[key]:\n+\t\t\tchooser = ParserChooser()\n+\t\t\tchooser.findFormat("gff")\n+\t\t\tgffParser = chooser.getParser(inputFile)\n+\t\t\tfor transcript in gffParser.getIterator():\n+\t\t\t\t\tfinalTranscript = ref[transcript.getTagValue("ID")]\n+\t\t\t\t\tif finalTranscript.getTagValue("nbOverlaps"):\n+\t\t\t\t\t\tnbOverlap = int(finalTranscript.getTagValue("nbOverlaps")) + int(transcript.getTagValue("nbOverlaps"))\n+\t\t\t\t\t\tfinalTranscript.setTagValue("nbOverlaps", nbOverlap)\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tfinalTranscript.setTagValue("nbOverlaps", transcript.getTagValue("nbOverlaps"))\n+\t\t\t\t\t\n+\t\t\t\t\tif finalTranscript.getTagValue("overlapsWith") and transcript.getTagValue("overlapsWith") != None:\n+\t\t\t\t\t\toverlapName = "--".join([finalTranscript.getTagValue("overlapsWith"), transcript.getTagValue("overlapsWith")])\n+\t\t\t\t\t\tfinalTranscript.set'..b'pe="int",\thelp="extension towards 3\' in file 2 [format: int]")\n+\tparser.add_option("-c", "--colinear",\t\t dest="colinear",\t action="store_true", default=False,\t\t\t\thelp="colinear only [format: bool] [default: false]")\n+\tparser.add_option("-a", "--antisense",\t\tdest="antisense",\t action="store_true", default=False,\t\t\t\thelp="antisense only [format: bool] [default: false]")\n+\tparser.add_option("-d", "--distance",\t\t dest="distance",\t action="store",\t default=None,\t type="int",\thelp="accept some distance between query and reference [format: int]")\n+\tparser.add_option("-k", "--included",\t\t dest="included",\t action="store_true", default=False,\t\t\t\thelp="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")\n+\tparser.add_option("-K", "--including",\t\tdest="including",\t action="store_true", default=False,\t\t\t\thelp="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")\n+\tparser.add_option("-m", "--minOverlap",\t dest="minOverlap",\t action="store",\t default=None,\t type="int",\thelp="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")\n+\tparser.add_option("-p", "--pcOverlap",\t\tdest="pcOverlap",\t action="store",\t default=None, type="int",\thelp="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")\n+\tparser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,\t\t\t\thelp="also output not overlapping data [format: bool] [default: false]")\n+\tparser.add_option("-x", "--exclude",\t\t dest="exclude",\t\taction="store_true", default=False,\t\t\t\thelp="invert the match [format: bool] [default: false]")\n+\tparser.add_option("-v", "--verbosity",\t\tdest="verbosity",\t action="store",\t default=1,\t type="int",\thelp="trace level [format: int]")\n+\tparser.add_option(\'\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all SAM results in a tar file.\' )\n+\tparser.add_option( \'\', \'--outTxt\', dest=\'outTxtFile\', help=\'The output list of results files on txt format.[compulsory]\' )\n+\t(options, args) = parser.parse_args()\n+\t\n+\t\n+\t#Parse the input txt file and read a list of BAM files.\n+\tfile = open(options.inputTxt, "r")\n+\tlines = file.readlines()\n+\tinputFileNames = []\n+\toverlapOutputNames = []\n+\toutputName = options.outTxtFile\n+\tresDirName = os.path.dirname(outputName) + "/"\n+\t#Write output txt file and define all output sam file names.\n+\tout = open(outputName, "w")\n+\tfor line in lines:\n+\t\ttab = line.split()\n+\t\tinputFileNames.append(tab[1])\n+\t\toverlapOutName = resDirName + tab[0] + \'_overlapOut.gff3\'\n+\t\toverlapOutputNames.append(overlapOutName)\n+\t\tout.write(tab[0] + \'\\t\' + overlapOutName + \'\\n\')\n+\tfile.close()\n+\tout.close()\n+\t\n+\t#Launch on nodes\n+\tacronym = "compareOverlapping"\n+\tjobdb = TableJobAdaptatorFactory.createJobInstance()\n+\tiLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "test", acronym, acronym, False, True)\n+\n+\n+\t\n+\n+\t#construction the commandes for each input file\n+\tlCmdsTuples = []\n+\tdCutOut2Out = {}\n+\tlAllFile2remove = []\n+\tfor i in range(len(inputFileNames)):\n+\t\tlCutInputFile = split(inputFileNames[i], 20000)\n+\t\tlAllFile2remove.extend(lCutInputFile)\n+\t\tlCutOutput = []\n+\t\tfor cutInput in lCutInputFile:\n+\t\t\tcutOutput = "%s_out" % cutInput\n+\t\t\tlCutOutput.append(cutOutput)\n+\t\t\tlAllFile2remove.extend(lCutOutput)\n+\t\t\tcmd2Launch = _createCompareOverlappingCmd(iLauncher, options, cutInput, options.inputFileName1, cutOutput)\n+\t\t\tlCmdsTuples.append(_map(iLauncher, cmd2Launch, "", ""))\n+\t\tchooser = ParserChooser()\n+\t\tchooser.findFormat(options.format2)\n+\t\tdCutOut2Out[overlapOutputNames[i]] = lCutOutput\n+\tiLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n+\t\n+\tjoin(dCutOut2Out, options)\n+\tFileUtils.removeFilesFromListIfExist(lAllFile2remove)\n+\n+\tif options.outputTar != None:\n+\t\ttoTar(options.outputTar, overlapOutputNames)\t\n+\n+if __name__=="__main__": __main__()\t\t\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.xml Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,251 @@\n+<tool id="CompareOverlapping_parallel_unSQL" name="CompareOverlapping (for DEA in parallel)">\n+\t<description>Shrink or extend the sets of genomic coordinates to get the information between starts of reads and starts of genes (parallelized).</description>\n+\t<command interpreter="python">\n+\t\tcompareOverlapping_parallel_unSQL.py -i $formatType.inputFileName1\n+\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n+\t\t\t-f bed\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n+\t\t\t-f gff\t\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n+\t\t\t-f gff2\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n+\t\t\t-f gff3\n+\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n+\t\t\t-f sam\n+\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n+\t\t\t-f gtf\n+\t\t#end if\n+\t\t\t\n+\t\t--inputTxt $inputTxt \n+\t\t\n+\t\t-g $format2\n+\n+\t\t--outTxt $outTxtFile\n+\n+\t\t#if $optionNFirstFile1.NFirstForFile1 == \'Yes\':\n+\t\t\t-S $optionNFirstFile1.firstNtFile1\n+\t\t#end if\n+\t\t#if $optionNFirstFile2.NFirstForFile2 == \'Yes\':\n+\t\t\t-s $optionNFirstFile2.firstNtFile2\n+\t\t#end if\n+\t\t#if $optionNLastFile1.NLastForFile1 == \'Yes\':\n+\t\t\t-U $optionNLastFile1.lastNtFile1\n+\t\t#end if\n+\t\t#if $optionNLastFile2.NLastForFile2 == \'Yes\':\n+\t\t\t-u $optionNLastFile2.lastNtFile2\n+\t\t#end if\n+\t\n+\t\t#if $optionExtentionCinqFile1.extentionFile1 == \'Yes\':\n+\t\t\t-E $optionExtentionCinqFile1.extention51\n+\t\t#end if\n+\t\t#if $optionExtentionCinqFile2.extentionFile2 == \'Yes\':\n+\t\t\t-e $optionExtentionCinqFile2.extention52\n+\t\t#end if\n+\n+\t\t#if $optionExtentionTroisFile1.extentionFile1 == \'Yes\':\n+\t\t\t-N $optionExtentionTroisFile1.extention31\n+\t\t#end if\n+\t\t#if $optionExtentionTroisFile2.extentionFile2 == \'Yes\':\n+\t\t\t-n $optionExtentionTroisFile2.extention32\n+\t\t#end if\t\n+\n+\t\t#if $OptionColinearOrAntiSens.OptionCA == \'Colinear\':\n+\t\t\t-c \n+\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n+\t\t\t-a\n+\t\t#end if\t\n+\n+\t\t#if $OptionDistance.Dist == \'Yes\':\n+\t\t\t-d $OptionDistance.distance\n+\t\t#end if\n+\n+\t\t#if $OptionMinOverlap.MO == \'Yes\':\n+\t\t\t-m $OptionMinOverlap.minOverlap\n+\t\t#end if\n+\n+\t\t$InvertMatch\n+\t\t$ReportIntron\n+\t\t$NotOverlapping\n+\t\t$tar $outputTarFile\n+\t</command>\n+\n+\t<inputs>\n+\n+\t\t<conditional name="formatType">\n+\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="sam">sam</option>\n+\t\t\t\t<option value="gtf">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff">\n+\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff2">\n+\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff3">\n+\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="sam">\n+\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gtf">\n+\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n+ </when>\n+\t\t</conditional>\n+\t\t\n+\t\t<param name="inputTxt" type="data" format="txt" label="A txt file contains a list of several input transcripts files." />\n+\t\t\n+\t\t<param name="format2" type="text" value="bed" label="format for File 2, you can choose [bed, gff, gff2, gff3, sam, gtf]"/>\n+\t\t\n+\t\t<conditional name="optionNFirstFile1">\n+\t\t\t<param name="NFirstForFile1" type="select" label="NFirst for file 1" help="only consider the n first nucleotides of the transcripts in file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="n first nucleotides for input file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t<conditional name="opt'..b'tForFile2" type="select" label="NLast for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionCinqFile1">\n+\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention51" type="integer" value="1" label="in file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionCinqFile2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extension towards 5 for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention52" type="integer" value="1" label="in file 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionTroisFile1">\n+\t\t\t<param name="extentionFile1" type="select" label="Extension towards 3 for file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention31" type="integer" value="1" label="in file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionTroisFile2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention32" type="integer" value="1" label="in file 2" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionColinearOrAntiSens">\n+\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n+\t\t\t\t<option value="Colinear">Colinear</option>\n+\t\t\t\t<option value="AntiSens">AntiSens</option>\n+\t\t\t\t<option value="NONE" selected="true">NONE</option>\n+\t\t\t</param>\n+\t\t\t<when value="Colinear">\n+\t\t\t</when>\n+\t\t\t<when value="AntiSens">\n+\t\t\t</when>\n+\t\t\t<when value="NONE">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionDistance">\n+\t\t\t<param name="Dist" type="select" label="Maximum Distance between two reads">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="distance" type="integer" value="0"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionMinOverlap">\n+\t\t\t<param name="MO" type="select" label="Minimum number of overlapping between two reads">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="minOverlap" type="integer" value="1"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n+\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n+\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n+\t\t<param name="tar" type="boolean" truevalue="--tar" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />\n+\t</inputs>\n+\n+\t<outputs>\n+\t\t<data name="outTxtFile" format="txt" label="overlapping output files "/>\n+\t\t<data name="outputTarFile" format="tar">\n+\t\t <filter>tar</filter>\n+\t </data>\n+\t</outputs> \n+\t\n+</tool>\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/countNumber.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/countNumber.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $in_file = $ARGV[0];
+my $out_file = $ARGV[1];
+my $sort_type = $ARGV[2]; # n(umeric) or a(lphanumeric)
+my ($line,$ID,$nbOverlaps,%hash);
+
+open(IN, $in_file);
+while ($line = <IN>){
+ chomp($line);
+ $line=~s/\t/|/g;
+ my @part=split(/\|/,$line);
+ my @split=split(";",$part[$#part]);
+ $split[0] =~ m/^(\w+).+$/;
+
+ foreach my $i (@split){
+ if ($i=~m/nbOverlaps=(.+)/){
+ $nbOverlaps=$1;
+ }
+ if ($i=~m/gene_id=(.+)/){
+ $ID=$1;
+ $hash{$ID}=$nbOverlaps;
+ }
+ }
+}
+close(IN);
+
+open(OUT, ">$out_file");
+foreach my $key ( sort keys %hash) {
+ print OUT "$key\t$hash{$key}\n";
+}
+close(OUT);

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/countNumber.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/countNumber.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,16 @@
+<tool id="countNumber" name="countNumber">
+ <description>Calculate the number of reads(annotations) overlapping for each transcript.</description>
+ <command interpreter="perl"> countNumber.pl $input $outputCSV
+ </command>
+
+ <inputs>
+ <param name="input" type="data" format="gff3" label="Please choose your gff3 format file (which contains the number of overlaps)."/>
+ </inputs>
+
+ <outputs>
+ <data format="csv" name="outputCSV" label="countNumber Output"/>
+ </outputs>
+
+ <help>
+ </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/countNumber_parallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/countNumber_parallel.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,96 @@
+#! /usr/bin/env python
+
+
+import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
+from optparse import OptionParser
+
+def stop_err(msg):
+ sys.stderr.write('%s\n' % msg)
+ sys.exit()
+
+def toTar(tarFileName, outCountNames):
+ dir = os.path.dirname(tarFileName)
+ tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
+ currentPath = os.getcwd()
+ os.chdir(dir)
+ for file in outCountNames:
+ relativeFileName = os.path.basename(file)
+ tfile.add(relativeFileName)
+ os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
+ tfile.close()
+ os.chdir(currentPath)
+
+
+def __main__():
+ #Parse Command Line
+ parser = optparse.OptionParser()
+ parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
+ parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
+ parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
+ (options, args) = parser.parse_args()
+
+ #Parse the input txt file and read a list of transcripts files.
+ file = open(options.inputFile, "r")
+ lines = file.readlines()
+ inputFileNames = []
+ outCountNames = []
+ outputName = options.outputFile
+ resDirName = os.path.dirname(outputName) + '/'
+
+ #Write output txt file and define all output count file names
+ out = open(outputName, "w")
+ out.write("label\tfiles\tgroup\n")
+ for line in lines:
+ tab = line.split()
+ inputFileNames.append(tab[1])
+ outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
+ outCountNames.append(outCountName)
+ out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
+ file.close()
+ out.close()
+
+ #Construct the lines commands
+ cmds = []
+ for i in range(len(inputFileNames)):
+ cmd = "perl %s/SMART/DiffExpAnal/countNumber.pl " % os.environ["REPET_PATH"]
+ opts = "%s %s " % (inputFileNames[i], outCountNames[i])
+ cmd += opts
+ cmds.append(cmd)
+
+ tmp_files = []
+ for i in range(len(cmds)):
+ try:
+ tmp_out = tempfile.NamedTemporaryFile().name
+ tmp_files.append(tmp_out)
+ tmp_stdout = open(tmp_out, 'wb')
+ tmp_err = tempfile.NamedTemporaryFile().name
+ tmp_files.append(tmp_err)
+ tmp_stderr = open(tmp_err, 'wb')
+ proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
+ returncode = proc.wait()
+ tmp_stderr.close()
+ #get stderr, allowing for case where it's very large
+ tmp_stderr = open(tmp_err, 'rb')
+ stderr = ''
+ buffsize = 1048576
+ try:
+ while True:
+ stderr += tmp_stderr.read(buffsize)
+ if not stderr or len(stderr) % buffsize != 0:
+ break
+ except OverflowError:
+ pass
+ tmp_stdout.close()
+ tmp_stderr.close()
+ if returncode != 0:
+ raise Exception, stderr
+ except Exception, e:
+ stop_err('Error in :\n' + str(e))
+
+ if options.outputTar != None:
+ toTar(options.outputTar, outCountNames)
+
+ for tmp_file in tmp_files:
+ os.remove(tmp_file)
+
+if __name__=="__main__":__main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/countNumber_parallel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/countNumber_parallel.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,19 @@
+<tool id="countNumber_parallel" name="countNumber (for DEA)">
+
+ <description>Calculate the number of reads(annotations) overlapping for each transcript.</description>
+ <command interpreter="python"> countNumber_parallel.py -i $inputTxt -o $outputTxt $tar $outputTarFile
+ </command>
+
+ <inputs>
+ <param name="inputTxt" type="data" format="txt" label="Please choose your txt format file (which contains a list of gff3 overlapping results files)."/>
+ <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="False" label="tar option" help="This option creates a tar file for all out results" />
+ </inputs>
+
+ <outputs>
+ <data format="txt" name="outputTxt" label="countNumber Output"/>
+ <data name="outputTarFile" format="tar">
+ <filter>tar</filter>
+ </data>
+ </outputs>
+
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/countNumber_parallel_unSQL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/countNumber_parallel_unSQL.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,91 @@
+#! /usr/bin/env python
+
+
+import optparse, os, sys, tarfile, random
+from optparse import OptionParser
+from commons.core.launcher.Launcher import Launcher
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+
+def stop_err(msg):
+ sys.stderr.write('%s\n' % msg)
+ sys.exit()
+
+def toTar(tarFileName, outCountNames):
+ dir = os.path.dirname(tarFileName)
+ tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
+ currentPath = os.getcwd()
+ os.chdir(dir)
+ for file in outCountNames:
+ relativeFileName = os.path.basename(file)
+ tfile.add(relativeFileName)
+ os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
+ tfile.close()
+ os.chdir(currentPath)
+
+def _map(iLauncher, cmd, cmdStart, cmdFinish ):
+ lCmds = []
+ lCmds.append(cmd)
+ lCmdStart = []
+ lCmdStart.append(cmdStart)
+ lCmdFinish = []
+ lCmdFinish.append(cmdFinish)
+ return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
+
+def _createCountNumberCommand(iLauncher, inputFile, outputFile):
+ lArgs = []
+ lArgs.append("%s" % inputFile)
+ lArgs.append("%s" % outputFile)
+ return iLauncher.getSystemCommand("perl %s/SMART/DiffExpAnal/countNumber.pl " % os.environ["REPET_PATH"], lArgs)
+
+def __main__():
+ #Parse Command Line
+ parser = optparse.OptionParser()
+ parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
+ parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
+ parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
+ (options, args) = parser.parse_args()
+
+ #Parse the input txt file and read a list of transcripts files.
+ file = open(options.inputFile, "r")
+ lines = file.readlines()
+ inputFileNames = []
+ outCountNames = []
+ outputName = options.outputFile
+ resDirName = os.path.dirname(outputName) + '/'
+
+ #Write output txt file and define all output count file names
+ out = open(outputName, "w")
+ out.write("label\tfiles\tgroup\n")
+ for line in lines:
+ tab = line.split()
+ inputFileNames.append(tab[1])
+ outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
+ outCountNames.append(outCountName)
+ out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
+ file.close()
+ out.close()
+
+ #Launch on nodes
+ acronym = "countNumber"
+ jobdb = TableJobAdaptatorFactory.createJobInstance()
+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
+ lCmdsTuples = []
+ for i in range(len(inputFileNames)): #Construct the lines commands
+ inputFile = inputFileNames[i]
+ outputFile = outCountNames[i]
+ cmd2Launch = _createCountNumberCommand(iLauncher, inputFile, outputFile)
+ cmdStart = ""
+ cmdFinish = ""
+ lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))
+
+
+
+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)
+
+
+
+ if options.outputTar != None:
+ toTar(options.outputTar, outCountNames)
+
+
+if __name__=="__main__":__main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/countNumber_parallel_unSQL.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/countNumber_parallel_unSQL.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,19 @@
+<tool id="countNumber_parallel_unSQL" name="countNumber (for DEA in parallel)">
+
+ <description>Calculate the number of reads(annotations) overlapping for each transcript (parallelized).</description>
+ <command interpreter="python"> countNumber_parallel_unSQL.py -i $inputTxt -o $outputTxt $tar $outputTarFile
+ </command>
+
+ <inputs>
+ <param name="inputTxt" type="data" format="txt" label="Please choose your txt format file (which contains a list of gff3 overlapping results files)."/>
+ <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="False" label="tar option" help="This option creates a tar file for all out results" />
+ </inputs>
+
+ <outputs>
+ <data format="txt" name="outputTxt" label="countNumber Output"/>
+ <data name="outputTarFile" format="tar">
+ <filter>tar</filter>
+ </data>
+ </outputs>
+
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/deseq.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/deseq.sh Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,21 @@
+#! /bin/sh
+
+#Arguments :
+#$1=targetFile(the list of files)
+#$2=with or without header
+#$3=with or without replicates
+#$4=OUT_HTML.html
+#$5=OUT_HTML images directory
+#$6=OUT_complete.xls
+#$7=OUT_up.xls
+#$8=OUT_down.xls
+
+#run example:
+#bash deseq.sh DESeqTools/targetTest.txt 1 1 testOUT_HTML.html /tmp/ testOUT_complet.xls testOUT_up.xls testOUT_down.xls
+
+#echo $5
+#mkdir -p $5 #First, create the images tmp directory given by Galaxy, -p option can create the parent directory which dosen't exist.
+
+mkdir -p $5
+MY_PATH=`dirname $0`
+cat $MY_PATH/DESeqTools/anadiffGenes2conds.R | R --slave --args $1 $2 $3 $4 $5 $6 $7 $8 $0 < $MY_PATH/DESeqTools/anadiffGenes2conds.R

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/deseq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/deseq.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,22 @@
+<tool id="DESEQ" name="DESEQ for differential expression analysis">
+  <description>Differential expression analysis for reads count data</description>
+  <command interpreter="bash"> deseq.sh $inputFile $header $withOutReplicates $outHTML $outHTML.files_path $outComplete $outUP $outDown 2> $log </command>
+
+  <inputs>
+      <param name="inputFile" type="data" label="Input File list" format="txt"/>
+      <param name="header" type="boolean" truevalue="1" falsevalue="0" checked="false" label="If there is a header for your count files, please choose this case."/>
+      <param name="withOutReplicates" type="boolean" truevalue="1" falsevalue="0" checked="false" label="If your data has not replicates, please choose this case."/>
+
+  </inputs>
+
+  <outputs>
+      <data format="HTML" name="outHTML" label="[DESEQ] Output HTML File" help="This output file shows all results images by DESeq analysis"/>
+      <data format="tabular" name="outComplete" label="[DESEQ] Output complete File"/>
+      <data format="tabular" name="outUP" label="[DESEQ] Output up File" help="This output file shows the genes of group1 which are overexpressed than those of group2"/>
+      <data format="tabular" name="outDown" label="[DESEQ] Output down File" help="This output file shows the  genes of group1 which are less expressed than those of group2"/>
+      <data format="txt" name="log" label="[DESEQ] Output log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/fastq_groomer_parallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/fastq_groomer_parallel.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,115 @@
+import sys, os, optparse, random
+from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter
+
+def stop_err(msg):
+ sys.stderr.write("%s\n" % msg)
+ sys.exit()
+
+def main():
+
+    input_filename = sys.argv[1]  #a txt file
+    input_type = sys.argv[2]
+    output_filename = sys.argv[3] #a txt file
+    output_type = sys.argv[4]
+    force_quality_encoding = sys.argv[5]
+    summarize_input = sys.argv[6] == 'summarize_input'
+    pairedEnd_input = sys.argv[7]
+    if pairedEnd_input == 'None':
+     pairedEnd_input = None
+    else:
+ output_pairedEndFileName = sys.argv[8]
+
+    if force_quality_encoding == 'None':
+        force_quality_encoding = None
+
+    #Parse the input txt file and read a list of fastq files
+    file = open(input_filename, "r")
+    lines = file.readlines()
+    inputFileNames = []
+    outGroomerNames = []
+    resDirName = os.path.dirname(output_filename) + "/"
+    #Write output txt file and define all output groomer file names
+    outFile = open(output_filename, "w")
+    for line in lines:
+ tab = line.split()
+ inputFileNames.append(tab[1])
+ outGroomerName = resDirName + tab[0] + '_outGroomer_%s.fastq' % random.randrange(0, 10000)
+ outGroomerNames.append(outGroomerName)
+ outFile.write(tab[0] + '\t' + outGroomerName + '\n')
+    outFile.close()
+    file.close()
+
+    if pairedEnd_input != None:
+ inPairedFile = open(pairedEnd_input, "r")
+ lines = inPairedFile.readlines()
+ inputPairedEndFileNames = []
+ outGroomerPairedEndNames = []
+ outPairedEndFile = open(output_pairedEndFileName, "w")
+ for line in lines:
+ tab = line.split()
+ inputPairedEndFileNames.append(tab[1])
+ outGroomerPairedEndName = resDirName + tab[0] + '_outGroomer_pairedEnd_%s.fastq' % random.randrange(0, 10000)
+ outGroomerPairedEndNames.append(outGroomerPairedEndName)
+ outPairedEndFile.write(tab[0] + '\t' + outGroomerPairedEndName + '\n')
+ outPairedEndFile.close()
+        inPairedFile.close()
+
+    # Write output file
+    aggregator = fastqAggregator()
+    for i in range(len(outGroomerNames)):
+ out = fastqWriter( open( outGroomerNames[i], 'wb' ), format = output_type, force_quality_encoding = force_quality_encoding )
+ read_count = None
+ if summarize_input:
+     reader = fastqVerboseErrorReader
+ else:
+     reader = fastqReader
+ for read_count, fastq_read in enumerate( reader( open( inputFileNames[i] ), format = input_type, apply_galaxy_conventions = True ) ):
+     if summarize_input:
+         aggregator.consume_read( fastq_read )
+     out.write( fastq_read )
+ out.close()
+
+ if read_count is not None:
+     print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type )
+     if input_type != output_type and 'solexa' in [ input_type, output_type ]:
+         print "Converted between Solexa and PHRED scores."
+     if summarize_input:
+         print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() )  or "None" )
+         ascii_range = aggregator.get_ascii_range()
+         decimal_range =  aggregator.get_decimal_range()
+         print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed
+         print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] )
+ else:
+      print "No valid FASTQ reads were provided."
+
+
+    # Write output pairedEnd file
+    if pairedEnd_input != None:
+     aggregator = fastqAggregator()
+     for i in range(len(outGroomerPairedEndNames)):
+     outPair = fastqWriter(open(outGroomerPairedEndNames[i], 'wb'), format = output_type, force_quality_encoding = force_quality_encoding)
+     read_count = None
+     if summarize_input:
+     reader = fastqVerboseErrorReader
+     else:
+     reader = fastqReader
+     for read_count, fastq_reader in enumerate(reader(open(inputPairedEndFileNames[i]), format=input_type, apply_galaxy_conventions=True)):
+    if summarize_input:
+     aggregator.consume_read(fastq_read)
+    outPair.write(fastq_read)
+     outPair.close()
+
+     if read_count is not None:
+      print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type )
+     if input_type != output_type and 'solexa' in [ input_type, output_type ]:
+     print "Converted between Solexa and PHRED scores."
+     if summarize_input:
+     print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() )  or "None" )
+ ascii_range = aggregator.get_ascii_range()
+ decimal_range =  aggregator.get_decimal_range()
+ print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed
+ print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] )
+     else:
+     print "No valid paired-end FASTQ reads were provided."
+
+if __name__ == "__main__": main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/fastq_groomer_parallel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/fastq_groomer_parallel.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,122 @@
+<tool id="fastq_groomer_parallel" name="FASTQ Groomer (for DEA)" version="1.0.0">
+  <description>convert between various FASTQ quality formats for a list of inputs.</description>
+  <command interpreter="python">fastq_groomer_parallel.py '$input_file' '$input_type' '$output_file'
+#if str( $options_type['options_type_selector'] ) == 'basic':
+#if str( $input_type ) == 'cssanger':
+'cssanger'
+#else:
+'sanger'
+#end if
+'ascii' 'summarize_input'
+#else:
+'${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}'
+#end if
+#if $OptionPairedEnd.pairedEnd == "Yes":
+'$OptionPairedEnd.pairedEnd_input' '$output_pairedEndFile'
+#else:
+'None' 'None'
+#end if
+</command>
+  <inputs>
+    <param name="input_file" type="data" format="txt" label="The File list to groom" />
+    <param name="input_type" type="select" label="Input FASTQ quality scores type">
+      <option value="solexa">Solexa</option>
+      <option value="illumina">Illumina 1.3-1.7</option>
+      <option value="sanger" selected="True">Sanger</option>
+      <option value="cssanger">Color Space Sanger</option>
+    </param>
+    <conditional name="options_type">
+    <param name="options_type_selector" type="select" label="Advanced Options">
+      <option value="basic" selected="True">Hide Advanced Options</option>
+      <option value="advanced">Show Advanced Options</option>
+    </param>
+    <when value="basic">
+      
+    </when>
+    <when value="advanced">
+      <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format.">
+        <option value="solexa">Solexa</option>
+        <option value="illumina">Illumina 1.3+</option>
+        <option value="sanger" selected="True">Sanger (recommended)</option>
+        <option value="cssanger">Color Space Sanger</option>
+      </param>
+      <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
+        <option value="None">Use Source Encoding</option>
+        <option value="ascii" selected="True">ASCII</option>
+        <option value="decimal">Decimal</option>
+      </param>
+      <param name="summarize_input" type="select" label="Summarize input data">
+        <option value="summarize_input" selected="True">Summarize Input</option>
+        <option value="dont_summarize_input">Do not Summarize Input (faster)</option>
+      </param>
+    </when>
+  </conditional>
+
+  <conditional name="OptionPairedEnd">
+   <param name="pairedEnd" type="select" label="For paired-end analysis.">
+   <option value="Yes">Yes</option>
+   <option value="No" selected="true">No</option>
+   </param>
+   <when value="Yes">
+   <param name="pairedEnd_input" type="data" format="txt" label="input paired-end files list"/>
+   </when>
+   <when value="No">
+   </when>
+  </conditional>
+
+  </inputs>
+
+  <outputs>
+    <data name="output_file" format="txt">
+    </data>
+    <data format="txt" name="output_pairedEndFile" label="output Paired-end fastq files">
+     <filter>(OptionPairedEnd['pairedEnd']=='Yes')</filter>
+    </data>
+  </outputs>
+  <help>
+**What it does**
+
+This tool offers several conversions options relating to the FASTQ format.
+
+When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger).
+
+When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum).
+
+When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_
+
+When converting between color space (csSanger) and base/sequence space (Sanger, Illumina, Solexa) formats, adapter bases are lost or gained; if gained, the base 'G' is used as the adapter. You cannot convert a color space read to base space if there is no adapter present in the color space sequence. Any masked or ambiguous nucleotides in base space will be converted to 'N's when determining color space encoding.
+
+-----
+
+**Quality Score Comparison**
+
+::
+
+    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
+    ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+    ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+    |                         |    |        |                              |                     |
+   33                        59   64       73                            104                   126
+
+   S - Sanger       Phred+33,  93 values  (0, 93) (0 to 60 expected in raw reads)
+   I - Illumina 1.3 Phred+64,  62 values  (0, 62) (0 to 40 expected in raw reads)
+   X - Solexa       Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads)
+
+Diagram adapted from http://en.wikipedia.org/wiki/FASTQ_format
+
+.. class:: infomark
+
+Output from Illumina 1.8+ pipelines are Sanger encoded.
+
+------
+
+**Citation**
+
+If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_
+
+
+.. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970
+
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,168 @@
+import sys, os, optparse,shutil, random
+from commons.core.launcher.Launcher import Launcher
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+from commons.core.utils.FileUtils import FileUtils
+
+def _map(iLauncher, cmd, cmdStart, cmdFinish ):
+ lCmds = []
+ lCmds.extend(cmd)
+ lCmdStart = []
+ lCmdStart.extend(cmdStart)
+ lCmdFinish = []
+ lCmdFinish.extend(cmdFinish)
+ return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
+
+def splitFastQ(fileName, nbOfSeqPerBatch):
+ nbOfLinesPerFile = nbOfSeqPerBatch * 4
+ lOutput = []
+ filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
+ resDir = os.path.dirname(fileName)
+ with open(fileName) as inF:
+ fileNb = 1
+ line = inF.readline()
+ if not line or nbOfLinesPerFile == 0:
+ outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)
+ lOutput.append(outFileName)
+ f = open(outFileName, "wb")
+ shutil.copyfileobj(open(fileName, "rb"), f)
+ f.close()
+ else:
+ while line:
+ outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)
+ lOutput.append(outFileName)
+ with open(outFileName, "w") as outF:
+ lineNb = 1
+ while lineNb <= nbOfLinesPerFile and line:
+ outF.write(line)
+ line = inF.readline()
+ lineNb += 1
+ fileNb += 1
+ return lOutput
+
+def joinFastQ(dCutOut2Out):
+ for key in dCutOut2Out.keys():
+ FileUtils.catFilesFromList(dCutOut2Out[key],key, False)
+
+def _createFastqGroomerCode(outGroomerNames, inputFileNames, input_type, output_type, force_quality_encoding, summarize_input):
+ cmd2Launch = []
+ cmd2Launch.append("log = 0")
+ cmd2Launch.append("from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter")
+ cmd2Launch.append("aggregator = fastqAggregator()")
+ cmd2Launch.append("out = fastqWriter( open( '%s', 'wb' ), format = '%s', force_quality_encoding = '%s')" % (outGroomerNames,output_type,force_quality_encoding))
+ cmd2Launch.append("read_count = None")
+ if summarize_input:
+ cmd2Launch.append("reader = fastqVerboseErrorReader")
+ else:
+ cmd2Launch.append("reader = fastqReader")
+ cmd2Launch.append("for read_count, fastq_read in enumerate( reader( open( '%s' ), format = '%s', apply_galaxy_conventions = True ) ):" % (inputFileNames, input_type))
+ if summarize_input:
+ cmd2Launch.append("\taggregator.consume_read( fastq_read )")
+ cmd2Launch.append("\tout.write( fastq_read )")
+ cmd2Launch.append("out.close()")
+ cmd2Launch.append("if read_count is not None:")
+ #cmd2Launch.append("\tprint 'Groomed %s %s reads into %s reads.' % ( read_count + 1, %s, %s )" % ('%i', '%s', '%s', input_type,output_type))
+ cmd2Launch.append("\tif '%s' != '%s' and 'solexa' in [ '%s', '%s' ]:" % (input_type, output_type, input_type, output_type))
+ cmd2Launch.append("\t\tprint 'Converted between Solexa and PHRED scores.'")
+ if summarize_input:
+ cmd2Launch.append("\tprint 'Based upon quality and sequence, the input data is valid for: %s' % ( ', '.join( aggregator.get_valid_formats() )  or 'None' )")
+ cmd2Launch.append("\tascii_range = aggregator.get_ascii_range()")
+ cmd2Launch.append("\tdecimal_range =  aggregator.get_decimal_range()")
+ cmd2Launch.append("\tprint 'Input ASCII range: %s(%i) - %s(%i)' % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) )")
+ cmd2Launch.append("\tprint 'Input decimal range: %i - %i' % ( decimal_range[0], decimal_range[1] ) ")
+ cmd2Launch.append("else:")
+ cmd2Launch.append("\tprint 'No valid FASTQ reads were provided.'")
+ cmd2Launch.append("\tlog = 255")
+ return cmd2Launch
+
+def stop_err(msg):
+ sys.stderr.write("%s\n" % msg)
+ sys.exit()
+
+def main():
+
+ input_filename = sys.argv[1]  #a txt file
+ input_type = sys.argv[2]
+ output_filename = sys.argv[3] #a txt file
+ output_type = sys.argv[4]
+ force_quality_encoding = sys.argv[5]
+ summarize_input = sys.argv[6] == 'summarize_input'
+ pairedEnd_input = sys.argv[7]
+ if pairedEnd_input == 'None':
+ pairedEnd_input = None
+ else:
+ output_pairedEndFileName = sys.argv[8]
+
+ if force_quality_encoding == 'None':
+ force_quality_encoding = None
+
+ #Parse the input txt file and read a list of fastq files
+ file = open(input_filename, "r")
+ lines = file.readlines()
+ inputFileNames = []
+ outGroomerNames = []
+ resDirName = os.path.dirname(output_filename) + "/"
+ #Write output txt file and define all output groomer file names
+ outFile = open(output_filename, "w")
+ for line in lines:
+ tab = line.split()
+ inputFileNames.append(tab[1])
+ outGroomerName = resDirName + tab[0] + '_outGroomer_%s.fastq' % random.randrange(0, 10000)
+ outGroomerNames.append(outGroomerName)
+ outFile.write(tab[0] + '\t' + outGroomerName + '\n')
+ outFile.close()
+ file.close()
+
+ if pairedEnd_input != None:
+ inPairedFile = open(pairedEnd_input, "r")
+ lines = inPairedFile.readlines()
+ inputPairedEndFileNames = []
+ outGroomerPairedEndNames = []
+ outPairedEndFile = open(output_pairedEndFileName, "w")
+ for line in lines:
+ tab = line.split()
+ inputPairedEndFileNames.append(tab[1])
+ outGroomerPairedEndName = resDirName + tab[0] + '_outGroomer_pairedEnd_%s.fastq' % random.randrange(0, 10000)
+ outGroomerPairedEndNames.append(outGroomerPairedEndName)
+ outPairedEndFile.write(tab[0] + '\t' + outGroomerPairedEndName + '\n')
+ outPairedEndFile.close()
+ inPairedFile.close()
+
+ acronym = "fastqGroomer"
+ jobdb = TableJobAdaptatorFactory.createJobInstance()
+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
+ lCmdsTuples = []
+ dCutOut2Out = {}
+ lAllFile2remove = []
+ # Write output file
+ for i in range(len(outGroomerNames)):
+ lCutInputFile = splitFastQ(inputFileNames[i], 20000)
+ lAllFile2remove.extend(lCutInputFile)
+ lCutOutput = []
+ for cutInput in lCutInputFile:
+ cutOutput = "%s_out" % cutInput
+ lCutOutput.append(cutOutput)
+ lAllFile2remove.extend(lCutOutput)
+ cmd2Launch = _createFastqGroomerCode(cutOutput, cutInput, input_type, output_type, force_quality_encoding, summarize_input)
+ cmdStart = []
+ cmdFinish = []
+ lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))
+ dCutOut2Out[outGroomerNames[i]] = lCutOutput
+ if pairedEnd_input != None:
+ lCutInputFile = splitFastQ(inputPairedEndFileNames[i], 20000)
+ lAllFile2remove.extend(lCutInputFile)
+ lCutOutput = []
+ for cutInput in lCutInputFile:
+ cutOutput = "%s_out" % cutInput
+ lCutOutput.append(cutOutput)
+ lAllFile2remove.extend(lCutOutput)
+ cmd2Launch = _createFastqGroomerCode(cutOutput, cutInput, input_type, output_type, force_quality_encoding, summarize_input)
+ cmdStart = []
+ cmdFinish = []
+ lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))
+ dCutOut2Out[outGroomerPairedEndNames[i]] = lCutOutput
+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, False)
+
+ joinFastQ(dCutOut2Out)
+ FileUtils.removeFilesFromListIfExist(lAllFile2remove)
+
+if __name__ == "__main__": main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,122 @@
+<tool id="fastq_groomer_parallel_unSQL" name="FASTQ Groomer (for DEA in parallel)" version="1.0.0">
+  <description>convert between various FASTQ quality formats for a list of inputs (parallelized).</description>
+  <command interpreter="python">fastq_groomer_parallel_unSQL.py '$input_file' '$input_type' '$output_file'
+#if str( $options_type['options_type_selector'] ) == 'basic':
+#if str( $input_type ) == 'cssanger':
+'cssanger'
+#else:
+'sanger'
+#end if
+'ascii' 'summarize_input'
+#else:
+'${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}'
+#end if
+#if $OptionPairedEnd.pairedEnd == "Yes":
+'$OptionPairedEnd.pairedEnd_input' '$output_pairedEndFile'
+#else:
+'None' 'None'
+#end if
+</command>
+  <inputs>
+    <param name="input_file" type="data" format="txt" label="The File list to groom" />
+    <param name="input_type" type="select" label="Input FASTQ quality scores type">
+      <option value="solexa">Solexa</option>
+      <option value="illumina">Illumina 1.3-1.7</option>
+      <option value="sanger" selected="True">Sanger</option>
+      <option value="cssanger">Color Space Sanger</option>
+    </param>
+    <conditional name="options_type">
+    <param name="options_type_selector" type="select" label="Advanced Options">
+      <option value="basic" selected="True">Hide Advanced Options</option>
+      <option value="advanced">Show Advanced Options</option>
+    </param>
+    <when value="basic">
+      
+    </when>
+    <when value="advanced">
+      <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format.">
+        <option value="solexa">Solexa</option>
+        <option value="illumina">Illumina 1.3+</option>
+        <option value="sanger" selected="True">Sanger (recommended)</option>
+        <option value="cssanger">Color Space Sanger</option>
+      </param>
+      <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
+        <option value="None">Use Source Encoding</option>
+        <option value="ascii" selected="True">ASCII</option>
+        <option value="decimal">Decimal</option>
+      </param>
+      <param name="summarize_input" type="select" label="Summarize input data">
+        <option value="summarize_input" selected="True">Summarize Input</option>
+        <option value="dont_summarize_input">Do not Summarize Input (faster)</option>
+      </param>
+    </when>
+  </conditional>
+
+  <conditional name="OptionPairedEnd">
+   <param name="pairedEnd" type="select" label="For paired-end analysis.">
+   <option value="Yes">Yes</option>
+   <option value="No" selected="true">No</option>
+   </param>
+   <when value="Yes">
+   <param name="pairedEnd_input" type="data" format="txt" label="input paired-end files list"/>
+   </when>
+   <when value="No">
+   </when>
+  </conditional>
+
+  </inputs>
+
+  <outputs>
+    <data name="output_file" format="txt">
+    </data>
+    <data format="txt" name="output_pairedEndFile" label="output Paired-end fastq files">
+     <filter>(OptionPairedEnd['pairedEnd']=='Yes')</filter>
+    </data>
+  </outputs>
+  <help>
+**What it does**
+
+This tool offers several conversions options relating to the FASTQ format.
+
+When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger).
+
+When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum).
+
+When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_
+
+When converting between color space (csSanger) and base/sequence space (Sanger, Illumina, Solexa) formats, adapter bases are lost or gained; if gained, the base 'G' is used as the adapter. You cannot convert a color space read to base space if there is no adapter present in the color space sequence. Any masked or ambiguous nucleotides in base space will be converted to 'N's when determining color space encoding.
+
+-----
+
+**Quality Score Comparison**
+
+::
+
+    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
+    ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+    ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
+    |                         |    |        |                              |                     |
+   33                        59   64       73                            104                   126
+
+   S - Sanger       Phred+33,  93 values  (0, 93) (0 to 60 expected in raw reads)
+   I - Illumina 1.3 Phred+64,  62 values  (0, 62) (0 to 40 expected in raw reads)
+   X - Solexa       Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads)
+
+Diagram adapted from http://en.wikipedia.org/wiki/FASTQ_format
+
+.. class:: infomark
+
+Output from Illumina 1.8+ pipelines are Sanger encoded.
+
+------
+
+**Citation**
+
+If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_
+
+
+.. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970
+
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/gsnap.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/gsnap.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,42 @@
+<tool id="gsnap" name="gsnap">
+
+ <description>GSNAP version 2012-12-20.
+              GMAP: A Genomic Mapping and Alignment Program for mRNA and EST Sequences, and
+                 GSNAP: Genomic Short-read Nucleotide Alignment Program
+    </description>
+
+ <command interpreter="python"> wrappGSNAP.py
+ -d $genomeName -i $inputFasta -k $kmer -q $inputFastq -A $outputFormat -o $outputSam
+
+ #if $optionPairedEnd.paire == 'Yes':
+ -p $optionPairedEnd.pairedEndFile
+ #end if
+
+ </command>
+
+ <inputs>
+ <param name="inputFasta" type="data" format="fasta" label="Reference genome file, fasta format."/>
+ <param name="genomeName" type="text" value="Arabidopsis_Thaliana" label="Please give the reference genome a name! (Ex. Arabidopsis_Thaliana)"/>
+ <param name="kmer" type="integer" value="12" label="Choose kmer value (superior or egal at 16), a big kmer value can take more RAM(4Go)." />
+ <param name="inputFastq" type="data" format="fastq" label="Input fastq file."/>
+ <param name="outputFormat" type="text" format="sam" label="Choose an output format [sam, goby (need to re-compile with appropriate options)]."/>
+
+ <conditional name="optionPairedEnd">
+ <param name="paire" type="select" label="pairedEnd fastq file">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="pairedEndFile" type="data" format="fastq"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data format="sam" name="outputSam" label="gsnap Output"/>
+ </outputs>
+
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/gsnap_parallel_unSQL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/gsnap_parallel_unSQL.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,195 @@\n+#!/usr/bin/env python\n+\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile, glob \n+import time\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.utils.FileUtils import FileUtils\n+from optparse import OptionParser\n+\n+def stop_err( msg ):\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit()\n+\n+def toTar(tarFileName, accepted_hits_outputNames):\n+ tfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n+ currentPath = os.getcwd()\n+ os.chdir(dir)\n+ for file in accepted_hits_outputNames:\n+ relativeFileName = os.path.basename(file)\n+ tfile.add(relativeFileName)\n+ os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n+ tfile.close()\n+ os.chdir(currentPath)\n+ \n+def joinSAM(dCutOut2Out):\n+ for key in dCutOut2Out.keys():\n+ FileUtils.catFilesFromList(dCutOut2Out[key],key, False)\n+ \n+def _map(iLauncher, cmd, cmdStart, cmdFinish ):\n+ lCmds = []\n+ lCmds.extend(cmd)\n+ lCmdStart = []\n+ lCmdStart.extend(cmdStart)\n+ lCmdFinish = []\n+ lCmdFinish.extend(cmdFinish)\n+ return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n+\n+def _createGsnapSplicingOptions(options):\n+ lArgs = []\n+ lArgs.append("-N %s" % options.novelsplicing)\n+ if options.useSplicing:\n+ lArgs.append("-s %s" % options.useSplicing)\n+ lArgs.append("-w %s" % options.localsplicedist)\n+ lArgs.append("-e %s" % options.localSplicePenality)\n+ lArgs.append("-E %s" % options.distantSplicePenality)\n+ lArgs.append("-K %s" % options.distantSpliceEndlength)\n+ lArgs.append("-l %s" % options.shortendSpliceEndlength)\n+ \n+ \n+ return lArgs\n+\n+def _createGsnapPairedEndOptions(options):\n+ lArgs = []\n+ if not(options.useSplicing or options.pairedEndFile):\n+ lArgs.append("--pairmax-dna %s" % options.pairmaxRna)\n+ if options.useSplicing or options.pairedEndFile:\n+ lArgs.append("--pairmax-rna %s" % options.pairmaxRna)\n+ lArgs.append("--pairexpect=%s" % options.pairexpect)\n+ lArgs.append("--pairdev=%s" % options.pairedev)\n+ \n+ \n+\n+def _createGsnapCommand(iLauncher, options, workingDir, inputFileNames, inputRevFilesNames, outputFileName, batchNumber, numberOfBatch):\n+ lArgs = []\n+ lArgs.append("-d %s" % options.genomeName)\n+ lArgs.append("-k %s" % options.kmer)\n+ lArgs.append("-D %s" % workingDir)\n+ lArgs.append("-A %s" % options.outputFormat)\n+ lArgs.append("-q %s/%s" % (batchNumber, numberOfBatch))\n+ lArgs.append("--no-sam-headers")\n+ lArgs.append(inputFileNames)\n+ print \'N option: %s, pairedEndFile option: %s\' %(options.novelsplicing, options.pairedEndFile)\n+ if options.pairedEndFile:\n+ lArgs.append(inputRevFilesNames)\n+ if options.novelsplicing == \'1\':\n+ lArgs.extend(_createGsnapSplicingOptions(options))\n+ elif options.pairedEndFile:\n+ lArgs.extend(_createGsnapPairedEndOptions(options))\n+ \n+ lArgs.append("> %s" % outputFileName) \n+ return iLauncher.getSystemCommand("gsnap", lArgs) \n+\n+def __main__():\n+ #Parse Command Line\n+ description = "GMAP/GSNAP version:2012-12-20."\n+ parser = OptionParser(description = description)\n+ parser.add_option(\'-o\', \'--outputTxtFile\', dest=\'outputTxtFile\', help=\'for Differential expression analysis pipeline, new output option gives a txt output containing the list of mapping results.\')\n+ parser.add_option("-q", "--inputTxt", dest="inputTxt", action="store", type="string", help="input, a txt file for a list of input reads files [compulsory]")\n+ parser.add_option(\'-t\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all accepted hits results in a tar file.\' )\n+ parser.add_option("-d", "--genomeName", dest="genomeName", help="Define the reference genome name.[compulsory]")\n+# parser.add_option("-o", "--outputFile", dest='..b'h for RNA-Seq paired reads, or other reads that could have a splice (default 200000).")\n+ parser.add_option("--pairexpect", dest="pairexpect", default=200, help="Expected paired-end length, used for calling splices in medial part of paired-end reads (default 200)")\n+ parser.add_option("--pairdev", dest="pairdev", default=25, help="Allowable deviation from expected paired-end length, used for calling splices in medial part of paired-end reads (default 25)")\n+ \n+ (options, args) = parser.parse_args() \n+\n+ workingDir = os.path.dirname(options.inputFastaFile)\n+ \n+ file = open(options.inputTxt,"r")\n+ lines = file.readlines()\n+ inputFileNames = []\n+ gsnapOutputNames = []\n+ outputName = options.outputTxtFile\n+ resDirName = os.path.dirname(outputName) + \'/\'\n+ out = open(outputName, "w")\n+ for line in lines:\n+ timeId = time.strftime("%Y%m%d%H%M%S")\n+ tab = line.split()\n+ inputFileNames.append(tab[1])\n+ OutputName = resDirName + tab[0] + \'_samOutput_%s.sam\' % timeId\n+ gsnapOutputNames.append(OutputName) \n+ out.write(tab[0] + \'\\t\' + OutputName + \'\\n\')\n+ file.close()\n+ out.close()\n+ \n+ if options.pairedEndFile:\n+ revFile = open(options.pairedEndFile,"r")\n+ lines = revFile.readlines()\n+ inputRevFileNames = []\n+ for line in lines:\n+ revTab = line.split()\n+ inputRevFileNames.append(revTab[1])\n+ revFile.close()\n+\n+ #Create gsnap make \n+ lCmdsTuples =[]\n+ acronym = "gsnap_make"\n+ jobdb = TableJobAdaptatorFactory.createJobInstance()\n+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n+ cmds = []\n+ cmd_setup = "gmap_setup -d %s -D %s -k %s %s;" % (options.genomeName, workingDir, options.kmer, options.inputFastaFile)\n+ cmds.append(cmd_setup)\n+ cmd_make_coords = "make -f Makefile.%s coords;" % options.genomeName \n+ cmds.append(cmd_make_coords)\n+ cmd_make_gmapdb = "make -f Makefile.%s gmapdb;" % options.genomeName\n+ cmds.append(cmd_make_gmapdb)\n+ cmd_make_install = "make -f Makefile.%s install;" % options.genomeName\n+ cmds.append(cmd_make_install)\n+ cmd_index = iLauncher.getSystemCommand("", cmds)\n+ cmd2Launch = []\n+ cmdStart = []\n+ cmdFinish = []\n+ cmd2Launch.append(cmd_index)\n+ lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish)) \n+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True) \n+ \n+ acronym = "gsnap"\n+ jobdb = TableJobAdaptatorFactory.createJobInstance()\n+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n+ lCmdsTuples = []\n+ dCutOut2Out = {}\n+ lAllFile2remove = []\n+ numberOfBatch = 20 #usually for testing, working on to find a value for default launch on galaxy\n+ for i in range(len(inputFileNames)):\n+ lCutOutput = []\n+ for j in range(numberOfBatch):\n+ cutOutput = "%s_out_%s" % (inputFileNames[i], j)\n+ lCutOutput.append(cutOutput)\n+ lAllFile2remove.extend(lCutOutput)\n+ cmd2Launch = []\n+ if options.pairedEndFile: \n+ inputRevFile = inputRevFileNames[i]\n+ else:\n+ inputRevFile = ""\n+ cmd2Launch.append(_createGsnapCommand(iLauncher, options, workingDir, inputFileNames[i], inputRevFile, cutOutput, j, numberOfBatch))\n+ cmdStart = []\n+ cmdFinish = []\n+ lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish)) \n+ dCutOut2Out[gsnapOutputNames[i]] = lCutOutput\n+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n+ \n+ joinSAM(dCutOut2Out) \n+ FileUtils.removeFilesFromListIfExist(lAllFile2remove) \n+ \n+ if options.outputTar != None:\n+ toTar(options.outputTar, gsnapOutputNames)\n+\n+\n+if __name__=="__main__": __main__()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/gsnap_parallel_unSQL.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/gsnap_parallel_unSQL.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,46 @@
+<tool id="gsnap_parallel_unSQL" name="GSNAP (for DEA in parallel)" version="1.0.0">
+  <description>Genomic Short-read Nucleotide Alignment Program in parallel for Differential Expression Analysis (DEA)</description>
+  <command interpreter="python">gsnap_parallel_unSQL.py
+ -i $genome_file
+ -q $fastq_file_list
+ -o $output_file_list
+ -d $genome_prefix
+ -k $kmer_size
+ #if $OptionPairedEnd.pairedEnd == 'Yes':
+ -p $pairedEnd_input
+ #end if
+
+</command>
+  <inputs>
+    <param name="genome_file" type="data" format="fasta" label="Genome fasta file" />
+    <param name="fastq_file_list" type="data" format="txt" label="Fastq file list" />
+    <param name="genome_prefix" type="text" format="txt" label="Prefix used to name genome index " />
+    <param name="kmer_size" type="integer" value="12" label="Kmer size"/>
+
+  <conditional name="OptionPairedEnd">
+   <param name="pairedEnd" type="select" label="For paired-end analysis.">
+   <option value="Yes">Yes</option>
+   <option value="No" selected="true">No</option>
+   </param>
+   <when value="Yes">
+   <param name="pairedEnd_input" type="data" format="txt" label="input paired-end files list"/>
+   </when>
+   <when value="No">
+   </when>
+  </conditional>
+
+  </inputs>
+
+  <outputs>
+    <data name="output_file_list" format="txt"/>
+  </outputs>
+  <help>
+**What it does**
+
+To complete
+
+**Citation**
+
+If you use this tool, please cite "Thomas D. Wu and Serban Nacu, Fast and SNP-tolerant detection of complex variants and splicing in short reads, Bioinformatics 2010 26:873-881"
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/listInputs.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/listInputs.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,13 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $in_file1 = $ARGV[0];
+my $in_file2 = $ARGV[1];
+my $out_file = $ARGV[2];
+
+open(OUT, ">$out_file");
+print OUT "label\tfiles\tgroup\n";
+print OUT "fileID=1\t$in_file1\tgroup1\n";
+print OUT "fileID=2\t$in_file2\tgroup2\n";
+close(OUT);

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/listInputs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/listInputs.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,25 @@
+<tool id="listInputs" name="listInputs">
+ <description>Give a list of input files from different conditions/groups for DESeq analysis, DESeq can then charge these input files from the given list.</description>
+ <command interpreter="perl"> listInputs.pl $inputFromGroup1 $inputFromGroup2 $output
+ </command>
+
+ <inputs>
+ <param name="inputFromGroup1" type="data" format="tabular" label="Please choose your file from group1."/>
+ <param name="inputFromGroup2" type="data" format="tabular" label="Please choose your file from group2."/>
+ </inputs>
+
+ <outputs>
+ <data format="txt" name="output" label="listInputs Output"/>
+ </outputs>
+
+ <help>
+ This tool can facilate the the chargement for DESeq tool.
+ Example:
+ From group1, we have input1.
+ From group2, we have input2.
+ This tool will give us a list like:
+ fileID=1 input1 group1
+ fileID=2 input2 group2
+ Where the value of fileID is unique for each input file.
+ </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/loadHTSeqResultFiles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/loadHTSeqResultFiles.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import optparse, sys
+
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-i', '--inputs', dest='inputFiles', default=None, help='several input files. (seperated by @ or @@' )
+    parser.add_option( '-o', '--output', dest='outputFile', default=None, help='The output list of HTSeq results files(.tabular) on txt format.' )
+    ( options, args ) = parser.parse_args()
+
+
+    out = open(options.outputFile, 'w')
+    out.write("label\tfiles\tgroup\n")
+    if options.inputFiles == None:
+        raise Exception, 'input file name is not defined!'
+
+    groupCount = 1
+    fileCount = 0
+
+    inputFiles = sys.argv[6:]
+    print '\n\nthe length of inputfiles is : %s \n' % len(inputFiles)
+    i = 0
+    while i < (len(inputFiles)-1):
+        if inputFiles[i] == "@":
+            i += 1
+            fileCount = 1
+            groupCount += 1
+            out.write("Group%s_%s\t%s\t%s\n" % (groupCount, fileCount, inputFiles[i], groupCount))
+        else:
+     fileCount += 1
+            out.write("Group%s_%s\t%s\t%s\n" % (groupCount, fileCount, inputFiles[i], groupCount))
+        i += 1
+
+    out.close()
+
+
+
+if __name__=="__main__": __main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/loadHTSeqResultFiles.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/loadHTSeqResultFiles.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,29 @@
+<tool id="load_HTSeqResultFiles" name="load HTSeqResultFiles" >
+  <description>To load several HTSeq result files from different conditions.</description>
+  <command interpreter="python"> loadHTSeqResultFiles.py -o $htseqRes_out
+ -i
+ #for $i in $condition_groups
+ #for $j in $i.replicates
+ $j.tabular_file
+ #end for
+ @
+ #end for
+
+</command>
+  <inputs>
+   <repeat name="condition_groups" title="Condition group" min="2">
+      <repeat name="replicates" title="Replicate">
+     <param name="tabular_file" format="tabular" type="data" label="TABULAR file."/>
+          </repeat>
+        </repeat>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="htseqRes_out" label="HTSeq result files" help="This program gives you a list of files you choose for the following data analysis."/>
+
+</outputs>
+<help>
+</help>
+
+</tool>
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/loadMultiFastqFiles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/loadMultiFastqFiles.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+import optparse, sys
+
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('-i', '--inputs', dest='inputFiles', default=None, help='several input files. (seperated by @ or @@' )
+    parser.add_option( '-o', '--output', dest='outputSingleFile', default=None, help='The output list of fastq files on txt format.' )
+    parser.add_option( '', '--pairedEnd', dest='outputPaireFile', default=None, help='paired end option help to upload the corresponding paired end complementary fastq files' )
+    ( options, args ) = parser.parse_args()
+
+
+
+    if options.outputSingleFile == None:
+        raise Exception, 'OutSingleFile txt file name is not defined!'
+    else:
+        outSingle = open(options.outputSingleFile, 'w')
+
+    if options.inputFiles == None:
+        raise Exception, 'input file name is not defined!'
+
+    groupCount = 1
+    fileCount = 0
+
+    if options.outputPaireFile == None:
+        inputFiles = sys.argv[4:]
+        i = 0
+        while i < (len(inputFiles)-1):
+     if inputFiles[i] == "@":
+                i += 1
+                fileCount = 1
+                groupCount += 1
+                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
+
+            else:
+                fileCount += 1
+                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
+
+            i += 1
+    else:
+        inputFiles = sys.argv[6:]
+        print '\n\nthe length of inputfiles is : %s \n' % len(inputFiles)
+        outPaire = open(options.outputPaireFile, 'w')
+        i = 0
+        while i < (len(inputFiles)-1):
+            if inputFiles[i] == "@@":
+                i += 1
+                outPaire.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
+            elif inputFiles[i] == "@":
+                i += 1
+                fileCount = 1
+                groupCount += 1
+                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
+            else:
+                fileCount += 1
+                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
+
+            i += 1
+
+
+
+        outPaire.close()
+
+    outSingle.close()
+
+
+
+if __name__=="__main__": __main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/loadMultiFastqFiles.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/loadMultiFastqFiles.sh Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,24 @@
+#!/bin/bash
+
+OUTFile=${1}
+shift
+groupCount=1
+replicateNumber=1
+
+arrayZ=( $@ )
+#remove the last symble '@' given by commande line
+unset arrayZ[${#arrayZ[@]}-1]
+
+for FILE in ${arrayZ[@]}
+do
+ #if a new group of fastq, re-count the replicateNumber
+ if echo $FILE | grep -q "@"
+ then
+ groupCount=$(($groupCount + 1))
+ replicateNumber=1
+ else
+ echo -e "Group${groupCount}_${replicateNumber}\t${FILE}" >>${OUTFile}
+ replicateNumber=$(($replicateNumber + 1))
+ fi
+done
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/loadMultiFastqFiles.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/loadMultiFastqFiles.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,75 @@
+<tool id="load_multiFASTQFiles" name="load_multiFASTQfiles" >
+  <description>To load several FASTQ files from different conditions.</description>
+  <command interpreter="python"> loadMultiFastqFiles.py -o $multiFASTQfiles_out
+#if $single_end_paired_end.mapping_mode == 'single':
+ -i
+ #for $i in $single_end_paired_end.condition_groups
+ #for $j in $i.replicates
+ $j.fastq_file
+ #end for
+ @
+ #end for
+
+#elif $single_end_paired_end.mapping_mode == 'paired':
+
+ --pairedEnd $multiFASTQfiles_paired_end_out
+ -i
+ #for $i in $single_end_paired_end.condition_groups
+ #for $j in $i.replicates
+ $j.fastq_file
+ @@
+ $j.fastq_paired_end_file
+ #end for
+ @
+ #end for
+#end if
+
+</command>
+  <inputs>
+   <conditional name="single_end_paired_end">
+    <param name="mapping_mode" type="select" label="The uploading fastq files for single-end or paired-end mapping mode.">
+ <option value="single">Single-End</option>
+ <option value="paired">Paire-End</option>
+    </param>
+            <when value="single">
+    <repeat name="condition_groups" title="Condition group" min="2">
+        <repeat name="replicates" title="Replicate">
+         <param name="fastq_file" format="fastq" type="data" label="FASTQ file. Can show the sequences quality."/>
+         </repeat>
+         </repeat>
+    </when>
+    <when value="paired">
+    <repeat name="condition_groups" title="Condition group" min="2">
+           <repeat name="replicates" title="Replicate">
+         <param name="fastq_file" format="fastq" type="data" label="FASTQ file. Can show the sequences quality."/>
+         <param name="fastq_paired_end_file" format="fastq" type="data" label="fastq paired end complementary file" help="Add the corresponding paired end file for paired end mapping"/>
+         </repeat>
+         </repeat>
+    </when>
+
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="multiFASTQfiles_out" label="loadMultiFASTQFiles result" help="This program gives you a list of files you choose for the following data analysis."/>
+    <data format="txt" name="multiFASTQfiles_paired_end_out" label="loadMultiFASTQFiles for paired end result" help="This program gives you a list of files you choose for the following data analysis.">
+     <filter>(single_end_paired_end['mapping_mode']=='paired')</filter>
+
+    </data>
+</outputs>
+<help>
+ **This tool is to help upload several data for differential expression pipeline. Before click 'Execute', you should Click** Ctrl + here_ **first to open the pipeline in a new page.**
+
+ .. _here: http://127.0.0.1:8085/u/yufei-luo/w/differentialexpressiondeseq-with-replicates
+</help>
+
+</tool>
+
+
+
+
+
+
+
+
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/testR.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/testR.R Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,93 @@
+#!/usr/bin
+
+library(DESeq)
+library(hexbin)
+library(latticeExtra)
+library(gplots)
+library(geneplotter)
+library(Biobase)
+
+##In a file called test_args.R
+args <- commandArgs()
+
+
+fileName <- args[4]
+colNames <- as.integer(unlist(strsplit(args[5], ",")))
+colCond1 <- as.integer(unlist(strsplit(args[6], ",")))
+colCond2 <- as.integer(unlist(strsplit(args[7], ",")))
+OUTPUTCSV <- args[8]
+OUTPUTPNG <- args[9]
+
+if(colNames[1]!=0){
+ countsTable <- read.delim(fileName, row.names=1)
+ conditions <- c((colNames[length(colNames)]+1):ncol(countsTable))
+} else if(colNames[1]==0){
+ countsTable <- read.delim(fileName)
+ conditions <- c(1:ncol(countsTable))
+ rownames(countsTable) <- paste( "Gene", 1:nrow(countsTable), sep="_" )}
+
+for(i in colCond1){conditions[i] = "A"}
+for(i in colCond2){conditions[i] = "B"}
+conditions
+#analysis with DESeq
+cds <- newCountDataSet( countsTable, conditions )
+cds <- estimateSizeFactors( cds )
+cds <- estimateVarianceFunctions( cds )
+result <- nbinomTest( cds, "A", "B" )
+#stock the result dans un .tsv as output file
+write.table(result, OUTPUTCSV, sep = " ", quote = FALSE, col.names = NA)
+
+#figures for DE analysis
+#pdf( OUTPUTPNG, width=4, height=4 )
+png( filename=OUTPUTPNG, width=700, height=700 )
+#png format is not as clear as pdf format!!!!!!!!!!!!!!!!!!!!!!!!!
+print(xyplot(
+ log2FoldChange ~ I(baseMean),
+ result,
+ pch=16, cex=.3,
+ col=ifelse(result$padj < .1, "#FF000040","#00000040" ),
+ panel = function( x, y, col, ...) {
+ above <- (y > 5.8)
+ below <- (y < -5.8)
+ inside <- !( above | below )
+ panel.xyplot( x=x[inside], y=y[inside], col=col[inside], ...)
+ panel.arrows( x[above], 5.8, x[above], 5.95, col=col[above],length=".1", unit="native" )
+ panel.arrows( x[below], -5.8, x[below], -5.95, col=col[below],length=".1", unit="native" ) },
+ axis = function(side, ...) {
+ if( side=="left") {
+ panel.axis( side, outside=TRUE, at=seq(-14,14,by=1), labels=FALSE )
+ panel.axis( side, outside=TRUE, at=seq(-10,10,by=5), labels=TRUE )
+ }
+ if( side=="bottom") {
+ panel.axis( side, outside=TRUE, at=seq(-2,10,by=1), rot=0,
+ labels = do.call( expression,
+ lapply( seq(-2,10,by=1), function(a)
+ substitute( 10^b, list(b=a) ) ) ) )
+ } },
+ xlab = "mean", ylab = "log2 fold change",
+ scales = list(x = list( log=TRUE ),y = list( log=FALSE, limits=c( -6, 6 ) ) ) ))
+dev.off()
+
+#The volcano plot
+#pdf( "vulcano_fly.pdf", width=4, height=4 )
+#print(xyplot( -log10( pval ) ~ log2FoldChange,
+# result,
+# pch=20, cex=.2,
+# col=ifelse( result$padj<.1, "#FF000050", "#00000050" ),
+# axis = function( side, ... ) {
+# if( side=="bottom") {
+# panel.axis( side, outside=TRUE, at=seq(-14,14,by=1), labels=FALSE )
+# panel.axis( side, outside=TRUE, at=seq(-10,10,by=5), labels=TRUE )
+# }
+# if( side=="left") {
+# panel.axis( side, outside=TRUE, at=seq(0,25,by=1), labels=FALSE )
+# panel.axis( side, outside=TRUE, at=seq(0,25,by=5),
+# labels = do.call( expression,
+# lapply( seq(0,25,by=5), function(a)
+# substitute( 10^-b, list(b=a) ) ) ) )
+# } },
+# xlab = "log2 fold change", ylab = "p value",
+# scales = list(
+# x = list( limits=c( -6, 6 ) ),
+# y = list( limits=c( 0, 25 ) ) ) ))
+#dev.off()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/testR.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/testR.sh Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,11 @@
+#! /bin/sh
+
+#cat testR.R | R --slave --args $1 $2 $3 $4 $5 $6 < DiffExpAnal/testR.R
+
+#ex1. sh testR.sh fly_RNA_counts.tsv 0 1,3 2,4 output_fly.csv output_fly.png
+#ex2. sh testR.sh NeuralStemCellData.tab 1 2,3,4 5,6 output_modif.csv output_modif.png
+
+#cat /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/testR.R | R --slave --args $1 $2 $3 $4 $5 $6 < /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/testR.R
+
+#$1=targetFile(the list of files) $2=with or without replicate
+cat /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R| R --slave --args $1 $2 < /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/tophat_parallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/tophat_parallel.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,325 @@\n+\n+#!/usr/bin/env python\n+\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile,random\n+\n+def stop_err( msg ):\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit()\n+\n+def toTar(tarFileName, accepted_hits_outputNames):\n+ fileName = os.path.splitext(tarFileName)[0]\n+ fileNameBaseName = os.path.basename(fileName)\n+ dir = os.path.dirname(tarFileName) \n+ tfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n+ currentPath = os.getcwd()\n+ os.chdir(dir)\n+ for file in accepted_hits_outputNames:\n+ relativeFileName = os.path.basename(file)\n+ tfile.add(relativeFileName)\n+ os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n+ tfile.close()\n+ os.chdir(currentPath)\n+ \n+\n+def __main__():\n+ #Parse Command Line\n+ parser = optparse.OptionParser()\n+ parser.add_option(\'-o\', \'--outputTxtFile\', dest=\'outputTxtFile\', help=\'for Differential expression analysis pipeline, new output option gives a txt output containing the list of mapping results.\')\n+ parser.add_option(\'-t\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all accepted hits results in a tar file.\' )\n+ parser.add_option( \'-p\', \'--num-threads\', dest=\'num_threads\', help=\'Use this many threads to align reads. The default is 1.\' )\n+ parser.add_option( \'-C\', \'--color-space\', dest=\'color_space\', action=\'store_true\', help=\'This indicates color-space data\' )\n+ parser.add_option( \'-J\', \'--junctions-output\', dest=\'junctions_output_file\', default=\'junctions_output.bed\', help=\'Junctions output file; formate is BED.\' )\n+ parser.add_option( \'-H\', \'--hits-output\', dest=\'accepted_hits_output_file\', default=\'hits_output_%s.bam\' % random.randrange(0, 10000), help=\'Accepted hits output file; formate is BAM.\' )\n+ parser.add_option( \'\', \'--own-file\', dest=\'own_file\', help=\'\' )\n+ parser.add_option( \'-D\', \'--indexes-path\', dest=\'index_path\', help=\'Indexes directory; location of .ebwt and .fa files.\' )\n+ parser.add_option( \'-r\', \'--mate-inner-dist\', dest=\'mate_inner_dist\', help=\'This is the expected (mean) inner distance between mate pairs. \\\n+ For, example, for paired end runs with fragments selected at 300bp, \\\n+ where each end is 50bp, you should set -r to be 200. There is no default, \\\n+ and this parameter is required for paired end runs.\')\n+ parser.add_option( \'\', \'--mate-std-dev\', dest=\'mate_std_dev\', help=\'Standard deviation of distribution on inner distances between male pairs.\' )\n+ parser.add_option( \'-a\', \'--min-anchor-length\', dest=\'min_anchor_length\', \n+ help=\'The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.\' )\n+ parser.add_option( \'-m\', \'--splice-mismatches\', dest=\'splice_mismatches\', help=\'The maximum number of mismatches that can appear in the anchor region of a spliced alignment.\' )\n+ parser.add_option( \'-i\', \'--min-intron-length\', dest=\'min_intron_length\', \n+ help=\'The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.\' )\n+ parser.add_option( \'-I\', \'--max-intron-length\', dest=\'max_intron_length\', \n+ help=\'The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.\' )\n+ parser.add_option( \'-F\', \'--junction_filter\', dest=\'junction_filter\', help=\'Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)\' )\n+ parser.add_option( \'-g\', \'--max_multihits\', dest=\'max_multihits\', help=\'Maximum number of alignmen'..b'overage-search --min-coverage-intron %s --max-coverage-intron %s \' % ( options.min_coverage_intron, options.max_coverage_intron )\n+ else:\n+ opts += \'--no-coverage-search \'\n+ if options.closure_search:\n+ opts += \'--closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s \' % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron ) \n+ else:\n+ opts += \'--no-closure-search \'\n+ if options.microexon_search:\n+ opts += \'--microexon-search \'\n+ if options.single_paired == \'paired\':\n+ opts += \'--mate-std-dev %s \' % options.mate_std_dev\n+ if options.initial_read_mismatches:\n+ opts += \'--initial-read-mismatches %d \' % int( options.initial_read_mismatches )\n+ if options.seg_mismatches:\n+ opts += \'--segment-mismatches %d \' % int( options.seg_mismatches )\n+ if options.seg_length:\n+ opts += \'--segment-length %d \' % int( options.seg_length )\n+ if options.min_segment_intron:\n+ opts += \'--min-segment-intron %d \' % int( options.min_segment_intron )\n+ if options.max_segment_intron:\n+ opts += \'--max-segment-intron %d \' % int( options.max_segment_intron )\n+ if options.own_file:\n+ cmd = cmd % ( opts, index_paths[0], input_files ) #here to add paired end file\n+ else:\n+ cmd = cmd % ( opts, index_paths[i], input_files ) #here to add paired end file\n+ except Exception, e:\n+ # Clean up temp dirs\n+ if os.path.exists( tmp_index_dir ):\n+ shutil.rmtree( tmp_index_dir )\n+ stop_err( \'Something is wrong with the alignment parameters and the alignment could not be run\\n\' + str( e ) )\n+ \n+ cmds.append(cmd)\n+\n+ # Run the command line for each file.\n+ for i in range(len(cmds)):\n+ try:\n+ tmp_out = tempfile.NamedTemporaryFile().name\n+ tmp_files.append(tmp_out)\n+ tmp_stdout = open( tmp_out, \'wb\' )\n+ tmp_err = tempfile.NamedTemporaryFile().name\n+ tmp_files.append(tmp_err)\n+ tmp_stderr = open( tmp_err, \'wb\' )\n+ proc = subprocess.Popen( args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )\n+ returncode = proc.wait()\n+ tmp_stderr.close()\n+ # get stderr, allowing for case where it\'s very large\n+ tmp_stderr = open( tmp_err, \'rb\' )\n+ stderr = \'\'\n+ buffsize = 1048576\n+ try:\n+ while True:\n+ stderr += tmp_stderr.read( buffsize )\n+ if not stderr or len( stderr ) % buffsize != 0:\n+ break\n+ except OverflowError:\n+ pass\n+ tmp_stdout.close()\n+ tmp_stderr.close()\n+ if returncode != 0:\n+ raise Exception, stderr\n+ \n+ # Copy output files from tmp directory to specified files.\n+ #shutil.copyfile( os.path.join( "tophat_out", "junctions.bed" ), junctions_outputNames[i] )\n+ shutil.copyfile( os.path.join( "tophat_out", "accepted_hits.bam" ), accepted_hits_outputNames[i] )\n+ # TODO: look for errors in program output.\n+ except Exception, e:\n+ stop_err( \'Error in tophat:\\n\' + str( e ) ) \n+\n+ if options.outputTar != None:\n+ toTar(options.outputTar, accepted_hits_outputNames)\n+\n+ \n+ # Clean up temp dirs\n+ for tmp_index_dir in tmp_index_dirs:\n+ if os.path.exists( tmp_index_dir ):\n+ shutil.rmtree( tmp_index_dir )\n+\n+ for tmp in tmp_files:\n+ os.remove(tmp)\n+\n+\n+if __name__=="__main__": __main__()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/tophat_parallel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/tophat_parallel.xml Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,577 @@\n+<tool id="tophat_parallel" name="Tophat for Illumina (for DEA)" version="1.0.0">\n+ <description>Find splice junctions using RNA-seq data, can have several input RNA-seq data.</description>\n+ <version_command>tophat --version</version_command>\n+ <requirements>\n+ <requirement type="package">tophat</requirement>\n+ </requirements>\n+ <command interpreter="python">\n+\t tophat_parallel.py\n+ ## Change this to accommodate the number of threads you have available.\n+ --num-threads="4"\n+\n+ ## Provide outputs.\n+ -o $outputFileName\n+ ##--junctions-output=$junctions\n+ ##--hits-output=$accepted_hits\n+\n+ ## Handle reference file.\n+ #if $refGenomeSource.genomeSource == "history":\n+ --own-file=$refGenomeSource.ownFile\n+ #else:\n+ --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ \'tophat_indexes\' ].get_fields() )[0][-1] }"\n+ #end if\n+\n+ ## Are reads single-end or paired?\n+ --single-paired=$singlePaired.sPaired\n+\n+ ## First input file always required.\n+ --input1=$input1\n+\n+ ## Set params based on whether reads are single-end or paired.\n+ #if $singlePaired.sPaired == "single":\n+ --settings=$singlePaired.sParams.sSettingsType\n+ #if $singlePaired.sParams.sSettingsType == "full":\n+ -a $singlePaired.sParams.anchor_length\n+ -m $singlePaired.sParams.splice_mismatches\n+ -i $singlePaired.sParams.min_intron_length\n+ -I $singlePaired.sParams.max_intron_length\n+ -F $singlePaired.sParams.junction_filter\n+ -g $singlePaired.sParams.max_multihits\n+ --min-segment-intron $singlePaired.sParams.min_segment_intron\n+ --max-segment-intron $singlePaired.sParams.max_segment_intron\n+ --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches\n+ --seg-mismatches=$singlePaired.sParams.seg_mismatches\n+ --seg-length=$singlePaired.sParams.seg_length\n+ --library-type=$singlePaired.sParams.library_type\n+ \n+ ## Indel search.\n+ #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":\n+ ## --allow-indels\n+ --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length\n+ --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length\n+ #else:\n+ --no-novel-indels\n+ #end if\n+\n+ ## Supplying junctions parameters.\n+ #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":\n+ #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":\n+ -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model\n+ #end if\n+ #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":\n+ -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs\n+ #end if\n+ ## TODO: No idea why a string cast is necessary, but it is:\n+ #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":\n+ --no-novel-juncs\n+ #end if\n+ #end if\n+\n+ #if $singlePaired.sParams.closure_search.use_search == "Yes":\n+ --closure-search\n+ --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon\n+ --'..b'hen such a pair is supported by a split segment alignment of a long read. The default is 500000.\n+ -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of \n+ exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the \n+ filter. The default is 0.15.\n+ -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many \n+ alignments. The default is 40.\n+ -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.\n+ -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.\n+ -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G)\n+ --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.\n+ --closure-search Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)\n+ --no-coverage-search Disables the coverage based search for junctions.\n+ --coverage-search Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.\n+ --microexon-search With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.\n+ --butterfly-search TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.\n+ --segment-mismatches Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.\n+ --segment-length Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.\n+ --min-closure-exon During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.\n+ --min-closure-intron The minimum intron length that may be found during closure search. The default is 50.\n+ --max-closure-intron The maximum intron length that may be found during closure search. The default is 5000.\n+ --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50.\n+ --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000.\n+ --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50.\n+ --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000.\n+ </help>\n+</tool>\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/tophat_parallel_unSQL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/tophat_parallel_unSQL.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,353 @@\n+\n+#!/usr/bin/env python\n+\n+import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile, glob\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.utils.FileUtils import FileUtils\n+\n+def stop_err( msg ):\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit()\n+\n+def toTar(tarFileName, accepted_hits_outputNames):\n+ tfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n+ currentPath = os.getcwd()\n+ os.chdir(dir)\n+ for file in accepted_hits_outputNames:\n+ relativeFileName = os.path.basename(file)\n+ tfile.add(relativeFileName)\n+ os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n+ tfile.close()\n+ os.chdir(currentPath)\n+ \n+def splitFastQ(fileName, nbOfSeqPerBatch):\n+ nbOfLinesPerFile = nbOfSeqPerBatch * 4\n+ lOutput = []\n+ filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n+ resDir = os.path.dirname(fileName)\n+ with open(fileName) as inF:\n+ fileNb = 1\n+ line = inF.readline()\n+ if not line or nbOfLinesPerFile == 0:\n+ outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n+ lOutput.append(outFileName)\n+ f = open(outFileName, "wb")\n+ shutil.copyfileobj(open(fileName, "rb"), f)\n+ f.close()\n+ else:\n+ while line:\n+ outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n+ lOutput.append(outFileName)\n+ with open(outFileName, "w") as outF:\n+ lineNb = 1\n+ while lineNb <= nbOfLinesPerFile and line:\n+ outF.write(line)\n+ line = inF.readline()\n+ lineNb += 1\n+ fileNb += 1\n+ return lOutput\n+\n+def joinBAM(dCutOut2Out):\n+ for key in dCutOut2Out.keys():\n+ fh = open(key, "w") \n+ fh.close()\n+ nbFile = 0\n+ cmd = "samtools merge -f %s" % key\n+ for fileName in dCutOut2Out[key]:\n+ nbFile = nbFile + 1\n+ if nbFile < 225:\n+ cmd += " %s" % fileName\n+ else:\n+ nbFile = 0\n+ cmd += ";mv %s tmpBAM;" % (key)\n+ cmd += "samtools merge -f %s tmpBAM %s" % (key, fileName)\n+ proc = subprocess.Popen( args=cmd , shell=True)\n+ returncode = proc.wait()\n+\n+ \n+def _map(iLauncher, cmd, cmdStart, cmdFinish ):\n+ lCmds = []\n+ lCmds.extend(cmd)\n+ lCmdStart = []\n+ lCmdStart.extend(cmdStart)\n+ lCmdFinish = []\n+ lCmdFinish.extend(cmdFinish)\n+ return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n+\n+def _createTopHatCommand(iLauncher, options, index_paths, inputFileNames, inputRevFilesNames, space):\n+ lArgs = []\n+ lArgs.append(\'-p %s %s\' % ( options.num_threads, space ))\n+ if options.single_paired == \'paired\':\n+ lArgs.append(\'-r %s \' % options.mate_inner_dist)\n+ if options.settings == \'preSet\':\n+ lArgs.append(index_paths)\n+ lArgs.append(inputFileNames)\n+ if options.input2:\n+ lArgs.append(inputRevFilesNames)\n+ return iLauncher.getSystemCommand("tophat", lArgs)\n+ else:\n+ if int( options.min_anchor_length ) >= 3:\n+ lArgs.append(\'-a %s \' % options.min_anchor_length)\n+ else:\n+ raise Exception, \'Minimum anchor length must be 3 or greater\'\n+ lArgs.append(\'-m %s \' % options.splice_mismatches)\n+ lArgs.append(\'-i %s \' % options.min_intron_length)\n+ lArgs.append(\'-I %s \' % options.max_intron_length)\n+ if float( options.junction_filter ) != 0.0:\n+ lArgs.append(\'-F %s \' % options.junction_filter)\n+ lArgs.append(\'-g %s \' % options.max_multihits)\n+ # Custom junctions options.\n+ if options.gene_model_annotations:\n+ lArgs.append(\'-G %s \' % options.gene_'..b'ame + \'\\n\')\n+ file.close()\n+ out.close()\n+ \n+ if options.input2:\n+ revFile = open(options.input2,"r")\n+ lines = revFile.readlines()\n+ inputRevFileNames = []\n+ for line in lines:\n+ revTab = line.split()\n+ inputRevFileNames.append(revTab[1])\n+ revFile.close()\n+\n+ \n+ # Creat bowtie index if necessary.\n+ tmp_index_dirs = []\n+ index_paths = []\n+ tmp_index_dir = tempfile.mkdtemp(dir="%s" % os.getcwd())\n+ tmp_index_dirs.append(tmp_index_dir)\n+ if options.own_file:\n+ index_path = os.path.join( tmp_index_dir, \'.\'.join( os.path.split( options.own_file )[1].split( \'.\' )[:-1] ) )\n+ index_paths.append(index_path)\n+ try:\n+ os.link( options.own_file, index_path + \'.fa\' )\n+ except:\n+ # Tophat prefers (but doesn\'t require) fasta file to be in same directory, with .fa extension\n+ pass\n+ lCmdsTuples =[]\n+ acronym = "tophat_index"\n+ jobdb = TableJobAdaptatorFactory.createJobInstance()\n+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n+ cmd_index = iLauncher.getSystemCommand("bowtie-build", [space, "-f %s" % options.own_file, index_path])\n+ cmd2Launch = []\n+ cmdStart = []\n+ cmdFinish = []\n+ cmd2Launch.append(cmd_index)\n+ lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish)) \n+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n+ else:\n+ for file in inputFileNames:\n+ tmp_index_dir = tempfile.mkdtemp()\n+ index_path = tmp_index_dir + \'/\' + os.path.basename(file).split(\'.\')[0]\n+ index_paths.append(index_path)\n+ tmp_index_dirs.append(tmp_index_dir)\n+\n+ \n+ \n+ acronym = "tophat"\n+ jobdb = TableJobAdaptatorFactory.createJobInstance()\n+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n+ lCmdsTuples = []\n+ dCutOut2Out = {}\n+ lAllFile2remove = []\n+ # for inputFileName in inputFileNames:\n+ for i in range(len(inputFileNames)):\n+ lCutOutput = []\n+ lCutInputFile = splitFastQ(inputFileNames[i], 20000)\n+ lAllFile2remove.extend(lCutInputFile)\n+ if options.input2:\n+ lCutPairInputFile = splitFastQ(inputRevFileNames[i], 20000)\n+ lAllFile2remove.extend(lCutPairInputFile)\n+ for j in range(len(lCutInputFile)):\n+ cutOutput = "%s_out" % lCutInputFile[j]\n+ lCutOutput.append(cutOutput)\n+ lAllFile2remove.extend(lCutOutput)\n+ cmd2Launch = []\n+ if options.input2: \n+ inputRevFile = lCutPairInputFile[j]\n+ else:\n+ inputRevFile = ""\n+ if options.own_file:\n+ cmd2Launch.append(_createTopHatCommand(iLauncher, options, index_paths[0], lCutInputFile[j], inputRevFile, space))\n+ else:\n+ cmd2Launch.append(_createTopHatCommand(iLauncher, options, index_paths[i], lCutInputFile[j], inputRevFile, space))\n+ cmdStart = []\n+ cmdFinish = ["shutil.copyfile( os.path.join( \'tophat_out\', \'accepted_hits.bam\' ), \'%s\')" % cutOutput]\n+ lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish)) \n+ dCutOut2Out[accepted_hits_outputNames[i]] = lCutOutput\n+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n+ \n+ joinBAM(dCutOut2Out) \n+ FileUtils.removeFilesFromListIfExist(lAllFile2remove) \n+ \n+ if options.outputTar != None:\n+ toTar(options.outputTar, accepted_hits_outputNames)\n+\n+ \n+ # Clean up temp dirs\n+ for tmp_index_dir in tmp_index_dirs:\n+ if os.path.exists( tmp_index_dir ):\n+ shutil.rmtree( tmp_index_dir )\n+\n+ for tmp in tmp_files:\n+ os.remove(tmp)\n+\n+\n+if __name__=="__main__": __main__()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/tophat_parallel_unSQL.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/tophat_parallel_unSQL.xml Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,577 @@\n+<tool id="tophat_parallel_unSQL" name="Tophat for Illumina (for DEA in parallel)" version="1.0.0">\n+ <description>Find splice junctions using RNA-seq data, can have several input RNA-seq data (parallelized).</description>\n+ <version_command>tophat --version</version_command>\n+ <requirements>\n+ <requirement type="package">tophat</requirement>\n+ </requirements>\n+ <command interpreter="python">\n+\t tophat_parallel_unSQL.py\n+ ## Change this to accommodate the number of threads you have available.\n+ --num-threads="4"\n+\n+ ## Provide outputs.\n+ -o $outputFileName\n+ ##--junctions-output=$junctions\n+ ##--hits-output=$accepted_hits\n+\n+ ## Handle reference file.\n+ #if $refGenomeSource.genomeSource == "history":\n+ --own-file=$refGenomeSource.ownFile\n+ #else:\n+ --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ \'tophat_indexes\' ].get_fields() )[0][-1] }"\n+ #end if\n+\n+ ## Are reads single-end or paired?\n+ --single-paired=$singlePaired.sPaired\n+\n+ ## First input file always required.\n+ --input1=$input1\n+\n+ ## Set params based on whether reads are single-end or paired.\n+ #if $singlePaired.sPaired == "single":\n+ --settings=$singlePaired.sParams.sSettingsType\n+ #if $singlePaired.sParams.sSettingsType == "full":\n+ -a $singlePaired.sParams.anchor_length\n+ -m $singlePaired.sParams.splice_mismatches\n+ -i $singlePaired.sParams.min_intron_length\n+ -I $singlePaired.sParams.max_intron_length\n+ -F $singlePaired.sParams.junction_filter\n+ -g $singlePaired.sParams.max_multihits\n+ --min-segment-intron $singlePaired.sParams.min_segment_intron\n+ --max-segment-intron $singlePaired.sParams.max_segment_intron\n+ --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches\n+ --seg-mismatches=$singlePaired.sParams.seg_mismatches\n+ --seg-length=$singlePaired.sParams.seg_length\n+ --library-type=$singlePaired.sParams.library_type\n+ \n+ ## Indel search.\n+ #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":\n+ ## --allow-indels\n+ --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length\n+ --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length\n+ #else:\n+ --no-novel-indels\n+ #end if\n+\n+ ## Supplying junctions parameters.\n+ #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":\n+ #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":\n+ -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model\n+ #end if\n+ #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":\n+ -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs\n+ #end if\n+ ## TODO: No idea why a string cast is necessary, but it is:\n+ #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":\n+ --no-novel-juncs\n+ #end if\n+ #end if\n+\n+ #if $singlePaired.sParams.closure_search.use_search == "Yes":\n+ --closure-search\n+ --min-closure-exon $singlePaired.sParams.closure_search.min_c'..b'hen such a pair is supported by a split segment alignment of a long read. The default is 500000.\n+ -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of \n+ exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the \n+ filter. The default is 0.15.\n+ -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many \n+ alignments. The default is 40.\n+ -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.\n+ -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.\n+ -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G)\n+ --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.\n+ --closure-search Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)\n+ --no-coverage-search Disables the coverage based search for junctions.\n+ --coverage-search Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.\n+ --microexon-search With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.\n+ --butterfly-search TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.\n+ --segment-mismatches Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.\n+ --segment-length Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.\n+ --min-closure-exon During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.\n+ --min-closure-intron The minimum intron length that may be found during closure search. The default is 50.\n+ --max-closure-intron The maximum intron length that may be found during closure search. The default is 5000.\n+ --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50.\n+ --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000.\n+ --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50.\n+ --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000.\n+ </help>\n+</tool>\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/DiffExpAnal/wrappGSNAP.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/DiffExpAnal/wrappGSNAP.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,76 @@
+#! /usr/bin/env python
+
+import os, sys, subprocess,tempfile
+from optparse import OptionParser
+
+def stop_err(msg):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit()
+
+def __main__():
+    #Parse Command Line
+    description = "GMAP/GSNAP version:2012-12-20."
+    parser = OptionParser(description = description)
+    parser.add_option("-d", "--genomeName", dest="genomeName", help="Define the reference genome name.[compulsory]")
+    parser.add_option("-o", "--outputFile", dest="outputfile", help="output[compulsory]")
+    #parser.add_option("-D", "--workingDir", dest="workingdir", help="Define the directory of writing reference genome index.[compulsory]")
+    parser.add_option("-k", "--kmer", dest="kmer", default=12, help="Choose kmer value (<=16), a big kmer value can take more RAM(4Go).[compulsory]")
+    parser.add_option("-i", "--inputFasta", dest="inputFastaFile", help="Reference genome file, fasta format.[compulsory]")
+    parser.add_option("-q", "--inputFastq", dest="inputFastqFile", help="Input fastq file.")
+    parser.add_option("-p", "--pairedEnd", dest="pairedEndFile", default=None, help="Input paired-end fastq file.")
+    parser.add_option("-A", "--outputFormat", dest="outputFormat", default="sam", help="Choose an output format [sam, goby (need to re-compile with appropriate options)].")
+    (options, args) = parser.parse_args()
+
+    #If workingDir dose not exist, should create before run the job.
+
+    workingDir = os.path.dirname(options.inputFastaFile)
+
+    cmds = []
+    cmd_setup = "gmap_setup -d %s -D %s -k %s %s" % (options.genomeName, workingDir, options.kmer, options.inputFastaFile)
+    cmds.append(cmd_setup)
+    cmd_make_coords = "make -f Makefile.%s coords" % options.genomeName
+    cmds.append(cmd_make_coords)
+    cmd_make_gmapdb = "make -f Makefile.%s gmapdb" % options.genomeName
+    cmds.append(cmd_make_gmapdb)
+    cmd_make_install = "make -f Makefile.%s install" % options.genomeName
+    cmds.append(cmd_make_install)
+    cmd_run = "gsnap -d %s -D %s -A %s %s " % (options.genomeName, workingDir, options.outputFormat, options.inputFastqFile)
+    if options.pairedEndFile != None:
+        cmd_run += "%s" % options.pairedEndFile
+    cmd_run += " > %s" % options.outputfile
+    cmds.append(cmd_run)
+
+    tmp_files = []
+    for i in range(len(cmds)):
+        try:
+            tmp_out = tempfile.NamedTemporaryFile().name
+            tmp_files.append(tmp_out)
+            tmp_stdout = open(tmp_out, 'wb')
+            tmp_err = tempfile.NamedTemporaryFile().name
+            tmp_files.append(tmp_err)
+            tmp_stderr = open(tmp_err, 'wb')
+            proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
+            returncode = proc.wait()
+            tmp_stderr.close()
+            #get stderr, allowing for case where it's very large
+            tmp_stderr = open(tmp_err, 'rb')
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read(buffsize)
+                    if not stderr or len(stderr) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stdout.close()
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, stderr
+        except Exception, e:
+            stop_err('Error in :\n' + str(e))
+
+    for tmp_file in tmp_files:
+        os.remove(tmp_file)
+
+if __name__=="__main__":__main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/File.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/File.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,55 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+public class File {
+  String name;
+  String formatType;
+  String format;
+
+
+  public File(String name, String type, String format) {
+    this.name       = name;
+    this.formatType = type;
+    this.format     = format;
+  }
+
+  public String getName() {
+    return this.name;
+  }
+
+  public String getFormatType() {
+    return this.formatType;
+  }
+
+  public String getFormat() {
+    return this.format;
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Files.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Files.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,75 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class Files {
+  HashMap <String, File> files;
+
+  public Files () {
+    files = new HashMap < String, File> ();
+  }
+
+  public void addFile(String fileName, String type, String format) {
+    this.addFile(new File(fileName, type, format));
+  }
+
+  public void addFile(File file) {
+    files.put(file.name, file);
+  }
+
+  public void clear() {
+    files.clear();
+  }
+
+  public String getType(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format type of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).formatType;
+  }
+
+  public String getFormat(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).format;
+  }
+}
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/FormatType.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/FormatType.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,64 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class FormatType {
+  String type;
+  Vector < String > formats;
+
+  public FormatType (String type) {
+    this.type    = type;
+    this.formats = new Vector < String > ();
+  }
+
+  public String getType () {
+    return this.type;
+  }
+
+  public void addFormat (String format) {
+    formats.add(format);
+  }
+
+  public boolean containsFormat (String format) {
+    for (int i = 0; i < formats.size(); i++) {
+      if (((String) formats.get(i)).compareToIgnoreCase(format) == 0) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public Vector < String > getFormats () {
+    return formats;
+  }
+}
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/FormatsContainer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/FormatsContainer.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,90 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class FormatsContainer {
+
+  HashMap < String, FormatType > formatTypes;
+
+
+  public FormatsContainer() {
+    this.formatTypes = new HashMap < String, FormatType > ();
+  }
+
+
+  public void addFormat(String type, String format) {
+    FormatType formatType;
+    if (formatTypes.containsKey(type)) {
+      formatType = this.formatTypes.get(type);
+    }
+    else {
+      formatType = new FormatType(type);
+      this.formatTypes.put(type, formatType);
+    }
+    formatType.addFormat(format);
+  }
+
+
+  public Vector < String > getFormatTypes () {
+    Vector < String > v = new Vector < String > ();
+    v.addAll(this.formatTypes.keySet());
+    return v;
+  }
+
+
+  public FormatType getFormats (String type) {
+    if (! formatTypes.containsKey(type)) {
+      System.out.print("Format type " + type + " is unavailable. Got: ");
+      Iterator it = formatTypes.entrySet().iterator();
+      while (it.hasNext()) {
+        Map.Entry pairs = (Map.Entry) it.next();
+        System.out.print(pairs.getKey() + " ");
+      }
+      System.out.println();
+    }
+    return formatTypes.get(type);
+  }
+
+
+  public String getFormatType (String format) {
+    for (Iterator it = formatTypes.keySet().iterator(); it.hasNext(); ) {
+      Object type       =  it.next();
+      Object formatType = formatTypes.get(type);
+      if (((FormatType) formatType).containsFormat(format)) {
+        return (String) type;
+      }
+    }
+    return null;
+  }
+}
+
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/FormatsReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/FormatsReader.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,83 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.File;
+import java.io.*;
+
+
+public class FormatsReader {
+
+  String fileName;
+  Vector < FormatType > formatTypes;
+  Vector < String > typeNames;
+
+
+  public FormatsReader(String fileName) {
+    this.fileName    = fileName;
+    this.formatTypes = new Vector < FormatType > ();
+  }
+
+
+  public boolean read() {
+    File file = new File(this.fileName);
+
+    try {
+      BufferedReader reader = new BufferedReader(new FileReader(file));
+      String     line = null;
+      String[]   lineElements;
+      String[]   formats;
+      String     typeName;
+
+      while ((line = reader.readLine()) != null) {
+        if (line.length() > 0) {
+          lineElements = line.split(":");
+          typeName     = lineElements[0].trim();
+          formats      = lineElements[1].split(",");
+          for (int i = 0; i < formats.length; i++) {
+            Global.formats.addFormat(typeName, formats[i].trim());
+          }
+        }
+      }
+
+      reader.close();
+    }
+    catch (FileNotFoundException e) {
+      return false;
+    }
+    catch (IOException e) {
+      return false;
+    }
+
+    return true;
+  }
+}
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Global.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Global.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,70 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.Vector;
+import java.util.HashMap;
+import javax.swing.DefaultListModel;
+import javax.swing.JButton;
+import javax.swing.JTextField;
+
+public class Global {
+
+  public static int logAreaSize = 100;
+
+  public static String smartConfFileName = "smart.conf";
+
+  public static String smartProgramsFileName = "programs.txt";
+
+  public static String smartFormatsFileName = "formats.txt";
+
+  public static String pythonPath = new String();
+
+  public static String pythonCommand = "python";
+
+  public static String mysqlCommand = "mysql";
+
+  public static String rCommand = "R";
+
+  public static Files files = new Files();
+
+  public static Vector < String > fileNames = new Vector < String >();
+
+  public static FormatsContainer formats = new FormatsContainer();
+
+  public static boolean programRunning = false;
+
+  public static HashMap < JButton, JTextField > otherFilesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherDirectoriesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherFileConcatenationChooser = new HashMap < JButton, JTextField >();
+
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/Old/PasswordAsker.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/Old/PasswordAsker.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,87 @@
+import java.awt.*;
+import java.awt.event.*;
+import javax.swing.*;
+import java.util.concurrent.CountDownLatch;
+
+public class PasswordAsker {
+
+  static String password;
+  static JFrame frame;
+  static CountDownLatch latch;
+
+
+  public PasswordAsker() {
+    password = null;
+    javax.swing.SwingUtilities.invokeLater(new Runnable() {
+      public void run() {
+        createAndShowGUI();
+      }
+    });
+    latch = new CountDownLatch(1);
+  }
+
+
+  private static void createAndShowGUI() {
+    //Create and set up the window.
+    frame = new JFrame("Password");
+    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+    frame.setContentPane(setMainPane());
+
+    //Display the window.
+    frame.pack();
+    frame.setVisible(true);
+  }
+
+
+  private static JPanel setMainPane() {
+    JPanel rootPanel = new JPanel(false);
+    rootPanel.setLayout(new GridLayout(0, 1));
+
+    JPanel infoPanel = new JPanel(false);
+    JLabel infoLabel = new JLabel("Please write here the password that you entered for the mySQL root account.\r\nNo information is stored nor sent. I promise.");
+    infoPanel.add(infoLabel);
+
+    JPanel passPanel = new JPanel(false);
+    passPanel.setLayout(new GridLayout(1, 0));
+    JLabel passLabel = new JLabel("password");
+    final JTextField passText = new JTextField(20);
+    passLabel.setLabelFor(passText);
+    passPanel.add(passLabel);
+    passPanel.add(passText);
+
+    JPanel  okPanel  = new JPanel(false);
+    JButton okButton = new JButton("OK");
+    okPanel.add(okButton);
+
+    okButton.addActionListener(new ActionListener() {
+      public void actionPerformed(ActionEvent e) {
+        password = passText.getText();
+        frame.setVisible(false);
+        frame.dispose();
+        latch.countDown();
+      }
+    });
+
+    rootPanel.add(infoPanel);
+    rootPanel.add(passPanel);
+    rootPanel.add(okPanel);
+
+    return rootPanel;
+  }
+
+
+  public boolean waitForPassword() {
+    try {
+      latch.await();
+    }
+    catch (InterruptedException e) {
+      return false;
+    }
+    return true;
+  }
+
+
+  public String getPassword() {
+    return password;
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/Old/SmartInstaller.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/Old/SmartInstaller.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,167 @@
+import java.util.*;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.*;
+import javax.swing.*;
+import javax.swing.filechooser.*;
+import javax.swing.border.*;
+import javax.swing.SwingUtilities;
+import java.net.*;
+
+public class SmartInstaller extends JPanel implements ActionListener {
+  int       BUFFER = 1024;
+
+  JFrame    mainFrame;
+  JTextArea logArea;
+
+  // configuration chooser buttons
+  String       configurations[] = {"32 bits", "64 bits"};
+  JRadioButton configurationButtons[];
+
+  // program chooser buttons
+  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"};
+  JCheckBox programChooserButtons[];
+
+  JButton   goButton;
+
+  // install directory
+  JButton    installDirectoryChooserButton;
+  JTextField installDirectoryChooserTextField;
+
+
+  public SmartInstaller() {
+    super();
+
+    Box box = Box.createVerticalBox();
+
+    // Header
+    JPanel       headerPanel = new JPanel(false);
+    JTextArea    headerArea  = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters.\r\nPlease remember the root password if you install MySQL!");
+    TitledBorder headerBorder = BorderFactory.createTitledBorder("Wellcome to the S-MART installer!");
+    headerArea.setEditable(false);
+    headerArea.setBackground(headerPanel.getBackground());
+    headerPanel.add(headerArea);
+    headerPanel.setBorder(headerBorder);
+
+
+    // Configuration
+    JPanel configurationPanel = new JPanel(false);
+    configurationPanel.setLayout(new GridLayout(1, 0));
+    configurationButtons = new JRadioButton[configurations.length];
+    ButtonGroup configurationGroup = new ButtonGroup();
+    for (int i = 0; i < configurations.length; i++) {
+      JRadioButton button = new JRadioButton(configurations[i]);
+      configurationPanel.add(button);
+      configurationButtons[i] = button;
+      configurationGroup.add(button);
+    }
+    configurationButtons[0].setSelected(true);
+    TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration");
+    configurationPanel.setBorder(configurationBorder);
+
+
+    // Program chooser panel
+    JPanel programPanel = new JPanel(false);
+    programPanel.setLayout(new GridLayout(0, 1));
+
+    JLabel programLabel = new JLabel("Choose which programs to install:");
+    programPanel.add(programLabel);
+    programChooserButtons = new JCheckBox[programChoosers.length];
+    for (int i = 0; i < programChoosers.length; i++) {
+      JCheckBox button = new JCheckBox(programChoosers[i]);
+      button.setSelected(true);
+      programPanel.add(button);
+      programChooserButtons[i] = button;
+    }
+    TitledBorder programBorder = BorderFactory.createTitledBorder("Programs");
+    programPanel.setBorder(programBorder);
+
+    // Install directory chooser
+    JPanel installDirectoryChooserPanel = new JPanel(false);
+    installDirectoryChooserPanel.setLayout(new GridLayout(1, 0));
+    JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: ");
+    installDirectoryChooserTextField = new JTextField();
+    installDirectoryChooserButton = new JButton("Open...");
+    installDirectoryChooserButton.addActionListener(this);
+
+    installDirectoryChooserPanel.add(installDirectoryChooserLabel);
+    installDirectoryChooserPanel.add(installDirectoryChooserTextField);
+    installDirectoryChooserPanel.add(installDirectoryChooserButton);
+    TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory");
+    installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder);
+
+    // GO!
+    JPanel goPanel = new JPanel(false);
+    goButton = new JButton("GO!");
+    goButton.addActionListener(this);
+    goButton.setSelected(true);
+    goPanel.add(goButton);
+    TitledBorder goBorder = BorderFactory.createTitledBorder("Start install");
+    goPanel.setBorder(goBorder);
+
+    // Log
+    logArea = new JTextArea(10, 120);
+    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));
+    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+    TitledBorder logBorder = BorderFactory.createTitledBorder("Log");
+    logScroll.setBorder(logBorder);
+
+    GridLayout horizontalLayout = new GridLayout(1, 0);
+
+    box.add(headerPanel);
+    box.add(configurationPanel);
+    box.add(programPanel);
+    box.add(installDirectoryChooserPanel);
+    box.add(goPanel);
+    box.add(logScroll);
+
+    add(box);
+  }
+
+
+  public void actionPerformed(ActionEvent e) {
+
+    // Install directories chooser
+    if (e.getSource() == goButton) {
+      boolean[] selectedPrograms = new boolean[programChoosers.length];
+      for (int i = 0; i < programChoosers.length; i++) {
+        selectedPrograms[i] = programChooserButtons[i].isSelected();
+      }
+      SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1);
+      task.execute();
+    }
+    // Install directories chooser
+    else if (e.getSource() == installDirectoryChooserButton) {
+      JFileChooser chooser = new JFileChooser();
+      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {
+        installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath());
+      }
+    }
+  }
+
+  private static void createAndShowGUI() {
+    // Create and set up the window.
+    JFrame mainFrame = new JFrame("S-Mart Installer");
+    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+    //Create and set up the content pane.
+    JComponent newContentPane = new SmartInstaller();
+    newContentPane.setOpaque(true);
+    mainFrame.setContentPane(newContentPane);
+
+    // Display the window.
+    mainFrame.pack();
+    mainFrame.setVisible(true);
+  }
+
+
+  public static void main(String[] args) {
+    javax.swing.SwingUtilities.invokeLater(new Runnable() {
+      public void run() {
+        createAndShowGUI();
+      }
+    });
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/Old/SmartInstallerTask.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/Old/SmartInstallerTask.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,455 @@\n+import java.util.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+import java.net.*;\n+import java.util.Stack;\n+import java.util.zip.ZipEntry;\n+import java.util.zip.ZipInputStream;\n+\n+public class SmartInstallerTask extends SwingWorker<Boolean, String> {\n+\n+ int BUFFER = 1024;\n+\n+ int architecture = 0;\n+ String installDirectoryName = null;\n+ JTextArea logArea = null;\n+ boolean[] selectedPrograms = null;\n+\n+ // program chooser buttons\n+ String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"};\n+\n+ // Web addresses for the tools\n+ String packageAddresses[][] = {\n+ {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"},\n+ {"", ""},\n+ {"", ""},\n+ {"http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-win32.msi", "http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-winx64.msi"},\n+ {"", ""},\n+ {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"},\n+ {"http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe", "http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe"},\n+ {"http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip", "http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip"}\n+ };\n+\n+ // Packages to install\n+ String rPackages[] = {"RColorBrewer", "Hmisc"};\n+\n+ // Script lines\n+ String scriptLines[][] = {\n+ {"\\"<INSTALLDIR>\\\\R-2.11.0-win32.exe\\"", "\\"<INSTALLDIR>\\\\R-2.11.0-win64.exe\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\""},\n+ {"msiexec /i \\"<INSTALLDIR>\\\\mysql-essential-5.1.47-win32.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\mysql-essential-5.1.47-winx64.msi\\""},\n+ {"", ""},\n+ {"msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.amd64.msi\\""},\n+ {"<INSTALLDIR>\\\\MySQL-python-1.2.2.win32-py2.6.exe", "<INSTALLDIR>\\\\MySQL-python-1.2.2.win32-py2.6.exe"},\n+ {"", ""}\n+ };\n+\n+ // Files to uncompress\n+ String compressedFiles[][] = {\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"<INSTALLDIR>\\\\s-mart.zip", "<INSTALLDIR>\\\\s-mart.zip"}\n+ };\n+\n+\n+ public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) {\n+ logArea = ta;\n+ selectedPrograms = b;\n+ installDirectoryName = s;\n+ architecture = a;\n+ }\n+\n+\n+ @Override\n+ public Boolean doInBackground() {\n+ boolean installOk;\n+ publish("Starting install\\n");\n+ writeFiles();\n+ for (int i = 0; i < selectedPrograms.length; i++) {\n+ if (selectedPrograms[i]) {\n+ if (! install(i)) {\n+ return Boolean.FALSE;\n+ }\n+ }\n+ }\n+ removeFiles();\n+ setEnvironmentVariables();\n+ publish("Ending install\\n");\n+ return Boolean.TRUE;\n+ }\n+\n+\n+ @Override\n+ protected void process(List<String> chunks) {\n+ for (String chunk: chunks) {\n+ logArea.append(chunk);\n+ }\n+ }\n+\n+\n+ private boolean launch(String command) {\n+ return realLaunch(new ProcessBuilder(command), command);\n+ }\n+\n+ private boolean launch(String[] command) {\n+ return realLaunch(new ProcessBuilder(command), Arrays.toString(command));\n+ }\n+\n+ private boolean realLaunch(ProcessBuilder pb, String command) {\n+ BufferedReader outputReader;\n+ pb = pb.redirectErrorStream(true);\n+ Process process '..b' boolean uncompressPackage(int element) {\n+ String file = compressedFiles[element][architecture];\n+ if (! "".equals(file)) {\n+ file = replaceSubstring(file);\n+ publish(" Starting uncompressing file \'" + file + "\'\\n");\n+ try {\n+ FileInputStream fis = new FileInputStream(file);\n+ BufferedInputStream bis = new BufferedInputStream(fis);\n+ ZipInputStream zis = new ZipInputStream(bis);\n+ ZipEntry entry;\n+ while ((entry = zis.getNextEntry()) != null) {\n+ if (! entry.isDirectory()) {\n+ File newFile = new File(installDirectoryName + File.separator + entry.getName());\n+ // create parent directories\n+ File upDirectory = newFile.getParentFile();\n+ while (upDirectory != null){\n+ if (! upDirectory.exists()) {\n+ upDirectory.mkdir();\n+ publish(" Creating directory \'" + upDirectory.getAbsolutePath() + "\'\\n");\n+ }\n+ upDirectory = upDirectory.getParentFile();\n+ }\n+ // write the files to the disk\n+ publish(" Extracting \'" + entry.getName() + "\' to \'" + newFile.getAbsolutePath() + "\'\\n");\n+ int count;\n+ byte data[] = new byte[BUFFER];\n+ FileOutputStream fos = new FileOutputStream(newFile);\n+ BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);\n+ while ((count = zis.read(data, 0, BUFFER)) != -1){\n+ bos.write(data, 0, count);\n+ }\n+ bos.flush();\n+ bos.close();\n+ fos.close();\n+ }\n+ }\n+ zis.close();\n+ bis.close();\n+ fis.close();\n+ }\n+ catch(FileNotFoundException e) {\n+ publish(" !Cannot find file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ catch(Exception e){\n+ publish(" !Cannot uncompress file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ publish(" Ending uncompressing file \'" + file + "\'\\n");\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean removePackage(int element) {\n+ String packageName = packageAddresses[element][architecture];\n+ if ("".equals(packageName)) {\n+ return true;\n+ }\n+ String fileName = getLocalName(packageAddresses[element][architecture]);\n+ return removeFile(fileName);\n+ }\n+\n+\n+ private boolean postProcess(int element) {\n+ switch (element) {\n+ case 4:\n+ // Create mySQL user\n+ PasswordAsker pa = new PasswordAsker();\n+ if (! pa.waitForPassword()) {\n+ publish("Problem in the password asker!\\n");\n+ return false;\n+ }\n+ String command = "\\"<MYSQLFILE>\\" --user=root --password=" + pa.getPassword() + " -e \\"source <INSTALLDIR>\\\\createUser.sql\\"";\n+ command = replaceSubstring(command);\n+ if (! launch(command)) {\n+ publish(" !Cannot create SQL accounts!\\n");\n+ return false;\n+ }\n+ return true;\n+ case 7:\n+ // Move S-MART files to parent directory\n+ File installDirectory = new File(installDirectoryName + File.separator + "S-Mart");\n+ for (File file: installDirectory.listFiles()) {\n+ File destinationFile = new File(file.getParentFile().getParentFile(), file.getName());\n+ if (! file.renameTo(destinationFile)) {\n+ publish(" !Cannot move \'" + file.getAbsolutePath() + "\' to \'" + destinationFile.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ if (! installDirectory.delete()) {\n+ publish(" !Cannot remove installation S-MART directory \'" + installDirectory.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean setEnvironmentVariables() {\n+ String[] command = {"REG", "ADD", "HKCU\\\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\\"" + installDirectoryName + "\\\\Python\\"", "/f"};\n+ return launch(command);\n+ }\n+}\n+\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/PasswordAsker.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/PasswordAsker.java Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/SmartInstaller.jar

Binary file SMART/Java/Installer/SmartInstaller.jar has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/SmartInstaller.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/SmartInstaller.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,167 @@
+import java.util.*;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.*;
+import javax.swing.*;
+import javax.swing.filechooser.*;
+import javax.swing.border.*;
+import javax.swing.SwingUtilities;
+import java.net.*;
+
+public class SmartInstaller extends JPanel implements ActionListener {
+  int       BUFFER = 1024;
+
+  JFrame    mainFrame;
+  JTextArea logArea;
+
+  // configuration chooser buttons
+  String       configurations[] = {"32 bits", "64 bits"};
+  JRadioButton configurationButtons[];
+
+  // program chooser buttons
+  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"};
+  JCheckBox programChooserButtons[];
+
+  JButton   goButton;
+
+  // install directory
+  JButton    installDirectoryChooserButton;
+  JTextField installDirectoryChooserTextField;
+
+
+  public SmartInstaller() {
+    super();
+
+    Box box = Box.createVerticalBox();
+
+    // Header
+    JPanel       headerPanel = new JPanel(false);
+    JTextArea    headerArea  = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters.");
+    TitledBorder headerBorder = BorderFactory.createTitledBorder("Welcome to the S-MART installer!");
+    headerArea.setEditable(false);
+    headerArea.setBackground(headerPanel.getBackground());
+    headerPanel.add(headerArea);
+    headerPanel.setBorder(headerBorder);
+
+
+    // Configuration
+    JPanel configurationPanel = new JPanel(false);
+    configurationPanel.setLayout(new GridLayout(1, 0));
+    configurationButtons = new JRadioButton[configurations.length];
+    ButtonGroup configurationGroup = new ButtonGroup();
+    for (int i = 0; i < configurations.length; i++) {
+      JRadioButton button = new JRadioButton(configurations[i]);
+      configurationPanel.add(button);
+      configurationButtons[i] = button;
+      configurationGroup.add(button);
+    }
+    configurationButtons[0].setSelected(true);
+    TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration");
+    configurationPanel.setBorder(configurationBorder);
+
+
+    // Program chooser panel
+    JPanel programPanel = new JPanel(false);
+    programPanel.setLayout(new GridLayout(0, 1));
+
+    JLabel programLabel = new JLabel("Choose which programs to install:");
+    programPanel.add(programLabel);
+    programChooserButtons = new JCheckBox[programChoosers.length];
+    for (int i = 0; i < programChoosers.length; i++) {
+      JCheckBox button = new JCheckBox(programChoosers[i]);
+      button.setSelected(true);
+      programPanel.add(button);
+      programChooserButtons[i] = button;
+    }
+    TitledBorder programBorder = BorderFactory.createTitledBorder("Programs");
+    programPanel.setBorder(programBorder);
+
+    // Install directory chooser
+    JPanel installDirectoryChooserPanel = new JPanel(false);
+    installDirectoryChooserPanel.setLayout(new GridLayout(1, 0));
+    JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: ");
+    installDirectoryChooserTextField = new JTextField();
+    installDirectoryChooserButton = new JButton("Open...");
+    installDirectoryChooserButton.addActionListener(this);
+
+    installDirectoryChooserPanel.add(installDirectoryChooserLabel);
+    installDirectoryChooserPanel.add(installDirectoryChooserTextField);
+    installDirectoryChooserPanel.add(installDirectoryChooserButton);
+    TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory");
+    installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder);
+
+    // GO!
+    JPanel goPanel = new JPanel(false);
+    goButton = new JButton("GO!");
+    goButton.addActionListener(this);
+    goButton.setSelected(true);
+    goPanel.add(goButton);
+    TitledBorder goBorder = BorderFactory.createTitledBorder("Start install");
+    goPanel.setBorder(goBorder);
+
+    // Log
+    logArea = new JTextArea(10, 120);
+    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));
+    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+    TitledBorder logBorder = BorderFactory.createTitledBorder("Log");
+    logScroll.setBorder(logBorder);
+
+    GridLayout horizontalLayout = new GridLayout(1, 0);
+
+    box.add(headerPanel);
+    box.add(configurationPanel);
+    box.add(programPanel);
+    box.add(installDirectoryChooserPanel);
+    box.add(goPanel);
+    box.add(logScroll);
+
+    add(box);
+  }
+
+
+  public void actionPerformed(ActionEvent e) {
+
+    // Install directories chooser
+    if (e.getSource() == goButton) {
+      boolean[] selectedPrograms = new boolean[programChoosers.length];
+      for (int i = 0; i < programChoosers.length; i++) {
+        selectedPrograms[i] = programChooserButtons[i].isSelected();
+      }
+      SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1);
+      task.execute();
+    }
+    // Install directories chooser
+    else if (e.getSource() == installDirectoryChooserButton) {
+      JFileChooser chooser = new JFileChooser();
+      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {
+        installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath());
+      }
+    }
+  }
+
+  private static void createAndShowGUI() {
+    // Create and set up the window.
+    JFrame mainFrame = new JFrame("S-Mart Installer");
+    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+    //Create and set up the content pane.
+    JComponent newContentPane = new SmartInstaller();
+    newContentPane.setOpaque(true);
+    mainFrame.setContentPane(newContentPane);
+
+    // Display the window.
+    mainFrame.pack();
+    mainFrame.setVisible(true);
+  }
+
+
+  public static void main(String[] args) {
+    javax.swing.SwingUtilities.invokeLater(new Runnable() {
+      public void run() {
+        createAndShowGUI();
+      }
+    });
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/SmartInstallerTask.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/SmartInstallerTask.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,419 @@\n+import java.util.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+import java.net.*;\n+import java.util.Stack;\n+import java.util.zip.ZipEntry;\n+import java.util.zip.ZipInputStream;\n+\n+public class SmartInstallerTask extends SwingWorker<Boolean, String> {\n+\n+ int BUFFER = 1024;\n+\n+ int architecture = 0;\n+ String installDirectoryName = null;\n+ JTextArea logArea = null;\n+ boolean[] selectedPrograms = null;\n+\n+ // program chooser buttons\n+ String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"};\n+\n+ // Web addresses for the tools\n+ String packageAddresses[][] = {\n+ {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"},\n+ {"", ""},\n+ {"", ""},\n+ {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"},\n+ {"http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip", "http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip"}\n+ };\n+\n+ // Packages to install\n+ String rPackages[] = {"RColorBrewer", "Hmisc"};\n+\n+ // Script lines\n+ String scriptLines[][] = {\n+ {"\\"<INSTALLDIR>\\\\R-2.11.0-win32.exe\\"", "\\"<INSTALLDIR>\\\\R-2.11.0-win64.exe\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\""},\n+ {"msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.amd64.msi\\""},\n+ {"", ""}\n+ };\n+\n+ // Files to uncompress\n+ String compressedFiles[][] = {\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"<INSTALLDIR>\\\\s-mart-1.0.15.zip", "<INSTALLDIR>\\\\s-mart-1.0.15.zip"}\n+ };\n+\n+\n+ public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) {\n+ logArea = ta;\n+ selectedPrograms = b;\n+ installDirectoryName = s;\n+ architecture = a;\n+ }\n+\n+\n+ @Override\n+ public Boolean doInBackground() {\n+ boolean installOk;\n+ publish("Starting install\\n");\n+ writeFiles();\n+ for (int i = 0; i < selectedPrograms.length; i++) {\n+ if (selectedPrograms[i]) {\n+ if (! install(i)) {\n+ return Boolean.FALSE;\n+ }\n+ }\n+ }\n+ removeFiles();\n+ setEnvironmentVariables();\n+ publish("Ending install\\n");\n+ return Boolean.TRUE;\n+ }\n+\n+\n+ @Override\n+ protected void process(List<String> chunks) {\n+ for (String chunk: chunks) {\n+ logArea.append(chunk);\n+ }\n+ }\n+\n+\n+ private boolean launch(String command) {\n+ return realLaunch(new ProcessBuilder(command), command);\n+ }\n+\n+ private boolean launch(String[] command) {\n+ return realLaunch(new ProcessBuilder(command), Arrays.toString(command));\n+ }\n+\n+ private boolean realLaunch(ProcessBuilder pb, String command) {\n+ BufferedReader outputReader;\n+ pb = pb.redirectErrorStream(true);\n+ Process process = null;\n+ publish(" Starting command \'" + command + "\'\\n");\n+ try {\n+ process = pb.start();\n+ BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());\n+ InputStream is = process.getInputStream();\n+ InputStreamReader isr = new InputStreamReader(is);\n+ outputReader = new BufferedReader(isr);\n+ }\n+ catch (Exception exception) {\n+ publish(" !Process cannot be started (command is \'" + command + "\')!\\n");\n+ exception.printStackTrace();\n+ return false;\n+ }\n+ if (outputReader == null) {\n+ publish(" !Problem in the outp'..b'turn false;\n+ }\n+ try {\n+ process.waitFor();\n+ }\n+ catch (InterruptedException e) {\n+ publish(" !Cannot wait for the end of the command \'" + command + "\'!\\n");\n+ return false;\n+ }\n+ int exitValue = process.exitValue();\n+ if (exitValue != 0) {\n+ publish(" !Problem during the execution of the command \'" + command + "\'!\\n");\n+ return false;\n+ }\n+ publish(" Ending command \'" + command + "\'\\n");\n+ }\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean uncompressPackage(int element) {\n+ String file = compressedFiles[element][architecture];\n+ if (! "".equals(file)) {\n+ file = replaceSubstring(file);\n+ publish(" Starting uncompressing file \'" + file + "\'\\n");\n+ try {\n+ FileInputStream fis = new FileInputStream(file);\n+ BufferedInputStream bis = new BufferedInputStream(fis);\n+ ZipInputStream zis = new ZipInputStream(bis);\n+ ZipEntry entry;\n+ while ((entry = zis.getNextEntry()) != null) {\n+ if (! entry.isDirectory()) {\n+ File newFile = new File(installDirectoryName + File.separator + entry.getName());\n+ // create parent directories\n+ File upDirectory = newFile.getParentFile();\n+ while (upDirectory != null){\n+ if (! upDirectory.exists()) {\n+ upDirectory.mkdir();\n+ publish(" Creating directory \'" + upDirectory.getAbsolutePath() + "\'\\n");\n+ }\n+ upDirectory = upDirectory.getParentFile();\n+ }\n+ // write the files to the disk\n+ publish(" Extracting \'" + entry.getName() + "\' to \'" + newFile.getAbsolutePath() + "\'\\n");\n+ int count;\n+ byte data[] = new byte[BUFFER];\n+ FileOutputStream fos = new FileOutputStream(newFile);\n+ BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);\n+ while ((count = zis.read(data, 0, BUFFER)) != -1){\n+ bos.write(data, 0, count);\n+ }\n+ bos.flush();\n+ bos.close();\n+ fos.close();\n+ }\n+ }\n+ zis.close();\n+ bis.close();\n+ fis.close();\n+ }\n+ catch(FileNotFoundException e) {\n+ publish(" !Cannot find file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ catch(Exception e){\n+ publish(" !Cannot uncompress file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ publish(" Ending uncompressing file \'" + file + "\'\\n");\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean removePackage(int element) {\n+ String packageName = packageAddresses[element][architecture];\n+ if ("".equals(packageName)) {\n+ return true;\n+ }\n+ String fileName = getLocalName(packageAddresses[element][architecture]);\n+ return removeFile(fileName);\n+ }\n+\n+\n+ private boolean postProcess(int element) {\n+ switch (element) {\n+ case 4:\n+ // Move S-MART files to parent directory\n+ File installDirectory = new File(installDirectoryName + File.separator + "S-Mart");\n+ for (File file: installDirectory.listFiles()) {\n+ File destinationFile = new File(file.getParentFile().getParentFile(), file.getName());\n+ if (! file.renameTo(destinationFile)) {\n+ publish(" !Cannot move \'" + file.getAbsolutePath() + "\' to \'" + destinationFile.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ if (! installDirectory.delete()) {\n+ publish(" !Cannot remove installation S-MART directory \'" + installDirectory.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean setEnvironmentVariables() {\n+ String[] command = {"REG", "ADD", "HKCU\\\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\\"" + installDirectoryName + "\\\\Python\\"", "/f"};\n+ return launch(command);\n+ }\n+}\n+\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/build.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/build.sh Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,5 @@
+#! /bin/sh
+
+rm -rf SmartInstaller.jar
+javac *.java
+jar cvfm SmartInstaller.jar manifest.txt *.class

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/manifest.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/manifest.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Created-By: Matthias Zytnicki
+Main-Class: SmartInstaller

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Installer/s-mart.zip

Binary file SMART/Java/Installer/s-mart.zip has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Program.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Program.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,175 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.awt.*;
+import javax.swing.*;
+
+
+public class Program {
+  String                 shortName;
+  String                 name;
+  String                 section;
+  String                 description;
+  Vector <ProgramOption> options;
+  JPanel                 panel;
+  JButton                button;
+
+
+  public Program() {
+    this.shortName = null;
+    this.name      = null;
+    this.options   = new Vector <ProgramOption> ();
+  }
+
+
+  public void setShortName(String shortName) {
+    this.shortName = shortName;
+  }
+
+
+  public void setName(String name) {
+    this.name = name;
+  }
+
+
+  public void setSection(String section) {
+    this.section = section;
+  }
+
+  public void setDescription(String description) {
+    this.description = description;
+  }
+
+
+  public void addOption(ProgramOption option) {
+    options.add(option);
+  }
+
+
+  public String getShortName() {
+    return this.shortName;
+  }
+
+
+  public String getName() {
+    return this.name;
+  }
+
+
+  public String getSection() {
+    return this.section;
+  }
+
+  public String getDescription() {
+    return this.description;
+  }
+
+
+  public String checkValues() {
+    for (int i = 0; i < options.size(); i++) {
+      String comment = options.get(i).checkValue();
+      if (comment != null) {
+        return comment;
+      }
+    }
+    return null;
+  }
+
+
+  public LinkedList<String> getCommand() {
+    LinkedList<String> parameterList = new LinkedList<String>();
+    parameterList.add(Global.pythonCommand);
+    parameterList.add("Python" + java.io.File.separator + this.shortName);
+    for (int i = 0; i < options.size(); i++) {
+      ProgramOption option = options.get(i);
+      parameterList.addAll(option.getCommand());
+    }
+    return parameterList;
+  }
+
+
+  public JPanel getPanel() {
+    if (this.panel != null) {
+      return this.panel;
+    }
+
+    this.panel = new JPanel(false);
+    this.panel.setLayout(new FlowLayout());
+    Box box = Box.createVerticalBox();
+
+    JPanel descriptionPanel = new JPanel(false);
+    JLabel descriptionLabel = new JLabel(this.description);
+    descriptionPanel.add(descriptionLabel);
+    box.add(descriptionPanel);
+
+    for (int i = 0; i < options.size(); i++) {
+      ProgramOption option = options.get(i);
+      JPanel        panel  = option.getPanel();
+      if (panel == null) {
+        System.out.println("Problem with Python program '" + this.shortName + "'.");
+        return null;
+      }
+      box.add(option.getPanel());
+    }
+
+    JPanel buttonPanel = new JPanel(false);
+    this.button = new JButton("GO!");
+
+    buttonPanel.add(button);
+
+    box.add(buttonPanel);
+
+    this.panel.add(box);
+
+    return this.panel;
+  }
+
+
+  public JButton getButton() {
+    if (this.button == null) {
+      this.getPanel();
+    }
+    return this.button;
+  }
+
+
+  public Vector < File > getOutputFiles() {
+    Vector < File > files = new Vector < File > ();
+    for (int i = 0; i < options.size(); i++) {
+      ProgramOption option = options.get(i);
+      if (! option.isInput()) {
+        files.add(option.getOutputFile());
+      }
+    }
+    return files;
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/ProgramFileReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/ProgramFileReader.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,174 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.File;
+import java.io.*;
+
+
+public class ProgramFileReader {
+  String fileName;
+  Vector <Program> programs;
+
+
+  public ProgramFileReader(String fileName) {
+    this.fileName = fileName;
+    this.programs = new Vector <Program> ();
+  }
+
+
+  public boolean read() {
+//  File    file    = new File(this.fileName);
+//  Program program = null;
+//  int     step    = 0;
+//  TreeMap <String, ProgramOption> options = new TreeMap <String, ProgramOption> ();
+
+//  try {
+//    BufferedReader reader = new BufferedReader(new FileReader(file));
+//    String line    = null;
+//    String section = null;
+
+//    while ((line = reader.readLine()) != null) {
+
+//      line = line.trim();
+
+//      if (line.length() == 0) {
+//        if (program != null) {
+//          programs.add(program);
+//        }
+//        program = null;
+//        step = 0;
+//        continue;
+//      }
+
+//      if ((line.charAt(0) == '[') && (line.charAt(line.length() - 1) == ']')) {
+//        section = line.substring(1, line.length() - 1).trim();
+//        continue;
+//      }
+//      switch (step) {
+//        case 0:
+//        program = new Program();
+//          program.setName(line);
+//          if (section == null) {
+//            System.out.println("Error! Section of program '" + line + "' is not set!");
+//          }
+//          program.setSection(section);
+//          step = 1;
+//          break;
+//        case 1:
+//          program.setShortName(line);
+//          step = 2;
+//          break;
+//        case 2:
+//          ProgramOption option = new ProgramOption();
+
+//          String[] elements    = line.split(":");
+//          boolean  input       = elements[0].trim().equalsIgnoreCase("input")? true: false;
+//          String[] subElements = elements[1].split(";");
+//          String   identifier = subElements[0].trim();
+
+//          option.setInput(input);
+
+//          if (input) {
+
+//            if (subElements.length < 4) {
+//              System.out.println("Line '" + line + "' is weird...");
+//            }
+
+//            String   type       = subElements[1].trim();
+//            String   comment    = subElements[2].trim();
+//            boolean  compulsory = subElements[3].trim().equalsIgnoreCase("0")? false: true;
+
+//            option.setIdentifier(identifier);
+//            option.setType(type);
+//            option.setComment(comment);
+//            option.setCompulsory(compulsory);
+
+//            if ("file".compareToIgnoreCase(type) == 0) {
+//              if (subElements.length < 5) {
+//                System.out.println("Line '" + line + "' is weird...");
+//              }
+
+//              String formatIdentifier = subElements[4].trim();
+//              option.setFormatIdentifier(formatIdentifier);
+//            }
+//            else if ("choice".compareToIgnoreCase(type) == 0) {
+//              if (subElements.length < 5) {
+//                System.out.println("Line '" + line + "' is weird...");
+//              }
+
+//              String[] choices = subElements[4].trim().split(",");
+//              for (int i = 0; i < choices.length; i++) {
+//                choices[i] = choices[i].trim();
+//              }
+//              option.setChoices(choices);
+//            }
+//            options.put(identifier, option);
+//          }
+//          else {
+//            String format = subElements[1].trim();
+
+//            option.setFormat(format);
+//            option.setAssociatedOption(options.get(identifier));
+//          }
+
+//          program.addOption(option);
+
+//          break;
+//        default:
+//          return false;
+//      }
+//    }
+
+//    reader.close();
+//  }
+//  catch (FileNotFoundException e) {
+//    return false;
+//  }
+//  catch (IOException e) {
+//    return false;
+//  }
+
+//  if (program != null) {
+//    programs.add(program);
+//  }
+
+    return true;
+  }
+
+  public int getNbPrograms() {
+    return programs.size();
+  }
+
+  public Program getProgram(int i) {
+    return programs.get(i);
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/ProgramLauncher.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/ProgramLauncher.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,209 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.*;
+import javax.swing.SwingUtilities;
+import javax.swing.*;
+import java.util.concurrent.CountDownLatch;
+
+public class ProgramLauncher extends SwingWorker<Boolean, String>  {
+
+  String[]     command;
+  JTextArea    logArea;
+  JLabel       messageField;
+  JProgressBar progressBar;
+  JLabel       etaField;
+  int          exitValue;
+  CountDownLatch latch;
+
+
+
+  public ProgramLauncher (LinkedList <String> c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = new String[c.size()];
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+    c.toArray(command);
+    latch = new CountDownLatch(1);
+  }
+
+
+  public ProgramLauncher (String[] c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = c;
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+    latch = new CountDownLatch(1);
+  }
+
+
+  @Override
+  public Boolean doInBackground() {
+    ProcessBuilder pb           = new ProcessBuilder(command);
+    Process process             = null;
+    BufferedReader outputReader = null;
+    pb                          = pb.redirectErrorStream(true);
+    Map<String, String> env     = pb.environment();
+    env.put("PYTHONPATH", System.getProperty("user.dir"));
+    env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "SMART" + java.io.File.separator + "Java" + java.io.File.separator + "Python");
+    env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+    env.put("SMARTRPATH", Global.rCommand);
+    String commandJoined = Arrays.toString(command);
+
+    try {
+      publish("=== Starting command '" + commandJoined.trim() + "' ===\n");
+      process = pb.start();
+
+      BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());
+      InputStream is                   = process.getInputStream();
+      InputStreamReader isr            = new InputStreamReader(is);
+      outputReader                     = new BufferedReader(isr);
+    }
+    catch (Exception exception) {
+      publish("!Process cannot be started (command is '" + commandJoined + "')!\n");
+      exception.printStackTrace();
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    if (outputReader == null) {
+      publish("!Problem in the output of the command!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    else {
+      try {
+        String line;
+        while ((line = outputReader.readLine()) != null) {
+          publish(line + "\n");
+        }
+      }
+      catch (IOException e) {
+        e.printStackTrace();
+        publish("!Cannot get the output of the command!\n");
+        latch.countDown();
+        return Boolean.FALSE;
+      }
+    }
+    try {
+      process.waitFor();
+    }
+    catch (InterruptedException e) {
+      e.printStackTrace();
+      publish("!Cannot wait for the end of the command!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    try {
+      exitValue = process.exitValue();
+    }
+    catch (IllegalThreadStateException e) {
+      e.printStackTrace();
+      publish("!Cannot get the exit value of the command!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    if (exitValue != 0) {
+      publish("!Problem during the execution of the command '" + commandJoined + "'!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    publish("=== Ending command '" + commandJoined.trim() + "' ===\n");
+    latch.countDown();
+    return Boolean.TRUE;
+  }
+
+
+  @Override
+  protected void process(List<String> chunks) {
+    String message = "";
+    String text    = logArea.getText();
+    for (String chunk: chunks) {
+      text += chunk;
+    }
+    for (String lineSeparatedByCarriageReturn: text.split("\n")) {
+      for (String line: lineSeparatedByCarriageReturn.split("\r")) {
+        boolean progressLine = false;
+        if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*")) {
+          String[] ratioElements = line.split("\\]")[1].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("");
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*ETA:\\s*.*")) {
+          String[] ratioElements = line.split("\\]")[1].split("E")[0].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          String   eta           = line.split("ETA:")[1].trim();
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("ETA: " + eta);
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*\\s*completed in.*")) {
+          String nbElements = line.split("\\]")[1].split("completed")[0].trim();
+          String timeSpent  = line.split("completed in")[1].trim();
+          message          += line.split("\\[")[0].trim() + ": " + nbElements + " elements completed in " + timeSpent + "\n";
+          messageField.setText(line.split("\\[")[0].trim());
+          progressLine = true;
+        }
+        if (! progressLine) {
+          message += line + "\n";
+        }
+      }
+    }
+    String lines[]     = message.split("\n");
+    String toBeWritten = "";
+    for (int i = Math.max(0, lines.length - Global.logAreaSize); i < lines.length; i++) {
+      toBeWritten += lines[i] + "\n";
+    }
+    logArea.setText(toBeWritten);
+  }
+
+  public int getExitValue() {
+    try {
+      latch.await();
+    }
+    catch (InterruptedException e) {
+      logArea.append("Cannot wait for the end of the process!\n");
+      e.printStackTrace();
+      return -1;
+    }
+    return exitValue;
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/ProgramOption.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/ProgramOption.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,358 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+\n+\n+public class ProgramOption {\n+ boolean input;\n+ String identifier;\n+ String type;\n+ String comment;\n+ boolean compulsory;\n+ String[] format;\n+ String formatIdentifier;\n+ ProgramOption associatedOption;\n+ String defaultValue;\n+ String[] choices;\n+ JComponent component;\n+ JPanel panel;\n+\n+\n+ public ProgramOption() {\n+ this.input = true;\n+ this.identifier = null;\n+ this.type = null;\n+ this.comment = null;\n+ this.compulsory = false;\n+ this.format = null;\n+ this.formatIdentifier = null;\n+ this.associatedOption = null;\n+ this.defaultValue = "";\n+ this.choices = null;\n+ this.component = null;\n+ this.panel = null;\n+ }\n+\n+\n+ public void setInput(boolean input) {\n+ this.input = input;\n+ }\n+\n+\n+ public void setIdentifier(String identifier) {\n+ this.identifier = identifier;\n+ }\n+\n+\n+ public void setType(String type) {\n+ this.type = type;\n+ }\n+\n+\n+ public void setComment(String comment) {\n+ this.comment = comment;\n+ }\n+\n+\n+ public void setCompulsory(boolean compulsory) {\n+ this.compulsory = compulsory;\n+ }\n+\n+\n+ public void setFormat(String[] format) {\n+ this.format = format;\n+ }\n+\n+\n+ public void setFormat(String format) {\n+ this.format = new String[1];\n+ this.format[0] = format;\n+ }\n+\n+\n+ public void setFormatIdentifier(String formatIdentifier) {\n+ this.formatIdentifier = formatIdentifier;\n+ }\n+\n+\n+ public void setAssociatedOption(ProgramOption option) {\n+ this.associatedOption = option;\n+ }\n+\n+\n+ public void setChoices(String[] choices) {\n+ this.choices = new String[choices.length+1];\n+ this.choices[0] = "---";\n+ for (int i = 0; i < choices.length; i++) {\n+ this.choices[i+1] = choices[i];\n+ }\n+ }\n+\n+\n+ public void setDefault(String defaultValue) {\n+ this.defaultValue = defaultValue;\n+ }\n+\n+\n+ public boolean isInput() {\n+ return this.input;\n+ }\n+\n+\n+ public boolean checkSettings() {\n+ if (this.identifier == null) {\n+ return false;\n+ }\n+ if (this.type == nul'..b'{\n+ this.component = new JComboBox(this.choices);\n+ label.setLabelFor(this.component);\n+ this.panel.add(label);\n+ this.panel.add(this.component);\n+ }\n+ else {\n+ System.out.println("Do not know how to read type " + this.type);\n+ }\n+\n+ return this.panel;\n+ }\n+\n+\n+ public JComponent getComponent() {\n+ if (component == null) {\n+ this.getPanel();\n+ }\n+ return this.component;\n+ }\n+\n+\n+ private String getValue() {\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("directory".equals(this.type)) || ("files".equals(this.type))) {\n+ String s = ((JTextField) this.component).getText();\n+ if ("None".equals(s)) {\n+ return "";\n+ }\n+ return s;\n+ }\n+ if ("file".equals(this.type)) {\n+ return (String) ((JComboBox) this.component).getSelectedItem();\n+ }\n+ if ("boolean".equals(this.type)) {\n+ return ((JCheckBox) this.component).isSelected()? "true": "false";\n+ }\n+ if ("format".equals(this.type)) {\n+ return (String) ((JComboBox) this.component).getSelectedItem();\n+ }\n+ if ("choice".equals(this.type)) {\n+ String s = (String) ((JComboBox) this.component).getSelectedItem();\n+ if ("---".equals(s)) {\n+ return "";\n+ }\n+ return s;\n+ }\n+ System.out.println("Do not know how to get value of \'" + this.type + "\' (" + this.identifier + ").");\n+ return null;\n+ }\n+\n+\n+ public String checkValue() {\n+ String value = this.getValue();\n+ if ((this.compulsory) && ((value == null) || ("".equals(value)))) {\n+ return "Option \'" + this.comment + "\' has no value... Please specify it.\\n";\n+ }\n+ if ("int".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value)) && (! "None".equals(value))) {\n+ try {\n+ int i = Integer.parseInt(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be an integer... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ else if ("float".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value))) {\n+ try {\n+ float i = Float.parseFloat(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be a float... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ return null;\n+ }\n+\n+\n+ public LinkedList <String> getCommand() {\n+ LinkedList <String> list = new LinkedList <String> ();\n+\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("format".equals(this.type)) || ("directory".equals(this.type)) || ("files".equals(this.type)) || ("choice".equals(this.type))) {\n+ String value = this.getValue();\n+ if (value.length() == 0) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(value);\n+ return list;\n+ }\n+ if ("file".equals(this.type)) {\n+ String fileName = (String) ((JComboBox) this.component).getSelectedItem();\n+ if (fileName == null) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(this.getValue());\n+ return list;\n+ }\n+ if (("boolean".equals(this.type)) || ("bool".equals(this.type))) {\n+ if ("true".equals(this.getValue())) {\n+ list.add(this.identifier);\n+ }\n+ return list;\n+ }\n+ System.out.println("Cannot get type of option " + this.type + " (" + this.identifier + "): " + this.getValue());\n+ return null;\n+ }\n+\n+\n+ public File getOutputFile() {\n+ if (this.input) return null;\n+ String format = "";\n+ if (this.format != null) {\n+ format = this.format[0];\n+ }\n+ if (this.associatedOption != null) {\n+ format = this.associatedOption.getValue();\n+ }\n+ return new File(this.getValue(), Global.formats.getFormatType(format), format);\n+ }\n+}\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/.RData

Binary file SMART/Java/Python/.RData has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/.gitignore Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+/CleanTranscriptFile.py

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/100%
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/100% Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,152 @@
+
+R version 2.15.1 (2012-06-22) -- "Roasted Marshmallows"
+Copyright (C) 2012 The R Foundation for Statistical Computing
+ISBN 3-900051-07-0
+Platform: x86_64-pc-linux-gnu (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+  Natural language support but running in an English locale
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+[Previously saved workspace restored]
+
+> library(extrafont)
+Registering fonts with R
+There were 50 or more warnings (use warnings() to see the first 50)
+> loadfonts()
+Padauk already registered with pdfFonts().
+Padauk Book already registered with pdfFonts().
+Abyssinica SIL already registered with pdfFonts().
+cmex10 already registered with pdfFonts().
+cmmi10 already registered with pdfFonts().
+cmr10 already registered with pdfFonts().
+cmsy10 already registered with pdfFonts().
+DejaVu Sans already registered with pdfFonts().
+DejaVu Sans Condensed already registered with pdfFonts().
+DejaVu Sans Light already registered with pdfFonts().
+DejaVu Sans Mono already registered with pdfFonts().
+DejaVu Serif already registered with pdfFonts().
+DejaVu Serif Condensed already registered with pdfFonts().
+Droid Arabic Naskh already registered with pdfFonts().
+Droid Sans already registered with pdfFonts().
+Droid Sans Armenian already registered with pdfFonts().
+Droid Sans Ethiopic already registered with pdfFonts().
+Droid Sans Fallback already registered with pdfFonts().
+Droid Sans Georgian already registered with pdfFonts().
+Droid Sans Hebrew already registered with pdfFonts().
+Droid Sans Mono already registered with pdfFonts().
+Droid Sans Thai already registered with pdfFonts().
+Droid Serif already registered with pdfFonts().
+esint10 already registered with pdfFonts().
+eufm10 already registered with pdfFonts().
+FreeMono already registered with pdfFonts().
+FreeSans already registered with pdfFonts().
+FreeSerif already registered with pdfFonts().
+gargi already registered with pdfFonts().
+Garuda already registered with pdfFonts().
+Gentium already registered with pdfFonts().
+GentiumAlt already registered with pdfFonts().
+Gentium Basic already registered with pdfFonts().
+Gentium Book Basic already registered with pdfFonts().
+Junicode already registered with pdfFonts().
+KacstArt already registered with pdfFonts().
+KacstBook already registered with pdfFonts().
+KacstDecorative already registered with pdfFonts().
+KacstDigital already registered with pdfFonts().
+KacstFarsi already registered with pdfFonts().
+KacstLetter already registered with pdfFonts().
+KacstNaskh already registered with pdfFonts().
+KacstOffice already registered with pdfFonts().
+KacstOne already registered with pdfFonts().
+KacstPen already registered with pdfFonts().
+KacstPoster already registered with pdfFonts().
+KacstQurn already registered with pdfFonts().
+KacstScreen already registered with pdfFonts().
+KacstTitle already registered with pdfFonts().
+KacstTitleL already registered with pdfFonts().
+Kedage already registered with pdfFonts().
+Khmer OS already registered with pdfFonts().
+Khmer OS System already registered with pdfFonts().
+LKLUG already registered with pdfFonts().
+Lohit Bengali already registered with pdfFonts().
+Lohit Gujarati already registered with pdfFonts().
+Lohit Hindi already registered with pdfFonts().
+Lohit Punjabi already registered with pdfFonts().
+Lohit Tamil already registered with pdfFonts().
+Loma already registered with pdfFonts().
+Mallige already registered with pdfFonts().
+MarVoSym already registered with pdfFonts().
+Meera already registered with pdfFonts().
+mry_KacstQurn already registered with pdfFonts().
+msam10 already registered with pdfFonts().
+msbm10 already registered with pdfFonts().
+Mukti Narrow already registered with pdfFonts().
+NanumGothic already registered with pdfFonts().
+NanumMyeongjo already registered with pdfFonts().
+OpenSymbol already registered with pdfFonts().
+Phetsarath OT already registered with pdfFonts().
+Pothana2000 already registered with pdfFonts().
+Purisa already registered with pdfFonts().
+Rachana already registered with pdfFonts().
+Rekha already registered with pdfFonts().
+rsfs10 already registered with pdfFonts().
+Saab already registered with pdfFonts().
+Sawasdee already registered with pdfFonts().
+Tibetan Machine Uni already registered with pdfFonts().
+TlwgMono already registered with pdfFonts().
+TlwgTypewriter already registered with pdfFonts().
+Tlwg Typist already registered with pdfFonts().
+Tlwg Typo already registered with pdfFonts().
+Ubuntu already registered with pdfFonts().
+Ubuntu Condensed already registered with pdfFonts().
+Ubuntu Light already registered with pdfFonts().
+Ubuntu Mono already registered with pdfFonts().
+Ume Gothic already registered with pdfFonts().
+Ume Gothic C4 already registered with pdfFonts().
+Ume Gothic C5 already registered with pdfFonts().
+Ume Gothic O5 already registered with pdfFonts().
+Ume Gothic S4 already registered with pdfFonts().
+Ume Gothic S5 already registered with pdfFonts().
+Ume P Gothic already registered with pdfFonts().
+Ume P Gothic C4 already registered with pdfFonts().
+Ume P Gothic C5 already registered with pdfFonts().
+Ume P Gothic O5 already registered with pdfFonts().
+Ume P Gothic S4 already registered with pdfFonts().
+Ume P Gothic S5 already registered with pdfFonts().
+Ume UI Gothic already registered with pdfFonts().
+Ume UI Gothic O5 already registered with pdfFonts().
+UnBatang already registered with pdfFonts().
+UnDotum already registered with pdfFonts().
+UnGraphic already registered with pdfFonts().
+UnGungseo already registered with pdfFonts().
+UnPilgi already registered with pdfFonts().
+utkal already registered with pdfFonts().
+Vemana2000 already registered with pdfFonts().
+VL Gothic already registered with pdfFonts().
+VL PGothic already registered with pdfFonts().
+Waree already registered with pdfFonts().
+wasy10 already registered with pdfFonts().
+There were 26 warnings (use warnings() to see them)
+> library(ggplot2)
+> data <- read.table("tmpFile36240.dat", header = T)
+> data$Sample <- factor(data$Sample, levels=c("Col", "rtl2", "35S::RTL2#1", "35S::RTL2#2", "35S::RTL1#1", "dcl2dcl3dcl4"))
+> data$Size <- factor(data$Size, levels=c(21, 22, 23, 24))
+> png("/home/mzytnick/Desktop/Projects/Vaucheret/RTrna/MiRna/genomicSizes.png", width = 1000, height = 200)
+> ggplot(data, aes(x = Size, y = Count, fill = Size))  + geom_bar(stat = "identity") + facet_grid(. ~ Sample, space="free_x") + xlab("Sizes") + ylab("Count") + scale_fill_manual(values = c("blue", "green", "pink", "red")) + theme(legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())
+> dev.off()
+null device
+          1
+>
+> proc.time()
+   user  system elapsed
+  1.884   0.088   3.163

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/CompareOverlapping.pyc

Binary file SMART/Java/Python/CompareOverlapping.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/CountLoci.py
--- a/SMART/Java/Python/CountLoci.py Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/Java/Python/CountLoci.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -37,7 +37,7 @@
from SMART.Java.Python.misc.Progress import Progress
from SMART.Java.Python.misc.RPlotter import RPlotter
from SMART.Java.Python.cleanGff import CleanGff
-from SMART.Java.Python.CompareOverlappingSmallRef import CompareOverlappingSmallRef
+from SMART.Java.Python.CompareOverlapping import CompareOverlapping
from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
from SMART.Java.Python.GetUpDownStream import GetUpDownStream

@@ -85,7 +85,7 @@
                                "five_prime_UTR":            "%sfive.gff3"     % (self.outputBase), \
                                "three_prime_UTR":           "%sthree.gff3"    % (self.outputBase), \
                                "mRNA":                      "%smrna.gff3"     % (self.outputBase), \
-                               "ncRNA":                     "%sncRNA.gff3"    % (self.outputBase), \
+                               "ncRNA":                     "%sncRNA.gff3"     % (self.outputBase), \
                                "transposable_element_gene": "%sTE.gff3"       % (self.outputBase), \
                                "vic":                       "%svicinity.gff3" % (self.outputBase)}
         self.tmpFileNames.extend(self.referenceFiles.values())
@@ -98,14 +98,14 @@
                 self._writeTmpRef([tag], fileName)

     def _compare(self, queryFileName, queryFormat, referenceFileName, referenceFormat, outputFileName, exclusion = False):
-        co = CompareOverlappingSmallRef(self.verbosity-1)
-        co.setQueryFile(queryFileName, queryFormat)
-        co.setReferenceFile(referenceFileName, referenceFormat)
-        co.setOutputFile(outputFileName)
+        co = CompareOverlapping(self.verbosity-1)
+        co.setInput(queryFileName, queryFormat, QUERY)
+        co.setInput(referenceFileName, referenceFormat, REFERENCE)
+        co.setOutput(outputFileName)
         if exclusion:
-            co.setInvert(True)
+            co.getInvert(True)
         co.run()
-        return co.nbWritten
+        return co._nbOverlappingQueries

     def _copy(self, inputFile, tag):
         parser = GffParser(inputFile, self.verbosity-1)

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/GetFlanking.pyc

Binary file SMART/Java/Python/GetFlanking.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/GetIntersection.py
--- a/SMART/Java/Python/GetIntersection.py Mon Apr 29 03:45:52 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,164 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2011
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-#
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-MINBIN = 3
-MAXBIN = 7
-REFERENCE = 0
-QUERY = 1
-
-def getBin(start, end):
- for i in range(MINBIN, MAXBIN + 1):
- binLevel = 10 ** i
- if int(start / binLevel) == int(end / binLevel):
- return int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))
- return int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
-
-def getOverlappingBins(start, end):
- array = []
- bigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
- for i in range(MINBIN, MAXBIN + 1):
- binLevel = 10 ** i
- array.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))
- array.append((bigBin, bigBin))
- return array
-
-
-class GetIntersection(object):
-
- def __init__(self, verbosity):
- self.verbosity              = verbosity
- self.nbQueries              = 0
- self.nbRefs                 = 0
- self.nbWritten              = 0
- self.bins                 = {}
-
- def setReferenceFile(self, fileName, format):
- chooser = ParserChooser(self.verbosity)
- chooser.findFormat(format)
- self.refParser = chooser.getParser(fileName)
-
- def setQueryFile(self, fileName, format):
- chooser = ParserChooser(self.verbosity)
- chooser.findFormat(format)
- self.queryParser = chooser.getParser(fileName)
-
- def setOutputFile(self, fileName):
- self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
-
- def loadRef(self):
- progress = UnlimitedProgress(10000, "Reading references", self.verbosity)
- for transcript in self.refParser.getIterator():
- if transcript.__class__.__name__ == "Mapping":
- transcript = transcript.getTranscript()
- chromosome = transcript.getChromosome()
- bin    = getBin(transcript.getStart(), transcript.getEnd())
- if chromosome not in self.bins:
- self.bins[chromosome] = {}
- if bin not in self.bins[chromosome]:
- self.bins[chromosome][bin] = []
- self.bins[chromosome][bin].append(transcript)
- self.nbRefs += 1
- progress.inc()
- progress.done()
-
- def _compareTranscript(self, queryTranscript):
- queryChromosome = queryTranscript.getChromosome()
- if queryChromosome not in self.bins:
- return None
- queryStart = queryTranscript.getStart()
- queryEnd   = queryTranscript.getEnd()
- bins    = getOverlappingBins(queryStart, queryEnd)
- overlaps   = []
- for binRange in bins:
- for bin in range(binRange[0], binRange[1]+1):
- if bin not in self.bins[queryChromosome]:
- continue
- for refTranscript in self.bins[queryChromosome][bin]:
- newTranscript = queryTranscript.getIntersection(refTranscript)
- if newTranscript != None:
- overlaps.append(newTranscript)
- if not overlaps:
- return None
- newTranscript = overlaps[0]
- for transcript in overlaps[1:]:
- newTranscript.merge(transcript)
- return newTranscript
-
- def compare(self):
- progress = UnlimitedProgress(10000, "Comparing queries", self.verbosity)
- for queryTranscript in self.queryParser.getIterator():
- if queryTranscript.__class__.__name__ == "Mapping":
- queryTranscript = queryTranscript.getTranscript()
- progress.inc()
- self.nbQueries += 1
- newTranscript = self._compareTranscript(queryTranscript)
- if newTranscript != None:
- self.writer.addTranscript(queryTranscript)
- self.nbWritten += 1
- progress.done()
- self.writer.close()
-
- def displayResults(self):
- print "# queries:  %d" % (self.nbQueries)
- print "# refs:     %d" % (self.nbRefs)
- print "# written:  %d" % (self.nbWritten)
-
- def run(self):
- self.loadRef()
- self.compare()
- self.displayResults()
-
-if __name__ == "__main__":
-
- description = "Get Intersection v1.0.0: Shrink the first data set so that all bases covered by the first data set is also covered by the second data set. [Category: Data Comparison]"
-
- parser = OptionParser(description = description)
- parser.add_option("-i", "--input1",         dest="inputFileName1", action="store",            type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
- parser.add_option("-f", "--format1",        dest="format1",   action="store",            type="string", help="format of previous file [compulsory] [format: transcript file format]")
- parser.add_option("-j", "--input2",         dest="inputFileName2", action="store",            type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
- parser.add_option("-g", "--format2",        dest="format2",   action="store",            type="string", help="format of previous file [compulsory] [format: transcript file format]")
- parser.add_option("-o", "--output",         dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
- parser.add_option("-v", "--verbosity",      dest="verbosity",    action="store",      default=1,     type="int", help="trace level [format: int]")
- (options, args) = parser.parse_args()
-
- gi = GetIntersection(options.verbosity)
- gi.setQueryFile(options.inputFileName1, options.format1)
- gi.setReferenceFile(options.inputFileName2, options.format2)
- gi.setOutputFile(options.outputFileName)
- gi.run()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/GetUpDownStream.pyc

Binary file SMART/Java/Python/GetUpDownStream.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/Helitrons.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/Helitrons.fasta Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,2378 @@\n+>HELITRON1\n+TCTACATATACATTTTTGGGAACGATTTTGAGTTGAAAAATCATTTATCAATTTCTGATA\n+TTGTATGTTTAGTCCCTACAAAATCAATCAGACAAGATAGTCTTCTGCGTTAGGACAATG\n+ACCAAATTTTTGATGAACTATTTACGTTTTGCACTAATTACTTACCTAAACATATAAATT\n+GTTTTGCACTATCCTCATAAATTGCACACATTGACTCCCTTAAAACTCTACGAATTCAAT\n+ATTACACGAAATAATATTTCCTAATTAGCAAAAATCCGATACAACCAATCACTTAACTAA\n+CCATATCTATAAAATTACTTGATTCGAAAATCTCCGATATTTTTGCTACAATCTTACCAA\n+ATATTCATATTCTCTATATCACATTTCTATAAGTTAAATCAGTTATTAAAGTTGCTTTAA\n+TTCCGTTTACCTCAAACAAACACTTAATATATCATATGATCTTACATTTGGTATAGTAAA\n+TATATTTTGAAATATACTACAAATCATCATCATACTCAAAATTCTTCCTATATATCGGTA\n+TTGTGTTATCAATCTTTACCAAGTACTATATTCTCCATCTAATTTAAAATGTAAAGTAAT\n+CTTGCATTCCAAAGATATTTTCCGATTTGAATCGCATCATCAATTCCCTCCATTCCTCAT\n+TTATAGCAATGCAGTTTCCTGTTTCCAAACATAGCACATCACATTAGCTAGAATATCTCC\n+AAAATTTTGCTTGAGAATCAAAGCAAAATCTAAATACTCATGCAATATTCCATCTTCTAA\n+ACCATCCAAATCATTCTGTTTTGAATGTACAAGCGTAAAAATAAGGGCAAATGCATCTAC\n+CATATCTCGACAGTTTGATTCAGAATCAAATCAAAATCTAAATACGGCATTTCGATAATT\n+ATTTTGCACTTATGATACTCAAGCATATTCCTATTTTAAAACCATTCCAAATTGTCTGTT\n+TCTATTGAGCTGCGCTTAGAATCTTTTATACGGTGTGAAAATTTAGGTTTACTAACTTAG\n+TGCACCCACTATAAATAAGGTTGCTCATATCATTCCATTTCATCATCACCTACACAGCAA\n+ACTATCTACACATTTCACTTCTACTTCTACAACTTTGTTATTTGAGAAATGGCTGCTACA\n+TTCGCATACCTTAAAGACGTGAGGCCTTACAAAAACGCATGGAGAGTCCAGGTCAAGATT\n+CTTCACTCTTGGAAGCAGTACACAAGCAACACCCGTGAAACCATTGAGTTGGTCATCTCT\n+GATGAATATGAAAAAAGTAAACTTACCATTTTAATTCTTTGCAATCGTAAATTATTATGT\n+CTTGCTAATGCCCAAATGAAAACGTCTATTCTATACTAATATGCTATGACTTCTCTTGGT\n+GGTAAGGGGAAAAAAATGCATGCTACCGTGAAGAAGGAGTTGGTTTCTAAGTTTGTTCAC\n+AAGCTTATTGTTGGCGAATGGGTCTTCATTGAGATTTTTGGTCTTACCTATGCGTCTGGC\n+CAGTTTCGCCCAACCAACCATCTATACAAGATGGCGTTCCAGGTTAGAACTGAAGTCATG\n+GGTTGTGCTTCTGTCTCTGACTCCAACTTCTTGACTTTGGCGCCATTTTCAAAAATCCAG\n+AGTGGTGAGCTTAACCCTCACATGTTGGTTGGTGAGGGCTATATCTCTCAAAGATCCGTG\n+TATTTTAATTGTCATGTTATTCAAGAAACTTTATAGCCATTACGTTTTTTTGAATCTGTG\n+ATTCGTTTTTGTATTAGATGCTATTGGCCAAATCATTACTGTTGGTGAGTTGGAGGAACT\n+CGAGGCTAACAATAAGCCTACAACAAAGATTGACTTTGAGATCAGAGATCAAATGTAAGT\n+TGCTTATTCTCTGATTTTTGGTCCTCAAGAATGCAAACTATATCCAACAATTTATCTTAT\n+AGTAATTGTTTTAGTAAATTTTAAAACTAACATTAAAGTTAACTATTATAAGGATGAGAG\n+AATGCAAGTTACTTTGTGGGGAACATACGCTCAGCAGGTTTACAGAGCATGTCAGGAATC\n+TGAGGGAAAGAATGTGATTTTTCTCATTCGTTTTGCCAAAATTAAGAGTTACAAGGGTAT\n+ATTCAGTTTCTTTGTACCTATTCCTTTTCTTTCCATACTGCATGATCTATGGTCACTTAA\n+ATGTTATGAGACTGCAGGTGTGAAGAGTTTATCCAACTCATTTGATGCATCGCAAGTACA\n+TGTCAATCCGGACTTCCCTGAAGCTCACCATTTCAGTCAAACGTACGAAGATGTCATATT\n+TTCTATAGATTGTAAAGTGCTTACTTTTATAATATGTGATCTATGGTAACTTAACCGTTT\n+ATAAAATTGCTGGTGTGAAAAGTTTATCCAACTCTTTTGATGCATCTCAAGTACATGTCA\n+ATCCTAACTTCCTCGAAGTTGTGGCTTTCAGTCAATCGTAAATTTAGTAATATGATTCTC\n+AAAAAGGATATGCTCATATTGAATTTTGCTTTTTTTGTAATGTATATGAATACCGGTTTA\n+ACTCACTTTCCGTTTATATGTTATTTAACAGACTTCCAAATGATGGTGCTATTTGTGTGT\n+TCCGTGCAAGAGTCCCACGTTTTGAGATGGTTGCAGTTAAAAGGATTGACTACAGTGAGT\n+ACACAAGGAATACCATTGAAGATCTGCTTAGCTCGACTGAGGATTGTTTTTAATCAGATA\n+CTTTGTAATATGCACTTAAAGACATTAAGACTATATACTCATTTATGCTAATACATTGTA\n+TTTACTATATGTTTGTATAATTTCATTTAGGTTGGTAAAGTCAGAGTTTTGTGCACAATC\n+TATGCAATTGATACGGATTGGGCTTGGTATTACATCAGCTGCAAGACATGTAATAAGAAA\n+GTGAATCATATTCATGCTGGTGTTAATGGAGTAAACAACAAGGGTAAGAAGCCTAGATTC\n+TGGTGTGATACATGCAAGTCTGTTGTAACCAATGTGGTCTCTAGGTGCATATGCACTCCC\n+TATGCGTCATTAGTAGTTGCAGAGTATTTAATACATTCAAAAATGTTTATGGATTTTCTC\n+AAACGGTCTTACTTATAATTTATAATCTAAGTGGTTTTGCAAAAAAATGTGACCTATACA\n+ACTCAGTACATGATCTATGCAAAGGTTATGGATAGCACTGGTGAAGCCAAATTGCTTCTG\n+TTTGATTCAATTTGCTCTGAGATCATTGGCGAGTCTGCAACCTCTGTTCTTAATGGATCT\n+GTTGATGAGGTTTGTTTCTTAAAGTTTTCCCGTGTCTACTTTATGTCTTATTCTGATATA\n+TATTAATCTAGATTTTAAATACTATATTATATTACCCTGTTGCAGATTGAGGATCCAGAA\n+GATCTTCCTGATTCTGTAAAGAATCTGATTGGTAAGACATTTCTGTTTCTGGTGTGGGTT\n+GAGAAAGACAACATCTCGGATGGAAAAGAAATCTATAAGGTTTCAAAGGTGCTTCTGAAG\n+GATGGACTACTAGAGGAACAATTACTAGAGGATTCTGCTGAACATGTGAACCCTGCATCC\n+ATTGTGTCTGGTGATCAGGTAATATATACTATAAACATATATCATTAATCATAACACTTA\n+TAATAATTTGTATTTATTGTGCTTCCATTCATATGTCTAATCTTTAATCGTGTATTATAA\n+TACAATAGGTTCTACTTATGCTGGAGAATGGTAATGGATCGCCAGACTCTACGACTCCAT\n+CTTCAAAGCGTGTTTACGCTAGAGAAACGAGTGGCTCTGAAGGTTCTTCAAGTTCAAAGA\n+AGGTGTGTGTTGTACCATTAGACTTGGAAAAGTCTTTATCTGAGAATGCTGAACATGGAG\n'..b'\n+TATAAGATTTTATATGACTTATTTTTTTTTTTGAACCGACATATAAGATCTTTAGTCAAG\n+TTAGAGGGTTTTTCATTGAAAGTTATCTTAAATCGTAGGCTTTAAATTTTTAAATGTGAA\n+GAAGGTTCATCTATTGTTTTGACTTTCAAGAAAATTGGTAATACATGCATTTGCATTTTT\n+ACAAAAAAAAAAACTGAATAATTTATGTTTATTTTTTAATATATAGTAAAACATACTGTT\n+TTTACAAAAAAAAATAAAAAAAAATTGAATGCAAACTTTTAATCAACTAAAATGTCTTTG\n+TAAAAAAATCATAATTTTTAAACATCAAAATTACTATTATTAATTATTTTGATCGATATA\n+TTAACAATTATAAATAAGTTTTTTAAATAATTTAATTTAAATATTATTTTATATTCAAAA\n+CTAAAACCGAATATAAAATCCACGCATCGCGTGGACAACTTCTAG\n+>HELITRONY3A\n+TCTACTTAACAATTTTTAAGTACATTTTAAGGAATTTAATCGGATTGGTTTTTTTTTATG\n+GGTTTAACCCTCTTTTTTTTCTGTTTTTTTCGGCCCATTTGTTGAATCTCTTTACTATTT\n+GGTCCACATCCTATAATTTTAAATGTTAATCAAATCTTACCAAAATTTACAAATCGCTTT\n+AAATACTAATTAGTTACAATTAATTATTTCCGTTCAGCAAAGTAAATCCGACTTATATGG\n+AATCGAATATGTAACATTGCAAAACAAATCCGACTCATATGGTAATTTAGTATTTATCCG\n+TTCAGCAAAGTAATTACTTGAGTAAATATTCTATTAACTACAAAATCTTCAAGTTAAACG\n+AAATCAAAATCTGCATTCCACTACATAATTTTCGGAGATCTTAAGAACTAAATTAAAGCA\n+TATTCCTTAAACATCCTCTATTCAATTCCTATAACTTTTGAAAAATATACTTTTCAAAGA\n+ATTCTAACAATCATTATGAAACAACCTAACGAGATACTTTTTCCATAACAACTAATCATT\n+CAATTCATTTTCTAATTCTAGCCAATAAGAAAATAGAAAAAAATATTGCTTGCACACAAA\n+GATATTTTTTTATTCGAAATCAAAACTAACCCTAATTGTGTATGAGCTACTATATATACT\n+CTCTCATTAGCAAGCCAAATATCACATCTTCATTGTTTTTTTCATTACCTGCACCTATAT\n+ATAACATCTCTCATCTTCATCTTCATCGTTTTGTTTTTGTTTTCTGTTATAAGAATTTTC\n+ATATTTTATGTTTTTAGTTAGAAAATATACTAAACATTTAATCTGTTTTGCAGCTAATCT\n+TACAAACATATCAACAAATACACTCTCAAATCTGGTGAGTTATTGAAATGGTCCATAGTC\n+TTTTATTATCTTCATAAATATGATTCTAAATTCTTTTATTATTTACTAACTAAATATGTT\n+CTATATCTTTATAGTTAAAGATTACTTTTTCAATTTCAATCTTCATCTCTATGGTATGGA\n+TGTATCTCTTCAAACTTTGATAGCTCAATCTGCATCAAAACTCCTAGATAATTCATTTGA\n+GCAGTGTTCAAATGTACAAAATTTTATTTTTTATGATGATATAAGAGTTGTATTTCTTAT\n+ACAATTTCTGCTCATCTTTTCTTTGTTAATAACTACAAAATATTAAATATATAGGAATAA\n+ATTTTGCAATATATTGAAAAATATAGCCTTGAATATCAAATCCTACTAATAAGGAAAGTT\n+AAAATTTATTCCTTTAACAACTTATATACTCGCTTACATCTTCTCTAATAAGGCAAATTA\n+CGAAATATTCTCTAAACATTTAATACACGATAGCTAAACGTTATCTATTCCTATTAAAAA\n+AAAAAAAATCTAACAAGAGAAAGTTGCAATTTCGGTAAGATTGAGTACTATATTATGTAT\n+AACTAATATTGTAAACAATCTAACCTTATTTGTATTTTTAAATCTCATAGAATATGCTTC\n+ATATAAACCCTAACATAAAGACCTAACCATAAACACTATAAATATAACCTTCACTATTTT\n+ACCGTAAGTCATAACCAAAAACTAACTATATTTATTTGCCTACACAATTACATTAGATTT\n+TATGTTTAATGACTAGGAAAACTTTGCTTGCTAACCGGATATGGTTGCTCTAAAATATTC\n+ATTGATCCTAATTTCAGATGTTTGGATAAAAAGAACTACATGTAAGTCACTCTCTTTACT\n+TAATTTTCCTAAATGTATAAAAAAAATTAAGGTTTACTTAATGATTTTTTTTGTTGTTTT\n+ATAGGAATGCATTTAAAGGTGATGATGATGACGACTATGAGCTTTATGAGGAGATTGTTG\n+AAACAAACTTCCAAATGTTTTTTAGTATCAAAAAATAGCAAACATTCATGTTTGATTGTC\n+CTTTCGTTTTTTTTTTTGTTTTGTACTTATGACTTATCAACATCGTACATTTTGTTATAT\n+TTCTACTAATCAAACTATCAAGTTATTAAAACAAGTTATAAGATTTTATGTTGGTGGTTT\n+ATTAATTTATTATTACTCAATTACGAGACATACATTCTAACAATATATTTTACAGTTCTA\n+ATACTTACTACTATAATTATATTTGTACTTTATGACAATTAAGTATAAGTATTGATCCCA\n+ACTAACCTTATAAATAGCGATTTATCCTTCACGTGCTCTTCAATCATAGTAATCAACTTC\n+CATCTACCAAATAATTATTTTGGAAATAAATTATGAAGGATGACATTATAACAATGATAT\n+AGTAAAAGGAATAAAATATTACAAAACTCTAATTTAGGTAAATATAATCATTAAGATCTT\n+TTCAAATAAACATAAATCAAAACAGAAGATTTTGGTAATATTAAATATAGGCCTCGATTA\n+CATTAGAAAACTTAACTAAGCAGTGATTATAGGGATTCTGTAAAGAAAATAACACGCATG\n+TTACTTTTTCTTTTTGGTTTTGTCATTACTCTCAACCATTTATCTTTCGACACATATTCA\n+CCTCTTAACTCTAAAATAACATCTTAGCTAAGATACATTTCTTAGATTAAGATTGTTAGT\n+TTGATTTAATTTAATCATTTTTAATCCTAAAATTTTGTTAAGATACTAAGCTAAGATATG\n+GGCTAAGATGCACCAATGGAGATGCTCTTAGAAACTAGAGGTAAATGTTTTGTTTCAAAA\n+GTATATATGAATAAATCATATAAAAACTTATAAAACTGAATCGTATATACTCCCGCGGGA\n+CGAATCTAGCATGGCTAGGCGGATTTATTAAAACACTATAACTATTAACTTATTTCAAAT\n+ATTATAGGGTGATCATATTTTAGAACTAATTAACTTACAAATTATATCCATATTTATTAT\n+ATGAACTACAAAATTTAAACCTATTAACACCTTCTTACTTTCAAAACAATTAATTTAAAT\n+TGGTACATTTTCAAATAAATATTTAATTTGTATTTACATCAAACAAACAACTGAGTGTAC\n+TAATTTATAGTATCGTTACATTGCATAATTAAAATAAATGAGTGTACTAATTTATAAACT\n+CGATCGCTTAATAAAATGTCATATATAATATACACACAACAAAAGATATAGTTTTCATAT\n+AAGAAAATGAAATATCAACAATAATTTGAAATCATATGCTTACAGAGATAGACACATTGT\n+ATAGAATAATTTTTATAAATCCGTAGAATAACCAATATTATCGTTACCTTAAATACGTGT\n+CATTTAATTCTACATACAACGGAACATATAATTTGCCAGTAATAAAAAATACAACAATCA\n+TACTATACATATACTACATTGTCAAAACCCAAAAAACCAAAACTATAAACAAACAAAAAT\n+CCTGCGGTGTACCGCGGGTCATATCCTAG\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/Rplots.pdf

Binary file SMART/Java/Python/Rplots.pdf has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/S1_S3_blast.blast
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/S1_S3_blast.blast Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,5405 @@\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t83.33\t72\t12\t0\t20\t91\t1\t72\t3e-07\t48.1\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t96.15\t26\t1\t0\t256\t281\t356\t381\t5e-06\t44.1\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t96.00\t25\t1\t0\t251\t275\t431\t455\t2e-05\t42.1\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t91.67\t24\t2\t0\t588\t611\t13064\t13087\t0.019\t32.2\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t13\t0\t0\t529\t541\t7340\t7328\t1.2\t26.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t81\t92\t785\t796\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t222\t233\t3878\t3889\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t31\t42\t4243\t4232\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t535\t546\t6279\t6290\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t135\t146\t9265\t9254\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t164\t175\t12561\t12550\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t93.75\t16\t1\t0\t225\t240\t12716\t12701\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t376\t387\t12884\t12873\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t217\t228\t13045\t13056\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t95.83\t24\t1\t0\t588\t611\t1649\t1672\t8e-05\t40.1\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t90.91\t22\t2\t0\t550\t571\t1170\t1149\t0.30\t28.2\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t100.00\t14\t0\t0\t430\t443\t1650\t1663\t0.30\t28.2\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t100.00\t12\t0\t0\t130\t141\t199\t188\t4.6\t24.3\n+chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_A'..b'882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1E\t100.00\t12\t0\t0\t10\t21\t58\t47\t1.2\t24.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1E\t100.00\t12\t0\t0\t10\t21\t291\t280\t1.2\t24.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t12\t0\t0\t66\t77\t4914\t4903\t1.2\t24.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t5\t15\t7896\t7886\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t121\t131\t9920\t9910\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t123\t133\t9977\t9967\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t20\t30\t20090\t20100\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON3\t100.00\t12\t0\t0\t5\t16\t7568\t7557\t1.2\t24.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON3\t100.00\t11\t0\t0\t65\t75\t1998\t2008\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY3\t100.00\t11\t0\t0\t141\t151\t3832\t3822\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t16\t26\t217\t207\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t127\t137\t1692\t1702\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t124\t134\t2354\t2344\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t124\t134\t2502\t2492\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON5\t100.00\t11\t0\t0\t108\t118\t11817\t11807\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON1\t100.00\t11\t0\t0\t146\t156\t9667\t9677\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON1\t100.00\t11\t0\t0\t136\t146\t13674\t13664\t4.8\t22.3\n+chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON1\t100.00\t11\t0\t0\t123\t133\t18664\t18654\t4.8\t22.3\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/SR1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/SR1.fastq Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,5000 @@\n+@HWI-EAS337_3:7:1:415:1217/1\n+GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n++HWI-EAS337_3:7:1:415:1217/1\n+WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ\n+@HWI-EAS337_3:7:1:208:1489/1\n+GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n++HWI-EAS337_3:7:1:208:1489/1\n+WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n+@HWI-EAS337_3:7:1:278:1153/1\n+GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n++HWI-EAS337_3:7:1:278:1153/1\n+WWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n+@HWI-EAS337_3:7:1:1178:755/1\n+GGTGAGAGTGGTTGGTTGATGGTAAAACCATTGAAT\n++HWI-EAS337_3:7:1:1178:755/1\n+WWWWWWWWWVWWWVVWWVVWVVVVWVWVVVUUUUUU\n+@HWI-EAS337_3:7:1:277:1259/1\n+GGGTGACAAAGAAAACAAAAGGGACATGGTACTTGG\n++HWI-EAS337_3:7:1:277:1259/1\n+WWWWWWWWWWWWWWWWWWWWWWVWWWWWWVUUUUUU\n+@HWI-EAS337_3:7:1:447:1231/1\n+GACTTGTGGAAGAGTTGGAATGGAAAGCTGGAGCCT\n++HWI-EAS337_3:7:1:447:1231/1\n+WWWWWWWWWWWVWVWWWVWWWVVVVVVVVVURUSUU\n+@HWI-EAS337_3:7:1:300:1199/1\n+GTTTTTGCATATAGATCTCTTTGTAAAGATATCCAT\n++HWI-EAS337_3:7:1:300:1199/1\n+WVWWWWWWWWWWWVWWWWWWWWQWVVVTWWUUUURU\n+@HWI-EAS337_3:7:1:247:1210/1\n+GATAGCTTTGACTATAGGACTTTTATGTATGTGTTG\n++HWI-EAS337_3:7:1:247:1210/1\n+WWWWWWWWWWWWVWWWVVWWWWWWWWTVVWRULUUR\n+@HWI-EAS337_3:7:1:1154:1517/1\n+GAATGTTGCAGACCTTACTCCTACCTATGAAGCACA\n++HWI-EAS337_3:7:1:1154:1517/1\n+WWWWWWWVWWVWWWWWWWWWWWWWVWVWVWUSUUUU\n+@HWI-EAS337_3:7:1:164:1869/1\n+GTTTGATAGGAATTTATTTCTTCTTCGACATCCACC\n++HWI-EAS337_3:7:1:164:1869/1\n+WWWWWWWWVVWWWWWWWWWWWWWWWWQWWWUUUUUU\n+@HWI-EAS337_3:7:1:415:1194/1\n+GATGGTTGACACATTAAGAACATTCTCACCGGTCTC\n++HWI-EAS337_3:7:1:415:1194/1\n+WWWWWWWWWWWWWWWWWVWVWVWWWWWWWVSUUUUU\n+@HWI-EAS337_3:7:1:645:1892/1\n+GATAGTAAGCACCCCTCACTTCCAACCCAAAGATTG\n++HWI-EAS337_3:7:1:645:1892/1\n+WWWWWWWWWWWWWWWWWVWWWWWVVWVWWVUUUUUU\n+@HWI-EAS337_3:7:1:33:1446/1\n+GTTATTCTTTCTTTCTCAAATGGATGCAGTAATGCA\n++HWI-EAS337_3:7:1:33:1446/1\n+WWWWWWWWWWWWWWWWWWWWWUQWWVVWQWUUUSUU\n+@HWI-EAS337_3:7:1:1194:1427/1\n+GAAAAATCACATTTTTTTGTTTGATAAAAACCCAGA\n++HWI-EAS337_3:7:1:1194:1427/1\n+WWWWWWWWVWVWWWWWWWVWWWUWWWWWWWUUUUSU\n+@HWI-EAS337_3:7:1:624:1913/1\n+GACATCTTCAACTCCGGAGTTTTGAGTAACATTATA\n++HWI-EAS337_3:7:1:624:1913/1\n+WWWWWWWWWWVWWWWVVVVWWWWVVVWWVWUUUUUU\n+@HWI-EAS337_3:7:1:437:1202/1\n+GTACTTATGATGAAACTGAGATCAACTACCACCTCC\n++HWI-EAS337_3:7:1:437:1202/1\n+WWWWWVWWWVWVWWWWWWWWVWWWWVWVVVUUUUUU\n+@HWI-EAS337_3:7:1:1386:1787/1\n+GTTTAGCTAGTATTAAGGCTAGAAATGGATATGATG\n++HWI-EAS337_3:7:1:1386:1787/1\n+WWWWWWWWWWWWWWWWVVWWWVWVVWVVVWUUSUUO\n+@HWI-EAS337_3:7:1:227:1155/1\n+GATAGCAGCAAGGTTATTGGAATCTAAGCAATCTAC\n++HWI-EAS337_3:7:1:227:1155/1\n+WWVWWVWWVVWVVIWVWVVUWVVVVWVTVVUUUUSU\n+@HWI-EAS337_3:7:1:472:1025/1\n+GAAGTGATACTCATAAAACTATTTAGAAAGTTAATT\n++HWI-EAS337_3:7:1:472:1025/1\n+WWWWWWWWWWWWWWWVWVWWWWWWVVWWWVUUUUUU\n+@HWI-EAS337_3:7:1:220:1482/1\n+GCTATATGAGAATTCAGGCCACTTGTAGTTCGATAA\n++HWI-EAS337_3:7:1:220:1482/1\n+WWWWWWWWWVWWWWWWWVWWWWWWWWVVWWURUUUU\n+@HWI-EAS337_3:7:1:1699:1966/1\n+GATGAAGGATACTACAAAAAAAAGGGTTATTTTGTG\n++HWI-EAS337_3:7:1:1699:1966/1\n+WWWWWWWWWWWWWWWWVWVWWWWWWWVWWWUUUSUR\n+@HWI-EAS337_3:7:1:547:1084/1\n+GTGGTCAGGTCCTCTTCAAGTGATACAATGTTCCCC\n++HWI-EAS337_3:7:1:547:1084/1\n+WWWWWWWWWWWWWWWWWWVVWVWWWWWWWVUUUUSU\n+@HWI-EAS337_3:7:1:464:1097/1\n+GAAATTGAAGCTAGTTATTGACAGTTTACCAAGTTA\n++HWI-EAS337_3:7:1:464:1097/1\n+WWWWWWWWWWWVWWVWWWWWWWWVVWWWWVUUUUUR\n+@HWI-EAS337_3:7:1:171:1480/1\n+GATAATACTATTAGCACATCAGCCTCTAGATGAGAC\n++HWI-EAS337_3:7:1:171:1480/1\n+WWWWWWWWWWWWWVWWWWWWWVWWWWWWTVUUUUUU\n+@HWI-EAS337_3:7:1:293:1251/1\n+GTGGTAGTGAGCTCCGTGGTGAACAAGATGACGGAA\n++HWI-EAS337_3:7:1:293:1251/1\n+WWWWWWWVWVWWWWVVWWVVVVVVWVVVVVRPUURR\n+@HWI-EAS337_3:7:1:647:1863/1\n+GGGTTTCAGATTAGTAAGTTATAGTGAAAAAATATA\n++HWI-EAS337_3:7:1:647:1863/1\n+WWVWWWWWVWWWWVWWVVWWWWWWWVWVVWUUUUUU\n+@HWI-EAS337_3:7:1:263:1275/1\n+GCTACGTCTGCTCTAACTCCTAATATGATCCTGTAT\n++HWI-EAS337_3:7:1:263:1275/1\n+WWWWWWWWWWWWWWWWWWWWWVWWWWQVWWUUOUUU\n+@HWI-EAS337_3:7:1:1112:215/1\n+GGTGTTGATTTCACAAGGAGGAATACTCATCTAAAA\n++HWI-EAS337_3:7:1:1112:215/1\n+WWVWWVVWVVWWWVWWVUWVVVWWWVWTVWUUUUUU\n+@HWI-EAS337_3:7:1:319:1275/1\n+GTTATAGTTCTTGACAACAAAGTACAGAGGTGGTCC\n++HWI-EAS337_3:7:1:319:1275/1\n+WWWWWWWWWWWWVWWWWWWWWWWWWWVWVWUUSUUU\n+@HWI-EAS337_3:7:1:1310:1480/1'..b'A\n++HWI-EAS337_3:7:1:986:591/1\n+WWWWWWWWWVWWWWWWWWWWWWWVWVVWVVUUUUUR\n+@HWI-EAS337_3:7:1:181:1099/1\n+GGTCGACGTAAGAGATCTGCAGGGCTATTACTCATT\n++HWI-EAS337_3:7:1:181:1099/1\n+WWWWWWWWWWWWWVWWWWWWWVVWWWVWWVUUUUUU\n+@HWI-EAS337_3:7:1:509:832/1\n+GTGAAGTATGGGTGGAAATGCTTGCGTATGCTGCTA\n++HWI-EAS337_3:7:1:509:832/1\n+WWWWVWWWWVVWWWWWWVWVWWWVVVVVWVSUUUUR\n+@HWI-EAS337_3:7:1:510:597/1\n+GGGTCTGGAATAGTAATGCGCTGATTCTAGTAAAGT\n++HWI-EAS337_3:7:1:510:597/1\n+VWWWVWWWWWWWWWWWWVWWWWWVWWWWVVUUUUUU\n+@HWI-EAS337_3:7:1:1765:1489/1\n+GTCAATTTTTTCTTTGTTTAAATCCGGGGAGGCTAG\n++HWI-EAS337_3:7:1:1765:1489/1\n+WVWWVWWWWWWWWWWVWWWWWWWWWQQVTVUSUUUR\n+@HWI-EAS337_3:7:1:417:1560/1\n+GTAACCTTCCCAGTGTCTCCTTAAGAAAGACTTGGA\n++HWI-EAS337_3:7:1:417:1560/1\n+WWWWVWWSWSVWVVVVWWWWWWWWVSWWWWUQUUQU\n+@HWI-EAS337_3:7:1:1047:854/1\n+GTTGAAATTCCTGATTTTCCATGTGCATCATAAGCC\n++HWI-EAS337_3:7:1:1047:854/1\n+WWWWWVWWWWWWWWUWWWWWVWVWVVVWVVUUUUUU\n+@HWI-EAS337_3:7:1:1296:202/1\n+GGTGTTGGAGTTGGATTTGTTTCTGCTTTGATATCC\n++HWI-EAS337_3:7:1:1296:202/1\n+WWWWWWWVWVWWWVVWWWWWWWWWVVWWWTUUUUUF\n+@HWI-EAS337_3:7:1:502:642/1\n+GATGATTCTTGCTGGTTAAGTTGAGATGGGTTATAA\n++HWI-EAS337_3:7:1:502:642/1\n+WWWWWVPVVVWWWVVVWWWVWWSVVVWVVVUUUUUR\n+@HWI-EAS337_3:7:1:82:1651/1\n+GGCATCCTGTTCATCCTTCTCTTCATTTTTAGGCGT\n++HWI-EAS337_3:7:1:82:1651/1\n+WWWWWWWWVWWWWWWWWVWWWWWWVWWWWWUKJUQU\n+@HWI-EAS337_3:7:1:1505:1274/1\n+GAAACTTTTCAAAAAAAAAGTTGCATGAGAAATAAG\n++HWI-EAS337_3:7:1:1505:1274/1\n+WWWWWWWWWWWWVWWWWWWWVWWWWWWVWWSUUUUR\n+@HWI-EAS337_3:7:1:5:1770/1\n+GTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\n++HWI-EAS337_3:7:1:5:1770/1\n+WWWWWWWWWWVWWWWWWWWWWWWWWWWWVVSUUQUU\n+@HWI-EAS337_3:7:1:115:1005/1\n+GATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\n++HWI-EAS337_3:7:1:115:1005/1\n+WWWWWWWWWVVWWWVVVVWWVVVWWWVVWVUUUUUU\n+@HWI-EAS337_3:7:1:354:1708/1\n+GCATCCGACAGTGACTTAGACGATGAGGAATACGAG\n++HWI-EAS337_3:7:1:354:1708/1\n+WWWWWWWWWWWWVWWWWWVWWVWWVWWVWWUUUUUR\n+@HWI-EAS337_3:7:1:1639:1500/1\n+GTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\n++HWI-EAS337_3:7:1:1639:1500/1\n+WWWWWWVVWWWWWVVWVVWUVVVVVVVVWVUUUUUR\n+@HWI-EAS337_3:7:1:766:243/1\n+GTGGCATCTATGGAAGATAAATTGGAGATTGTTGCT\n++HWI-EAS337_3:7:1:766:243/1\n+WWVWWWWWWWWVVWWVWWWWWWWVVVTVWWRUUJSU\n+@HWI-EAS337_3:7:1:920:144/1\n+GTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\n++HWI-EAS337_3:7:1:920:144/1\n+WWVVWWWWWVWPWVWWVWWSWVSWWHWWLVUCPUUH\n+@HWI-EAS337_3:7:1:389:268/1\n+GGTCAATTAGAGAGGGCAACCACCCTCAAAGAATTT\n++HWI-EAS337_3:7:1:389:268/1\n+WWWWWWWWWVWWWWVVWWWWWWWWWWWWVVSUUUUU\n+@HWI-EAS337_3:7:1:294:1868/1\n+GAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\n++HWI-EAS337_3:7:1:294:1868/1\n+VWVWWWWWWWWVQWWWWOWVVWWVWVVWQWUURULU\n+@HWI-EAS337_3:7:1:1147:62/1\n+GAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\n++HWI-EAS337_3:7:1:1147:62/1\n+WWWWWWWWWWWWVWWWWWWVWWVWVVWVVWUUUUUU\n+@HWI-EAS337_3:7:1:787:1759/1\n+GGTTTTATTAGAATTGGTAGCTGTTCTGATTTTCTG\n++HWI-EAS337_3:7:1:787:1759/1\n+WVVWWWWVWWWWWWWVUWWUWWUVWVVTVVUUUUUH\n+@HWI-EAS337_3:7:1:425:1939/1\n+GCTAATTGTGGTGTCTGGGTCTATGTGGCTAAACTT\n++HWI-EAS337_3:7:1:425:1939/1\n+WWWWVWWVWWWWVWWWVVVVWWWVVWVVVWUUUUUU\n+@HWI-EAS337_3:7:1:187:1132/1\n+GTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\n++HWI-EAS337_3:7:1:187:1132/1\n+WVWWWWWWWWWWWWWWWWWWWTVWVWWWVVUUUUUU\n+@HWI-EAS337_3:7:1:1739:1840/1\n+GGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\n++HWI-EAS337_3:7:1:1739:1840/1\n+WWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\n+@HWI-EAS337_3:7:1:1505:1876/1\n+GAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\n++HWI-EAS337_3:7:1:1505:1876/1\n+WWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\n+@HWI-EAS337_3:7:1:447:192/1\n+GACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\n++HWI-EAS337_3:7:1:447:192/1\n+WWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\n+@HWI-EAS337_3:7:1:21:2019/1\n+GTATGAGGTAAAAGATGATAACCTGTCTTCCAGCCC\n++HWI-EAS337_3:7:1:21:2019/1\n+VWWVVWWVVWWWWWWWWWWWWWWWQVVWWWUURUUU\n+@HWI-EAS337_3:7:1:1593:652/1\n+GTGATGAGTAAAACATCATCATATGAACTTGAAGAG\n++HWI-EAS337_3:7:1:1593:652/1\n+WWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\n+@HWI-EAS337_3:7:1:1254:1660/1\n+GAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\n++HWI-EAS337_3:7:1:1254:1660/1\n+WWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\n+@HWI-EAS337_3:7:1:291:629/1\n+GTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\n++HWI-EAS337_3:7:1:291:629/1\n+WWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/Wig/chr1.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/Wig/chr1.wig Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,9 @@
+fixedStep  chrom=chr1  start=11  step=1
+1.1
+1.2
+fixedStep  chrom=chr1  start=14  step=1
+1.4
+1.5
+variableStep chrom=chr1
+17  1.7
+19  1.9

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/adress.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/adress.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,7 @@
+0
+58
+115
+173
+231
+289
+347

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/clusterize_default_expected.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/clusterize_default_expected.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,1790 @@\n+chr4\tS-MART\ttranscript\t1\t50000\t.\t+\t.\tnbElements=0;ID=region1;Name=region1\n+chr4\tS-MART\ttranscript\t49951\t99950\t.\t+\t.\tnbElements=0;ID=region2;Name=region2\n+chr4\tS-MART\ttranscript\t99901\t149900\t.\t+\t.\tnbElements=0;ID=region3;Name=region3\n+chr4\tS-MART\ttranscript\t149851\t199850\t.\t+\t.\tnbElements=0;ID=region4;Name=region4\n+chr4\tS-MART\ttranscript\t199801\t249800\t.\t+\t.\tnbElements=0;ID=region5;Name=region5\n+chr4\tS-MART\ttranscript\t249751\t299750\t.\t+\t.\tnbElements=0;ID=region6;Name=region6\n+chr4\tS-MART\ttranscript\t299701\t349700\t.\t+\t.\tnbElements=0;ID=region7;Name=region7\n+chr4\tS-MART\ttranscript\t349651\t399650\t.\t+\t.\tnbElements=0;ID=region8;Name=region8\n+chr4\tS-MART\ttranscript\t399601\t449600\t.\t+\t.\tnbElements=0;ID=region9;Name=region9\n+chr4\tS-MART\ttranscript\t449551\t499550\t.\t+\t.\tnbElements=0;ID=region10;Name=region10\n+chr4\tS-MART\ttranscript\t499501\t549500\t.\t+\t.\tnbElements=0;ID=region11;Name=region11\n+chr4\tS-MART\ttranscript\t549451\t599450\t.\t+\t.\tnbElements=0;ID=region12;Name=region12\n+chr4\tS-MART\ttranscript\t599401\t649400\t.\t+\t.\tnbElements=0;ID=region13;Name=region13\n+chr4\tS-MART\ttranscript\t649351\t699350\t.\t+\t.\tnbElements=0;ID=region14;Name=region14\n+chr4\tS-MART\ttranscript\t699301\t749300\t.\t+\t.\tnbElements=0;ID=region15;Name=region15\n+chr4\tS-MART\ttranscript\t749251\t799250\t.\t+\t.\tnbElements=0;ID=region16;Name=region16\n+chr4\tS-MART\ttranscript\t799201\t849200\t.\t+\t.\tnbElements=0;ID=region17;Name=region17\n+chr4\tS-MART\ttranscript\t849151\t899150\t.\t+\t.\tnbElements=0;ID=region18;Name=region18\n+chr4\tS-MART\ttranscript\t899101\t949100\t.\t+\t.\tnbElements=0;ID=region19;Name=region19\n+chr4\tS-MART\ttranscript\t949051\t999050\t.\t+\t.\tnbElements=0;ID=region20;Name=region20\n+chr4\tS-MART\ttranscript\t999001\t1049000\t.\t+\t.\tnbElements=0;ID=region21;Name=region21\n+chr4\tS-MART\ttranscript\t1048951\t1098950\t.\t+\t.\tnbElements=0;ID=region22;Name=region22\n+chr4\tS-MART\ttranscript\t1098901\t1148900\t.\t+\t.\tnbElements=0;ID=region23;Name=region23\n+chr4\tS-MART\ttranscript\t1148851\t1198850\t.\t+\t.\tnbElements=0;ID=region24;Name=region24\n+chr4\tS-MART\ttranscript\t1198801\t1248800\t.\t+\t.\tnbElements=0;ID=region25;Name=region25\n+chr4\tS-MART\ttranscript\t1248751\t1298750\t.\t+\t.\tnbElements=0;ID=region26;Name=region26\n+chr4\tS-MART\ttranscript\t1298701\t1348700\t.\t+\t.\tnbElements=0;ID=region27;Name=region27\n+chr4\tS-MART\ttranscript\t1348651\t1398650\t.\t+\t.\tnbElements=0;ID=region28;Name=region28\n+chr4\tS-MART\ttranscript\t1398601\t1448600\t.\t+\t.\tnbElements=0;ID=region29;Name=region29\n+chr4\tS-MART\ttranscript\t1448551\t1498550\t.\t+\t.\tnbElements=0;ID=region30;Name=region30\n+chr4\tS-MART\ttranscript\t1498501\t1548500\t.\t+\t.\tnbElements=0;ID=region31;Name=region31\n+chr4\tS-MART\ttranscript\t1548451\t1598450\t.\t+\t.\tnbElements=0;ID=region32;Name=region32\n+chr4\tS-MART\ttranscript\t1598401\t1648400\t.\t+\t.\tnbElements=0;ID=region33;Name=region33\n+chr4\tS-MART\ttranscript\t1648351\t1698350\t.\t+\t.\tnbElements=0;ID=region34;Name=region34\n+chr4\tS-MART\ttranscript\t1698301\t1748300\t.\t+\t.\tnbElements=0;ID=region35;Name=region35\n+chr4\tS-MART\ttranscript\t1748251\t1798250\t.\t+\t.\tnbElements=0;ID=region36;Name=region36\n+chr4\tS-MART\ttranscript\t1798201\t1848200\t.\t+\t.\tnbElements=0;ID=region37;Name=region37\n+chr4\tS-MART\ttranscript\t1848151\t1898150\t.\t+\t.\tnbElements=0;ID=region38;Name=region38\n+chr4\tS-MART\ttranscript\t1898101\t1948100\t.\t+\t.\tnbElements=0;ID=region39;Name=region39\n+chr4\tS-MART\ttranscript\t1948051\t1998050\t.\t+\t.\tnbElements=0;ID=region40;Name=region40\n+chr4\tS-MART\ttranscript\t1998001\t2048000\t.\t+\t.\tnbElements=0;ID=region41;Name=region41\n+chr4\tS-MART\ttranscript\t2047951\t2097950\t.\t+\t.\tnbElements=0;ID=region42;Name=region42\n+chr4\tS-MART\ttranscript\t2097901\t2147900\t.\t+\t.\tnbElements=0;ID=region43;Name=region43\n+chr4\tS-MART\ttranscript\t2147851\t2197850\t.\t+\t.\tnbElements=0;ID=region44;Name=region44\n+chr4\tS-MART\ttranscript\t2197801\t2247800\t.\t+\t.\tnbElements=0;ID=region45;Name=region45\n+chr4\tS-MART\ttranscript\t2247751\t2297750\t.\t+\t.\tnbElements=0;ID=region46;Name=region46\n+chr4\tS-MART\ttranscript\t2297701\t2347700\t.\t+\t.\tnbElements=0;ID=region47;Name=region47\n+chr4\tS-MART\ttranscript\t2347651\t2397650\t.\t+\t.\tnbEleme'..b'MART\ttranscript\t3946051\t3996050\t.\t+\t.\tnbElements=0;ID=region1746;Name=region1746\n+chr1\tS-MART\ttranscript\t3996001\t4046000\t.\t+\t.\tnbElements=0;ID=region1747;Name=region1747\n+chr1\tS-MART\ttranscript\t4045951\t4095950\t.\t+\t.\tnbElements=0;ID=region1748;Name=region1748\n+chr1\tS-MART\ttranscript\t4095901\t4145900\t.\t+\t.\tnbElements=0;ID=region1749;Name=region1749\n+chr1\tS-MART\ttranscript\t4145851\t4195850\t.\t+\t.\tnbElements=0;ID=region1750;Name=region1750\n+chr1\tS-MART\ttranscript\t4195801\t4245800\t.\t+\t.\tnbElements=0;ID=region1751;Name=region1751\n+chr1\tS-MART\ttranscript\t4245751\t4295750\t.\t+\t.\tnbElements=0;ID=region1752;Name=region1752\n+chr1\tS-MART\ttranscript\t4295701\t4345700\t.\t+\t.\tnbElements=0;ID=region1753;Name=region1753\n+chr1\tS-MART\ttranscript\t4345651\t4395650\t.\t+\t.\tnbElements=0;ID=region1754;Name=region1754\n+chr1\tS-MART\ttranscript\t4395601\t4445600\t.\t+\t.\tnbElements=0;ID=region1755;Name=region1755\n+chr1\tS-MART\ttranscript\t4445551\t4495550\t.\t+\t.\tnbElements=0;ID=region1756;Name=region1756\n+chr1\tS-MART\ttranscript\t4495501\t4545500\t.\t+\t.\tnbElements=0;ID=region1757;Name=region1757\n+chr1\tS-MART\ttranscript\t4545451\t4595450\t.\t+\t.\tnbElements=0;ID=region1758;Name=region1758\n+chr1\tS-MART\ttranscript\t4595401\t4645400\t.\t+\t.\tnbElements=0;ID=region1759;Name=region1759\n+chr1\tS-MART\ttranscript\t4645351\t4695350\t.\t+\t.\tnbElements=0;ID=region1760;Name=region1760\n+chr1\tS-MART\ttranscript\t4695301\t4745300\t.\t+\t.\tnbElements=0;ID=region1761;Name=region1761\n+chr1\tS-MART\ttranscript\t4745251\t4795250\t.\t+\t.\tnbElements=0;ID=region1762;Name=region1762\n+chr1\tS-MART\ttranscript\t4795201\t4845200\t.\t+\t.\tnbElements=0;ID=region1763;Name=region1763\n+chr1\tS-MART\ttranscript\t4845151\t4895150\t.\t+\t.\tnbElements=0;ID=region1764;Name=region1764\n+chr1\tS-MART\ttranscript\t4895101\t4945100\t.\t+\t.\tnbElements=0;ID=region1765;Name=region1765\n+chr1\tS-MART\ttranscript\t4945051\t4995050\t.\t+\t.\tnbElements=0;ID=region1766;Name=region1766\n+chr1\tS-MART\ttranscript\t4995001\t5045000\t.\t+\t.\tnbElements=0;ID=region1767;Name=region1767\n+chr1\tS-MART\ttranscript\t5044951\t5094950\t.\t+\t.\tnbElements=0;ID=region1768;Name=region1768\n+chr1\tS-MART\ttranscript\t5094901\t5144900\t.\t+\t.\tnbElements=0;ID=region1769;Name=region1769\n+chr1\tS-MART\ttranscript\t5144851\t5194850\t.\t+\t.\tnbElements=0;ID=region1770;Name=region1770\n+chr1\tS-MART\ttranscript\t5194801\t5244800\t.\t+\t.\tnbElements=0;ID=region1771;Name=region1771\n+chr1\tS-MART\ttranscript\t5244751\t5294750\t.\t+\t.\tnbElements=0;ID=region1772;Name=region1772\n+chr1\tS-MART\ttranscript\t5294701\t5344700\t.\t+\t.\tnbElements=0;ID=region1773;Name=region1773\n+chr1\tS-MART\ttranscript\t5344651\t5394650\t.\t+\t.\tnbElements=0;ID=region1774;Name=region1774\n+chr1\tS-MART\ttranscript\t5394601\t5444600\t.\t+\t.\tnbElements=0;ID=region1775;Name=region1775\n+chr1\tS-MART\ttranscript\t5444551\t5494550\t.\t+\t.\tnbElements=0;ID=region1776;Name=region1776\n+chr1\tS-MART\ttranscript\t5494501\t5544500\t.\t+\t.\tnbElements=0;ID=region1777;Name=region1777\n+chr1\tS-MART\ttranscript\t5544451\t5594450\t.\t+\t.\tnbElements=0;ID=region1778;Name=region1778\n+chr1\tS-MART\ttranscript\t5594401\t5644400\t.\t+\t.\tnbElements=0;ID=region1779;Name=region1779\n+chr1\tS-MART\ttranscript\t5644351\t5694350\t.\t+\t.\tnbElements=0;ID=region1780;Name=region1780\n+chr1\tS-MART\ttranscript\t5694301\t5744300\t.\t+\t.\tnbElements=0;ID=region1781;Name=region1781\n+chr1\tS-MART\ttranscript\t5744251\t5794250\t.\t+\t.\tnbElements=0;ID=region1782;Name=region1782\n+chr1\tS-MART\ttranscript\t5794201\t5844200\t.\t+\t.\tnbElements=0;ID=region1783;Name=region1783\n+chr1\tS-MART\ttranscript\t5844151\t5894150\t.\t+\t.\tnbElements=0;ID=region1784;Name=region1784\n+chr1\tS-MART\ttranscript\t5894101\t5944100\t.\t+\t.\tnbElements=0;ID=region1785;Name=region1785\n+chr1\tS-MART\ttranscript\t5944051\t5994050\t.\t+\t.\tnbElements=0;ID=region1786;Name=region1786\n+chr1\tS-MART\ttranscript\t5994001\t6044000\t.\t+\t.\tnbElements=0;ID=region1787;Name=region1787\n+chr1\tS-MART\ttranscript\t6043951\t6093950\t.\t+\t.\tnbElements=0;ID=region1788;Name=region1788\n+chr1\tS-MART\ttranscript\t6093901\t6143900\t.\t+\t.\tnbElements=0;ID=region1789;Name=region1789\n+chr1\tS-MART\ttranscript\t6143851\t6193850\t.\t+\t.\tnbElements=1.0;ID=region1790;Name=region1790\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/clusterize_default_expected.map
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/clusterize_default_expected.map Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,1790 @@\n+region1\tchr4\t1\t50001\n+region2\tchr4\t49951\t99951\n+region3\tchr4\t99901\t149901\n+region4\tchr4\t149851\t199851\n+region5\tchr4\t199801\t249801\n+region6\tchr4\t249751\t299751\n+region7\tchr4\t299701\t349701\n+region8\tchr4\t349651\t399651\n+region9\tchr4\t399601\t449601\n+region10\tchr4\t449551\t499551\n+region11\tchr4\t499501\t549501\n+region12\tchr4\t549451\t599451\n+region13\tchr4\t599401\t649401\n+region14\tchr4\t649351\t699351\n+region15\tchr4\t699301\t749301\n+region16\tchr4\t749251\t799251\n+region17\tchr4\t799201\t849201\n+region18\tchr4\t849151\t899151\n+region19\tchr4\t899101\t949101\n+region20\tchr4\t949051\t999051\n+region21\tchr4\t999001\t1049001\n+region22\tchr4\t1048951\t1098951\n+region23\tchr4\t1098901\t1148901\n+region24\tchr4\t1148851\t1198851\n+region25\tchr4\t1198801\t1248801\n+region26\tchr4\t1248751\t1298751\n+region27\tchr4\t1298701\t1348701\n+region28\tchr4\t1348651\t1398651\n+region29\tchr4\t1398601\t1448601\n+region30\tchr4\t1448551\t1498551\n+region31\tchr4\t1498501\t1548501\n+region32\tchr4\t1548451\t1598451\n+region33\tchr4\t1598401\t1648401\n+region34\tchr4\t1648351\t1698351\n+region35\tchr4\t1698301\t1748301\n+region36\tchr4\t1748251\t1798251\n+region37\tchr4\t1798201\t1848201\n+region38\tchr4\t1848151\t1898151\n+region39\tchr4\t1898101\t1948101\n+region40\tchr4\t1948051\t1998051\n+region41\tchr4\t1998001\t2048001\n+region42\tchr4\t2047951\t2097951\n+region43\tchr4\t2097901\t2147901\n+region44\tchr4\t2147851\t2197851\n+region45\tchr4\t2197801\t2247801\n+region46\tchr4\t2247751\t2297751\n+region47\tchr4\t2297701\t2347701\n+region48\tchr4\t2347651\t2397651\n+region49\tchr4\t2397601\t2447601\n+region50\tchr4\t2447551\t2497551\n+region51\tchr4\t2497501\t2547501\n+region52\tchr4\t2547451\t2597451\n+region53\tchr4\t2597401\t2647401\n+region54\tchr4\t2647351\t2697351\n+region55\tchr4\t2697301\t2747301\n+region56\tchr4\t2747251\t2797251\n+region57\tchr4\t2797201\t2847201\n+region58\tchr4\t2847151\t2897151\n+region59\tchr4\t2897101\t2947101\n+region60\tchr4\t2947051\t2997051\n+region61\tchr4\t2997001\t3047001\n+region62\tchr4\t3046951\t3096951\n+region63\tchr4\t3096901\t3146901\n+region64\tchr4\t3146851\t3196851\n+region65\tchr4\t3196801\t3246801\n+region66\tchr4\t3246751\t3296751\n+region67\tchr4\t3296701\t3346701\n+region68\tchr4\t3346651\t3396651\n+region69\tchr4\t3396601\t3446601\n+region70\tchr4\t3446551\t3496551\n+region71\tchr4\t3496501\t3546501\n+region72\tchr4\t3546451\t3596451\n+region73\tchr4\t3596401\t3646401\n+region74\tchr4\t3646351\t3696351\n+region75\tchr4\t3696301\t3746301\n+region76\tchr4\t3746251\t3796251\n+region77\tchr4\t3796201\t3846201\n+region78\tchr4\t3846151\t3896151\n+region79\tchr4\t3896101\t3946101\n+region80\tchr4\t3946051\t3996051\n+region81\tchr4\t3996001\t4046001\n+region82\tchr4\t4045951\t4095951\n+region83\tchr4\t4095901\t4145901\n+region84\tchr4\t4145851\t4195851\n+region85\tchr4\t4195801\t4245801\n+region86\tchr4\t4245751\t4295751\n+region87\tchr4\t4295701\t4345701\n+region88\tchr4\t4345651\t4395651\n+region89\tchr4\t4395601\t4445601\n+region90\tchr4\t4445551\t4495551\n+region91\tchr4\t4495501\t4545501\n+region92\tchr4\t4545451\t4595451\n+region93\tchr4\t4595401\t4645401\n+region94\tchr4\t4645351\t4695351\n+region95\tchr4\t4695301\t4745301\n+region96\tchr4\t4745251\t4795251\n+region97\tchr4\t4795201\t4845201\n+region98\tchr4\t4845151\t4895151\n+region99\tchr4\t4895101\t4945101\n+region100\tchr4\t4945051\t4995051\n+region101\tchr4\t4995001\t5045001\n+region102\tchr4\t5044951\t5094951\n+region103\tchr4\t5094901\t5144901\n+region104\tchr4\t5144851\t5194851\n+region105\tchr4\t5194801\t5244801\n+region106\tchr4\t5244751\t5294751\n+region107\tchr4\t5294701\t5344701\n+region108\tchr4\t5344651\t5394651\n+region109\tchr4\t5394601\t5444601\n+region110\tchr4\t5444551\t5494551\n+region111\tchr4\t5494501\t5544501\n+region112\tchr4\t5544451\t5594451\n+region113\tchr4\t5594401\t5644401\n+region114\tchr4\t5644351\t5694351\n+region115\tchr4\t5694301\t5744301\n+region116\tchr4\t5744251\t5794251\n+region117\tchr4\t5794201\t5844201\n+region118\tchr4\t5844151\t5894151\n+region119\tchr4\t5894101\t5944101\n+region120\tchr4\t5944051\t5994051\n+region121\tchr4\t5994001\t6044001\n+region122\tchr4\t6043951\t6093951\n+region123\tchr4\t6093901\t6143901\n+region124\tchr4\t6143851\t6193851\n+region125\tchr4\t6193801\t6243801\n+region126\tchr4\t6243751\t6293751\n+region127\tchr4\t6293701\t6343701\n+region128\tchr4\t6343651\t6393651\n+region129\tchr4\t6393601\t6443601\n+region130'..b'9951\t99951\n+region1669\tchr1\t99901\t149901\n+region1670\tchr1\t149851\t199851\n+region1671\tchr1\t199801\t249801\n+region1672\tchr1\t249751\t299751\n+region1673\tchr1\t299701\t349701\n+region1674\tchr1\t349651\t399651\n+region1675\tchr1\t399601\t449601\n+region1676\tchr1\t449551\t499551\n+region1677\tchr1\t499501\t549501\n+region1678\tchr1\t549451\t599451\n+region1679\tchr1\t599401\t649401\n+region1680\tchr1\t649351\t699351\n+region1681\tchr1\t699301\t749301\n+region1682\tchr1\t749251\t799251\n+region1683\tchr1\t799201\t849201\n+region1684\tchr1\t849151\t899151\n+region1685\tchr1\t899101\t949101\n+region1686\tchr1\t949051\t999051\n+region1687\tchr1\t999001\t1049001\n+region1688\tchr1\t1048951\t1098951\n+region1689\tchr1\t1098901\t1148901\n+region1690\tchr1\t1148851\t1198851\n+region1691\tchr1\t1198801\t1248801\n+region1692\tchr1\t1248751\t1298751\n+region1693\tchr1\t1298701\t1348701\n+region1694\tchr1\t1348651\t1398651\n+region1695\tchr1\t1398601\t1448601\n+region1696\tchr1\t1448551\t1498551\n+region1697\tchr1\t1498501\t1548501\n+region1698\tchr1\t1548451\t1598451\n+region1699\tchr1\t1598401\t1648401\n+region1700\tchr1\t1648351\t1698351\n+region1701\tchr1\t1698301\t1748301\n+region1702\tchr1\t1748251\t1798251\n+region1703\tchr1\t1798201\t1848201\n+region1704\tchr1\t1848151\t1898151\n+region1705\tchr1\t1898101\t1948101\n+region1706\tchr1\t1948051\t1998051\n+region1707\tchr1\t1998001\t2048001\n+region1708\tchr1\t2047951\t2097951\n+region1709\tchr1\t2097901\t2147901\n+region1710\tchr1\t2147851\t2197851\n+region1711\tchr1\t2197801\t2247801\n+region1712\tchr1\t2247751\t2297751\n+region1713\tchr1\t2297701\t2347701\n+region1714\tchr1\t2347651\t2397651\n+region1715\tchr1\t2397601\t2447601\n+region1716\tchr1\t2447551\t2497551\n+region1717\tchr1\t2497501\t2547501\n+region1718\tchr1\t2547451\t2597451\n+region1719\tchr1\t2597401\t2647401\n+region1720\tchr1\t2647351\t2697351\n+region1721\tchr1\t2697301\t2747301\n+region1722\tchr1\t2747251\t2797251\n+region1723\tchr1\t2797201\t2847201\n+region1724\tchr1\t2847151\t2897151\n+region1725\tchr1\t2897101\t2947101\n+region1726\tchr1\t2947051\t2997051\n+region1727\tchr1\t2997001\t3047001\n+region1728\tchr1\t3046951\t3096951\n+region1729\tchr1\t3096901\t3146901\n+region1730\tchr1\t3146851\t3196851\n+region1731\tchr1\t3196801\t3246801\n+region1732\tchr1\t3246751\t3296751\n+region1733\tchr1\t3296701\t3346701\n+region1734\tchr1\t3346651\t3396651\n+region1735\tchr1\t3396601\t3446601\n+region1736\tchr1\t3446551\t3496551\n+region1737\tchr1\t3496501\t3546501\n+region1738\tchr1\t3546451\t3596451\n+region1739\tchr1\t3596401\t3646401\n+region1740\tchr1\t3646351\t3696351\n+region1741\tchr1\t3696301\t3746301\n+region1742\tchr1\t3746251\t3796251\n+region1743\tchr1\t3796201\t3846201\n+region1744\tchr1\t3846151\t3896151\n+region1745\tchr1\t3896101\t3946101\n+region1746\tchr1\t3946051\t3996051\n+region1747\tchr1\t3996001\t4046001\n+region1748\tchr1\t4045951\t4095951\n+region1749\tchr1\t4095901\t4145901\n+region1750\tchr1\t4145851\t4195851\n+region1751\tchr1\t4195801\t4245801\n+region1752\tchr1\t4245751\t4295751\n+region1753\tchr1\t4295701\t4345701\n+region1754\tchr1\t4345651\t4395651\n+region1755\tchr1\t4395601\t4445601\n+region1756\tchr1\t4445551\t4495551\n+region1757\tchr1\t4495501\t4545501\n+region1758\tchr1\t4545451\t4595451\n+region1759\tchr1\t4595401\t4645401\n+region1760\tchr1\t4645351\t4695351\n+region1761\tchr1\t4695301\t4745301\n+region1762\tchr1\t4745251\t4795251\n+region1763\tchr1\t4795201\t4845201\n+region1764\tchr1\t4845151\t4895151\n+region1765\tchr1\t4895101\t4945101\n+region1766\tchr1\t4945051\t4995051\n+region1767\tchr1\t4995001\t5045001\n+region1768\tchr1\t5044951\t5094951\n+region1769\tchr1\t5094901\t5144901\n+region1770\tchr1\t5144851\t5194851\n+region1771\tchr1\t5194801\t5244801\n+region1772\tchr1\t5244751\t5294751\n+region1773\tchr1\t5294701\t5344701\n+region1774\tchr1\t5344651\t5394651\n+region1775\tchr1\t5394601\t5444601\n+region1776\tchr1\t5444551\t5494551\n+region1777\tchr1\t5494501\t5544501\n+region1778\tchr1\t5544451\t5594451\n+region1779\tchr1\t5594401\t5644401\n+region1780\tchr1\t5644351\t5694351\n+region1781\tchr1\t5694301\t5744301\n+region1782\tchr1\t5744251\t5794251\n+region1783\tchr1\t5794201\t5844201\n+region1784\tchr1\t5844151\t5894151\n+region1785\tchr1\t5894101\t5944101\n+region1786\tchr1\t5944051\t5994051\n+region1787\tchr1\t5994001\t6044001\n+region1788\tchr1\t6043951\t6093951\n+region1789\tchr1\t6093901\t6143901\n+region1790\tchr1\t6143851\t6193851\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/clusterize_normalize_expected.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/clusterize_normalize_expected.gff3 Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/clusterize_output_tag_expected.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/clusterize_output_tag_expected.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,1790 @@\n+chr4\tS-MART\ttranscript\t1\t50000\t.\t+\t.\tnbElements=0;newTag=0;ID=region1;Name=region1\n+chr4\tS-MART\ttranscript\t49951\t99950\t.\t+\t.\tnbElements=0;newTag=0;ID=region2;Name=region2\n+chr4\tS-MART\ttranscript\t99901\t149900\t.\t+\t.\tnbElements=0;newTag=0;ID=region3;Name=region3\n+chr4\tS-MART\ttranscript\t149851\t199850\t.\t+\t.\tnbElements=0;newTag=0;ID=region4;Name=region4\n+chr4\tS-MART\ttranscript\t199801\t249800\t.\t+\t.\tnbElements=0;newTag=0;ID=region5;Name=region5\n+chr4\tS-MART\ttranscript\t249751\t299750\t.\t+\t.\tnbElements=0;newTag=0;ID=region6;Name=region6\n+chr4\tS-MART\ttranscript\t299701\t349700\t.\t+\t.\tnbElements=0;newTag=0;ID=region7;Name=region7\n+chr4\tS-MART\ttranscript\t349651\t399650\t.\t+\t.\tnbElements=0;newTag=0;ID=region8;Name=region8\n+chr4\tS-MART\ttranscript\t399601\t449600\t.\t+\t.\tnbElements=0;newTag=0;ID=region9;Name=region9\n+chr4\tS-MART\ttranscript\t449551\t499550\t.\t+\t.\tnbElements=0;newTag=0;ID=region10;Name=region10\n+chr4\tS-MART\ttranscript\t499501\t549500\t.\t+\t.\tnbElements=0;newTag=0;ID=region11;Name=region11\n+chr4\tS-MART\ttranscript\t549451\t599450\t.\t+\t.\tnbElements=0;newTag=0;ID=region12;Name=region12\n+chr4\tS-MART\ttranscript\t599401\t649400\t.\t+\t.\tnbElements=0;newTag=0;ID=region13;Name=region13\n+chr4\tS-MART\ttranscript\t649351\t699350\t.\t+\t.\tnbElements=0;newTag=0;ID=region14;Name=region14\n+chr4\tS-MART\ttranscript\t699301\t749300\t.\t+\t.\tnbElements=0;newTag=0;ID=region15;Name=region15\n+chr4\tS-MART\ttranscript\t749251\t799250\t.\t+\t.\tnbElements=0;newTag=0;ID=region16;Name=region16\n+chr4\tS-MART\ttranscript\t799201\t849200\t.\t+\t.\tnbElements=0;newTag=0;ID=region17;Name=region17\n+chr4\tS-MART\ttranscript\t849151\t899150\t.\t+\t.\tnbElements=0;newTag=0;ID=region18;Name=region18\n+chr4\tS-MART\ttranscript\t899101\t949100\t.\t+\t.\tnbElements=0;newTag=0;ID=region19;Name=region19\n+chr4\tS-MART\ttranscript\t949051\t999050\t.\t+\t.\tnbElements=0;newTag=0;ID=region20;Name=region20\n+chr4\tS-MART\ttranscript\t999001\t1049000\t.\t+\t.\tnbElements=0;newTag=0;ID=region21;Name=region21\n+chr4\tS-MART\ttranscript\t1048951\t1098950\t.\t+\t.\tnbElements=0;newTag=0;ID=region22;Name=region22\n+chr4\tS-MART\ttranscript\t1098901\t1148900\t.\t+\t.\tnbElements=0;newTag=0;ID=region23;Name=region23\n+chr4\tS-MART\ttranscript\t1148851\t1198850\t.\t+\t.\tnbElements=0;newTag=0;ID=region24;Name=region24\n+chr4\tS-MART\ttranscript\t1198801\t1248800\t.\t+\t.\tnbElements=0;newTag=0;ID=region25;Name=region25\n+chr4\tS-MART\ttranscript\t1248751\t1298750\t.\t+\t.\tnbElements=0;newTag=0;ID=region26;Name=region26\n+chr4\tS-MART\ttranscript\t1298701\t1348700\t.\t+\t.\tnbElements=0;newTag=0;ID=region27;Name=region27\n+chr4\tS-MART\ttranscript\t1348651\t1398650\t.\t+\t.\tnbElements=0;newTag=0;ID=region28;Name=region28\n+chr4\tS-MART\ttranscript\t1398601\t1448600\t.\t+\t.\tnbElements=0;newTag=0;ID=region29;Name=region29\n+chr4\tS-MART\ttranscript\t1448551\t1498550\t.\t+\t.\tnbElements=0;newTag=0;ID=region30;Name=region30\n+chr4\tS-MART\ttranscript\t1498501\t1548500\t.\t+\t.\tnbElements=0;newTag=0;ID=region31;Name=region31\n+chr4\tS-MART\ttranscript\t1548451\t1598450\t.\t+\t.\tnbElements=0;newTag=0;ID=region32;Name=region32\n+chr4\tS-MART\ttranscript\t1598401\t1648400\t.\t+\t.\tnbElements=0;newTag=0;ID=region33;Name=region33\n+chr4\tS-MART\ttranscript\t1648351\t1698350\t.\t+\t.\tnbElements=0;newTag=0;ID=region34;Name=region34\n+chr4\tS-MART\ttranscript\t1698301\t1748300\t.\t+\t.\tnbElements=0;newTag=0;ID=region35;Name=region35\n+chr4\tS-MART\ttranscript\t1748251\t1798250\t.\t+\t.\tnbElements=0;newTag=0;ID=region36;Name=region36\n+chr4\tS-MART\ttranscript\t1798201\t1848200\t.\t+\t.\tnbElements=0;newTag=0;ID=region37;Name=region37\n+chr4\tS-MART\ttranscript\t1848151\t1898150\t.\t+\t.\tnbElements=0;newTag=0;ID=region38;Name=region38\n+chr4\tS-MART\ttranscript\t1898101\t1948100\t.\t+\t.\tnbElements=0;newTag=0;ID=region39;Name=region39\n+chr4\tS-MART\ttranscript\t1948051\t1998050\t.\t+\t.\tnbElements=0;newTag=0;ID=region40;Name=region40\n+chr4\tS-MART\ttranscript\t1998001\t2048000\t.\t+\t.\tnbElements=0;newTag=0;ID=region41;Name=region41\n+chr4\tS-MART\ttranscript\t2047951\t2097950\t.\t+\t.\tnbElements=0;newTag=0;ID=region42;Name=region42\n+chr4\tS-MART\ttranscript\t2097901\t2147900\t.\t+\t.\tnbElements=0;newTag=0;ID=region43;Name=region43\n+chr4\t'..b'851\t4195850\t.\t+\t.\tnbElements=0;newTag=0;ID=region1750;Name=region1750\n+chr1\tS-MART\ttranscript\t4195801\t4245800\t.\t+\t.\tnbElements=0;newTag=0;ID=region1751;Name=region1751\n+chr1\tS-MART\ttranscript\t4245751\t4295750\t.\t+\t.\tnbElements=0;newTag=0;ID=region1752;Name=region1752\n+chr1\tS-MART\ttranscript\t4295701\t4345700\t.\t+\t.\tnbElements=0;newTag=0;ID=region1753;Name=region1753\n+chr1\tS-MART\ttranscript\t4345651\t4395650\t.\t+\t.\tnbElements=0;newTag=0;ID=region1754;Name=region1754\n+chr1\tS-MART\ttranscript\t4395601\t4445600\t.\t+\t.\tnbElements=0;newTag=0;ID=region1755;Name=region1755\n+chr1\tS-MART\ttranscript\t4445551\t4495550\t.\t+\t.\tnbElements=0;newTag=0;ID=region1756;Name=region1756\n+chr1\tS-MART\ttranscript\t4495501\t4545500\t.\t+\t.\tnbElements=0;newTag=0;ID=region1757;Name=region1757\n+chr1\tS-MART\ttranscript\t4545451\t4595450\t.\t+\t.\tnbElements=0;newTag=0;ID=region1758;Name=region1758\n+chr1\tS-MART\ttranscript\t4595401\t4645400\t.\t+\t.\tnbElements=0;newTag=0;ID=region1759;Name=region1759\n+chr1\tS-MART\ttranscript\t4645351\t4695350\t.\t+\t.\tnbElements=0;newTag=0;ID=region1760;Name=region1760\n+chr1\tS-MART\ttranscript\t4695301\t4745300\t.\t+\t.\tnbElements=0;newTag=0;ID=region1761;Name=region1761\n+chr1\tS-MART\ttranscript\t4745251\t4795250\t.\t+\t.\tnbElements=0;newTag=0;ID=region1762;Name=region1762\n+chr1\tS-MART\ttranscript\t4795201\t4845200\t.\t+\t.\tnbElements=0;newTag=0;ID=region1763;Name=region1763\n+chr1\tS-MART\ttranscript\t4845151\t4895150\t.\t+\t.\tnbElements=0;newTag=0;ID=region1764;Name=region1764\n+chr1\tS-MART\ttranscript\t4895101\t4945100\t.\t+\t.\tnbElements=0;newTag=0;ID=region1765;Name=region1765\n+chr1\tS-MART\ttranscript\t4945051\t4995050\t.\t+\t.\tnbElements=0;newTag=0;ID=region1766;Name=region1766\n+chr1\tS-MART\ttranscript\t4995001\t5045000\t.\t+\t.\tnbElements=0;newTag=0;ID=region1767;Name=region1767\n+chr1\tS-MART\ttranscript\t5044951\t5094950\t.\t+\t.\tnbElements=0;newTag=0;ID=region1768;Name=region1768\n+chr1\tS-MART\ttranscript\t5094901\t5144900\t.\t+\t.\tnbElements=0;newTag=0;ID=region1769;Name=region1769\n+chr1\tS-MART\ttranscript\t5144851\t5194850\t.\t+\t.\tnbElements=0;newTag=0;ID=region1770;Name=region1770\n+chr1\tS-MART\ttranscript\t5194801\t5244800\t.\t+\t.\tnbElements=0;newTag=0;ID=region1771;Name=region1771\n+chr1\tS-MART\ttranscript\t5244751\t5294750\t.\t+\t.\tnbElements=0;newTag=0;ID=region1772;Name=region1772\n+chr1\tS-MART\ttranscript\t5294701\t5344700\t.\t+\t.\tnbElements=0;newTag=0;ID=region1773;Name=region1773\n+chr1\tS-MART\ttranscript\t5344651\t5394650\t.\t+\t.\tnbElements=0;newTag=0;ID=region1774;Name=region1774\n+chr1\tS-MART\ttranscript\t5394601\t5444600\t.\t+\t.\tnbElements=0;newTag=0;ID=region1775;Name=region1775\n+chr1\tS-MART\ttranscript\t5444551\t5494550\t.\t+\t.\tnbElements=0;newTag=0;ID=region1776;Name=region1776\n+chr1\tS-MART\ttranscript\t5494501\t5544500\t.\t+\t.\tnbElements=0;newTag=0;ID=region1777;Name=region1777\n+chr1\tS-MART\ttranscript\t5544451\t5594450\t.\t+\t.\tnbElements=0;newTag=0;ID=region1778;Name=region1778\n+chr1\tS-MART\ttranscript\t5594401\t5644400\t.\t+\t.\tnbElements=0;newTag=0;ID=region1779;Name=region1779\n+chr1\tS-MART\ttranscript\t5644351\t5694350\t.\t+\t.\tnbElements=0;newTag=0;ID=region1780;Name=region1780\n+chr1\tS-MART\ttranscript\t5694301\t5744300\t.\t+\t.\tnbElements=0;newTag=0;ID=region1781;Name=region1781\n+chr1\tS-MART\ttranscript\t5744251\t5794250\t.\t+\t.\tnbElements=0;newTag=0;ID=region1782;Name=region1782\n+chr1\tS-MART\ttranscript\t5794201\t5844200\t.\t+\t.\tnbElements=0;newTag=0;ID=region1783;Name=region1783\n+chr1\tS-MART\ttranscript\t5844151\t5894150\t.\t+\t.\tnbElements=0;newTag=0;ID=region1784;Name=region1784\n+chr1\tS-MART\ttranscript\t5894101\t5944100\t.\t+\t.\tnbElements=0;newTag=0;ID=region1785;Name=region1785\n+chr1\tS-MART\ttranscript\t5944051\t5994050\t.\t+\t.\tnbElements=0;newTag=0;ID=region1786;Name=region1786\n+chr1\tS-MART\ttranscript\t5994001\t6044000\t.\t+\t.\tnbElements=0;newTag=0;ID=region1787;Name=region1787\n+chr1\tS-MART\ttranscript\t6043951\t6093950\t.\t+\t.\tnbElements=0;newTag=0;ID=region1788;Name=region1788\n+chr1\tS-MART\ttranscript\t6093901\t6143900\t.\t+\t.\tnbElements=0;newTag=0;ID=region1789;Name=region1789\n+chr1\tS-MART\ttranscript\t6143851\t6193850\t.\t+\t.\tnbElements=1.0;newTag=1.000000;ID=region1790;Name=region1790\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/clusterize_strands_expected.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/clusterize_strands_expected.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,3580 @@\n+chr4\tS-MART\ttranscript\t1\t50000\t.\t-\t.\tnbElements=0;ID=region1;Name=region1\n+chr4\tS-MART\ttranscript\t49951\t99950\t.\t-\t.\tnbElements=0;ID=region2;Name=region2\n+chr4\tS-MART\ttranscript\t99901\t149900\t.\t-\t.\tnbElements=0;ID=region3;Name=region3\n+chr4\tS-MART\ttranscript\t149851\t199850\t.\t-\t.\tnbElements=0;ID=region4;Name=region4\n+chr4\tS-MART\ttranscript\t199801\t249800\t.\t-\t.\tnbElements=0;ID=region5;Name=region5\n+chr4\tS-MART\ttranscript\t249751\t299750\t.\t-\t.\tnbElements=0;ID=region6;Name=region6\n+chr4\tS-MART\ttranscript\t299701\t349700\t.\t-\t.\tnbElements=0;ID=region7;Name=region7\n+chr4\tS-MART\ttranscript\t349651\t399650\t.\t-\t.\tnbElements=0;ID=region8;Name=region8\n+chr4\tS-MART\ttranscript\t399601\t449600\t.\t-\t.\tnbElements=0;ID=region9;Name=region9\n+chr4\tS-MART\ttranscript\t449551\t499550\t.\t-\t.\tnbElements=0;ID=region10;Name=region10\n+chr4\tS-MART\ttranscript\t499501\t549500\t.\t-\t.\tnbElements=0;ID=region11;Name=region11\n+chr4\tS-MART\ttranscript\t549451\t599450\t.\t-\t.\tnbElements=0;ID=region12;Name=region12\n+chr4\tS-MART\ttranscript\t599401\t649400\t.\t-\t.\tnbElements=0;ID=region13;Name=region13\n+chr4\tS-MART\ttranscript\t649351\t699350\t.\t-\t.\tnbElements=0;ID=region14;Name=region14\n+chr4\tS-MART\ttranscript\t699301\t749300\t.\t-\t.\tnbElements=0;ID=region15;Name=region15\n+chr4\tS-MART\ttranscript\t749251\t799250\t.\t-\t.\tnbElements=0;ID=region16;Name=region16\n+chr4\tS-MART\ttranscript\t799201\t849200\t.\t-\t.\tnbElements=0;ID=region17;Name=region17\n+chr4\tS-MART\ttranscript\t849151\t899150\t.\t-\t.\tnbElements=0;ID=region18;Name=region18\n+chr4\tS-MART\ttranscript\t899101\t949100\t.\t-\t.\tnbElements=0;ID=region19;Name=region19\n+chr4\tS-MART\ttranscript\t949051\t999050\t.\t-\t.\tnbElements=0;ID=region20;Name=region20\n+chr4\tS-MART\ttranscript\t999001\t1049000\t.\t-\t.\tnbElements=0;ID=region21;Name=region21\n+chr4\tS-MART\ttranscript\t1048951\t1098950\t.\t-\t.\tnbElements=0;ID=region22;Name=region22\n+chr4\tS-MART\ttranscript\t1098901\t1148900\t.\t-\t.\tnbElements=0;ID=region23;Name=region23\n+chr4\tS-MART\ttranscript\t1148851\t1198850\t.\t-\t.\tnbElements=0;ID=region24;Name=region24\n+chr4\tS-MART\ttranscript\t1198801\t1248800\t.\t-\t.\tnbElements=0;ID=region25;Name=region25\n+chr4\tS-MART\ttranscript\t1248751\t1298750\t.\t-\t.\tnbElements=0;ID=region26;Name=region26\n+chr4\tS-MART\ttranscript\t1298701\t1348700\t.\t-\t.\tnbElements=0;ID=region27;Name=region27\n+chr4\tS-MART\ttranscript\t1348651\t1398650\t.\t-\t.\tnbElements=0;ID=region28;Name=region28\n+chr4\tS-MART\ttranscript\t1398601\t1448600\t.\t-\t.\tnbElements=0;ID=region29;Name=region29\n+chr4\tS-MART\ttranscript\t1448551\t1498550\t.\t-\t.\tnbElements=0;ID=region30;Name=region30\n+chr4\tS-MART\ttranscript\t1498501\t1548500\t.\t-\t.\tnbElements=0;ID=region31;Name=region31\n+chr4\tS-MART\ttranscript\t1548451\t1598450\t.\t-\t.\tnbElements=0;ID=region32;Name=region32\n+chr4\tS-MART\ttranscript\t1598401\t1648400\t.\t-\t.\tnbElements=0;ID=region33;Name=region33\n+chr4\tS-MART\ttranscript\t1648351\t1698350\t.\t-\t.\tnbElements=0;ID=region34;Name=region34\n+chr4\tS-MART\ttranscript\t1698301\t1748300\t.\t-\t.\tnbElements=0;ID=region35;Name=region35\n+chr4\tS-MART\ttranscript\t1748251\t1798250\t.\t-\t.\tnbElements=0;ID=region36;Name=region36\n+chr4\tS-MART\ttranscript\t1798201\t1848200\t.\t-\t.\tnbElements=0;ID=region37;Name=region37\n+chr4\tS-MART\ttranscript\t1848151\t1898150\t.\t-\t.\tnbElements=0;ID=region38;Name=region38\n+chr4\tS-MART\ttranscript\t1898101\t1948100\t.\t-\t.\tnbElements=0;ID=region39;Name=region39\n+chr4\tS-MART\ttranscript\t1948051\t1998050\t.\t-\t.\tnbElements=0;ID=region40;Name=region40\n+chr4\tS-MART\ttranscript\t1998001\t2048000\t.\t-\t.\tnbElements=0;ID=region41;Name=region41\n+chr4\tS-MART\ttranscript\t2047951\t2097950\t.\t-\t.\tnbElements=0;ID=region42;Name=region42\n+chr4\tS-MART\ttranscript\t2097901\t2147900\t.\t-\t.\tnbElements=0;ID=region43;Name=region43\n+chr4\tS-MART\ttranscript\t2147851\t2197850\t.\t-\t.\tnbElements=0;ID=region44;Name=region44\n+chr4\tS-MART\ttranscript\t2197801\t2247800\t.\t-\t.\tnbElements=0;ID=region45;Name=region45\n+chr4\tS-MART\ttranscript\t2247751\t2297750\t.\t-\t.\tnbElements=0;ID=region46;Name=region46\n+chr4\tS-MART\ttranscript\t2297701\t2347700\t.\t-\t.\tnbElements=0;ID=region47;Name=region47\n+chr4\tS-MART\ttranscript\t2347651\t2397650\t.\t-\t.\tnbEleme'..b'MART\ttranscript\t3946051\t3996050\t.\t+\t.\tnbElements=0;ID=region3536;Name=region3536\n+chr1\tS-MART\ttranscript\t3996001\t4046000\t.\t+\t.\tnbElements=0;ID=region3537;Name=region3537\n+chr1\tS-MART\ttranscript\t4045951\t4095950\t.\t+\t.\tnbElements=0;ID=region3538;Name=region3538\n+chr1\tS-MART\ttranscript\t4095901\t4145900\t.\t+\t.\tnbElements=0;ID=region3539;Name=region3539\n+chr1\tS-MART\ttranscript\t4145851\t4195850\t.\t+\t.\tnbElements=0;ID=region3540;Name=region3540\n+chr1\tS-MART\ttranscript\t4195801\t4245800\t.\t+\t.\tnbElements=0;ID=region3541;Name=region3541\n+chr1\tS-MART\ttranscript\t4245751\t4295750\t.\t+\t.\tnbElements=0;ID=region3542;Name=region3542\n+chr1\tS-MART\ttranscript\t4295701\t4345700\t.\t+\t.\tnbElements=0;ID=region3543;Name=region3543\n+chr1\tS-MART\ttranscript\t4345651\t4395650\t.\t+\t.\tnbElements=0;ID=region3544;Name=region3544\n+chr1\tS-MART\ttranscript\t4395601\t4445600\t.\t+\t.\tnbElements=0;ID=region3545;Name=region3545\n+chr1\tS-MART\ttranscript\t4445551\t4495550\t.\t+\t.\tnbElements=0;ID=region3546;Name=region3546\n+chr1\tS-MART\ttranscript\t4495501\t4545500\t.\t+\t.\tnbElements=0;ID=region3547;Name=region3547\n+chr1\tS-MART\ttranscript\t4545451\t4595450\t.\t+\t.\tnbElements=0;ID=region3548;Name=region3548\n+chr1\tS-MART\ttranscript\t4595401\t4645400\t.\t+\t.\tnbElements=0;ID=region3549;Name=region3549\n+chr1\tS-MART\ttranscript\t4645351\t4695350\t.\t+\t.\tnbElements=0;ID=region3550;Name=region3550\n+chr1\tS-MART\ttranscript\t4695301\t4745300\t.\t+\t.\tnbElements=0;ID=region3551;Name=region3551\n+chr1\tS-MART\ttranscript\t4745251\t4795250\t.\t+\t.\tnbElements=0;ID=region3552;Name=region3552\n+chr1\tS-MART\ttranscript\t4795201\t4845200\t.\t+\t.\tnbElements=0;ID=region3553;Name=region3553\n+chr1\tS-MART\ttranscript\t4845151\t4895150\t.\t+\t.\tnbElements=0;ID=region3554;Name=region3554\n+chr1\tS-MART\ttranscript\t4895101\t4945100\t.\t+\t.\tnbElements=0;ID=region3555;Name=region3555\n+chr1\tS-MART\ttranscript\t4945051\t4995050\t.\t+\t.\tnbElements=0;ID=region3556;Name=region3556\n+chr1\tS-MART\ttranscript\t4995001\t5045000\t.\t+\t.\tnbElements=0;ID=region3557;Name=region3557\n+chr1\tS-MART\ttranscript\t5044951\t5094950\t.\t+\t.\tnbElements=0;ID=region3558;Name=region3558\n+chr1\tS-MART\ttranscript\t5094901\t5144900\t.\t+\t.\tnbElements=0;ID=region3559;Name=region3559\n+chr1\tS-MART\ttranscript\t5144851\t5194850\t.\t+\t.\tnbElements=0;ID=region3560;Name=region3560\n+chr1\tS-MART\ttranscript\t5194801\t5244800\t.\t+\t.\tnbElements=0;ID=region3561;Name=region3561\n+chr1\tS-MART\ttranscript\t5244751\t5294750\t.\t+\t.\tnbElements=0;ID=region3562;Name=region3562\n+chr1\tS-MART\ttranscript\t5294701\t5344700\t.\t+\t.\tnbElements=0;ID=region3563;Name=region3563\n+chr1\tS-MART\ttranscript\t5344651\t5394650\t.\t+\t.\tnbElements=0;ID=region3564;Name=region3564\n+chr1\tS-MART\ttranscript\t5394601\t5444600\t.\t+\t.\tnbElements=0;ID=region3565;Name=region3565\n+chr1\tS-MART\ttranscript\t5444551\t5494550\t.\t+\t.\tnbElements=0;ID=region3566;Name=region3566\n+chr1\tS-MART\ttranscript\t5494501\t5544500\t.\t+\t.\tnbElements=0;ID=region3567;Name=region3567\n+chr1\tS-MART\ttranscript\t5544451\t5594450\t.\t+\t.\tnbElements=0;ID=region3568;Name=region3568\n+chr1\tS-MART\ttranscript\t5594401\t5644400\t.\t+\t.\tnbElements=0;ID=region3569;Name=region3569\n+chr1\tS-MART\ttranscript\t5644351\t5694350\t.\t+\t.\tnbElements=0;ID=region3570;Name=region3570\n+chr1\tS-MART\ttranscript\t5694301\t5744300\t.\t+\t.\tnbElements=0;ID=region3571;Name=region3571\n+chr1\tS-MART\ttranscript\t5744251\t5794250\t.\t+\t.\tnbElements=0;ID=region3572;Name=region3572\n+chr1\tS-MART\ttranscript\t5794201\t5844200\t.\t+\t.\tnbElements=0;ID=region3573;Name=region3573\n+chr1\tS-MART\ttranscript\t5844151\t5894150\t.\t+\t.\tnbElements=0;ID=region3574;Name=region3574\n+chr1\tS-MART\ttranscript\t5894101\t5944100\t.\t+\t.\tnbElements=0;ID=region3575;Name=region3575\n+chr1\tS-MART\ttranscript\t5944051\t5994050\t.\t+\t.\tnbElements=0;ID=region3576;Name=region3576\n+chr1\tS-MART\ttranscript\t5994001\t6044000\t.\t+\t.\tnbElements=0;ID=region3577;Name=region3577\n+chr1\tS-MART\ttranscript\t6043951\t6093950\t.\t+\t.\tnbElements=0;ID=region3578;Name=region3578\n+chr1\tS-MART\ttranscript\t6093901\t6143900\t.\t+\t.\tnbElements=0;ID=region3579;Name=region3579\n+chr1\tS-MART\ttranscript\t6143851\t6193850\t.\t+\t.\tnbElements=1.0;ID=region3580;Name=region3580\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/expOutputGff.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/expOutputGff.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,2292 @@\n+C02HBa0185P07_LR40\tS-MART\ttranscript\t3889\t3924\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\n+C02HBa0185P07_LR40\tS-MART\ttranscript\t3830\t3865\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\n+C11SLe0053P22_LR298\tS-MART\ttranscript\t2130\t2165\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\n+C11SLe0053P22_LR298\tS-MART\ttranscript\t1980\t2015\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\n+C06HBa0144J05_LR355\tS-MART\ttranscript\t1\t36\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\n+C06HBa0144J05_LR355\tS-MART\ttranscript\t101\t136\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\n+C08HBa0165B06_LR218\tS-MART\ttranscript\t3619\t3654\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\n+C08HBa0165B06_LR218\tS-MART\ttranscript\t3575\t3610\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\n+C02HBa0329G05_LR52\tS-MART\ttranscript\t4746\t4781\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1154:1517;identity=100.000000;Name=HWI-EAS337_3:7:1:1154:1517\n+C02HBa0329G05_LR52\tS-MART\ttranscript\t4680\t4715\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1154:1517;identity=100.000000;Name=HWI-EAS337_3:7:1:1154:1517\n+C04HBa80D3_LR100\tS-MART\ttranscript\t423\t458\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:164:1869;identity=100.000000;Name=HWI-EAS337_3:7:1:164:1869\n+C04HBa80D3_LR100\tS-MART\ttranscript\t397\t432\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:164:1869;identity=100.000000;Name=HWI-EAS337_3:7:1:164:1869\n+C01HBa0216G16_LR11\tS-MART\ttranscript\t648\t683\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1194;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1194\n+C01HBa0216G16_LR11\tS-MART\ttranscript\t511\t546\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:415:1194;identity=97.222222;Name=HWI-EAS337_3:7:1:415:1194\n+C05HBa0145P19_LR136\tS-MART\ttranscript\t3686\t3721\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=2.000000;ID=HWI-EAS337_3:7:1:645:1892;identity=94.444444;Name=HWI-EAS337_3:7:1:645:1892\n+C05HBa0145P19_LR136\tS-MART\ttranscript\t3573\t3608\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:645:1892;identity=100.000000;Name=HWI-EAS337_3:7:1:645:1892\n+C08HBa0012O06_LR211\tS-MART\ttranscript\t1768\t1803\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:33:1446;identity=97.222222;Name=HWI-EAS337_3:7:1:33:1446\n+C08HBa0012O06_LR211\tS-MART\ttranscript\t1649\t1684\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:33:1446;identity=100.000000;Name=HWI-EAS337_3:7:1:33:1446\n+C09HBa0194K19_LR362\tS-MART\ttranscript\t9168\t9203\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1194:1427;identity=100.000000;Name=HWI-EAS337_3:7:1:1194:1427'..b':1:1147:62\n+C02HBa0204D01_LR334\tS-MART\ttranscript\t6704\t6739\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1147:62;identity=100.000000;Name=HWI-EAS337_3:7:1:1147:62\n+C02SLe0018B07_LR335\tS-MART\ttranscript\t8378\t8413\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:787:1759;identity=100.000000;Name=HWI-EAS337_3:7:1:787:1759\n+C02SLe0018B07_LR335\tS-MART\ttranscript\t8208\t8243\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:787:1759;identity=100.000000;Name=HWI-EAS337_3:7:1:787:1759\n+C09SLm0143I09_LR365\tS-MART\ttranscript\t1546\t1581\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:425:1939;identity=100.000000;Name=HWI-EAS337_3:7:1:425:1939\n+C09SLm0143I09_LR365\tS-MART\ttranscript\t1490\t1525\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:425:1939;identity=100.000000;Name=HWI-EAS337_3:7:1:425:1939\n+C02SLe0018B07_LR335\tS-MART\ttranscript\t9178\t9213\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:187:1132;identity=97.222222;Name=HWI-EAS337_3:7:1:187:1132\n+C02SLe0018B07_LR335\tS-MART\ttranscript\t9065\t9100\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:187:1132;identity=100.000000;Name=HWI-EAS337_3:7:1:187:1132\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t2868\t2903\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1739:1840;identity=100.000000;Name=HWI-EAS337_3:7:1:1739:1840\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t3189\t3224\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:1739:1840;identity=97.222222;Name=HWI-EAS337_3:7:1:1739:1840\n+C07SLe0111B06_LR194\tS-MART\ttranscript\t8673\t8708\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1505:1876;identity=100.000000;Name=HWI-EAS337_3:7:1:1505:1876\n+C07SLe0111B06_LR194\tS-MART\ttranscript\t8677\t8712\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1505:1876;identity=100.000000;Name=HWI-EAS337_3:7:1:1505:1876\n+C09SLm0143I09_LR365\tS-MART\ttranscript\t6957\t6992\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:192;identity=100.000000;Name=HWI-EAS337_3:7:1:447:192\n+C09SLm0143I09_LR365\tS-MART\ttranscript\t7039\t7074\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:192;identity=100.000000;Name=HWI-EAS337_3:7:1:447:192\n+C09SLm0037I08_LR367\tS-MART\ttranscript\t1298\t1333\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:21:2019;identity=100.000000;Name=HWI-EAS337_3:7:1:21:2019\n+C09SLm0037I08_LR367\tS-MART\ttranscript\t955\t990\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:21:2019;identity=100.000000;Name=HWI-EAS337_3:7:1:21:2019\n+C04HBa8K13_LR338\tS-MART\ttranscript\t2175\t2210\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:1593:652;identity=97.222222;Name=HWI-EAS337_3:7:1:1593:652\n+C04HBa8K13_LR338\tS-MART\ttranscript\t2226\t2261\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1593:652;identity=100.000000;Name=HWI-EAS337_3:7:1:1593:652\n+C12HBa326K10_LR306\tS-MART\ttranscript\t8100\t8135\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1254:1660;identity=100.000000;Name=HWI-EAS337_3:7:1:1254:1660\n+C12HBa326K10_LR306\tS-MART\ttranscript\t8243\t8278\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1254:1660;identity=100.000000;Name=HWI-EAS337_3:7:1:1254:1660\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/expRef.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/expRef.fasta Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,33148 @@\n+>C10HBa0111D09_LR276\n+GAACAAACAACCCCTTTTTGGAGGTGTTGGCGCGTCGTGCAGCTTACACTCAAAAGTTAA\n+AAAGTTGCCTTGCGATGCGGTCATGTTACAAACCTCTCTGCCTTAAATTAAATTCCATAA\n+CCAAGATTTGGAGGTGCCTCAACGATGCGCAGCCATGTCCCATATTTGGTCGCCTCGTTT\n+AAAAGTCAAGTTAGACTTAATTAAGAGGTCCAACTAGTGTAGGGGCGTTTTGAGTACTTG\n+TGGGATTTATTATAAACGGTTTTGAGTCACTTTAAACCCACTTCACCAATTAAAACAAAA\n+TCCTCAAGTTAAAACTCAATATCTTTCCATTCTCTCTCTCTAAAACCTTCATTGGAGATA\n+TTTGAAGCTCCACGGAAGAAGGTTAATTTTCCAAGGTTTCAATGAAAATTTCGTGTATAG\n+GTCTTCAATAAGGTATGGTGATTTCATCCTTGATTCTTCTATCATTCAAGGATCCAATTC\n+AAAGGTTTTTCAAAAGATCTCAAAAATCCTATTTCGAATTCTAAGTATGGGTTCTTCCAT\n+TTAAAGGTTTAAATGGATGAATTATGATGTTTTCAATGTTAGTTGATGTTTTTATGATAA\n+AAAAACTCCATGAACCCATGAGCATCCTAATTCTCTAATTTTGTCTTGTAAATTGAGTTT\n+GATAATTGTGATTGGTTATGGATGGAATTGTATTTAGATTGCTCTATATTGTTGATTCTT\n+ATTGTTAACCTATCTCTATATATGTAGAATTGAGATTGTAAGGATGAGTTAGTAATCTTG\n+GCTTTATGGGCTTTCGAATCCGGGTTTACCCCCTGGATGTAACCGGCATCCTCGCCCTTT\n+TTCAAGGACTAAGACCAACCTTTTAGTCTCATGTCATTACATTCATAGGTTGACAAATGC\n+GGAAAAATTTAAAACTTTCATTATCACTACTTGGAGGTTTACATAGACCTCTACATACAC\n+ATAAGATATATTCATATAGAGTATACATAGACCCTTCGTATAGGAAGGTTACATAGCCAT\n+CTACTTTTATTACACATACATATATATAAAATATAAAAATAGTCTAACGATTGTCTCATC\n+TCATACCCTCTAAACGATTATCACAATATGGGCATAACCCTTACATCAATCAAACAAGAG\n+CACATATAGGTCATACAAAAGTATAGTACTCAATTAAAAAGGAAAGAAATGAAAGAGTCT\n+TTAAGCTCATAACAAGTCCATAAGCTAGATTATGGCATTGACCTCAAAAGTTGAGGACCT\n+TATGTGCGTACACAAGCAAAACATGCTAAAAAGGGACTTTTTAGTCAAAACATGCCCATT\n+TATCCCTTTAAGAACCTACTACAAAGCCAACAAGTCATACCAACCAACCAAACATGCTTA\n+CTATCTCAACAAGTAATACTTATCCCAACATACTTGAAACCATGATTTACTACAACCCTA\n+TCACCAAGGAAAAATATCACAAGAATGAATAAGAGTCAATCATATCATGATAGAGAGACA\n+ACTATTCATGAATCCTTATCAACTCAACAAGTGCAATAACCAAGCAAAGCCTCATAACCT\n+TACTCAATCAAGTATCCTCAAAAAGAAACCATGACCAATGTCCAACTTTACCTAACATAG\n+CATTTAGGTTTACATTTTATCATATATTAACATTATGACCCAAGGCATACTCATTAGTAA\n+ACTAATTAATATATAATATCAACAATGTGCCATAGTAATCATATATACATAATATATCAT\n+CATAACATAAACATATATAAAAACCTCCTTCTAAGACTCCCCTCAAGGCTAACTAGTGAA\n+ATGTTTAGGTAGAGCCCCATACCCCTACCTAGATTAAGCTAGACCCCTTAGGTTATCCAA\n+GTTAGAGTTCAAGTCCTTTAATTCGTTTTACCTTTTGGGAACATCTTGCCCTAACCGACA\n+TAGACCACATGAGCTAGTGTGGGATACGGTTCCAAAAAACCCTACACAGAAAGAAGGCGG\n+ACTACTTGCCAAAGTATTACCAAAACATGAAACATAGCAACTACGTTGATCCACTAGCAA\n+GTATTTCTATAGGGGCAACATAGTTCAAGAACTCTGAGATATACTTGAGACCCTCTTTAT\n+GCGCCATGCATTATAGTCTCCAACCTCAAGAGTAATGTAGTGTTCCTACCTTCCCCATGT\n+GAGAAAGGACACTCCTCAATCTAGTTCACTCGGTGCTAAGCTAGAGACCCTTTTTGAAAT\n+GTCTTTAAGCCTTTAATTATCAATCATAGCTTAGCTTAGGTCATAGGGTATATCTCTTGT\n+ATAATCATCATCATCAATAGCTCAATAATAATTGTATGAGTATAAGTCCTTTCATCACAA\n+TTCATATAAGTGAGGTTAACATGTTAGCATTTCATTGCATATCAAGAAACATTGATGATT\n+CTTACCATCCTTGTATCACATACACCTTAATCAATCTCACAACATAGTCAGGACATATCA\n+ATTCAACATCATACCACCCTATAATCCTAATATAAGGCATACTCCAATATAACTTCACGT\n+CTTAACAAAAATTTATCACAATTGGAATTAAAGATAGAGATTCTAAGACTTAACAAGTCT\n+TCCTTGTAGTTCATCATCAAGGTCTTACCATCAACCCATAACTCAACCAAGTTTGGGGAG\n+TAACATCATCACACAATGATAATCAATAGGATAACAAGGCTAATTTCATCTCTATAACAC\n+AATTCAACACTAGATCATAACTTAAGACAAGATACATAGGCTAATTTCACACTATAATTC\n+ATAACCTAAATCACATCTCAAGAAATAGCATTATAGTCCTATAATTCATATTAATTTGTT\n+CATAATAACACAATAGGATAGTAATTTAATCAATAACCAAGTCAATTGAATGATCACAAT\n+ACAATATACATCAATATCACAAGCTAGGGTTAGGGATGAAGGATCATATTCTTCAATTTA\n+GACCAAACCACTAACAATTACCATAATAAAGTTTAAATTCATGTAAATGTATTCAATATA\n+ACCTAAATAAATCATTAACAACTCAATCCATAACTTCAATTTCGTAATTGAATGAAACCC\n+ATAAGAAAATTCACCTTTTGAAATCCATTTTAAAGAAACCCTTTGAGGAAAGAGCCTCAA\n+AGGTGAATTAGATCCCATATATTAATGTTTGATGATGAATTCGCCCCTTTCCATCCCCCA\n+AACCCTTATCCTTGCTAGTTTTTAATGGTGAGTTCAAGTAGAGAGAGAAATAAGAGAGAA\n+GGAAGAGAGTTTTTGTCTTAGAGTTCTAATTAATTTAATTGGGGTTGGGGATTTTATATG\n+CGTTTTAAGTTAGTTAATTAGTCACCCCTCAATACCTAACTAACCCCTGAACCACCTAAT\n+TAATTAAATGAATCAATATAAAAACATACAGGAAATTTGACCTTCACAGACGAGACCCCG\n+AACGACGGGCCATCTGTGAGTCAACGGTCCCTCACCCCTCCGTCCTGCACTCTATCGATC\n+AGTTCATAGACTGTGCAGGCAGATCAATTCTTCAACTTGTCTAAGTATGGGATGACGGTG\n+GTATCGACTCCCCGTCAGTCCACACACGGACCGTAGGTGGTCCCATCGATGCGCATTGTC\n+TAGTCCTTGTTTGTTCAAACACAAGGGCCTCAAGGGCCCTTGGTTGGTGCTTGGGGAGTC\n+GTACCCATACGTTTCAATCATGAAACAACTCAAAAACCTATAATCTATCCTTCCACCAAT\n+TTTTGTACCTTTCCGACTCTTAAAAGGTAGTCAAATAGGCTAAGGCACGCTAACACCCCT\n+TTGAACCAACTTCCTGGACGTTCTTATACATTTTGGTTCTTAAACTTCCT'..b'CAATATTCCATATTGATCGCCAGCTTCCATTGCTACAAAAGA\n+TGCATATAGTATCAGCTTCTTTAGACAAAGCTATAAGAAGTATCAGTTGACCCAATTGAC\n+AAGTCACACGTGCTATAACAAGATATCACTTGACGCAAGTGACAAGTGTGAAGCTGACAG\n+GAAAATAGGCAATAGAATCCCTCATTCTTTGTTTATATATAGCAACCTATTACTTCAGTA\n+TCTGTTTACAAGTTCTGCACCACGATAAGTATAACTATTTAGAAATTATGAAGGGAGTGT\n+TGCACAAATTAGTCAGGGTTAGAATTTTAATAATCCAACACACCAGAAATTCTGAGGACT\n+ATGCCTAGCAGCTGAAATCACCACAACAAGTTCAAAGTCAAATCCTGGCTCTTCCACATC\n+CTTTCCCTTCGTACAGTAAACTGAACAGATGCCTTTTGGATATGTTTCACTCACATACTT\n+CATAATTTCAGCATCCATGGCAGACCTACACAAGAATTACCATGAATAGGAGAATTTGGC\n+AAGCAACGGTAATCACAAAGTCAGTACTCATAAACCAAGCCTCTGATGAGAAGAACTAAC\n+CATGTTAAGAAGGGAACTACTCACATAAAACAAAAACATGATTTTTATAATCTGCATGCC\n+AAGCATTAGGAACTATTGAAGAATTCACTGCGATCTCAGAATATAATCCTTTCATACTCT\n+TAATGCCTCCCAATTATACCAAATTTAGGTGTCTTACTACCATTTTAGTATCTGACATTG\n+ATGTCATTACCCATTAATTTGGAGACACTGGTTCCTCACCGATAAAGATGAACAAAAAGG\n+TAAACCAGTGTATTATGAATCAGATCTCTTTATGGAATAAAAACACACATGAATCATGAT\n+GTAGGATCTTATTAACACTTGAGGACTGAGGCGAAGATTACTAAGAATATCCCGAAAGAT\n+AAGACATAGAATGTAATATAGAAGGACAGAGAACAAATGATACTGATTAAGAAGAAGAGC\n+ATCTATATAGGAGAGATTAGCCTAAACTATTTATTTTCAGACTGATTTCGGTGAGCGCAG\n+ACCAAAACATGCAGCTTTTTTTTCAATTAAGCCGGAAAGACAATTTCCACAAGAAATGCA\n+ACTGTTCTAGCATATCTTAAACTATAATCTGAGTGCTGTCTGAGAACTGAGGTTGAATTT\n+GCAAGTCTTGTTGAGCATGGTTAAAAAATAAGTCCAATTAGGCAAAATAATTGTGAATGT\n+CATAATATAGAAGAACTTCAACATCTCAATGGGAAAAACAGAAAGTGAGTAGCTAAAAAG\n+GGGAGCAATACCAAAGATATTAAACTGAGAAATATATCTCATACCCTACAGATGCATACC\n+TATACTCTTCCACGAAAGCAGATGGAAGTTCTTCATCTCTTGCTGGCCTAACGTCTTTAC\n+AAACCTAGAATGCAGACATACCATGAGCTTACAAGAAGGGAGCATAAATTATTACACGGC\n+AACAATAACTAGGAAAAAAAGAGAGAGAGGAAATACATTTAATTTCACAGCCCTTGAATT\n+ATGAATAAAACCATGCACTTGTTATATTAAGCAGAGACTACTTCCACTTTTCCAATCTAT\n+TTACATCTACTTCATCACATTAACAAGAATAGAAAAGAAATGCATGTTATGTGACATTCA\n+GCACTGTAGACTAATGAGGATTAATTTACAACCATGGACCAGCTATAGCAGAAGAGACCT\n+TATTTTACTTTTCCAAAATTGGTATTTACTCCATCACCATTAACAAGAATAAAAAATGCA\n+TCTTATGTGAAGTTCATAACTATTGAATAATGAGGTTAATCAAGAGCTTGCTATATCAAA\n+GCACATTTTTCAAGTTTACAATTTCTTCATTCTTGGTATCCACTTCTATCATGAAAACCA\n+ACCTAACAGTTAACACAATTCACTCTGGTGTTTTGTTCTTTATCCCTAATGCCTGAGTGT\n+GTTTAACTAATCAAGTTCCAATCAGCCAGAAGATGTCTAAACATACTAAACTATAGATAA\n+ACAACATGAGCATAAAACCAGTTTATGTAGAGATTTTTAATTGCACAGCATAAAAAGGAG\n+TACCCCCGGCACTATATGTGCTTCTCTTTCTTTTTTTCTTACTTCGTCCTGATCTTTTTA\n+CTTTTCTTTTCCTTTCTTTCAGAATAAGCACATTTTTGGATATAGTCCCACCATCTCTAC\n+CAGGTTTATGTCTGTCCATATTATTGCTTTTTTGAATTACCTTCTAAACAAAATACTCAT\n+CCTTACAAACCTGCTTCCTCTTCTATGGATCCCACCACTCCATGCCCCGAAAAAAAAGAA\n+AGAAAAAGATAAGTTTTTCATAGTCACTTTAGATTATCATTTATTAAATTCTGTCGGCTC\n+AACTGATAGGAACAGTGAAATGGACTTTTCAATCATAAAAAGATAATAGAAGTTATTAAA\n+TGATTCCCTTCAAGATTATGAGCTTTTAAACTTACATATCATGCAACTATTGGGGAATTA\n+AGGGACTGGGGATTTGATGATAAATTCCAGCACCATTTTTGGTGCTTTTGTGTTTTTGCA\n+AGGTAGTTTGTTAGTGGCACATGGAAGGAGGTGCCTCATCCAATGAAATTATTAATCTTA\n+TCAACAAGAAGAAAGTCAAAACACCAAACTGTAAAAAATCCAAAAATAGCATTTTGCATT\n+GTGTACTAACTGAACAACGTACTTGCTTGACATGGTCAACTCTGGCAACCTGCGCAGTCC\n+GGGGATCAAGATACTCATCCTTATGAACCTCACTAAATGATGTAATCAGTACCTACAAAT\n+TAGTGAACAGCAACTTTACACAGCTAGATCATGAAAAATAGCTTCCAAGTGTCCATTACT\n+ACATAAATGAAAAGCATTATACTTTCTTTTTAGAAGAGGGGAACAAAATCTTAGACTTCA\n+TAAGGAACAGTTCTCCGGAAAGTTTTCTTTTCTATATTGAAGAAGTAATCATTTATTGAA\n+GTGGTGGAAATTCCCTAAGCTTTAAACAGATGTAGAGAACTTGTCCATAAACATGGTGCT\n+CAACAAACAACACCCAACTCATTCATATATAGTAGCGCCAGCATCCAAGTACCATGAAGT\n+ATATCTCTATCCTAAAAGCTTTACTTGAAGACTTATTAGATTAATCTTACTTATTTCTCA\n+TGCAACTTTTTTTTTGAAAAGTTTCTCAAGCATAACTTTTATCCTTTGTTTTCATTCAGT\n+CTTTGAGCTCTAAAGGATGCCTAGAGAGGTCATGAACCAAGTAAGGAAATTGCAACATCA\n+TATTGCTTCCAATTTACCTCATTTTTTATCTTCAACTACCGAAAACTACCAAATCTGAAA\n+TTATCAACTAAGGAAAATTACAACATCAGTACAATAAGTATTGCTTACAGTTCACTTAAT\n+ATTTCAATCTTCGACTACGGAAAACTGTCAAATCTGAAATTATCAATTTGAATGACACGA\n+TTAGTCTAACTTACATTTTCAAAGATCTAACTTACCCAATGAAAAAAGAAAACAAGAGAG\n+AGACTTACATCGCCACTTCTGTTTGGGAATTCGAGACAAATCAAGTGAGATTTGTTGTAC\n+GAAGGAAATGACTCCTCGGCCGCTTTCTTATATATATTTTCGTCCTTTAAAATAGCTCTA\n+ACATCTGCAAATCCAACCAAATGACGCTCAAAACCAAAAATGTAAAAAATAAACTGCCGA\n+TCGCAAATGAACACCAATGCGGTCACATTTTCAAGCACGAAAAAAGCTTCAAAATACAAA\n+AAACTTTAGCGCAGAAAATAAACGAAAGAGAAGAAGAAGAAGACCTTTGGCGACGTACTG\n+AATTTCGCCGGCTGGGGCATTAAGAAGGAACCATTTGGCAATCTCAAT\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/inputCR.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/inputCR.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,6 @@
+chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
+chr2 test match 26303950 26303981 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
+chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
+chr4 test match 28565007 28565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
+chr1 test match 6155418 6155441 24 + . Name=test3/1;occurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50
+chr1 test match 6155418 6155441 24 - . Name=test3/1;occurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/inputFileTest1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/inputFileTest1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,5 @@
+track name=reads description="Reads" useScore=0 visibility=full offset=0
+arm_X 1000 2000 test1.1 1000 + 1000 2000 0 1 1000, 0,
+arm_X 1000 2000 test1.2 1000 - 1000 2000 0 1 1000, 0,
+arm_X 100 200 test1.3 1000 + 100 200 0 1 100, 0,
+arm_X 100 3200 test1.4 1000 + 100 3200 0 1 3100, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/inputFileTest2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/inputFileTest2.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+track name=reads454Relaxed description="reads454Relaxed" useScore=0 visibility=full offset=0
+arm_X 1000 2000 test2.1 1000 + 1000 2000 0 1 1000, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/inputMSWC1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/inputMSWC1.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,5 @@
+chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
+chr2 test match 26303950 26303981 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
+chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
+chr4 test match 28565007 28565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
+chr6 test match 48565007 48565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/inputMSWC2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/inputMSWC2.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,5 @@
+chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
+chr2 test match 26303990 26304021 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
+chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
+chr4 test match 28565017 28565051 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
+chr5 test match 30000000 30000050 50 + . Name=test3/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/inputMTC.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/inputMTC.sam Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,2698 @@\n+@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n+@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n+@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n+@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n+@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n+@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n+@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n+@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n+@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n+@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n+@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n+@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n+@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n+@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n+@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n+@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n+@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n+@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n+@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n+@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n+@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n+@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n+@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n+@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n+@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n+@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n+@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n+@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n+@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n+@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n+@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n+@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n+@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n+@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n+@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n+@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n+@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n+@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n+@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n+@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n+@SQ\tSN:BAC19_LR16\tLN:9760\n+@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n+@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n+@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n+@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n+@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n+@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n+@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n+@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n+@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n+@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n+@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n+@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n+@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n+@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n+@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n+@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n+@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n+@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n+@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n+@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n+@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n+@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n+@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n+@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n+@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n+@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n+@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n+@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n+@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n+@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n+@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n+@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n+@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n+@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n+@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n+@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n+@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n+@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n+@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n+@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n+@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n+@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n+@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n+@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n+@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n+@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n+@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n+@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n+@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n+@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n+@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n+@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n+@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n+@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n+@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n+@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n+@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n+@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n+@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n+@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n+@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n+@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n+@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n+@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n+@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n+@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n+@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n+@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n+@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n+@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n+@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n+@SQ\tSN:C05HBa0006N20_LR128\tLN:101'..b'018B07_LR335,+8208,36M,0;\n+HWI-EAS337_3:7:1:425:1939\t83\tC09SLm0143I09_LR365\t1546\t0\t36M\t=\t1490\t-92\tAAGTTTAGCCACATAGACCCAGACACCACAATTAGC\tUUUUUUWVVVWVVWWWVVVVWWWVWWWWVWWVWWWW\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,-1546,36M,0;\n+HWI-EAS337_3:7:1:425:1939\t163\tC09SLm0143I09_LR365\t1490\t0\t36M\t=\t1546\t92\tTAACTTTTCTATCTGGTTTCTATGTTTTCCAGCTCT\tWVWWWWWWWWWWWWVVVWWWWWWUVWVWVVTQTTTS\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,+1490,36M,0;\n+HWI-EAS337_3:7:1:187:1132\t83\tC02SLe0018B07_LR335\t9178\t0\t36M\t=\t9065\t-149\tGAAGAGGATATGAGCCAAGCCCCTTGCCTCTCCCAC\tUUUUUUVVWWWVWVTWWWWWWWWWWWWWWWWWWWVW\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:14A21\tXA:Z:C02SLe0018B07_LR335,-9178,36M,1;\n+HWI-EAS337_3:7:1:187:1132\t163\tC02SLe0018B07_LR335\t9065\t0\t36M\t=\t9178\t149\tGAATAAAAAAAGACAACAACATATCAAGATACAAAG\tWWWVWVWWWWWVWWVWWWVWWVWWWVWWVWTTTTTR\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02SLe0018B07_LR335,+9065,36M,0;\n+HWI-EAS337_3:7:1:1739:1840\t99\tC02HBa0072A04_LR26\t2868\t60\t36M\t=\t3189\t357\tGGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\tWWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1739:1840\t147\tC02HBa0072A04_LR26\t3189\t60\t36M\t=\t2868\t-357\tCTTTTGACCCAAAAGTTTGACGGGAAGGACAGTTTT\tRTTTTTVVVVWWCVWVVWWWVWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:12T23\n+HWI-EAS337_3:7:1:1505:1876\t99\tC07SLe0111B06_LR194\t8673\t60\t36M\t=\t8677\t40\tGAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\tWWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1505:1876\t147\tC07SLe0111B06_LR194\t8677\t60\t36M\t=\t8673\t-40\tGATCAAGTGTTGTCAAGTTCACTAGTTTAGAGAATG\tSTTTTTVVVWVVWVWWWWWVWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:447:192\t99\tC09SLm0143I09_LR365\t6957\t0\t36M\t=\t7039\t118\tGACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\tWWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,+6957,36M,0;\n+HWI-EAS337_3:7:1:447:192\t147\tC09SLm0143I09_LR365\t7039\t0\t36M\t=\t6957\t-118\tAACTGAACAGATGCCTTTTGGATATGTTTCACTCAC\tSTTTTTWVVWVWWVVVVVWWWVVWVWWWWWVWWWVW\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,-7039,36M,0;\n+HWI-EAS337_3:7:1:21:2019\t83\tC09SLm0037I08_LR367\t1298\t60\t36M\t=\t955\t-379\tGGGCTGGAAGACAGGTTATCATCTTTTACCTCATAC\tUUURUUWWWVVQWWWWWWWWWWWWWWWVVWWVVWWV\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:21:2019\t163\tC09SLm0037I08_LR367\t955\t60\t36M\t=\t1298\t379\tATTATGTTTACGGGACAATTGTATGTTCCATTATCT\tVWVWWWWWWWWWWWWWWWWWVWVWUWVVWWTTTTTR\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1593:652\t99\tC04HBa8K13_LR338\t2175\t60\t36M\t=\t2226\t87\tGTGATGAGTAAAACATCATCATATGAACTTGAAGAG\tWWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:28A7\n+HWI-EAS337_3:7:1:1593:652\t147\tC04HBa8K13_LR338\t2226\t60\t36M\t=\t2175\t-87\tTATGCTTAAAACAAGAGGAATTATACAGCTAAATAA\tSTTTKTWWWWWVWWWWVWVVVWVWWWWWWWVWVVWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1254:1660\t99\tC12HBa326K10_LR306\t8100\t60\t36M\t=\t8243\t179\tGAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\tWWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1254:1660\t147\tC12HBa326K10_LR306\t8243\t60\t36M\t=\t8100\t-179\tTGTACATTTTTCCTACCCATATGTGATGCCATTACT\tSTTTTTWVVVVWWVVWWVWWWVWVVWVWVWVWWWVW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:291:629\t77\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\tWWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n+HWI-EAS337_3:7:1:291:629\t141\t*\t0\t0\t*\t*\t0\t0\tATGAAGGGTTTTTTTGTTCTCTAATGTCATCTTATT\tWWWWVVWWWWWWWWWVWWVWVWVVVQWVVWTTTTTS\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/inputMapping.map
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/inputMapping.map Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,21 @@
+BlastclustCluster1Mb1 dmel 44957 60589
+BlastclustCluster2Mb1 dmel 441296 453986
+BlastclustCluster3Mb1 dmel 1263264 1272001
+BlastclustCluster4Mb1 dmel 691910 700435
+BlastclustCluster5Mb1 dmel 4887 13246
+BlastclustCluster6Mb1 dmel 340294 348412
+BlastclustCluster7Mb1 dmel 802363 809343
+BlastclustCluster8Mb1 dmel 303029 309770
+BlastclustCluster9Mb1 dmel 34275 40713
+BlastclustCluster10Mb1 dmel 976199 981423
+BlastclustCluster11Mb1 dmel 231806 236301
+BlastclustCluster12Mb1 dmel 323712 327988
+BlastclustCluster13Mb1 dmel 1011279 1014955
+BlastclustCluster14Mb1 dmel 474293 477597
+BlastclustCluster15Mb1 dmel 930649 933730
+BlastclustCluster16Mb1 dmel 1241523 1244351
+BlastclustCluster17Mb1 dmel 532049 534729
+BlastclustCluster18Mb1 dmel 335473 337381
+BlastclustCluster19Mb1 dmel 686181 687792
+BlastclustCluster20Mb1 dmel 1239136 1240579
+BlastclustCluster21Mb1 dmel 1261233 1262370

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,4 @@
+chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
+chr2 test match 26303950 26303981 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
+chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
+chr4 test match 28565007 28565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/mapperAnalyzerMappings.axt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/mapperAnalyzerMappings.axt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,12 @@
+1 chr1 6155418 6155441 test1/1 1 24 + 66
+GTAACAGATTCAGAACATTAGCAG
+GTAACAGATTCAGAACATTAGCAG
+2 chr2 26303950 26303981 test2/1 3 36 + 0
+AT-ATT-AAAAAAAAAAAAAAAAAAAAAAAAAAA
+ATGATTGAAAAAAAAAAAAAAAAAAAAAAAAAAA
+3 chr3 28320540 28320574 test2/1 3 36 + 0
+ATGTTTGACAAAAAAAAAAAAAAAAAAAAAAAAAA
+ATGATTGA-AAAAAAAAAAAAAAAAAAAAAAAAAA
+4 chr4 28565007 28565041 test2/1 1 36 + 0
+ATAAGATT-AAAAAAAAAAAAAAAAAAAAGGAAAAA
+ATATGATTGAAAAAAAAAAAAAAAAAAAAAAAAAAA

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/mapperAnalyzerOutput.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/mapperAnalyzerOutput.gff3 Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/mapperAnalyzerSequences.mfq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/mapperAnalyzerSequences.mfq Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,12 @@
+@test1/1
+GTAACAGATTCAGAACATTAGCAG
++test1/1
+bb`b_bbbbb_bbbbababbbbb^
+@test2/1
+ATATGATTGAAAAAAAAAAAAAAAAAAAAAAAAAAA
++test2/1
+BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+@test3/1
+TTTGAATAAAACGGGAGGATATA
++test3/1
+X^_Y`_____\R^BBBBBBBBBB

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/sorted_file_oneline.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/sorted_file_oneline.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+chr1 test test2.1 0 1000 1001 + . ID=test2.1;Name=test2.1
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/sorted_query.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/sorted_query.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,6 @@
+chr1 query test1.1 25 150 126 + . ID=query_1;Name=test1.1
+chr1 query test1.2 70 850 781 + . ID=query_2;Name=test1.2
+chr1 query test1.3 550 850 201 + . ID=query_3;Name=test1.3
+chr1 query test1.4 925 1025 101 + . ID=query_4;Name=test1.4
+chr1 query test1.5 1201 1210 10 + . ID=query_5;Name=test1.5
+chr1 query test1.6 1500 1600 101 + . ID=query_6;Name=test1.6

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/sorted_query_wig.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/sorted_query_wig.wig Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1040 @@
+track type=wiggle_0 name="SMART"
+variableStep chrom=chr1
+25 1
+26 1
+27 1
+28 1
+29 1
+30 1
+31 1
+32 1
+33 1
+34 1
+35 1
+36 1
+37 1
+38 1
+39 1
+40 1
+41 1
+42 1
+43 1
+44 1
+45 1
+46 1
+47 1
+48 1
+49 1
+50 1
+51 1
+52 1
+53 1
+54 1
+55 1
+56 1
+57 1
+58 1
+59 1
+60 1
+61 1
+62 1
+63 1
+64 1
+65 1
+66 1
+67 1
+68 1
+69 1
+70 2
+71 2
+72 2
+73 2
+74 2
+75 2
+76 2
+77 2
+78 2
+79 2
+80 2
+81 2
+82 2
+83 2
+84 2
+85 2
+86 2
+87 2
+88 2
+89 2
+90 2
+91 2
+92 2
+93 2
+94 2
+95 2
+96 2
+97 2
+98 2
+99 2
+100 2
+101 2
+102 2
+103 2
+104 2
+105 2
+106 2
+107 2
+108 2
+109 2
+110 2
+111 2
+112 2
+113 2
+114 2
+115 2
+116 2
+117 2
+118 2
+119 2
+120 2
+121 2
+122 2
+123 2
+124 2
+125 2
+126 2
+127 2
+128 2
+129 2
+130 2
+131 2
+132 2
+133 2
+134 2
+135 2
+136 2
+137 2
+138 2
+139 2
+140 2
+141 2
+142 2
+143 2
+144 2
+145 2
+146 2
+147 2
+148 2
+149 2
+150 2
+151 1
+152 1
+153 1
+154 1
+155 1
+156 1
+157 1
+158 1
+159 1
+160 1
+161 1
+162 1
+163 1
+164 1
+165 1
+166 1
+167 1
+168 1
+169 1
+170 1
+171 1
+172 1
+173 1
+174 1
+175 1
+176 1
+177 1
+178 1
+179 1
+180 1
+181 1
+182 1
+183 1
+184 1
+185 1
+186 1
+187 1
+188 1
+189 1
+190 1
+191 1
+192 1
+193 1
+194 1
+195 1
+196 1
+197 1
+198 1
+199 1
+200 1
+201 1
+202 1
+203 1
+204 1
+205 1
+206 1
+207 1
+208 1
+209 1
+210 1
+211 1
+212 1
+213 1
+214 1
+215 1
+216 1
+217 1
+218 1
+219 1
+220 1
+221 1
+222 1
+223 1
+224 1
+225 1
+226 1
+227 1
+228 1
+229 1
+230 1
+231 1
+232 1
+233 1
+234 1
+235 1
+236 1
+237 1
+238 1
+239 1
+240 1
+241 1
+242 1
+243 1
+244 1
+245 1
+246 1
+247 1
+248 1
+249 1
+250 1
+251 1
+252 1
+253 1
+254 1
+255 1
+256 1
+257 1
+258 1
+259 1
+260 1
+261 1
+262 1
+263 1
+264 1
+265 1
+266 1
+267 1
+268 1
+269 1
+270 1
+271 1
+272 1
+273 1
+274 1
+275 1
+276 1
+277 1
+278 1
+279 1
+280 1
+281 1
+282 1
+283 1
+284 1
+285 1
+286 1
+287 1
+288 1
+289 1
+290 1
+291 1
+292 1
+293 1
+294 1
+295 1
+296 1
+297 1
+298 1
+299 1
+300 1
+301 1
+302 1
+303 1
+304 1
+305 1
+306 1
+307 1
+308 1
+309 1
+310 1
+311 1
+312 1
+313 1
+314 1
+315 1
+316 1
+317 1
+318 1
+319 1
+320 1
+321 1
+322 1
+323 1
+324 1
+325 1
+326 1
+327 1
+328 1
+329 1
+330 1
+331 1
+332 1
+333 1
+334 1
+335 1
+336 1
+337 1
+338 1
+339 1
+340 1
+341 1
+342 1
+343 1
+344 1
+345 1
+346 1
+347 1
+348 1
+349 1
+350 1
+351 1
+352 1
+353 1
+354 1
+355 1
+356 1
+357 1
+358 1
+359 1
+360 1
+361 1
+362 1
+363 1
+364 1
+365 1
+366 1
+367 1
+368 1
+369 1
+370 1
+371 1
+372 1
+373 1
+374 1
+375 1
+376 1
+377 1
+378 1
+379 1
+380 1
+381 1
+382 1
+383 1
+384 1
+385 1
+386 1
+387 1
+388 1
+389 1
+390 1
+391 1
+392 1
+393 1
+394 1
+395 1
+396 1
+397 1
+398 1
+399 1
+400 1
+401 1
+402 1
+403 1
+404 1
+405 1
+406 1
+407 1
+408 1
+409 1
+410 1
+411 1
+412 1
+413 1
+414 1
+415 1
+416 1
+417 1
+418 1
+419 1
+420 1
+421 1
+422 1
+423 1
+424 1
+425 1
+426 1
+427 1
+428 1
+429 1
+430 1
+431 1
+432 1
+433 1
+434 1
+435 1
+436 1
+437 1
+438 1
+439 1
+440 1
+441 1
+442 1
+443 1
+444 1
+445 1
+446 1
+447 1
+448 1
+449 1
+450 1
+451 1
+452 1
+453 1
+454 1
+455 1
+456 1
+457 1
+458 1
+459 1
+460 1
+461 1
+462 1
+463 1
+464 1
+465 1
+466 1
+467 1
+468 1
+469 1
+470 1
+471 1
+472 1
+473 1
+474 1
+475 1
+476 1
+477 1
+478 1
+479 1
+480 1
+481 1
+482 1
+483 1
+484 1
+485 1
+486 1
+487 1
+488 1
+489 1
+490 1
+491 1
+492 1
+493 1
+494 1
+495 1
+496 1
+497 1
+498 1
+499 1
+500 1
+501 1
+502 1
+503 1
+504 1
+505 1
+506 1
+507 1
+508 1
+509 1
+510 1
+511 1
+512 1
+513 1
+514 1
+515 1
+516 1
+517 1
+518 1
+519 1
+520 1
+521 1
+522 1
+523 1
+524 1
+525 1
+526 1
+527 1
+528 1
+529 1
+530 1
+531 1
+532 1
+533 1
+534 1
+535 1
+536 1
+537 1
+538 1
+539 1
+540 1
+541 1
+542 1
+543 1
+544 1
+545 1
+546 1
+547 1
+548 1
+549 1
+550 2
+551 2
+552 2
+553 2
+554 2
+555 2
+556 2
+557 2
+558 2
+559 2
+560 2
+561 2
+562 2
+563 2
+564 2
+565 2
+566 2
+567 2
+568 2
+569 2
+570 2
+571 2
+572 2
+573 2
+574 2
+575 2
+576 2
+577 2
+578 2
+579 2
+580 2
+581 2
+582 2
+583 2
+584 2
+585 2
+586 2
+587 2
+588 2
+589 2
+590 2
+591 2
+592 2
+593 2
+594 2
+595 2
+596 2
+597 2
+598 2
+599 2
+600 2
+601 2
+602 2
+603 2
+604 2
+605 2
+606 2
+607 2
+608 2
+609 2
+610 2
+611 2
+612 2
+613 2
+614 2
+615 2
+616 2
+617 2
+618 2
+619 2
+620 2
+621 2
+622 2
+623 2
+624 2
+625 2
+626 2
+627 2
+628 2
+629 2
+630 2
+631 2
+632 2
+633 2
+634 2
+635 2
+636 2
+637 2
+638 2
+639 2
+640 2
+641 2
+642 2
+643 2
+644 2
+645 2
+646 2
+647 2
+648 2
+649 2
+650 2
+651 2
+652 2
+653 2
+654 2
+655 2
+656 2
+657 2
+658 2
+659 2
+660 2
+661 2
+662 2
+663 2
+664 2
+665 2
+666 2
+667 2
+668 2
+669 2
+670 2
+671 2
+672 2
+673 2
+674 2
+675 2
+676 2
+677 2
+678 2
+679 2
+680 2
+681 2
+682 2
+683 2
+684 2
+685 2
+686 2
+687 2
+688 2
+689 2
+690 2
+691 2
+692 2
+693 2
+694 2
+695 2
+696 2
+697 2
+698 2
+699 2
+700 2
+701 2
+702 2
+703 2
+704 2
+705 2
+706 2
+707 2
+708 2
+709 2
+710 2
+711 2
+712 2
+713 2
+714 2
+715 2
+716 2
+717 2
+718 2
+719 2
+720 2
+721 2
+722 2
+723 2
+724 2
+725 2
+726 2
+727 2
+728 2
+729 2
+730 2
+731 2
+732 2
+733 2
+734 2
+735 2
+736 2
+737 2
+738 2
+739 2
+740 2
+741 2
+742 2
+743 2
+744 2
+745 2
+746 2
+747 2
+748 2
+749 2
+750 2
+751 2
+752 2
+753 2
+754 2
+755 2
+756 2
+757 2
+758 2
+759 2
+760 2
+761 2
+762 2
+763 2
+764 2
+765 2
+766 2
+767 2
+768 2
+769 2
+770 2
+771 2
+772 2
+773 2
+774 2
+775 2
+776 2
+777 2
+778 2
+779 2
+780 2
+781 2
+782 2
+783 2
+784 2
+785 2
+786 2
+787 2
+788 2
+789 2
+790 2
+791 2
+792 2
+793 2
+794 2
+795 2
+796 2
+797 2
+798 2
+799 2
+800 2
+801 2
+802 2
+803 2
+804 2
+805 2
+806 2
+807 2
+808 2
+809 2
+810 2
+811 2
+812 2
+813 2
+814 2
+815 2
+816 2
+817 2
+818 2
+819 2
+820 2
+821 2
+822 2
+823 2
+824 2
+825 2
+826 2
+827 2
+828 2
+829 2
+830 2
+831 2
+832 2
+833 2
+834 2
+835 2
+836 2
+837 2
+838 2
+839 2
+840 2
+841 2
+842 2
+843 2
+844 2
+845 2
+846 2
+847 2
+848 2
+849 2
+850 2
+925 1
+926 1
+927 1
+928 1
+929 1
+930 1
+931 1
+932 1
+933 1
+934 1
+935 1
+936 1
+937 1
+938 1
+939 1
+940 1
+941 1
+942 1
+943 1
+944 1
+945 1
+946 1
+947 1
+948 1
+949 1
+950 1
+951 1
+952 1
+953 1
+954 1
+955 1
+956 1
+957 1
+958 1
+959 1
+960 1
+961 1
+962 1
+963 1
+964 1
+965 1
+966 1
+967 1
+968 1
+969 1
+970 1
+971 1
+972 1
+973 1
+974 1
+975 1
+976 1
+977 1
+978 1
+979 1
+980 1
+981 1
+982 1
+983 1
+984 1
+985 1
+986 1
+987 1
+988 1
+989 1
+990 1
+991 1
+992 1
+993 1
+994 1
+995 1
+996 1
+997 1
+998 1
+999 1
+1000 1
+1001 1
+1002 1
+1003 1
+1004 1
+1005 1
+1006 1
+1007 1
+1008 1
+1009 1
+1010 1
+1011 1
+1012 1
+1013 1
+1014 1
+1015 1
+1016 1
+1017 1
+1018 1
+1019 1
+1020 1
+1021 1
+1022 1
+1023 1
+1024 1
+1025 1
+1201 1
+1202 1
+1203 1
+1204 1
+1205 1
+1206 1
+1207 1
+1208 1
+1209 1
+1210 1
+1500 1
+1501 1
+1502 1
+1503 1
+1504 1
+1505 1
+1506 1
+1507 1
+1508 1
+1509 1
+1510 1
+1511 1
+1512 1
+1513 1
+1514 1
+1515 1
+1516 1
+1517 1
+1518 1
+1519 1
+1520 1
+1521 1
+1522 1
+1523 1
+1524 1
+1525 1
+1526 1
+1527 1
+1528 1
+1529 1
+1530 1
+1531 1
+1532 1
+1533 1
+1534 1
+1535 1
+1536 1
+1537 1
+1538 1
+1539 1
+1540 1
+1541 1
+1542 1
+1543 1
+1544 1
+1545 1
+1546 1
+1547 1
+1548 1
+1549 1
+1550 1
+1551 1
+1552 1
+1553 1
+1554 1
+1555 1
+1556 1
+1557 1
+1558 1
+1559 1
+1560 1
+1561 1
+1562 1
+1563 1
+1564 1
+1565 1
+1566 1
+1567 1
+1568 1
+1569 1
+1570 1
+1571 1
+1572 1
+1573 1
+1574 1
+1575 1
+1576 1
+1577 1
+1578 1
+1579 1
+1580 1
+1581 1
+1582 1
+1583 1
+1584 1
+1585 1
+1586 1
+1587 1
+1588 1
+1589 1
+1590 1
+1591 1
+1592 1
+1593 1
+1594 1
+1595 1
+1596 1
+1597 1
+1598 1
+1599 1
+1600 1

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/sorted_ref.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/sorted_ref.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,7 @@
+chr1 test test2.1 9 1000 1001 + . ID=test2.1;Name=test2.1
+chr1 test test2.2 50 350 301 + . ID=test2.2;Name=test2.2
+chr1 test test2.3 100 600 501 + . ID=test2.3;Name=test2.3
+chr1 test test2.4 200 450 251 + . ID=test2.4;Name=test2.4
+chr1 test test2.5 700 950 251 + . ID=test2.5;Name=test2.5
+chr1 test test2.6 800 900 101 + . ID=test2.6;Name=test2.6
+chr1 test test2.7 1200 1300 101 + . ID=test2.7;Name=test2.7

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testBedParser1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testBedParser1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+track name=reads description="Reads" useScore=0 visibility=full offset=0
+arm_X 1000 3000 test1.1 1000 + 1000 3000 0 2 100,1000, 0,1000,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testC2S.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testC2S.fa Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,100 @@
+>chr1
+CAACATTAGCGCCATGCCCACTGTGGGGAATTTACCAGCAGCCCGCACAC
+TTAGCCGGCCTGCTGCAAAGCGGGATTTATTTAATTCATCCTCCAAGAGC
+CCAAACGAGCATCCTATGAGTTTCTCGGAAGTGGTAGCTGGAGCAGGTCC
+AGTTTCTATGGCACCCCCTAATCCGGCACCACTGACGAAAACCCCAGGAA
+AGCGGACAAACGACGATCTGGATTGCTCAAATTTTAAGACGCCCAATAAA
+AAATTATGCGCGACATCCAATTTTGTAACTCCCTGCATTTTTCCCCCGCT
+CATCACACCAGTTTTCAAAAGCAAGGCAGCTCAATCTGTTTATGAGGAAT
+CAAAAGCCAGAAACGGACCCACCCGCCAGCCGTTACCCTGTAGCATCAAT
+GTCTCTGCTTGCAGCGCAGCGGCGCCACCCGTTATCGCCCCCCTACCCCC
+TCAAAATACAGATGCACAGCTGCCTCCATGGAAAATCGTGCCCCAGAGCC
+GTAGAGCACCCCCTATACTCGTCAATGATGTGAAGGAAATTGTCCCTCTC
+CTGGAAAAGCTAAATTATACAGCAGGAGTATCCAGCTACACCACCAGAGC
+AATAGAAGGAAACGGGGTCAGGATCCAGGCCAAGGATATGACCGCCTACA
+ACAAAATTAAAGAAGTCCTGGTGGCCAACGGATTTCCTTTATTCACCAAC
+CAGCCCAAGTCTGAGAGAGGCTTCCGAGTCATCATCAGACACCTCCATCA
+TTCCACACCATGCTCGTGGATAGTCGAGGAGCTGCTGAAGCTCGGTCTCC
+AAGCGCGCTTCGTCAGAAACATGACGAATCCAGCTACAGGTGGCCCCATG
+CGAATGTTTGAAGTGGAGATCGTCATGGCCAAAGATGGCAGCCACGATAA
+AATTCTCTCACTCAAACAAATCGGTGGGCAAAAGGTGGATATTGAAAGGA
+AAAATAGGACACAGGAGCCGGTTCAGTGCTACAGATGCCAGGGCTTCAGA
+CATGCCAAAAATTCATGCATGAGGACGCCTAAATGCATGAAATGCGCTGG
+CGATCACCTGTCATCCTGCTGCACCAAGCCAAAATCCACCCCCGCCACCT
+GCATCAACTGCTCTGGGGAACATATCAGCGCTTATAAAGGATGCCCCGCC
+TACAAGGCCGAAAAACGAAAGCTGGCGGCTAACATTATTGACACAAACAA
+AATAAGGACAATCAAAGACGCAACTAATCACTTTTATAAACGACAAGGCC
+CCCCTCCACGTAATAATACCCCTCGACTACCGCACAGCTCAGCAATCCTG
+ACCAAGTCAATCGCTGAAGCTCGCCAGGAAGCCGCCAAAAAGTCGATGCT
+AAATCCTTATTGGCAAAGCTCGAACGACAGAAGGCCACGTTTCTCCTCCC
+ATGACACTGCCATTCAAAAACGGCTAAACAAATGGCGCCGAAACTCTAAT
+AAGATACCCAAAAAGGGTAGGATATCCTCAAAGGACAATGCAAAGCCAAG
+ACCGGCATCCAAGACAAGCAACCCAGCGCAAAGACATCTGGAAAAATACC
+AGGACATGCTCCGAAACGAAAGGAGTGAAGAAATTGACCATGAACCTGAA
+AAAGGTACTCCTAATCCCAGCCGAGTCGGCAACGACAGCCCTCCGACCAC
+CAGCAGAGCCGCCAGAGCTAACTTTAAGCCAAGAATTATCGATGAAGCTA
+CGCCATCGCCAAGAAACTCCAATCCTTACCTACAGAAGAGCTTCTCGGAC
+GACCCCACCATTAATCTAGCTAATAGAGTCGATAATTTAGAAAAGAAAAT
+TGACATTTTAATGGCTTTAATCATACAAGGAACCAATAACAATAATCTTG
+ACATTGATACATCAATCTAAATTTACATTACACTTATTTATATTTATACC
+TATTATAAATATATATATCCGACACAAAAGCGCACGTCTGCCCACCCTTA
+TAATGTTCTAATTATTATCACCTTCCTCGACGCAAAGCTTAAACCTCTGT
+TGAAAAACAAATCAATTAGATGGATGACATAAAAACGTAAATAAATAATC
+TTCTCACCTCAAGCATCCGGATAAAAAAGGCAATACGCACTCCAACTCCT
+GATGAAGCTATGTGAAGAAAACTACACCAGGATTCAAAAGTCGAATCGGA
+GGATGGACATGAGAAGAATCTGTGCGGCAGAAGCATGATGAATAGAGGCG
+ACTCGCTGCAGCAAAATATGCACTACGCCACTTACCTGAATCTTCTGCGG
+CGCAGTCTTTTTATGTACCATCATCTCCGCCGCAACCGCTTCACACAGCT
+CCACAACATAAGATGCGCCACCAAAGCTGCCTCCGTACTGAACTGGACAT
+CATGCGTTGCGCTGCAAATCCTATCCTATTGACGAGCGCCAACAGCGGGT
+CTGCGCTAAAAACCTAAAAACAAAACAAAACAAAACAATTAATCAACAAC
+AAATTGAACATAACAATCAAACAATAACAATCACTTACCTCCTTGACTGC
+ATCCAATCGCTGACCCAAATCCAACACAACCGACAACAGGAGACGGGCTT
+CGCAAATGCAAAACAAAATCGCCAACTTTTGCGATTATAAATACAAAAAA
+TTGACAATTTTCTGATGCCATCTCCATCCTTTGATCCCACTGCCCAAATA
+AGGATCATTAGCGCGGAGCTGAAGCCACATTAATAAGCTGTAAAATTGAT
+CCCCAAAATGTATATTTCTCCTCAATACCGTATCTTCAACGAACTTTCCG
+CCAACCTGCAATGAAAGGGAAATTAATAATAATGCTATACAGAATTAATC
+AGCGACACATAGAAAATAGCAAACCAGACAGGCAAAGTAGTAGATGCAAA
+CAGGCGACTCCATCCCGCCGACGACAAGCATTCAAATCCTTCATACTGAA
+ACAAGGAAGCACAAGCCAATACTGGGAATTATTTACTCAAACAAAATACT
+TATCTAATTACCAACTCGACGACTCCAAATACGCGGCACACCGGCTGCGA
+TAGCTCTTAAATAAAGGGCCTCCTAATTAACTACAAAACGTACCTGAAAA
+ATAAAACAATTAACGCAATCGTAAATAATTACAATTATAATACTCACCTC
+CAGATTAGCCTAATGTACCTGAAAAACAAAAACAAAAATTAATGCAATAA
+TTATAAAAACAAATAAATACAAACACAATACTTACCTCCAAATTACCTCC
+CAGCCAAAGCACCTGAAATACAAAAACAAAGAATTAATGCAATAAATAAA
+TCAAATAAATACAAATACAATACTCACCCCAAATAACCTCCCAGCTAATT
+TACCTGAAAAAACAAAAATTAATACAATATTAAAAACGAATAACAAATGT
+AATACTTACCAAATTTTAACTTTGTATTCATTTCCATGGCCCAAATCGTT
+GCGACGGTCCTCGGCAACAAATCATGTTCCGGCGGCTCCTAGCTGCCAAT
+CCCGACGCATTGGCCACAAGACGCGGCGCTCCTGGCAACTCTCGATGAAT
+AACCGAGCTCCAATTTCCACGACGACTCTTCTGCCAAACGAGTCAGATTA
+CACCAACATAATGCCAGCAGCTCCCAAACAATGCAATGACGGCTGCGCGG
+GATCCATCTTCAGATTTTCTTCTTCCTGACGACCGGCTAAGCTGCCCTGC
+AATTTAAGAAATTTTATTAAACAATTGCAAATATCTACCACTGAGGGTGG
+TAGAGACAACCACCAAATGACAGCGGCGCGGGATACACCCACCACGAATA
+GGCTTTCTGCAGCGCTGGCCGGACATGCATGTTGCGACGCGCATTCAGCG
+TCCACAACAAGCCCCAGCCAGAATACAACAAACACTCACCTGCAATGTTT
+CCTGAGGCTTCCAGCGACTCGGTGCTTCCGTCCTTCTGGCGGGGGTACCT
+GAAAAGAATTAATTCAAATTATGTTAGTCTTAAATTCCAATGTTTCTTGT
+TAAATAATTCAAATTATCAAATGTAAACATAACATACAATGTGATAATGT
+TACCAGTCCATGTTACTGCCAAAAACCTAAGTTTACAAAAAAATACTTAC
+CTCTTAATATTAATACTAAATCTATGTCCAATCCCCAAACTCACCCCACG
+TAATGTACACCTCAAAAATTCAAATAATTGTACCTACATATTGCATTCTA
+TGTAATCAAAGGCAAAATAAATTGTGGATGCGGAACAGAATTCATTCTGT
+CTCCGTACCTCCACCAGCAAAGTTAAAAATGAAATATCCCTCATCACCGC
+TGCAATCTACATACATGGATACAGCGCAAAAGACGGTCAACCACGTCGTC
+TCCGAGTCGTTCAGGACACCTTGCTGCTCTCAATAACCTCCAGCCTGACG
+AGCGCCAACAGCGAGTTGACGCTAAAACCTAAAAACAAACAACAACAAAT
+TAAATACAAACAAATAAAATAAAATCAAACAAAACACTTACTTCACTGAC
+AACAGCCAATTGCTGATCCACATTCAACGCAACAGACAACAGGAGACGGG
+CCCCGCAAACGCAAAACAAAATCGCCAATTTTTGCGATTTTAAATACAAA
+AAATCGACAATTTTACTAAGCCCTCTCCATCTCCTGATGCCACCGCCACA
+ATAAGGATCACTAGCGCGGCGCTGATGCCACATCAATAAACCGCAATATT
+TGTCCTCAAAACGTATACTTCTTCTCAGTATCGCAACTTCTACGAATTTC
+CCGCTAACCTACAATGAAAGGAAAATCAATAAGAATGTGATACAAAAAAT
+TAATCAAGGGCAAATAGAAAATAGCTTACCGGACAGGCATACTAGCAGAT
+GCTAATATGCAACTCCATCCTTCTGAGACAAATACGCAACTCCTTTTTTC
+CAAGATTGCAAATACTGAAACAAGGAAGCACAAGCCAATACTGGGAATTA
+TTTAATTAAACAAAATACTTATCTAATTGCCAATTCGACGACTCCAAATC

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testC2S.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testC2S.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,3 @@
+chr1 S-MART SMART 1 10 . + . Name=region0
+chr1 S-MART SMART 51 60 . + . Name=region1
+chr1 S-MART SMART 51 60 . - . Name=region2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense1.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+chr1 test test1.1 1000 2000 1001 + . ID=test1.1;Name=test1.1

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense2.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+chr1 test test2.1 1500 2500 1001 + . ID=test2.1;Name=test2.1
+chr1 test test2.2 3000 4000 1001 - . ID=test2.2;Name=test2.2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple1.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+chr1 test test1.1 1000 2000 1001 + . ID=test1.1;Name=test1.1

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple2.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+chr1 test test2.1 1500 2500 1001 + . ID=test2.1;Name=test2.1
+chr1 test test2.2 3000 4000 1001 + . ID=test2.2;Name=test2.2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testDifferentialExpressionExpected.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testDifferentialExpressionExpected.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+chr1 S-MART S-MART_transcript 100 200 101 + . Name=test3.1;nbReadsCond1=15;regulation=equal;nbReadsCond2=15;pValue=1.0;ID=test3.1
+chr1 S-MART S-MART_transcript 200 300 101 + . Name=test3.2;nbReadsCond1=30;regulation=equal;nbReadsCond2=30;pValue=1.0;ID=test3.2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testDifferentialExpressionOutput.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testDifferentialExpressionOutput.gff3 Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testDifferentialExpressionReference.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testDifferentialExpressionReference.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+chr1 S-MART transcript 100 200 100 + . Name=test3.1
+chr1 S-MART transcript 200 300 100 + . Name=test3.2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testDifferentialExpressionSample1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testDifferentialExpressionSample1.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+chr1 S-MART S-MART_transcript 100 200 100 + . Name=test1.1;nbElements=10
+chr1 S-MART S-MART_transcript 200 300 100 + . Name=test1.2;nbElements=20

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testDifferentialExpressionSample2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testDifferentialExpressionSample2.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+chr1 S-MART S-MART_transcript 100 200 100 + . Name=test2.1;nbElements=20
+chr1 S-MART S-MART_transcript 200 300 100 + . Name=test2.2;nbElements=40

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testGffParser1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testGffParser1.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,7 @@
+arm_X test test_transcript 1000 2000 1 + . ID=id1-1;Name=test1;field=value1
+arm_X test test_exon 1000 2000 1 + . ID=id1-1-exon1;Name=test1-exon1;Parent=id1-1
+arm_X test test_transcript 10000 20000 1 - . ID=id2-1;Name=test2;field=value2
+arm_X test test_exon 10000 10100 1 - . ID=id2-1-exon1;Name=test2-exon1;Parent=id2-1
+arm_X test test_exon 10500 20000 1 - . ID=id2-1-exon2;Name=test2-exon2;Parent=id2-1
+arm_X test test_transcript 1000 2000 1 + . ID=test1.1-1;Name=test1.1
+arm_X test test_exon 1000 2000 1 + . ID=test1.1-1-exon1;Name=test1.1-exon1;Parent=test1.1-1

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testPlot.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testPlot.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,6 @@
+chr1 S-MART SMART 100 200 . + . value2=6;value3=1
+chr1 S-MART SMART 200 300 . + . value1=2;value2=5;value3=2
+chr1 S-MART SMART 300 400 . + . value1=3;value2=4;value3=3
+chr1 S-MART SMART 400 500 . + . value1=4;value2=3;value3=4
+chr1 S-MART SMART 500 600 . + . value1=5;value2=2;value3=5
+chr1 S-MART SMART 600 700 . + . value1=6;value3=6

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testSW.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testSW.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,4 @@
+chr1 S-MART SMART 101 111 . + . value1=1
+chr1 S-MART SMART 111 121 . + . value1=2
+chr1 S-MART SMART 201 211 . + . value1=10
+chr1 S-MART SMART 211 221 . + . value1=12

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,4 @@
+arm_X 1000 2000 test1.1 1000 + 1000 2000 0 1 1000, 0,
+arm_X 1000 2000 test1.2 1000 - 1000 2000 0 1 1000, 0,
+arm_X 100 200 test1.3 1000 + 100 200 0 1 100, 0,
+arm_X 100 3200 test1.4 1000 + 100 3200 0 2 100,100, 0,3000,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList2.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+arm_X 1000 2000 test2.1 1000 + 1000 2000 0 1 1000, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+arm_X test test_transcript 1000 4000 1 + . ID=id1-1;Name=test1;field=value1;nbElements=2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+arm_X test test_transcript 2000 3000 1 + . ID=id2;Name=test2;field=value1

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+arm_X 10000100 10000200 test1.1 100 - 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.2 100 - 10000000 10000100 0 1 100, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation2.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+arm_X 10000050 10000150 test2.1 100 - 10000050 10000150 0 1 100, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,7 @@
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1_modif.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1_modif.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,112 @@
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
+arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
+arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
+arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
+arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
+arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
+arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway2.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+arm_X 10000000 10000050 test2.1 50 - 10000000 10000050 0 1 50, 0,
+arm_3R 10000000 10000050 test2.2 50 - 10000000 10000050 0 1 50, 0,
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+arm_X 1000 3000 test1.1 1000 + 1000 3000 0 1 2000, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple2.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,1 @@
+arm_X 2000 4000 test1.2 1000 + 2000 4000 0 1 2000, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMerge1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMerge1.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,4 @@
+arm_X test test_transcript 1000 2000 1 + . ID=id1-1;Name=test1;field=value1;nbElements=2
+arm_X test test_exon 1000 2000 1 + . ID=id1-1-exon1;Name=test1-exon1;Parent=id1-1
+arm_X test test_transcript 1000 2000 1 + . ID=id2-1;Name=test2;field=value2
+arm_X test test_exon 1000 2000 1 + . ID=id2-1-exon1;Name=test2-exon1;Parent=id2-1

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeDifferentClusters1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeDifferentClusters1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+arm_X 1010 1110 test1.1 1000 + 1010 1110 0 1 100, 0,
+arm_X 100 100100 test1.2 1000 + 100 100100 0 1 100000, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeSense1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeSense1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+arm_X 1000 6000 test1.1 1000 + 1000 6000 0 2 1000,1000, 0,4000,
+arm_X 1000 4000 test1.2 1000 + 1000 4000 0 2 1000,1000, 0,2000,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/testTranscriptNormalize.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/testTranscriptNormalize.gff3 Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+chr1 S-MART transcript 1000 2000 1000 + . Name=test1;nbOccurrences=2
+chr1 S-MART transcript 1500 2500 1000 + . Name=test2;nbOccurrences=2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/test_distance.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/test_distance.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,3 @@
+arm_X 1000 2000 test2.1 1000 + 1000 2000 0 1 1000, 0,
+arm_X 250 350 test2.2 1000 + 250 350 0 1 100, 0,
+arm_X 150 250 test2.3 1000 + 150 250 0 1 100, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/TestFiles/test_minoverlapp.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/TestFiles/test_minoverlapp.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,6 @@
+track name=reads description="Reads" useScore=0 visibility=full offset=0
+arm_X 1000 2000 test1.1 1000 + 1000 2000 0 1 1000, 0,
+arm_X 1000 2000 test1.2 1000 - 1000 2000 0 1 1000, 0,
+arm_X 100 200 test1.3 1000 + 100 200 0 1 100, 0,
+arm_X 100 3200 test1.4 1000 + 100 3200 0 2 100,100, 0,3000,
+arm_X 1500 2000 test1.5 1000 + 1500 2000 0 1 500, 0,

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/__init__.pyc

Binary file SMART/Java/Python/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/cleanGff.pyc

Binary file SMART/Java/Python/cleanGff.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/cleaning/CleanerChooser.pyc

Binary file SMART/Java/Python/cleaning/CleanerChooser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/cleaning/DefaultCleaner.pyc

Binary file SMART/Java/Python/cleaning/DefaultCleaner.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/cleaning/GffCleaner.pyc

Binary file SMART/Java/Python/cleaning/GffCleaner.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/cleaning/GtfCleaner.pyc

Binary file SMART/Java/Python/cleaning/GtfCleaner.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/cleaning/TranscriptListCleaner.pyc

Binary file SMART/Java/Python/cleaning/TranscriptListCleaner.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/cleaning/__init__.pyc

Binary file SMART/Java/Python/cleaning/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/compare_TAIR10_Reiterative4th.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/compare_TAIR10_Reiterative4th.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,18028 @@\n+chr5\tS-MART\tgene\t4308129\t4310181\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13440;Name=AT5G13440\n+chr4\tS-MART\tgene\t10398918\t10399512\t.\t+\t.\tNote=protein_coding_gene;ID=AT4G18980;Name=AT4G18980\n+chr3\tS-MART\tgene\t22678151\t22680379\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G61270;Name=AT3G61270\n+chr3\tS-MART\tgene\t5705662\t5707023\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16770;Name=AT3G16770\n+chr5\tS-MART\tgene\t4523520\t4525863\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G14020;Name=AT5G14020\n+chr3\tS-MART\tgene\t5708925\t5710579\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16780;Name=AT3G16780\n+chr5\tS-MART\tgene\t26151421\t26157099\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G65440;Name=AT5G65440\n+chr3\tS-MART\tgene\t5711082\t5719023\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16785;Name=AT3G16785\n+chr3\tS-MART\tgene\t2100189\t2100983\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G36659;Name=AT3G36659\n+chr1\tS-MART\tgene\t8664153\t8665289\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24450;Name=AT1G24450\n+chr5\tS-MART\tgene\t4530643\t4533070\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G14040;Name=AT5G14040\n+chr1\tS-MART\tgene\t8665874\t8672651\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24460;Name=AT1G24460\n+chr5\tS-MART\tgene\t4533261\t4535301\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G14050;Name=AT5G14050\n+chr5\tS-MART\tgene\t4535401\t4539193\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G14060;Name=AT5G14060\n+chr1\tS-MART\tgene\t8676360\t8677159\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24480;Name=AT1G24480\n+chr3\tS-MART\tpseudogene\t14227679\t14228304\t.\t-\t.\tNote=pseudogene;ID=AT3G42047;Name=AT3G42047\n+chr3\tS-MART\tgene\t14228495\t14233245\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G42050;Name=AT3G42050\n+chr5\tS-MART\tgene\t4315759\t4318360\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13460;Name=AT5G13460\n+chr2\tS-MART\tgene\t14990325\t14990935\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35658;Name=AT2G35658\n+chr2\tS-MART\tgene\t14996713\t14999085\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35680;Name=AT2G35680\n+chr2\tS-MART\tgene\t14999754\t15003066\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35690;Name=AT2G35690\n+chr1\tS-MART\tgene\t8682234\t8685271\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24490;Name=AT1G24490\n+chr2\tS-MART\tgene\t15005205\t15005789\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35700;Name=AT2G35700\n+chr5\tS-MART\tgene\t4547202\t4549417\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G14090;Name=AT5G14090\n+chr2\tS-MART\tgene\t15022676\t15022867\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35733;Name=AT2G35733\n+chr3\tS-MART\tgene\t5752258\t5752410\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16851;Name=AT3G16851\n+chr2\tS-MART\tgene\t15027589\t15027668\t.\t-\t.\tNote=snoRNA;ID=AT2G35742;Name=AT2G35742\n+chr2\tS-MART\tgene\t15024489\t15026414\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35740;Name=AT2G35740\n+chr2\tS-MART\tpseudogene\t15027810\t15027989\t.\t-\t.\tNote=pseudogene;ID=AT2G35743;Name=AT2G35743\n+chr2\tS-MART\tgene\t15029712\t15029790\t.\t-\t.\tNote=snoRNA;ID=AT2G35744;Name=AT2G35744\n+chr2\tS-MART\tgene\t15059337\t15061100\t.\t-\t.\tNote=other_RNA;ID=AT2G35859;Name=AT2G35859\n+chr1\tS-MART\tgene\t8688629\t8689268\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24520;Name=AT1G24520\n+chr2\tS-MART\tgene\t15031923\t15033307\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35760;Name=AT2G35760\n+chr2\tS-MART\tgene\t15033595\t15034091\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35765;Name=AT2G35765\n+chr2\tS-MART\tgene\t15034037\t15036518\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35770;Name=AT2G35770\n+chr2\tS-MART\tgene\t15040679\t15042123\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35790;Name=AT2G35790\n+chr3\tS-MART\tgene\t5759375\t5762235\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G16860;Name=AT3G16860\n+chr2\tS-MART\tgene\t15042175\t15043575\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35795;Name=AT2G35795\n+chr5\tS-MART\tgene\t4553806\t4554382\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G14110;Name=AT5G14110\n+chr2\tS-MART\tgene\t15049150\t15050323\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35810;Name=AT2G35810\n+chr2\tS-MART\tgene\t15050912\t15052239\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35820;Name=AT2G35820\n+chr5\tS-MART\tgene\t4318524\t4319924\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13470;Name=AT5G13470\n+chr3\tS-MART\tgene\t5763586\t5764654\t.\t-\t.\tNote=protein_coding_gene;ID=AT3'..b'ne;ID=AT1G60540;Name=AT1G60540\n+chr1\tS-MART\tgene\t22302492\t22305156\t.\t+\t.\tNote=other_RNA;ID=AT1G60545;Name=AT1G60545\n+chr5\tS-MART\tgene\t4473089\t4474402\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13860;Name=AT5G13860\n+chr1\tS-MART\tgene\t22305831\t22308229\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60550;Name=AT1G60550\n+chr3\tS-MART\ttransposable_element_gene\t14106869\t14107511\t.\t+\t.\tNote=transposable_element_gene;ID=AT3G33230;Derives_from=AT3TE57780;Name=AT3G33230\n+chr1\tS-MART\tgene\t22324645\t22327359\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60600;Name=AT1G60600\n+chr1\tS-MART\tgene\t22327912\t22330276\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60610;Name=AT1G60610\n+chr1\tS-MART\tgene\t22333916\t22334161\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G60625;Name=AT1G60625\n+chr1\tS-MART\tgene\t8609445\t8612580\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24280;Name=AT1G24280\n+chr1\tS-MART\tgene\t22334662\t22336908\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60630;Name=AT1G60630\n+chr1\tS-MART\tgene\t22337375\t22339672\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60640;Name=AT1G60640\n+chr5\tS-MART\tgene\t4477354\t4478109\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13880;Name=AT5G13880\n+chr1\tS-MART\tgene\t22347622\t22349297\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60680;Name=AT1G60680\n+chr1\tS-MART\tgene\t8612505\t8614277\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24290;Name=AT1G24290\n+chr1\tS-MART\tgene\t22354753\t22356761\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60710;Name=AT1G60710\n+chr1\tS-MART\tgene\t19859267\t19860976\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G53260;Name=AT1G53260\n+chr3\tS-MART\tgene\t14093656\t14095549\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G33520;Name=AT3G33520\n+chr5\tS-MART\tgene\t4478843\t4480928\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13890;Name=AT5G13890\n+chr1\tS-MART\tgene\t22366701\t22368714\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60770;Name=AT1G60770\n+chr1\tS-MART\tgene\t22368953\t22372159\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60780;Name=AT1G60780\n+chr3\tS-MART\tgene\t5669373\t5670842\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G16640;Name=AT3G16640\n+chr5\tS-MART\tgene\t4481232\t4481889\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13900;Name=AT5G13900\n+chr5\tS-MART\tgene\t4301792\t4304312\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13420;Name=AT5G13420\n+chr1\tS-MART\tgene\t8626315\t8630971\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24320;Name=AT1G24320\n+chr1\tS-MART\tgene\t27982131\t27982280\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G74448;Name=AT1G74448\n+chr5\tS-MART\tgene\t4482450\t4483085\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13910;Name=AT5G13910\n+chr1\tS-MART\tgene\t8631440\t8635055\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24330;Name=AT1G24330\n+chr5\tS-MART\tgene\t4485168\t4485311\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13917;Name=AT5G13917\n+chr1\tS-MART\tgene\t8635209\t8638986\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24340;Name=AT1G24340\n+chr5\tS-MART\tgene\t4485931\t4487433\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13920;Name=AT5G13920\n+chr4\tS-MART\tgene\t10325965\t10326036\t.\t-\t.\tNote=tRNA;ID=AT4G18815;Name=AT4G18815\n+chr1\tS-MART\tgene\t8638520\t8640825\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24350;Name=AT1G24350\n+chr5\tS-MART\tgene\t4488688\t4490264\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13930;Name=AT5G13930\n+chr1\tS-MART\tgene\t8640582\t8643478\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24360;Name=AT1G24360\n+chr5\tS-MART\tgene\t4305125\t4307513\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13430;Name=AT5G13430\n+chr1\tS-MART\tgene\t27982509\t27984280\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G74450;Name=AT1G74450\n+chr5\tS-MART\tgene\t4495971\t4500725\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13950;Name=AT5G13950\n+chr4\tS-MART\tgene\t1311198\t1312154\t.\t+\t.\tNote=protein_coding_gene;ID=AT4G02950;Name=AT4G02950\n+chr5\tS-MART\tgene\t4501447\t4506188\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13960;Name=AT5G13960\n+chr5\tS-MART\tgene\t4506232\t4507842\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13970;Name=AT5G13970\n+chr5\tS-MART\tgene\t4508496\t4514666\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13980;Name=AT5G13980\n+chr2\tS-MART\tgene\t17167279\t17170407\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G41190;Name=AT2G41190\n+chr5\tS-MART\tgene\t4514568\t4516892\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13990;Name=AT5G13990\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/fo.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/fo.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,341 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2012\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+\n+import os, struct, time, shutil\n+from optparse import OptionParser\n+from pyRepetUnit.commons.parsing.ParserChooser import ParserChooser\n+from pyRepetUnit.commons.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.ncList.NCList import NCList\n+from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList\n+from SMART.Java.Python.ncList.NCListParser import NCListParser\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n+from SMART.Java.Python.ncList.FileSorter import FileSorter\n+from SMART.Java.Python.ncList.NCListHandler import NCListHandler\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+try:\n+ import cPickle as pickle\n+except:\n+ import pickle\n+\n+REFERENCE = 0\n+QUERY = 1\n+TYPES = (REFERENCE, QUERY)\n+TYPETOSTRING = {0: "reference", 1: "query"}\n+\n+class FindOverlapsOptim(object):\n+\t\n+\tdef __init__(self, verbosity = 1):\n+\t\tself._parsers\t\t\t\t = {}\n+\t\tself._sortedFileNames\t\t = {}\n+\t\tself._outputFileName\t\t = "outputOverlaps.gff3"\n+\t\tself._iWriter\t\t\t\t = None\n+\t\tself._inputFileNames\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._convertedFileNames = {REFERENCE: False, QUERY: False}\n+\t\tself._inputFileFormats\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._converted\t\t\t = {REFERENCE: False, QUERY: False}\n+\t\tself._ncListHandlers = {REFERENCE: None, QUERY: None}\n+\t\tself._splittedFileNames\t = {REFERENCE: {},\tQUERY: {}}\n+\t\tself._nbOverlappingQueries\t = 0\n+\t\tself._nbOverlaps\t\t\t = 0\n+\t\tself._nbLines\t\t\t\t = {REFERENCE: 0, QUERY: 0}\n+\t\tself._verbosity\t\t\t = verbosity\n+\t\tself._ncLists\t\t\t\t = {}\n+\t\tself._cursors\t\t\t\t = {}\n+\t\tself._nbElementsPerChromosome = {}\n+\t\tself._tmpDirectories\t\t = {REFERENCE: False, QUERY: False}\n+\t\t\n+\tdef close(self):\n+\t\tself._iWriter.close()\n+\t\tfor fileName in (self._sortedFileNames.values()):\n+\t\t\tif os.path.exists(fileName):\n+\t\t\t\tos.remove(fileName)\n+\t\tfor fileName in self._convertedFileNames.values():\n+\t\t\tif fileName:\n+\t\t\t\tos.remove(fileName)\n+\t\t\n+\tdef setRefFileName(self, fileName, format):\n+\t\tself.setFileName(fileName, format, REFERENCE)\n+\t\t\n+\tdef setQueryFileName(self, fileName, format):\n+\t\tself.setFileName(fileName, format, QUERY)\n+\n+\tdef setFileNam'..b'eak\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tcursor.moveDown()\n+\t\t\t#In case: Query is on the left of the RefInterval and does not overlap.\t\t\n+\t\t\telse:\n+\t\t\t\t#print "choice 3"\n+\t\t\t\tif firstOverlapLAddr.isOut() or firstOverlapLAddr.compare(parentCursor):\n+\t\t\t\t\t#print "changing nfo 2"\n+\t\t\t\t\tfirstOverlapLAddr.copy(cursor)\n+\t\t\t\t\tnextDone = False # new\n+\t\t\t\t#print "break 2"\n+\t\t\t\tbreak\n+\t\t\t\n+\t\t\tdone = False\n+\t\t\tif cursor.isOut():\n+\t\t\t\t#print "break 3"\n+\t\t\t\tbreak\n+\t\tself._writeIntervalInNewGFF3(queryTranscript, overlappingNames)\n+\t\treturn firstOverlapLAddr, nextDone, not overlappingNames\n+\t\n+\tdef isOverlapping(self, queryTranscript, refTranscript):\n+\t\tif (queryTranscript.getStart() <= refTranscript.getEnd() and queryTranscript.getEnd() >= refTranscript.getStart()):\n+\t\t\treturn 0 \n+\t\tif queryTranscript.getEnd() < refTranscript.getStart():\n+\t\t\treturn 1\n+\t\treturn -1\n+\n+\tdef checkIndex(self, transcript, cursor):\n+\t\tchromosome = transcript.getChromosome()\n+\t\tnextLIndex = self._indices[REFERENCE][chromosome].getIndex(transcript)\n+\t\tif nextLIndex == None:\n+\t\t\treturn None\n+\t\tncList\t\t = self._ncLists[REFERENCE][chromosome]\n+\t\tnextGffAddress = ncList.getRefGffAddr(nextLIndex)\n+\t\tthisGffAddress = cursor.getGffAddress()\n+\t\tif nextGffAddress > thisGffAddress:\n+\t\t\treturn nextLIndex\n+\t\treturn None\n+\t\t\n+\tdef _writeIntervalInNewGFF3(self, transcript, names):\n+\t\tnbOverlaps = 0\n+\t\tfor cpt in names.values():\n+\t\t\tnbOverlaps += cpt\n+\t\tif not names:\n+\t\t\treturn\n+\t\ttranscript.setTagValue("overlapsWith", "--".join(sorted(names.keys())))\n+\t\ttranscript.setTagValue("nbOverlaps", nbOverlaps)\n+\t\tself._iWriter.addTranscript(transcript)\n+\t\tself._iWriter.write()\n+\t\tself._nbOverlappingQueries += 1\n+\t\tself._nbOverlaps\t\t += nbOverlaps\n+\t\t\n+\tdef _extractID(self, transcript):\n+\t\tnbElements = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1\n+\t\tid\t\t = transcript.getTagValue("ID")\t\t\t\t if "ID"\t\t in transcript.getTagNames() else transcript.getUniqueName()\n+\t\treturn {id: nbElements}\n+\t\t\n+\tdef run(self):\n+\t\tself.createNCLists()\n+\t\tself.compare()\n+\t\tself.close()\n+\t\tif self._verbosity > 0:\n+\t\t\tprint "# queries: %d" % (self._nbLines[QUERY])\n+\t\t\tprint "# refs: %d" % (self._nbLines[REFERENCE])\n+\t\t\tprint "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)\n+\t\t\tprint "time: %.2gs" % (self._timeSpent)\n+\n+\n+if __name__ == "__main__":\n+\tdescription = "Find Overlaps Optim v1.0.0: Finds overlaps with several query intervals. [Category: Data Comparison]"\n+\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--query",\t dest="inputQueryFileName", action="store",\t\t\t type="string", help="Query input file [compulsory] [format: file in transcript or other format given by -f]")\n+\tparser.add_option("-f", "--queryFormat", dest="queryFormat",\t\taction="store",\t\t\t type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n+\tparser.add_option("-j", "--ref",\t\t dest="inputRefFileName", action="store",\t\t\t type="string", help="Reference input file [compulsory] [format: file in transcript or other format given by -g]")\n+\tparser.add_option("-g", "--refFormat", dest="refFormat",\t\t action="store",\t\t\t type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n+\tparser.add_option("-o", "--output",\t dest="outputFileName",\t action="store",\t\t\t type="string", help="Output file [compulsory] [format: output file in GFF3 format]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity",\t\t action="store", default=1, type="int",\t help="Trace level [format: int] [default: 1]")\n+\t(options, args) = parser.parse_args()\n+\t\n+\tiFOO = FindOverlapsOptim(options.verbosity)\n+\tiFOO.setRefFileName(options.inputRefFileName, options.refFormat)\n+\tiFOO.setQueryFileName(options.inputQueryFileName, options.queryFormat)\n+\tiFOO.setOutputFileName(options.outputFileName)\n+\tiFOO.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/genes.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/genes.gtf Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,27989 @@\n+I\tprotein_coding\tCDS\t335\t646\t.\t+\t0\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; protein_id "YAL069W"; transcript_name "YAL069W"; tss_id "TSS1127";\n+I\tprotein_coding\texon\t335\t649\t.\t+\t.\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; seqedit "false"; transcript_name "YAL069W"; tss_id "TSS1127";\n+I\tprotein_coding\tstart_codon\t335\t337\t.\t+\t0\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; transcript_name "YAL069W"; tss_id "TSS1127";\n+I\tprotein_coding\tCDS\t538\t789\t.\t+\t0\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; protein_id "YAL068W-A"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n+I\tprotein_coding\texon\t538\t792\t.\t+\t.\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; seqedit "false"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n+I\tprotein_coding\tstart_codon\t538\t540\t.\t+\t0\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n+I\tprotein_coding\tstop_codon\t647\t649\t.\t+\t0\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; transcript_name "YAL069W"; tss_id "TSS1127";\n+I\tprotein_coding\tstop_codon\t790\t792\t.\t+\t0\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n+I\tprotein_coding\texon\t1807\t2169\t.\t-\t.\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; seqedit "false"; transcript_name "PAU8"; tss_id "TSS248";\n+I\tprotein_coding\tstop_codon\t1807\t1809\t.\t-\t0\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; transcript_name "PAU8"; tss_id "TSS248";\n+I\tprotein_coding\tCDS\t1810\t2169\t.\t-\t0\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; protein_id "YAL068C"; transcript_name "PAU8"; tss_id "TSS248";\n+I\tprotein_coding\tstart_codon\t2167\t2169\t.\t-\t0\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; transcript_name "PAU8"; tss_id "TSS248";\n+I\tprotein_coding\tCDS\t2480\t2704\t.\t+\t0\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; protein_id "YAL067W-A"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n+I\tprotein_coding\texon\t2480\t2707\t.\t+\t.\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; seqedit "false"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n+I\tprotein_coding\tstart_codon\t2480\t2482\t.\t+\t0\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n+I\tprotein_coding\tstop_codon\t2705\t2707\t.\t+\t0\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n+I\tprotein_coding\texon\t7235\t9016\t.\t-\t.\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; seqedit "false"; transcript_name "SEO1"; tss_id "TSS5465";\n+I\tprotein_coding\tstop_codon\t7235\t7237\t.\t-\t0\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; transcript_name "SEO1"; tss_id "TSS5465";\n+I\tprotein_coding\tCDS\t7238\t9016\t.\t-\t0\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; protein_id "YAL067C"; transcript_name "SEO1"; tss_id "TSS5465";\n+I\tprotein_coding\tstart_codon\t9014\t9016\t.\t-\t0\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; transcript_name "SEO1"; tss_id "TSS5465";\n+I\tprotein_coding\tCDS\t10091\t10396\t.\t+\t0\tgene_id "YAL066W"; transcript_id "YAL066W"; exon_number "1"; gene_name "YAL066W"; p_'..b'203";\n+XVI\tprotein_coding\tstart_codon\t939922\t939924\t.\t+\t0\tgene_id "YPR201W"; transcript_id "YPR201W"; exon_number "1"; gene_name "ARR3"; p_id "P1664"; transcript_name "ARR3"; tss_id "TSS5203";\n+XVI\tprotein_coding\tstop_codon\t941134\t941136\t.\t+\t0\tgene_id "YPR201W"; transcript_id "YPR201W"; exon_number "1"; gene_name "ARR3"; p_id "P1664"; transcript_name "ARR3"; tss_id "TSS5203";\n+XVI\tprotein_coding\tCDS\t943032\t943050\t.\t+\t0\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "1"; gene_name "YPR202W"; p_id "P3577"; protein_id "YPR202W"; transcript_name "YPR202W"; tss_id "TSS6873";\n+XVI\tprotein_coding\texon\t943032\t943050\t.\t+\t.\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "1"; gene_name "YPR202W"; p_id "P3577"; seqedit "false"; transcript_name "YPR202W"; tss_id "TSS6873";\n+XVI\tprotein_coding\tstart_codon\t943032\t943034\t.\t+\t0\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "1"; gene_name "YPR202W"; p_id "P3577"; transcript_name "YPR202W"; tss_id "TSS6873";\n+XVI\tprotein_coding\tCDS\t943199\t943893\t.\t+\t1\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "2"; gene_name "YPR202W"; p_id "P3577"; protein_id "YPR202W"; transcript_name "YPR202W"; tss_id "TSS6873";\n+XVI\tprotein_coding\texon\t943199\t943896\t.\t+\t.\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "2"; gene_name "YPR202W"; p_id "P3577"; seqedit "false"; transcript_name "YPR202W"; tss_id "TSS6873";\n+XVI\tprotein_coding\tCDS\t943880\t944185\t.\t+\t0\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; protein_id "YPR203W"; transcript_name "YPR203W"; tss_id "TSS2481";\n+XVI\tprotein_coding\texon\t943880\t944188\t.\t+\t.\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; seqedit "false"; transcript_name "YPR203W"; tss_id "TSS2481";\n+XVI\tprotein_coding\tstart_codon\t943880\t943882\t.\t+\t0\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; transcript_name "YPR203W"; tss_id "TSS2481";\n+XVI\tprotein_coding\tstop_codon\t943894\t943896\t.\t+\t0\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "2"; gene_name "YPR202W"; p_id "P3577"; transcript_name "YPR202W"; tss_id "TSS6873";\n+XVI\tprotein_coding\tstop_codon\t944186\t944188\t.\t+\t0\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; transcript_name "YPR203W"; tss_id "TSS2481";\n+XVI\tprotein_coding\tCDS\t944603\t947698\t.\t+\t0\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; protein_id "YPR204W"; transcript_name "YPR204W"; tss_id "TSS839";\n+XVI\tprotein_coding\texon\t944603\t947701\t.\t+\t.\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; seqedit "false"; transcript_name "YPR204W"; tss_id "TSS839";\n+XVI\tprotein_coding\tstart_codon\t944603\t944605\t.\t+\t0\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; transcript_name "YPR204W"; tss_id "TSS839";\n+XVI\tprotein_coding\texon\t946856\t947338\t.\t-\t.\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; seqedit "false"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n+XVI\tprotein_coding\tstop_codon\t946856\t946858\t.\t-\t0\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n+XVI\tprotein_coding\tCDS\t946859\t947338\t.\t-\t0\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; protein_id "YPR204C-A"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n+XVI\tprotein_coding\tstart_codon\t947336\t947338\t.\t-\t0\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n+XVI\tprotein_coding\tstop_codon\t947699\t947701\t.\t+\t0\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; transcript_name "YPR204W"; tss_id "TSS839";\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/genome.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/genome.fasta Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,92 @@
+>chr1
+GTAATCAACTACCAATTCCAGCTCTCTTTTGACAACTGGTCTTATACCCACTTCCGTACACTTGCAACCC
+TCGTAAGACAATTGCAAATGAGTAATGGCCTTCCAATTAGCATTGGACGCCCTTGCACCCACGACTCACA
+GAGATCCCTCTCTGCACCCGATTCTCGAATCCACAGTGGATTCAATTCGCTCCTCGATACAGACCTACCC
+ATGGTCCATTCCGAAGGAACTTCTACCCCTACTCAACTCCTACGGCATCCCAACATCTGGTTTGGGAACC
+TCCCACCACCCCCACGCCGCCCACAAGACAATCGAGACTTTTCTCCTTTGCACCCACTGGTCTTTCCAGG
+CCACCACTCCCAGCTCCGTCATGTTCATGAAACCCAGCAAGTTCAACAAACTTGCCCAGGTAAACTCAAA
+CTTTCGGGAGCTGAAGAACTACCGCCTGCACCCCAACGACAGCACTCGTTACCCCTTCACATCACCAGAC
+CTTCCCGTTTTCCCCACCATTTTCATGCACGACGCCCTGATGTATTACCATCCGTCCCAGATCATGGACC
+TGTTCTTRCGGAAACCAAACCTCGAACGTCTGTACGCCAGCCTCGTAGTGCCACCCGAGGCCCATCTTTC
+CGACCAATCCTTCTACCCAAAGTTGTACACGTACACGACGACCCGCCACACTCTTCACTACGTCCCAGAG
+GGTCACGAAGCCGGCAGCTACAACCAACCGTCCGACGCCCACTCTTGGCTCCGAATCAATTCCATTCGCC
+TCGGCAACCACCACCTCTCAGTGACGATCCTGGAATCCTGGGGCCCCGTCCACTCGCTCCTCATTCAACG
+AGGGACCCCCCCCCCCGACCCATCACTCCAGGCCCCTCCAACACTCATGRCCTCAGACCTCTTTCGGTCT
+TACCAAGARCCTCGCCTCGACGTGGTCTCCTTCCGAATCCCCGACGCCATCGAACTTCCACAGGCCACAT
+TCCTCCAACAACCACTTCGAGACCGACTGGTCCCCCGAGCCGTCTACAACGCCCTGTTCACCTATACCAG
+AGCAGTCCGCACACTCCGAACTTCAGACCCAGCAGCATTCGTAAGGATGCACTCCTCCAAACCGGACCAC
+GATTGGGTCACCTCGAACGCCTGGGACAACCTGCAAACCTTCGCACTTCTGAACGTTCCCCTCCGACCAA
+ACGTCGTCTACCACGTTCTTCAGAGCCCAATCGCCTCCCTAAGCCTTTACCTGAGGCAACATTGGCGCCG
+TCTTACCGCCACCGCCGTCCCTATCCTCTCCTTCCTAACCCTCCTGCAGCGCTTCCTTCCATTGCCTATA
+CCTCTAGCAGAGGTAAAATCCATCACAGCCTTCCGAAGGGAGCTTTACCGAAAGAAGGAGCCCCACCACC
+CCCTCGACGTCTTCCATCTCCAGCACCGCGTCCGCAACTACCACTCCGCGATCTCGGCCGTACGCCCGGC
+TTCCCCACCCCACCAAAAACTCCCACACGCACTCCAGAAAGCCGCATTACTGCTTCTCCGACCGATATCG
+CCCCTCTTGACAGCGACCCCGTTCTTTCGGTCCGAACAGAAGTCCATGCTCCCGAACGCCGAACTTTCAT
+GGACCCTGAAGCGCTTCGCTCTGCCCTGGCAAGCCTCCCTAGTCCTCCTCGCTCTGTCGGAATCATCCAT
+ACTGCTCCACAAACTGTTCTCCCCGCCAACCCTCCAAGCCCAACACGACACCTACCACCGACATCTCCAC
+CCTGGATCCTACAGTCTCCAGTGGGAGAGGACGCCATTGTCGATTCCGAGGACGACAGCATTTCTTCCTT
+TCACTCCCACGACTTCGACAGCCCCTCCGGACCGCTCCGAAGCCAGTCTCCCTCCCGCTTTCGCCTCCAC
+CTTCGTTCCCCGTCCACCTCCAGCGGCATCGAGCCCTGGAGCCCAGCCTCCTACGACTACGGCAGCGCCC
+CCGACACCGATTGAACCCACCCAACGCACCCATCAAAATTCTGACCTCGCGCTCGAAAGTTCAACCTCAA
+CCGAACCTCCCCCACCCCCCATCCGATCCCCCGACAYGACGCCCTCCGCCCCCGTCCTTTTCCCAGAAAT
+CAACTCACCTCGTCGTTTTCCCCCCCAACTTCCCGCCACGCCCGATCTCGAACCCGCCCACACTCCACCC
+CCTCTTTCCATCCCGCACCAAGATCCGACTGACTCAGCGGACCCCCTCATGGGCTCCCACCTTTTGCACC
+ATTCACTGCCTGCACCCCCCACCCACCCCCTTCCATCTTCACAGCTGTTACCCGCACCTTTAACGAACGA
+CCCCACTGCGATCGGCCCGGTGCTCCCCTTTGAAGAACTCCACCCACGCAGGTACCCTGAAAACACCGCC
+ACTTTCCTCACGAGGCTCCGTTCACTCCCATCAAACCATCTTCCACAACCCACCTTGAATTGTCTTCTCT
+CCGCTGTCTCCGACCAAACCAAGGTTTCCGAGGAGCACCTCTGGGAGTCCCTACAGACAATTCTCCCAGA
+CAGCCAACTCAGCAATGAAGAGACCAACACTCTCGGGCTTTCAACTGAACACCTCACTGCGTTGGCCCAC
+CTTTACAACTTCCAGGCAACCGTTTACTCCGATCGCGGCCCCATCCTCTTCGGCCCCTCCGACACCATCA
+AGAGGATAGACATCACCCACACCACCGGACCGCCATCCCACTTTTCACCCGGCAAAAGACTCCTAGGCAG
+CCAACCCTCCGCTAAGGGCCATCCCTCCGACCCACTCATCAGAGCCATGAAGTCTTTCAAAGTATCCGGC
+AACTACCTTCCCTTCTCTGAGGCCCACAACCATCCCACCTCCATCTCACACGCCAAGAACTTGATTTCAA
+ACATGAAGAATGGTTTCGACGGCGTCCTCTCCCTCCTCGACGTCTCCACGGGCCAACGAACCGGACCCRC
+CCCCAAAGAACGGATCATCCAGATAGACCACTACCTTGACACCAACCCCGGCAAAACCACTCCTGTGGTG
+CATTTCGCTGGCTTCGCTGGCTGTGGGAAGACATATCCGATCCAACAGCTCCTCAAAACCAAACTGTTCA
+AAGACTTCCGGGTCTCTTGCCCTACCACAGAACTCAGAACCGAATGGAAGACAGCGATGGAACTCCACGG
+CTCCCAGTCATGGCGCTTTAACACTTGGGAGTCTTCCATTCTCAAGTCATCCAGAATCCTGGTCATTGAT
+GAGATCTACAAAATGCCAAGAGGGTACCTCGACCTTTCCATCCTCGCCGACCCCGCCCTCGAGCTCGTCA
+TAATTCTCGGCGATCCTCTMCAGGGCGAGTACCACTCCCAATCGAAAGACTCATCCAACCACCGCCTTCC
+CTCTGAAACTCTCAGGCTGCTACCATACATCGACATGTACTGCTGGTGGAGTTACCGCATTCCTCAATGC
+ATCGCCCGACTCTTCCAAATTCACAGCTTCAATGCCTGGCAAGGAGTTATCGGGTCCGTTTCCACTCCCC
+ATGATCAATCCCCCGTCCTCACCAACAGTCATGCCTCATCTCTTACCTTCAACAGCCTGGGATATCGCTC
+CTGCACGATCAGCTCTAGCCAAGGCCTCACATTCTGCGACCCCGCCATAATCGTCCTGGACAACTACACC
+AAGTGGCTCTCCTCGGCTAACGGCCTCGTCGCCCTCACTCGATCCAGATCAGGCGTCCAATTCATGKGCC
+CCTCTTCCTACGTCGGGGGAACCAACGGCTCTTCCGCCATGTTTTCCGACGCCTTCAACAACAGCCTCAT
+CATCATGGATCGCTACTTCCCATCCCTGTTCCCGCAACTCAAGCTCATCACCTCCCCCCTCACAACTCGC
+GGCCCCAAACTCAACGGGGCCACCCCCAGCGCATCCCCCACCCACCGTTCGCCAAACTTCCACCTTCCCC
+CACACATTCCGCTCTCCTATGATCGTGATTTTGTTACGGTGAACCCAACTCTCCCCGACCAAGGACCCGA
+AACAAGACTCGACACCCACTTTCTCCCACCGTCTCGGCTCCCTCTCCATTTCGATCTCCCACCGGCTATC
+ACCCCACCCCCGGTTTCCACAAGCGTCGACCCGCCACAAGCGAAAGCTAGCCCCGTCTACCCAGGCGAGT
+TCTTCGATTCTCTGGCGGCGTTCTTCTTACCAGCACACGACCCATCAACAAGGGAAATACTCCACAAAGA
+TCAATCTAGCAACCAGTTCCCCTGGTTCGACCGACCCTTCAGCCTGTCCTGCCAGCCCTCAAGTCTGATT
+TCCGCCAAGCATGCACCCAACCATGATCCGACCCTTCTACCGGCCTCCATCAACAAACGCTTGCGATTCA
+GACCCAGTGACTCACCGCACCAAATCACCGCGGACGACGTGGTCCTAGGCCTGCAACTCTTTCACTCTCT
+TTGTCGCGCCTACTCACGTCAACCCAACAGCACCGTTCCATTCAACCCTGAACTTTTCGCAGAATGCATC
+TCTCTGAATGAGTACGCACAGCTCAGTTCCAAAACCCAATCCACCATAGTGGCCAACGCTTCACGCTCCG
+ACCCAGACTGGCGACACACCACCGTCAAGATCTTCGCGAAAGCCCAACACAAAGTCAACGACGGCTCCAT
+CTTCGGCTCGTGGAAAGCCTGCCAGACCCTCGCACTCATGCACGACTACGTGATTCTGGTTCTTGGACCC
+GTCAAGAAATACCAGAGAATCTTCGACAACGCTGACCGGCCACCTAACATCTACTCACACTGCGGCAAGA
+CACCCAACCAACTTCGAGATTGGTGCCAGGAACATCTCACTCATTCCACCCCCAAAATCGCAAACGACTA
+CACCGCTTTCGACCAGTCCCAGCATGGAGAATCCGTGGTCCTTGAAGCCCTCAAAATGAAGAGACTGAAC
+ATTCCRAGCCATCTGATTCAGCTCCACGTCCACCTCAAGACCAACGTCTCCACCCAGTTCGGCCCCCTCA
+CATGCATGCGCCTAACCGGGGAACCCGGAACTTACGACGACAACACTGACTATAACCTCGCAGTCATCTA
+CTCCCAGTATGACGTCGGTTCCTGCCCCATCATGGTTTCTGGCGACGACTCACTCATAGACCACCCCCTT
+CCCACTCGCCACGACTGGCCATCCGTTCTCAAACGCCTCCACCTCCGCTTCAAACTTGAACTCACCTCTC
+ACCCCCTCTTCTGTGGCTACTACGTCGGTCCAGCCGGCTGCATCCGCAACCCCCTGGCCCTTTTCTGCAA
+GCTCATGATCGCCGTGGACGACGACGCCCTCGACGACCGACGACTCAGCTACCTCACCGAGTTCACCACC
+GGACACCTCCTTGGCGAATCACTGTGGCACCTCCTCCCTGAAACCCATGTTCAGTATCAGTCAGCCTGCT
+TTGACTTCTTCTGCAGGCGGTGCCCAAGACACGAGAAAATGCTCCTCGACGACTCCACACCCGCACTCAG
+CCTCCTCGAACGAATCACTTCTTCGCCGAGGTGGCTCACCAAAAATGCCATGTACCTCCTCCCTGCCAAG
+CTACGACTGGCCATCACCTCTCTATCTCAAACGCAGTCCTTCCCAGAATCCATCGAGGTTTCCCACGCTG
+AGTCTGAATTGCTTCACTACGTCCAATAGCAATCAGCCCCAACATGGAAATCGACAAAGAACTCGCCCCC
+CAAGACCGCACCGTCACCGTCGCCACCGTCCTACCAGCTGTCCCCGGCCCATCACCTCTCACCATCAAAC
+AACCGTTYCAGTCTGAAGTTCTATTTGCTGGAACCAAAGATGCCGAGGCTTCTCTCACCATCGCCAACAT
+CGACAGCGTTTCCACCCTCACCACCTTCTACCGTCATGCATCTCTGGAATCACTCTGGGTCACTATCCAT
+CCCACCTTGCAAGCCCCAGCTTTCCCGACCACGGTCGGTGTCTGCTGGGTACCCGCCAATTCTCCAGTCA
+CTCCCGCCCAAATCACCAAGACCTATGGTGGCCAGATCTTCTGCATTGGCGGCGCCATCAACACCCTCTC
+ACCTCTCATCGTCAAGTGCCCACTTGAAATGATGAACCCCCGGGTCAAGGATTCGATTCAGTACCTTGAC
+TCGCCCAAACTCCTCATCTCCATCACCGCTCAACCCACCGCTCCCCCCGCATCGACCTGCATAATAACTG
+TATCAGGAACTCTCTCGATGCACTCTCCGCTCATCACGGACACTTCCACCTAAGTTCTCGATCTTTAAAA
+TCGTTAGCTCGCCAGTTAGCGAGGTCTGTCCCCACACGACAGATAATCGGGTGCAACTCCCGCCCCTCTT
+CCGAGGGTCATCGGAACC

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/gf.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/gf.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,231 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2011\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+QUERY = 0\n+REFERENCE = 1\n+INPUTS = (QUERY, REFERENCE)\n+STRANDS = (-1, 1)\n+TAG_DISTANCE = "distance_"\n+TAG_SENSE = "_sense"\n+TAG_REGION = "_region"\n+TAGS_REGION = {-1: "_upstream", 0: "", 1: "_downstream"}\n+TAGS_RREGION = {-1: "upstream", 0: "overlapping", 1: "downstream"}\n+TAGS_SENSE = {-1: "antisense", 0: "", 1: "colinear"}\n+STRANDSTOSTR = {-1: "(-)", 0: "", 1: "(+)"}\n+\n+\n+def getOrderKey(transcript, direction):\n+ if direction == 1:\n+ return transcript.getEnd()\n+ return - transcript.getStart()\n+\n+def isInGoodRegion(transcriptRef, transcriptQuery, direction):\n+ if direction == 1:\n+ return transcriptQuery.getEnd() > transcriptRef.getEnd()\n+ return transcriptQuery.getStart() < transcriptRef.getStart()\n+\n+\n+class GetFlanking(object):\n+\n+ def __init__(self, verbosity):\n+ self.verbosity = verbosity\n+ self.transcripts = dict([id, {}] for id in INPUTS)\n+ self.directions = []\n+ self.noOverlap = False\n+ self.colinear = False\n+ self.antisense = False\n+ self.distance = None\n+ self.minDistance = None\n+ self.maxDistance = None\n+ self.tagName = "flanking"\n+\n+ def setInputFile(self, fileName, format, id):\n+ chooser = ParserChooser(self.verbosity)\n+ chooser.findFormat(format)\n+ parser = chooser.getParser(fileName)\n+ for transcript in parser.getIterator():\n+ chromosome = transcript.getChromosome()\n+ if chromosome not in self.transcripts[id]:\n+ self.transcripts[id][chromosome] = []\n+ self.transcripts[id][chromosome].append(transcript)\n+\n+ def setOutputFile(self, fileName):\n+ self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+\n+ def addUpstreamDirection(self, upstream):\n+ if upstream:\n+ self.directions.append(-1)\n+\n+ def addDownstreamDirection(self, downstream):\n+ if downstream:\n+ self.directions.append(1)\n+\n+ def setColinear(self, colinear):\n+ self.colinear = colinear\n+\n+ def setAntisense(self,'..b' progress.inc()\n+ for transcript in sorted(list(outputs), key = lambda flanking: (flanking.getChromosome(), flanking.getStart(), flanking.getEnd())):\n+ self.writer.addTranscript(transcript)\n+ self.writer.close()\n+ progress.done()\n+\n+ def run(self):\n+ self.flankings = {}\n+ for direction in STRANDS:\n+ self.getFlanking(direction)\n+ self.write()\n+\n+if __name__ == "__main__":\n+ \n+ description = "Get Flanking v1.0.1: Get the flanking regions of a set of reference. [Category: Data Selection]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+ parser.add_option("-5", "--upstream", dest="upstream", action="store_true", default=False, help="output upstream elements [format: boolean] [default: False]")\n+ parser.add_option("-3", "--downstream", dest="downstream", action="store_true", default=False, help="output downstream elements [format: boolean] [default: False]")\n+ parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="find first colinear element [format: boolean] [default: False]")\n+ parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="find first anti-sense element [format: boolean] [default: False]")\n+ parser.add_option("-e", "--noOverlap", dest="noOverlap", action="store_true", default=False, help="do not consider elements which are overlapping reference elements [format: boolean] [default: False]")\n+ parser.add_option("-d", "--minDistance", dest="minDistance", action="store", default=None, type="int", help="minimum distance between 2 elements [format: int]")\n+ parser.add_option("-D", "--maxDistance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance between 2 elements [format: int]")\n+ parser.add_option("-t", "--tag", dest="tagName", action="store", default="flanking", type="string", help="name of the new tag [format: string] [default: flanking]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ gf = GetFlanking(options.verbosity)\n+ gf.setInputFile(options.inputFileName1, options.format1, QUERY)\n+ gf.setInputFile(options.inputFileName2, options.format2, REFERENCE)\n+ gf.setOutputFile(options.outputFileName)\n+ gf.addUpstreamDirection(options.upstream)\n+ gf.addDownstreamDirection(options.downstream)\n+ gf.setColinear(options.colinear)\n+ gf.setAntisense(options.antisense)\n+ gf.setNoOverlap(options.noOverlap)\n+ gf.setMinDistance(options.minDistance)\n+ gf.setMaxDistance(options.maxDistance)\n+ gf.setNewTagName(options.tagName)\n+ gf.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/misc/MultipleRPlotter.pyc

Binary file SMART/Java/Python/misc/MultipleRPlotter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/misc/Progress.pyc

Binary file SMART/Java/Python/misc/Progress.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/misc/RPlotter.py
--- a/SMART/Java/Python/misc/RPlotter.py Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/Java/Python/misc/RPlotter.py Tue Apr 30 14:33:21 2013 -0400

@@ -639,7 +639,6 @@
         if self.minimumY != None:
             yMin = self.minimumY
         yMax = self.getNewYMaxWithTopMargin()
-        yMax += min(1, yMax / 100.0)
         if self.maximumY != None:
             yMax = self.maximumY

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/misc/RPlotter.pyc

Binary file SMART/Java/Python/misc/RPlotter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/misc/UnlimitedProgress.pyc

Binary file SMART/Java/Python/misc/UnlimitedProgress.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/misc/Utils.pyc

Binary file SMART/Java/Python/misc/Utils.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/misc/__init__.pyc

Binary file SMART/Java/Python/misc/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/mySql/MySqlConnection.pyc

Binary file SMART/Java/Python/mySql/MySqlConnection.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/mySql/MySqlExonTable.pyc

Binary file SMART/Java/Python/mySql/MySqlExonTable.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/mySql/MySqlQuery.pyc

Binary file SMART/Java/Python/mySql/MySqlQuery.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/mySql/MySqlTable.pyc

Binary file SMART/Java/Python/mySql/MySqlTable.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/mySql/MySqlTranscriptTable.pyc

Binary file SMART/Java/Python/mySql/MySqlTranscriptTable.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/mySql/__init__.pyc

Binary file SMART/Java/Python/mySql/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/.NCList.py.swp

Binary file SMART/Java/Python/ncList/.NCList.py.swp has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/.NCListCursor.py.swp

Binary file SMART/Java/Python/ncList/.NCListCursor.py.swp has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/Benchmark.py
--- a/SMART/Java/Python/ncList/Benchmark.py Mon Apr 29 03:45:52 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,357 +0,0 @@\n-import os, os.path, random, glob, subprocess, threading, time, resource\n-from optparse import OptionParser\n-from SMART.Java.Python.misc.Progress import *\n-from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from commons.core.parsing.GffParser import GffParser\n-\n-#TYPES = ("bin", "has", "seg", "fj", "nc", "new")\n-TYPES = ("new", )\n-\n-class RunCmd(threading.Thread):\n-\tdef __init__(self, cmd, out, err, time, memory):\n-\t\tthreading.Thread.__init__(self)\n-\t\tself._cmd = cmd\n-\t\tself._out = out\n-\t\tself._err = err\n-\t\tself._time = time\n-\t\tself._memory = memory\n-\t\tself._id\t = os.getpid()\n-\t\tself._mem = 0.0\n-\t\tself._outputFileName = "tmp_%d.out" % (self._id)\n-\n-\tdef run(self):\n-\t\tself._p = subprocess.Popen(self._cmd, stdout = self._out, stderr = self._err, shell = True)\n-\t\t#self._p.wait()\n-\n-\tdef _runShellCommand(self, command):\n-\t\tp = subprocess.call("%s > %s" % (command, self._outputFileName), shell=True)\n-\t\thandle = open(self._outputFileName)\n-\t\tdata = [line.split() for line in handle.readlines()[1:] if line]\n-\t\thandle.close()\n-\t\tos.remove(self._outputFileName)\n-\t\treturn data\n-\n-\tdef _getPid(self):\n-\t\tself._pid\t = None\n-\t\tcpt = 1\n-\t\twhile True:\n-\t\t\tcommandsFound = []\n-\t\t\tfor line in self._runShellCommand("ps -o pid,cmd"):\n-\t\t\t\tif line[1:] == self._cmd.split(" "):\n-\t\t\t\t\tself._pid = int(line[0])\n-\t\t\t\tcommandsFound.append(" ".join(line[1:]))\n-\t\t\tif self._pid != None:\n-\t\t\t\treturn True\n-\t\t\ttime.sleep(1)\n-\t\t\tif cpt % 100 == 0:\n-\t\t\t\tprint "pid of \'%s\' not found after %d seconds. Found: %s" % (self._cmd, cpt, " --- ".join(commandsFound))\n-\t\t\tcpt += 1\n-\t\t\tif cpt > 300:\n-\t\t\t\treturn False\n-\n-\tdef _fetchMemory(self):\n-\t\tlines = self._runShellCommand("ps u -p %d" % (self._pid))\n-\t\tfor line in lines:\n-\t\t\tself._mem = max(self._mem, float(line[3]))\n-\t\t\treturn self._mem >= self._memory\n-\t\t#print "Cannot find the memory of the current PID (%d) in: %s" % (self._pid, " --- ".join([" ".join(line) for line in lines]))\n-\t\treturn False\n-\t\n-\tdef getMemory(self):\n-\t\treturn self._mem\n-\n-\tdef _abort(self):\n-\t\ttry:\n-\t\t\tself._p.terminate()\n-\t\texcept Exception:\n-\t\t\tpass\n-\t\tself._killSubThreads()\n-\t\n-\tdef _killSubThreads(self):\n-\t\tfor line in self._runShellCommand("ps --ppid %d -o pid" % (self._pid)):\n-\t\t\tself._runShellCommand("kill %s" % (line[0]))\n-\t\tself._runShellCommand("kill %s" % (self._pid))\n-\n-\tdef go(self):\n-\t\tstartTime = time.time()\n-\t\tself.run()\n-\t\t#self.start()\n-\t\twhile not self._getPid():\n-\t\t\t#self.start()\n-\t\t\tself.run()\n-\t\twhile True:\n-\t\t\tif self._time != None and time.time() - startTime > self._time:\n-\t\t\t\tprint "\\nCommand \'%s\' did not finish in time. Aborting it." % (self._cmd)\n-\t\t\t\tself._abort()\n-\t\t\t\tbreak\n-\t\t\tif self._memory != None and self._fetchMemory():\n-\t\t\t\tprint "\\nCommand \'%s\' required too much memory (%f). Aborting it." % (self._cmd, self._mem)\n-\t\t\t\tself._abort()\n-\t\t\t\tbreak\n-\t\t\t#self.join(0.1)\n-\t\t\ttime.sleep(0.1)\n-\t\t\t#if not self.isAlive():\n-\t\t\tif self._p.poll() != None:\n-\t\t\t\treturn True\n-\t\treturn False\n-\n-\n-class DataStructure(object):\n-\tdef __init__(self):\n-\t\tself._structure = {}\n-\n-\tdef addData(self, data):\n-\t\tif data._nbRefs not in self._structure:\n-\t\t\tself._structure[data._nbRefs] = {}\n-\t\tif data._nbQueries not in self._structure[data._nbRefs]:\n-\t\t\tself._structure[data._nbRefs][data._nbQueries] = {}\n-\t\tif data._genomeSize not in self._structure[data._nbRefs][data._nbQueries]:\n-\t\t\tself._structure[data._nbRefs][data._nbQueries][data._genomeSize] = {}\n-\t\tif data._type not in self._structure[data._nbRefs][data._nbQueries][data._genomeSize]:\n-\t\t\tself._structure[data._nbRefs][data._nbQueries][data._genomeSize][data._type] = []\n-\t\tself._structure[data._nbRefs][data._nbQueries][data._genomeSize][data._type].append(data._group)\n-\n-\tdef export(self):\n-\t\toutputString = "#refs\\t#queries\\tgenome size\\ttype\\t# written\\t# overlaps\\tbuild t.\\trun t.\\tmem\\n"\n-\t\tf'..b' nbReferences, nbQueries, "NA", "NA", genomeSize)\n-\t\t\t\t\t\t\telse:\n-\t\t\t\t\t\t\t\tdata[type] = self._parseTrace(type, fileName, genomeSize)\n-\t\t\t\t\t\t\t\tself._structure.addData(data[type])\n-\t\t\t\t\t\t\t\tos.remove(fileName)\n-\t\t\t\t\t\t\tself._cleanTmpFiles()\n-\t\t\t\t\t\tself._cleanTmpFiles(True)\n-\t\t\t\t\t\tfirstType = TYPES[0]\n-\t\t\t\t\t\tfor type in TYPES[1:]:\n-\t\t\t\t\t\t\tif not data[firstType].checkConsistency(data[type]):\n-\t\t\t\t\t\t\t\traise Exception("Outputs are not consistent.\\n # outputs: %d vs %d.\\n # overlaps: %d vs %d.\\n %s: %f + %f; %s: %f + %f.\\n Files are %s and %s." % (data[firstType]._group._nbOutputs, data[type]._group._nbOutputs, data[firstType]._group._nbOverlaps, data[type]._group._nbOverlaps, firstType, data[firstType]._group._buildTime, data[firstType]._group._runTime, data[firstType]._group._mem, type, data[type]._group._buildTime, data[type]._group._runTime, data[type]._group._mem, refFileName, queryFileName))\n-\t\t\t\t\t\tfor fileName in (queryFileName, refFileName):\n-\t\t\t\t\t\t\tif os.path.exists(fileName):\n-\t\t\t\t\t\t\t\tos.remove(fileName)\n-\t\t\t\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\t\thandle = open(self._outputFileName, "w")\n-\t\thandle.write(self._structure.export())\n-\t\thandle.close()\n-\n-\n-\n-if __name__ == "__main__":\n-\t\n-\tdescription = "Benchmark v1.0.2: Compare NC-List with other tools. Only work under Linux. [Category: Other]"\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-r", "--nbReferences", dest="nbReferences", action="store", default=None, type="string", help="number of references (list of integers separated by commas) [compulsory] [format: string]")\n-\tparser.add_option("-q", "--nbQueries", dest="nbQueries",\t\t action="store", default=None, type="string", help="number of queries as a factor of the number of references (list of floats separated by commas) [compulsory] [format: string]")\n-\tparser.add_option("-R", "--nbReplicates", dest="nbReplicates", action="store", default=None, type="int",\thelp="number of replicates [compulsory] [format: int]")\n-\tparser.add_option("-s", "--genomeSizes", dest="genomeSizes", action="store", default=None, type="string", help="genome size as a factor of the number of references (list of floats separated by commas) [compulsory] [format: string]")\n-\tparser.add_option("-c", "--chromosome", dest="chromosome",\t action="store", default="chr1", type="string", help="name of the chromosome [default: chr1] [format: string]")\n-\tparser.add_option("-z", "--minSize", dest="minSize", action="store", default=None, type="int",\thelp="minimum size of the reads [compulsory] [format: int]")\n-\tparser.add_option("-Z", "--maxSize", dest="maxSize", action="store", default=None, type="int",\thelp="maximum size of the reads [compulsory] [format: int]")\n-\tparser.add_option("-o", "--output", dest="outputFileName", action="store",\t\t\t\t type="string", help="output file [compulsory] [format: output file in TXT format]")\n-\tparser.add_option("-t", "--time", dest="time", action="store", default=None, type="int",\thelp="maximum time to wait (in seconds) [default: None] [format: int]")\n-\tparser.add_option("-m", "--memory",\t dest="memory",\t\t action="store", default=None, type="float",\thelp="maximum memory usage (in %) [default: None] [format: float]")\n-\tparser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1,\t type="int",\thelp="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tbenchmark = Benchmark(options.verbosity)\n-\tbenchmark.setNbReferences(map(int, options.nbReferences.split(",")))\n-\tbenchmark.setNbQueries(map(float, options.nbQueries.split(",")))\n-\tbenchmark.setGenomeSizes(map(float, options.genomeSizes.split(",")))\n-\tbenchmark.setNbReplicates(options.nbReplicates)\n-\tbenchmark.setChromosomeName(options.chromosome)\n-\tbenchmark.setSizes(options.minSize, options.maxSize)\n-\tbenchmark.setLimits(options.time, options.memory)\n-\tbenchmark.setOutputFileName(options.outputFileName)\n-\tbenchmark.run()\n-\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/ConvertToNCList.pyc

Binary file SMART/Java/Python/ncList/ConvertToNCList.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/FileSorter.pyc

Binary file SMART/Java/Python/ncList/FileSorter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/NCIndex.pyc

Binary file SMART/Java/Python/ncList/NCIndex.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/NCList.pyc

Binary file SMART/Java/Python/ncList/NCList.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/NCListCursor.pyc

Binary file SMART/Java/Python/ncList/NCListCursor.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/NCListFilePickle.pyc

Binary file SMART/Java/Python/ncList/NCListFilePickle.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/NCListHandler.pyc

Binary file SMART/Java/Python/ncList/NCListHandler.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/NCListMerger.pyc

Binary file SMART/Java/Python/ncList/NCListMerger.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/NCListParser.pyc

Binary file SMART/Java/Python/ncList/NCListParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/ncList/__init__.pyc

Binary file SMART/Java/Python/ncList/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/re_filter_ratio_5_NbReads_100_samples_all_norm_Window100overlap50.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/re_filter_ratio_5_NbReads_100_samples_all_norm_Window100overlap50.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,296 @@\n+chr2\tS-MART\tsample9Unique_transcript\t11739884\t11740222\t21\t-\t.\tnbOverlaps1=11091;nbOverlaps2=2.71789;nbOverlaps3=6262.63;nbOverlaps4=5519.92;nbOverlaps5=8514.14;nbOverlaps6=6150.58;nbOverlaps7=33.3514;nbOverlaps8=1542.36;nbOverlaps9=501.044;nbOverlaps10=462.834;ID=HWUSI-EAS454_0005:4:6:8871:3706#0/1;overlapsWith=HWUSI-EAS454_0005:5:83:18620:7295#0/1--HWUSI-EAS454_0005:5:114:4513:11369#0--HWUSI-EAS454_0005:5:2:1;Name=HWUSI-EAS454_0005:4:6:8871:3706#0/1--HWUSI-EAS454_0005:3:78:19264:12888#0/1--HWUSI-EAS454_0001:4:90:;nbElements=57149.000000;nbE1/nbE2=2983.143665;filter=S1>S2,\n+chr2\tS-MART\tsample5Unique_transcript\t2731028\t2731271\t24\t-\t.\tnbOverlaps1=690;nbOverlaps2=6.11525;nbOverlaps3=491.553;nbOverlaps4=463.775;nbOverlaps5=535.969;nbOverlaps6=325.238;nbOverlaps7=7.13514;nbOverlaps8=33.0133;nbOverlaps9=84.0599;nbOverlaps10=35.1854;ID=HWUSI-EAS454_0001:7:35:2542:19410#0/1;overlapsWith=HWUSI-EAS454_0005:5:115:16901:16071#0/1--HWUSI-EAS454_0005:5:33:3771:9306#0/1--HWUSI-EAS454_0005:5:3;Name=HWUSI-EAS454_0001:7:35:2542:19410#0/1--HWUSI-EAS454_0005:1:68:6471:6188#0/1--HWUSI-EAS454_0001:4:74:;nbElements=3755.000000;nbE1/nbE2=96.974808;filter=S1>S2,\n+chr5\tS-MART\tsample8Unique_transcript\t15253504\t15253638\t19\t+\t.\tnbOverlaps1=274;nbOverlaps2=16.3073;nbOverlaps3=82.8734;nbOverlaps4=76.5867;nbOverlaps5=104.667;nbOverlaps6=46.3123;nbOverlaps7=0.324324;nbOverlaps8=22.838;nbOverlaps9=14.8226;nbOverlaps10=10.0132;ID=HWUSI-EAS454_0005:3:31:2846:9089#0;overlapsWith=HWUSI-EAS454_0005:5:97:8048:11119#0/1--HWUSI-EAS454_0005:5:97:4678:4418#0--HWUSI-EAS454_0005:5:36:17;Name=HWUSI-EAS454_0005:3:31:2846:9089#0--HWUSI-EAS454_0001:4:68:6299:4556#0/1--HWUSI-EAS454_0005:5:79:101;nbElements=913.000000;nbE1/nbE2=15.831470;filter=S1>S2,\n+chr4\tS-MART\tsample2Unique_transcript\t9401695\t9401791\t23\t+\t.\tnbOverlaps1=1;nbOverlaps2=748.099;nbOverlaps3=1.62497;nbOverlaps4=1.41827;nbOverlaps5=0.902304;nbOverlaps6=1.57883;nbOverlaps7=0;nbOverlaps8=2.03507;nbOverlaps9=0.195035;nbOverlaps10=0;ID=HWUSI-EAS454_0001:3:88:5184:17397#0/1;overlapsWith=HWUSI-EAS454_0005:4:109:7472:8125#0/1;Name=HWUSI-EAS454_0001:3:88:5184:17397#0/1--HWUSI-EAS454_0001:3:18:15316:12317#0/1--HWUSI-EAS454_0001:3:9;nbElements=1119.000000;nbE2/nbE1=374.049500;filter=S2>S1,\n+chr1\tS-MART\tsample1Unique_transcript\t11592768\t11592855\t24\t+\t.\tnbOverlaps1=377;nbOverlaps2=77.4598;nbOverlaps3=8.93733;nbOverlaps4=8.50964;nbOverlaps5=11.73;nbOverlaps6=4.21021;nbOverlaps7=0.0540541;nbOverlaps8=0.226119;nbOverlaps9=0;nbOverlaps10=0.417218;ID=HWUSI-EAS454_0005:1:59:8362:4670#0;overlapsWith=HWUSI-EAS454_0005:5:85:4327:6835#0--HWUSI-EAS454_0005:5:76:14344:4377#0;Name=HWUSI-EAS454_0005:1:59:8362:4670#0--HWUSI-EAS454_0001:3:24:2476:19461#0--HWUSI-EAS454_0001:3:85:9481;nbElements=534.000000;nbE1/nbE3=37.937756;filter=S1>S3,\n+chr5\tS-MART\tsample7Unique_transcript\t18274485\t18274638\t22\t+\t.\tnbOverlaps1=274;nbOverlaps2=50.9604;nbOverlaps3=219.371;nbOverlaps4=43.9665;nbOverlaps5=177.754;nbOverlaps6=68.4158;nbOverlaps7=0.918919;nbOverlaps8=11.532;nbOverlaps9=2.34041;nbOverlaps10=0.973509;ID=HWUSI-EAS454_0013_FC:1:64:3764:13895#0;overlapsWith=HWUSI-EAS454_0005:5:113:1828:9830#0/1--HWUSI-EAS454_0005:5:12:13473:6177#0/1--HWUSI-EAS454_0005:5:42;Name=HWUSI-EAS454_0013_FC:1:64:3764:13895#0--HWUSI-EAS454_0001:6:35:1740:1405#0/1--HWUSI-EAS454_0001:7:43;nbElements=1063.000000;nbE1/nbE2=5.273247;filter=S1>S2,\n+chr5\tS-MART\tsample6Unique_transcript\t16212511\t16212648\t24\t+\t.\tnbOverlaps1=56;nbOverlaps2=59.1141;nbOverlaps3=344.493;nbOverlaps4=25.5289;nbOverlaps5=51.4313;nbOverlaps6=165.777;nbOverlaps7=1.2973;nbOverlaps8=9.04474;nbOverlaps9=0.390069;nbOverlaps10=0.139073;ID=HWUSI-EAS454_0004:5:73:8987:9418#0/1;overlapsWith=HWUSI-EAS454_0005:5:111:18220:9874#0/1;Name=HWUSI-EAS454_0004:5:73:8987:9418#0/1--HWUSI-EAS454_0004:5:73:2669:12596#0/1--HWUSI-EAS454_0004:5:60:;nbElements=1024.000000;nbE3/nbE1=6.043737;filter=S3>S1\n+chr4\tS-MART\tsample9Unique_transcript\t10266529\t10266697\t23\t-\t.\tnbOverlaps1=469;nbOverlaps2=679.472;nbOverlaps3=38.1'..b'WUSI-EAS454_0005:3:69:12301:18998#0/1--HWUSI-EAS454_0004:5:51;nbElements=1166.000000;nbE1/nbE2=5.771658;filter=S1>S2,\n+chr1\tS-MART\tsample3Unique_transcript\t26111387\t26111467\t23\t-\t.\tnbOverlaps1=534;nbOverlaps2=36.012;nbOverlaps3=140.56;nbOverlaps4=87.9329;nbOverlaps5=37.8968;nbOverlaps6=169.461;nbOverlaps7=0.324324;nbOverlaps8=9.94922;nbOverlaps9=1.75531;nbOverlaps10=0.417218;ID=HWUSI-EAS454_0001:4:27:10831:18663#0/1;overlapsWith=HWUSI-EAS454_0005:5:110:16902:16613#0/1--HWUSI-EAS454_0005:5:80:7345:3357#0/1--HWUSI-EAS454_0005:5:3;Name=HWUSI-EAS454_0001:4:27:10831:18663#0/1--HWUSI-EAS454_0005:1:12:16643:14153#0--HWUSI-EAS454_0005:3:10;nbElements=1248.000000;nbE1/nbE2=14.427753;filter=S1>S2,\n+chr1\tS-MART\tsample2Unique_transcript\t3855600\t3855715\t23\t+\t.\tnbOverlaps1=166;nbOverlaps2=474.951;nbOverlaps3=6.49988;nbOverlaps4=7.09136;nbOverlaps5=5.41382;nbOverlaps6=22.6299;nbOverlaps7=0;nbOverlaps8=37.3096;nbOverlaps9=0.195035;nbOverlaps10=0.139073;ID=HWUSI-EAS454_0001:3:31:13200:14532#0;overlapsWith=HWUSI-EAS454_0005:5:71:9926:19481#0;Name=HWUSI-EAS454_0001:3:31:13200:14532#0--HWUSI-EAS454_0001:3:13:5219:7425#0--HWUSI-EAS454_0001:6:68:106;nbElements=1094.000000;nbE1/nbE3=22.133687;filter=S1>S3,\n+chr1\tS-MART\tsample4Unique_transcript\t21609487\t21609537\t23\t-\t.\tnbOverlaps1=122;nbOverlaps2=20.3842;nbOverlaps3=23.562;nbOverlaps4=56.7309;nbOverlaps5=17.1438;nbOverlaps6=15.262;nbOverlaps7=0.216216;nbOverlaps8=3.39178;nbOverlaps9=3.51062;nbOverlaps10=0.834436;ID=HWUSI-EAS454_0001:7:58:122:1727#0;overlapsWith=HWUSI-EAS454_0005:5:31:18659:14130#0--HWUSI-EAS454_0005:5:75:15252:4379#0--HWUSI-EAS454_0005:5:74:14;Name=HWUSI-EAS454_0001:7:58:122:1727#0--HWUSI-EAS454_0001:4:14:10300:13699#0--HWUSI-EAS454_0001:3:88:7480;nbElements=312.000000;nbE1/nbE2=5.705147;filter=S1>S2,\n+chr5\tS-MART\tsample2Unique_transcript\t7185205\t7185373\t24\t+\t.\tnbOverlaps1=39;nbOverlaps2=377.107;nbOverlaps3=207.996;nbOverlaps4=17.0193;nbOverlaps5=55.0405;nbOverlaps6=40.5232;nbOverlaps7=0.27027;nbOverlaps8=0.678356;nbOverlaps9=0.195035;nbOverlaps10=0.278145;ID=HWUSI-EAS454_0001:3:55:14092:13307#0;overlapsWith=HWUSI-EAS454_0005:5:79:13096:9168#0/1--HWUSI-EAS454_0005:5:37:11901:7436#0/1;Name=HWUSI-EAS454_0001:3:55:14092:13307#0--HWUSI-EAS454_0001:3:54:19540:17685#0/1--HWUSI-EAS454_0001:3:27;nbElements=990.000000;nbE2/nbE1=9.427675;nbE3/nbE1=5.199900;filter=S2>S1,S3>S1\n+chr4\tS-MART\tsample10Uniqu_transcript\t6551532\t6551826\t20\t+\t.\tnbOverlaps1=1109;nbOverlaps2=876.519;nbOverlaps3=104.81;nbOverlaps4=55.3126;nbOverlaps5=65.8682;nbOverlaps6=96.3084;nbOverlaps7=30.1622;nbOverlaps8=213.456;nbOverlaps9=391.629;nbOverlaps10=75.7946;ID=HWUSI-EAS454_0005:5:119:2219:3398#0/1;overlapsWith=HWUSI-EAS454_0005:5:109:4734:5243#0/1--HWUSI-EAS454_0005:5:43:4352:9936#0/1--HWUSI-EAS454_0005:5:64:;Name=HWUSI-EAS454_0005:5:119:2219:3398#0/1--HWUSI-EAS454_0005:3:85:14318:4211#0--HWUSI-EAS454_0005:5:105:;nbElements=6739.000000;nbE1/nbE3=10.481051;filter=S1>S3,\n+chr1\tS-MART\tsample2Unique_transcript\t25427657\t25427769\t24\t+\t.\tnbOverlaps1=31;nbOverlaps2=216.752;nbOverlaps3=1.62497;nbOverlaps4=0;nbOverlaps5=0;nbOverlaps6=0;nbOverlaps7=0;nbOverlaps8=0;nbOverlaps9=0.195035;nbOverlaps10=0;ID=HWUSI-EAS454_0001:3:64:17647:16938#0/1;overlapsWith=HWUSI-EAS454_0005:4:59:16218:16037#0/1;Name=HWUSI-EAS454_0001:3:64:17647:16938#0/1--HWUSI-EAS454_0001:3:91:1451:16969#0/1--HWUSI-EAS454_0001:3:8;nbElements=352.000000;nbE2/nbE1=6.773500;filter=S2>S1,\n+chr1\tS-MART\tsample3Unique_transcript\t79000\t79112\t21\t+\t.\tnbOverlaps1=436;nbOverlaps2=82.2161;nbOverlaps3=63.3738;nbOverlaps4=83.6781;nbOverlaps5=46.0175;nbOverlaps6=52.1013;nbOverlaps7=54.3784;nbOverlaps8=64.2177;nbOverlaps9=42.7126;nbOverlaps10=37.1324;ID=HWUSI-EAS454_0001:4:78:7399:13906#0;overlapsWith=HWUSI-EAS454_0005:5:114:13819:16401#0--HWUSI-EAS454_0005:5:21:18580:10056#0--HWUSI-EAS454_0005:5:79:;Name=HWUSI-EAS454_0001:4:78:7399:13906#0--HWUSI-EAS454_0001:3:82:16783:2949#0--HWUSI-EAS454_0001:7:56:690;nbElements=2620.000000;nbE1/nbE2=5.239371;nbE1/nbE3=6.772942;filter=S1>S2,S1>S3,\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/removeEmptySequences.py
--- a/SMART/Java/Python/removeEmptySequences.py Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/Java/Python/removeEmptySequences.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -66,9 +66,9 @@

     def setOutputFileName(self, fileName):
         if options.format == "fasta":
-            self.writer = FastaWriter(fileName, self.verbosity)
+            self.writer = FastaWriter("%s.mfa" % (fileName), self.verbosity)
         elif options.format == "fastq":
-            self.writer = FastqWriter(fileName, self.verbosity)
+            self.writer = FastqWriter("%s.mfq" % (fileName), self.verbosity)

     def parse(self):
@@ -91,13 +91,13 @@
     description = "Remove Empty Sequences v1.0.2: Remove all the empty sequences in a list. [Category: Personal]"

     parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",   action="store",                     type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",          action="store",                     type="string", help="format of the input file [compulsory] [format: sequence file format]")
-    parser.add_option("-j", "--input2",    dest="inputFileName2",  action="store",                     type="string", help="input file 2 (in case of pair end reads) [format: file in sequence format given by -f] [default: None]")
-    parser.add_option("-o", "--output",    dest="outputFileName",  action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in format given by -f]")
-    parser.add_option("-p", "--output2",   dest="outputFileName2", action="store",      default=None,  type="string", help="output file 2 (in case of pair end reads) [format: output file in sequence format given by -f] [default: None]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
-    parser.add_option("-l", "--log",       dest="log",             action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
+    parser.add_option("-i", "--input",         dest="inputFileName",     action="store",                                         type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",        dest="format",                    action="store",                                         type="string", help="format of the input file [compulsory] [format: sequence file format]")
+    parser.add_option("-j", "--input2",        dest="inputFileName2",    action="store",                                         type="string", help="input file 2 (in case of pair end reads) [format: file in sequence format given by -f] [default: None]")
+    parser.add_option("-o", "--output",        dest="outputFileName",    action="store",            default=None,    type="string", help="output file [compulsory] [format: output file in format given by -f]")
+    parser.add_option("-p", "--output2",     dest="outputFileName2", action="store",            default=None,    type="string", help="output file 2 (in case of pair end reads) [format: output file in sequence format given by -f] [default: None]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",             action="store",            default=1,         type="int",        help="trace level [format: int] [default: 1]")
+    parser.add_option("-l", "--log",             dest="log",                         action="store_true", default=False,                                help="write a log file [format: bool] [default: false]")
     (options, args) = parser.parse_args()

     if options.log:

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/script.Rout
--- a/SMART/Java/Python/script.Rout Mon Apr 29 03:45:52 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,1 +0,0 @@
-Fatal error: cannot open file 'script.R': No such file or directory

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/Bins.pyc

Binary file SMART/Java/Python/structure/Bins.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/Interval.pyc

Binary file SMART/Java/Python/structure/Interval.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/Mapping.pyc

Binary file SMART/Java/Python/structure/Mapping.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/Sequence.pyc

Binary file SMART/Java/Python/structure/Sequence.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/SequenceList.pyc

Binary file SMART/Java/Python/structure/SequenceList.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/SubMapping.pyc

Binary file SMART/Java/Python/structure/SubMapping.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/Transcript.pyc

Binary file SMART/Java/Python/structure/Transcript.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/TranscriptContainer.pyc

Binary file SMART/Java/Python/structure/TranscriptContainer.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/TranscriptList.pyc

Binary file SMART/Java/Python/structure/TranscriptList.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/structure/__init__.pyc

Binary file SMART/Java/Python/structure/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,100000 @@\n+chr1\tS-MART\ttranscript\t13\t33\t21\t-\t.\tbestRegion=(self);nbGaps=0;nbMismatches=0;ID=HWUSI-EAS454_0005:1:29:15426:13405#0/1;identity=100;Name=HWUSI-EAS454_0005:1:29:15426:13405#0/1\n+chr1\tS-MART\ttranscript\t14\t37\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:36:917:291#0;identity=100;Name=HWUSI-EAS454_0001:6:36:917:291#0\n+chr1\tS-MART\ttranscript\t14\t36\t23\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:39:1005:1200#0;identity=100;Name=HWUSI-EAS454_0001:6:39:1005:1200#0\n+chr1\tS-MART\ttranscript\t15\t38\t24\t-\t.\tquality=25;nbElements=3.000000;ID=HWUSI-EAS454_0005:1:88:8852:4891#0;Name=HWUSI-EAS454_0005:1:88:8852:4891#0--HWUSI-EAS454_0005:1:16:12336:7772#0--HWUSI-EAS454_0001:6:29:1176\n+chr1\tS-MART\ttranscript\t16\t39\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:12:605:1006#0;identity=100;Name=HWUSI-EAS454_0001:6:12:605:1006#0\n+chr1\tS-MART\ttranscript\t16\t38\t23\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:41:570:1349#0;identity=100;Name=HWUSI-EAS454_0001:6:41:570:1349#0\n+chr1\tS-MART\ttranscript\t17\t40\t24\t-\t.\tquality=37;nbElements=11.000000;ID=HWUSI-EAS454_0005:1:81:1819:13108#0;Name=HWUSI-EAS454_0005:1:81:1819:13108#0--HWUSI-EAS454_0005:1:75:6916:8155#0--HWUSI-EAS454_0005:1:67:2702\n+chr1\tS-MART\ttranscript\t17\t39\t23\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:90:6409:15569#0;identity=100;Name=HWUSI-EAS454_0005:1:90:6409:15569#0\n+chr1\tS-MART\ttranscript\t18\t41\t24\t-\t.\tquality=25;nbElements=11.000000;ID=HWUSI-EAS454_0005:1:8:14764:13869#0;Name=HWUSI-EAS454_0005:1:8:14764:13869#0--HWUSI-EAS454_0005:1:80:16600:20813#0--HWUSI-EAS454_0005:1:68:16\n+chr1\tS-MART\ttranscript\t19\t42\t24\t-\t.\tquality=37;nbElements=2.000000;ID=HWUSI-EAS454_0005:1:55:7969:5875#0;Name=HWUSI-EAS454_0005:1:55:7969:5875#0--HWUSI-EAS454_0001:6:20:1413:2018#0\n+chr1\tS-MART\ttranscript\t20\t42\t23\t-\t.\tquality=25;nbElements=2.000000;ID=HWUSI-EAS454_0005:1:8:12144:21397#0;Name=HWUSI-EAS454_0005:1:8:12144:21397#0--HWUSI-EAS454_0005:1:81:7335:14824#0\n+chr1\tS-MART\ttranscript\t20\t42\t23\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:117:3168:18210#0;identity=100;Name=HWUSI-EAS454_0005:1:117:3168:18210#0\n+chr1\tS-MART\ttranscript\t21\t44\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:71:270:748#0;identity=100;Name=HWUSI-EAS454_0001:6:71:270:748#0\n+chr1\tS-MART\ttranscript\t21\t41\t21\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:70:1405:628#0;identity=100;Name=HWUSI-EAS454_0001:6:70:1405:628#0\n+chr1\tS-MART\ttranscript\t22\t45\t24\t-\t.\tquality=37;nbElements=4.000000;ID=HWUSI-EAS454_0005:1:5:3208:12720#0;Name=HWUSI-EAS454_0005:1:5:3208:12720#0--HWUSI-EAS454_0001:6:77:795:914#0--HWUSI-EAS454_0001:6:67:1386:90\n+chr1\tS-MART\ttranscript\t23\t46\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:72:315:1393#0;identity=100;Name=HWUSI-EAS454_0001:6:72:315:1393#0\n+chr1\tS-MART\ttranscript\t23\t45\t23\t-\t.\tquality=37;nbElements=7.000000;ID=HWUSI-EAS454_0005:1:78:14145:12130#0;Name=HWUSI-EAS454_0005:1:78:14145:12130#0--HWUSI-EAS454_0005:1:54:2352:2026#0--HWUSI-EAS454_0005:1:23:106\n+chr1\tS-MART\ttranscript\t24\t48\t25\t-\t.\toccurrence=1;quality=25;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWUSI-EAS454_0001:6:57:318:773#0;identity=96;Name=HWUSI-EAS454_0001:6:57:318:773#0\n+chr1\tS-MART\ttranscript\t24\t47\t24\t-\t.\tquality=25;nbElements=20.000000;ID=HWUSI-EAS454_0005:1:95:13627:10660#0;Name=HWUSI-EAS454_0005:1:95:13627:10660#0--HWUSI-EAS454_0005:1:31:5776:16476#0--HWUSI-EAS454_0005:1:31:'..b'ART\ttranscript\t9315588\t9315612\t25\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:20:1492:802#0;identity=100;Name=HWUSI-EAS454_0001:6:20:1492:802#0\n+chr1\tS-MART\ttranscript\t9315654\t9315680\t27\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:5:3941:11137#0;identity=100;Name=HWUSI-EAS454_0005:1:5:3941:11137#0\n+chr1\tS-MART\ttranscript\t9315856\t9315882\t27\t-\t.\toccurrence=1;quality=25;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWUSI-EAS454_0005:1:119:16446:8333#0;identity=96;Name=HWUSI-EAS454_0005:1:119:16446:8333#0\n+chr1\tS-MART\ttranscript\t9315864\t9315883\t20\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:4:1116:1279#0;identity=100;Name=HWUSI-EAS454_0001:6:4:1116:1279#0\n+chr1\tS-MART\ttranscript\t9316130\t9316160\t31\t-\t.\toccurrence=1;quality=25;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWUSI-EAS454_0005:1:57:2462:8082#0;identity=96;Name=HWUSI-EAS454_0005:1:57:2462:8082#0\n+chr1\tS-MART\ttranscript\t9316130\t9316149\t20\t-\t.\toccurrence=1;quality=23;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWUSI-EAS454_0001:6:63:152:1917#0-1;identity=100;Name=HWUSI-EAS454_0001:6:63:152:1917#0\n+chr1\tS-MART\ttranscript\t9316240\t9316263\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:64:1494:1889#0;identity=100;Name=HWUSI-EAS454_0001:6:64:1494:1889#0\n+chr1\tS-MART\ttranscript\t9316788\t9316808\t21\t-\t.\tnbElements=2.000000;ID=HWUSI-EAS454_0005:1:37:6611:7504#0/1;Name=HWUSI-EAS454_0005:1:37:6611:7504#0/1--HWUSI-EAS454_0005:1:32:15853:3213#0/1\n+chr1\tS-MART\ttranscript\t9317112\t9317135\t24\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:5:14934:13715#0;identity=100;Name=HWUSI-EAS454_0005:1:5:14934:13715#0\n+chr1\tS-MART\ttranscript\t9318166\t9318186\t21\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:78:8582:8151#0;identity=100;Name=HWUSI-EAS454_0005:1:78:8582:8151#0\n+chr1\tS-MART\ttranscript\t9318484\t9318504\t21\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:9:894:642#0;identity=100;Name=HWUSI-EAS454_0001:6:9:894:642#0\n+chr1\tS-MART\ttranscript\t9318602\t9318621\t20\t-\t.\toccurrence=1;quality=0;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=HWUSI-EAS454_0005:1:40:5793:3039#0-1;identity=95;Name=HWUSI-EAS454_0005:1:40:5793:3039#0\n+chr1\tS-MART\ttranscript\t9319135\t9319156\t22\t-\t.\toccurrence=1;quality=0;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWUSI-EAS454_0005:1:30:14232:2440#0-1;identity=100;Name=HWUSI-EAS454_0005:1:30:14232:2440#0\n+chr1\tS-MART\ttranscript\t9321769\t9321794\t26\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:81:1482:1344#0;identity=100;Name=HWUSI-EAS454_0001:6:81:1482:1344#0\n+chr1\tS-MART\ttranscript\t9322055\t9322078\t24\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:54:17933:11240#0;identity=100;Name=HWUSI-EAS454_0005:1:54:17933:11240#0\n+chr1\tS-MART\ttranscript\t9322742\t9322762\t21\t-\t.\tbestRegion=(self);nbGaps=0;nbMismatches=3;ID=HWUSI-EAS454_0005:1:39:12064:3568#0/1;identity=85;Name=HWUSI-EAS454_0005:1:39:12064:3568#0/1\n+chr1\tS-MART\ttranscript\t9324388\t9324411\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:64:8973:18503#0;identity=100;Name=HWUSI-EAS454_0005:1:64:8973:18503#0\n+chr1\tS-MART\ttranscript\t9325384\t9325407\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:54:2957:1577#0;identity=100;Name=HWUSI-EAS454_0005:1:54:2957:1577#0\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test.pdf

Binary file SMART/Java/Python/test.pdf has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test.png

Binary file SMART/Java/Python/test.png has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/MockGetLetterDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/MockGetLetterDistribution.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,69 @@
+class MockGetLetterDistributionFasta (object) :
+ def write(self,inFileName):
+ f = open(inFileName,'w')
+ f.write('>HWI-EAS337_3:7:1:415:1217/1\n')
+ f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
+ f.write('>HWI-EAS337_3:7:1:208:1489/1\n')
+ f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
+ f.write('>HWI-EAS337_3:7:1:278:1153\n')
+ f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
+ f.close()
+
+class MockGetLetterDistributionFastq (object) :
+ def write(self,inFileName):
+ f = open(inFileName,'w')
+ f.write('@HWI-EAS337_3:7:1:415:1217/1\n')
+ f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
+ f.write('+HWI-EAS337_3:7:1:415:1217/1\n')
+ f.write('WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ\n')
+ f.write('@HWI-EAS337_3:7:1:208:1489/1\n')
+ f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
+ f.write('+HWI-EAS337_3:7:1:208:1489/1\n')
+ f.write('WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n')
+ f.write('@HWI-EAS337_3:7:1:278:1153/1\n')
+ f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
+ f.write('+HWI-EAS337_3:7:1:278:1153/1\n')
+ f.write('WWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n')
+ f.close()
+
+class MockGetLetterDistributionExpectedCSV (object) :
+ def write(self,inFileName):
+ f = open(inFileName,'w')
+ f.write('1;A=0%;C=0%;T=0%;G=0%;\n')
+ f.write('2;A=0%;C=0%;T=0%;G=100.00%;\n')
+ f.write('3;A=66.67%;C=0%;T=0%;G=33.33%;\n')
+ f.write('4;A=33.33%;C=0%;T=33.33%;G=33.33%;\n')
+ f.write('5;A=66.67%;C=0%;T=0%;G=33.33%;\n')
+ f.write('6;A=66.67%;C=0%;T=33.33%;G=0%;\n')
+ f.write('7;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
+ f.write('8;A=66.67%;C=33.33%;T=0%;G=0%;\n')
+ f.write('9;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
+ f.write('10;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
+ f.write('11;A=33.33%;C=0%;T=66.67%;G=0%;\n')
+ f.write('12;A=0%;C=33.33%;T=0%;G=66.67%;\n')
+ f.write('13;A=0%;C=33.33%;T=66.67%;G=0%;\n')
+ f.write('14;A=66.67%;C=0%;T=33.33%;G=0%;\n')
+ f.write('15;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
+ f.write('16;A=33.33%;C=0%;T=66.67%;G=0%;\n')
+ f.write('17;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
+ f.write('18;A=100.00%;C=0%;T=0%;G=0%;\n')
+ f.write('19;A=66.67%;C=33.33%;T=0%;G=0%;\n')
+ f.write('20;A=33.33%;C=0%;T=33.33%;G=33.33%;\n')
+ f.write('21;A=33.33%;C=66.67%;T=0%;G=0%;\n')
+ f.write('22;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
+ f.write('23;A=0%;C=0%;T=66.67%;G=33.33%;\n')
+ f.write('24;A=0%;C=0%;T=33.33%;G=66.67%;\n')
+ f.write('25;A=66.67%;C=0%;T=0%;G=33.33%;\n')
+ f.write('26;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
+ f.write('27;A=66.67%;C=0%;T=33.33%;G=0%;\n')
+ f.write('28;A=66.67%;C=0%;T=0%;G=33.33%;\n')
+ f.write('29;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
+ f.write('30;A=66.67%;C=33.33%;T=0%;G=0%;\n')
+ f.write('31;A=66.67%;C=0%;T=33.33%;G=0%;\n')
+ f.write('32;A=0%;C=33.33%;T=33.33%;G=33.33%;\n')
+ f.write('33;A=33.33%;C=0%;T=33.33%;G=33.33%;\n')
+ f.write('34;A=33.33%;C=66.67%;T=0%;G=0%;\n')
+ f.write('35;A=0%;C=0%;T=66.67%;G=33.33%;\n')
+ f.write('36;A=0%;C=0%;T=33.33%;G=66.67%;\n')
+ f.close()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_Clusterize.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_Clusterize.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,119 @@\n+import unittest\n+import os, os.path\n+from optparse import OptionParser\n+from SMART.Java.Python.misc import Utils\n+from SMART.Java.Python.clusterize import Clusterize\n+#TODO: test for n option when option corrected\n+\n+SMART_PATH = os.environ["REPET_PATH"] + "/SMART"\n+\n+class Test_F_Clusterize(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputFileName = "inputFileTest.bed"\n+ self._expOutputFileName = "expOutput.gff3"\n+ self._outputFileName = "output.gff3"\n+ self._writeInputFile()\n+ \n+ def tearDown(self):\n+ for file in (self._inputFileName, self._expOutputFileName, self._outputFileName):\n+ if os.path.exists(file):\n+ os.remove(file)\n+\n+ def test_run_2DifferentStrand(self):\n+ self._writeOutputFile_2DiffStrand(self._expOutputFileName)\n+ \n+ args = ["-i", self._inputFileName, "-f", "bed", "-o", self._outputFileName, "-v", "0"]\n+ parser = OptionParser()\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of file [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+ parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="merge colinear transcripts only [format: bool] [default: false]")\n+ parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="max. distance between two transcripts to be merged [format: int] [default: 0]")\n+ parser.add_option("-n", "--normalize", dest="normalize", action="store_true", default=False, help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int] [default: 1]")\n+ (options, args) = parser.parse_args(args)\n+ iClusterize = Clusterize(options.verbosity)\n+ iClusterize.setInputFile(options.inputFileName, options.format)\n+ iClusterize.setOutputFileName(options.outputFileName)\n+ iClusterize.setColinear(options.colinear)\n+ iClusterize.setDistance(options.distance)\n+ iClusterize.setNormalize(options.normalize)\n+ iClusterize.run()\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+\n+ def test_runAsScript_2DifferentStrand(self):\n+ self._writeOutputFile_2DiffStrand(self._expOutputFileName)\n+ os.system("python %s/Java/Python/clusterize.py -i %s -f bed -o %s -v 0" % (SMART_PATH, self._inputFileName, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+ \n+ def test_runAsScript_2DifferentStrand_map_output(self):\n+ self._expOutputFileName = "expOutput.map"\n+ self._outputFileName = "output.map"\n+ self._writeOutputFile_2DiffStrandMapFormat(self._expOutputFileName)\n+ os.system("python %s/Java/Python/clusterize.py -i %s -f bed -o %s -u map -v 0" % (SMART_PATH, self._inputFileName, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+ \n+ def test_runAsScript_2SameStrand(self):\n+ self._writeOutputFile_2SameStrand(self._expOutputFileName)\n+ os.system("python %s/Java/Python/clusterize.py -i %s -f bed -o %s -c -v 0" % (SMART_PATH, self._inputFileName, self._outputFileNam'..b'f.write("arm_X\\t10000100\\t10000200\\ttest1.1\\t100\\t+\\t10000100\\t10000200\\t0\\t1\\t100,\\t0,\\n")\n+ f.write("arm_X\\t10000100\\t10000200\\ttest1.2\\t100\\t-\\t10000100\\t10000200\\t0\\t1\\t100,\\t0,\\n")\n+ f.write("arm_2R\\t10000100\\t10000200\\ttest1.3\\t100\\t+\\t10000100\\t10000200\\t0\\t1\\t100,\\t0,\\n")\n+ f.write("arm_X\\t10000000\\t10000100\\ttest1.4\\t100\\t+\\t10000000\\t10000100\\t0\\t1\\t100,\\t0,\\n")\n+ f.write("arm_X\\t10000200\\t10000300\\ttest1.5\\t100\\t+\\t10000200\\t10000300\\t0\\t1\\t100,\\t0,\\n")\n+ f.write("arm_X\\t9999900\\t9999950\\ttest1.6\\t100\\t+\\t9999900\\t9999950\\t0\\t1\\t50,\\t0,\\n")\n+ f.write("arm_X\\t10000000\\t10000050\\ttest1.7\\t100\\t-\\t10000000\\t10000050\\t0\\t1\\t50,\\t0,\\n")\n+ f.close()\n+ \n+ def _writeOutputFile_2DiffStrand(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_2R\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t9999900\\t9999949\\t.\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000099\\t.\\t-\\t.\\tnbElements=2.000000;ID=test1.7--test1.4;Name=test1.7--test1.4\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t-\\t.\\tnbElements=2.000000;ID=test1.2--test1.1;Name=test1.2--test1.1\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000200\\t10000299\\t.\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n+ f.close()\n+ \n+ def _writeOutputFile_2SameStrand(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_2R\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n") \n+ f.write("arm_X\\tS-MART\\ttranscript\\t9999900\\t9999949\\t.\\t+\\t.\\tID=test1.6;Name=test1.6\\n") \n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000099\\t.\\t+\\t.\\tID=test1.4;Name=test1.4\\n") \n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000049\\t.\\t-\\t.\\tID=test1.7;Name=test1.7\\n") \n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.1;Name=test1.1\\n") \n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t-\\t.\\tID=test1.2;Name=test1.2\\n") \n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000200\\t10000299\\t.\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n+ f.close()\n+ \n+ def _writeOutputFile_distance(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_2R\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t9999900\\t9999949\\t.\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000299\\t.\\t+\\t.\\tnbElements=5.000000;ID=test1.5--test1.2--test1.1--test1.7--test1.4;Name=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n+ f.write("arm_X\\tS-MART\\texon\\t10000000\\t10000099\\t.\\t+\\t.\\tID=test1.5--test1.2--test1.1--test1.7--test1.4-exon1;Name=test1.5--test1.2--test1.1--test1.7--test1.4-exon1;Parent=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n+ f.write("arm_X\\tS-MART\\texon\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.5--test1.2--test1.1--test1.7--test1.4-exon2;Name=test1.5--test1.2--test1.1--test1.7--test1.4-exon2;Parent=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n+ f.write("arm_X\\tS-MART\\texon\\t10000200\\t10000299\\t.\\t+\\t.\\tID=test1.5--test1.2--test1.1--test1.7--test1.4-exon3;Name=test1.5--test1.2--test1.1--test1.7--test1.4-exon3;Parent=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n+ f.close()\n+ \n+ def _writeOutputFile_2DiffStrandMapFormat(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("test1.3\\tarm_2R\\t10000100\\t10000200\\n")\n+ f.write("test1.6\\tarm_X\\t9999900\\t9999950\\n")\n+ f.write("test1.7--test1.4\\tarm_X\\t10000000\\t10000100\\n")\n+ f.write("test1.2--test1.1\\tarm_X\\t10000100\\t10000200\\n")\n+ f.write("test1.5\\tarm_X\\t10000200\\t10000300\\n")\n+ f.close()\n+ \n+\n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_ClusterizeByTags.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_ClusterizeByTags.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,144 @@\n+import unittest, os, os.path\n+from SMART.Java.Python.misc import Utils\n+from SMART.Java.Python.ClusterizeByTags import ClusterizeByTags\n+\n+class Test_F_ClusterizeByTags(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputFileName = "inputFileTest.gff3"\n+ self._expOutputFileName = "expOutput.gff3"\n+ self._outputFileName = "output.gff3"\n+ \n+ def tearDown(self):\n+ for file in (self._inputFileName, self._expOutputFileName, self._outputFileName):\n+ if os.path.exists(file):\n+ os.remove(file)\n+\n+ def test_diff_simple(self):\n+ handle = open(self._inputFileName, "w")\n+ handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n+chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n+chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n+chr1\\tS-MART\\ttest1.4\\t700\\t800\\t.\\t+\\t.\\tName=test1.4;score=100;ID=test1.4\n+chr1\\tS-MART\\ttest1.5\\t900\\t1000\\t.\\t+\\t.\\tName=test1.5;score=110;ID=test1.5\n+chr1\\tS-MART\\ttest1.6\\t1100\\t1200\\t.\\t+\\t.\\tName=test1.6;score=105;ID=test1.6\n+""")\n+ handle.close()\n+ handle = open(self._expOutputFileName, "w")\n+ handle.write("""chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n+chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n+chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n+chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n+chr1\tS-MART\ttest1.4\t700\t1200\t315\t+\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n+chr1\tS-MART\texon\t700\t800\t100\t+\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n+chr1\tS-MART\texon\t900\t1000\t110\t+\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n+chr1\tS-MART\texon\t1100\t1200\t105\t+\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n+""")\n+ handle.close()\n+ cbt = ClusterizeByTags(0)\n+ cbt.setInputFile(self._inputFileName, "gff3")\n+ cbt.setOutputFile(self._outputFileName)\n+ cbt.setTag("score", None)\n+ cbt.setThreshold(20)\n+ cbt.setOperation("diff")\n+ cbt.setMaxDistance(None)\n+ cbt.run()\n+ self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n+\n+ def test_diff_two_strands(self):\n+ handle = open(self._inputFileName, "w")\n+ handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n+chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n+chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n+chr1\\tS-MART\\ttest1.4\\t700\\t800\\t.\\t-\\t.\\tName=test1.4;score=10;ID=test1.4\n+chr1\\tS-MART\\ttest1.5\\t900\\t1000\\t.\\t-\\t.\\tName=test1.5;score=15;ID=test1.5\n+chr1\\tS-MART\\ttest1.6\\t1100\\t1200\\t.\\t-\\t.\\tName=test1.6;score=15;ID=test1.6\n+""")\n+ handle.close()\n+ handle = open(self._expOutputFileName, "w")\n+ handle.write("""chr1\tS-MART\ttest1.4\t700\t1200\t40\t-\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n+chr1\tS-MART\texon\t700\t800\t10\t-\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n+chr1\tS-MART\texon\t900\t1000\t15\t-\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n+chr1\tS-MART\texon\t1100\t1200\t15\t-\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n+chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n+chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n+chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n+chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n+""")\n+ handle.cl'..b'tance(None)\n+ cbt.run()\n+ self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n+ \n+ def test_diff_one_strands(self):\n+ handle = open(self._inputFileName, "w")\n+ handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n+chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n+chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n+chr1\\tS-MART\\ttest1.4\\t700\\t800\\t.\\t-\\t.\\tName=test1.4;score=10;ID=test1.4\n+chr1\\tS-MART\\ttest1.5\\t900\\t1000\\t.\\t-\\t.\\tName=test1.5;score=15;ID=test1.5\n+chr1\\tS-MART\\ttest1.6\\t1100\\t1200\\t.\\t-\\t.\\tName=test1.6;score=15;ID=test1.6\n+""")\n+ handle.close()\n+ handle = open(self._expOutputFileName, "w")\n+ handle.write("""chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n+chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n+chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n+chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n+chr1\tS-MART\ttest1.4\t700\t1200\t40\t-\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n+chr1\tS-MART\texon\t700\t800\t10\t-\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n+chr1\tS-MART\texon\t900\t1000\t15\t-\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n+chr1\tS-MART\texon\t1100\t1200\t15\t-\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n+""")\n+ handle.close()\n+ cbt = ClusterizeByTags(0)\n+ cbt.setInputFile(self._inputFileName, "gff3")\n+ cbt.setOutputFile(self._outputFileName)\n+ cbt.setTag("score", None)\n+ cbt.setThreshold(20)\n+ cbt.setOperation("diff")\n+ cbt.setMaxDistance(None)\n+ cbt.setOneStrand(True)\n+ cbt.run()\n+ self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n+\n+ def test_diff_distance(self):\n+ handle = open(self._inputFileName, "w")\n+ handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n+chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n+chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n+chr1\\tS-MART\\ttest1.4\\t1000\\t1100\\t.\\t+\\t.\\tName=test1.4;score=10;ID=test1.4\n+chr1\\tS-MART\\ttest1.5\\t1200\\t1300\\t.\\t+\\t.\\tName=test1.5;score=15;ID=test1.5\n+chr1\\tS-MART\\ttest1.6\\t1400\\t1500\\t.\\t+\\t.\\tName=test1.6;score=15;ID=test1.6\n+""")\n+ handle.close()\n+ handle = open(self._expOutputFileName, "w")\n+ handle.write("""chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n+chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n+chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n+chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n+chr1\tS-MART\ttest1.4\t1000\t1500\t40\t+\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n+chr1\tS-MART\texon\t1000\t1100\t10\t+\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n+chr1\tS-MART\texon\t1200\t1300\t15\t+\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n+chr1\tS-MART\texon\t1400\t1500\t15\t+\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n+""")\n+ handle.close()\n+ cbt = ClusterizeByTags(0)\n+ cbt.setInputFile(self._inputFileName, "gff3")\n+ cbt.setOutputFile(self._outputFileName)\n+ cbt.setTag("score", None)\n+ cbt.setThreshold(20)\n+ cbt.setOperation("diff")\n+ cbt.setMaxDistance(200)\n+ cbt.run()\n+ self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n+\n+\n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_CollapseReads.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_CollapseReads.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,92 @@
+import unittest
+import os
+from SMART.Java.Python.CollapseReads import CollapseReads
+from SMART.Java.Python.misc import Utils
+from commons.core.utils.FileUtils import FileUtils
+from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample_NonOrder
+
+class Test_F_CollapseReads(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = 'inputCR.gff3'
+        self._writeInput(self._inputFileName)
+        self._outputFileName = 'outputCR.gff3'
+        self._expOutputFileName = 'expOutputCR.gff3'
+
+    def tearDown(self):
+        return
+
+    def test_run_default_option(self):
+        iCR = CollapseReads(0)
+        iCR.setInputFile(self._inputFileName, 'gff3')
+        iCR.setOutputFile(self._outputFileName)
+        iCR.strands = False
+        iCR.collapse()
+        self._writeExp_strand_False(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_strand_option(self):
+        iCR = CollapseReads(0)
+        iCR.setInputFile(self._inputFileName, 'gff3')
+        iCR.setOutputFile(self._outputFileName)
+        iCR.strands = True
+        iCR.collapse()
+        self._writeExp_strand_True(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_asScript_default_option(self):
+        cmd = 'python ../CollapseReads.py -i %s -f gff3 -o %s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self._writeExp_strand_True(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_asScript_strand_option(self):
+        cmd = 'python ../CollapseReads.py -i %s -f gff3 -o %s -s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self._writeExp_strand_False(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_toOrderGff(self):
+        iMRE = MockFindOverlaps_randomExample_NonOrder(self._inputFileName, 'chrom', 10, 1000)
+        iMRE.write()
+        iCR = CollapseReads(0)
+        iCR.setInputFile(self._inputFileName, 'gff3')
+        iCR.setOutputFile(self._outputFileName)
+        iCR.strands = False
+        iCR.collapse()
+        f = open(self._expOutputFileName, "w")
+        f.close()
+        cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._expOutputFileName, self._inputFileName)
+        os.system(cmd)
+
+    def _writeInput(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\ttest\tmatch\t6155418\t6155441\t.\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
+        f.write("chr2\ttest\tmatch\t26303950\t26303981\t.\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
+        f.write("chr3\ttest\tmatch\t28320540\t28320574\t.\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
+        f.write("chr4\ttest\tmatch\t28565007\t28565041\t.\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
+        f.write("chr1\ttest\tmatch\t6155418\t6155441\t.\t+\t.\toccurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50;Name=test3/1\n")
+        f.write("chr1\ttest\tmatch\t6155418\t6155441\t.\t-\t.\toccurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50;Name=test3/1\n")
+        f.close()
+
+    def _writeExp_strand_False(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr1 S-MART match 6155418 6155441 . - . nbElements=3.000000;ID=test3/1;Name=test3/1--test3/1--test1/1
+chr2 S-MART match 26303950 26303981 . + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
+chr3 S-MART match 28320540 28320574 . + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
+chr4 S-MART match 28565007 28565041 . + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
+""")
+        f.close()
+
+    def _writeExp_strand_True(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr1 S-MART match 6155418 6155441 . + . nbElements=2.000000;ID=test3/1;Name=test3/1--test1/1
+chr1 S-MART match 6155418 6155441 . - . occurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50;Name=test3/1
+chr2 S-MART match 26303950 26303981 . + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
+chr3 S-MART match 28320540 28320574 . + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
+chr4 S-MART match 28565007 28565041 . + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
+""")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_CombineTags.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_CombineTags.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,109 @@
+import unittest
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.parsing.GffParser import GffParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.CombineTags import CombineTags
+
+class Test_F_CombineTags(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName  = "inputFileTest.gff3"
+        self._outputFileName = "outputFileTest.gff3"
+        self._tag1           = "tag1"
+        self._tag2           = "tag2"
+        self._outputTag      = "outputTag"
+
+    def tearDown(self):
+        os.remove(self._inputFileName)
+        os.remove(self._outputFileName)
+
+    def test_plus(self):
+        transcript = self._createDummyTranscript()
+        transcript.setTagValue(self._tag1, 1)
+        transcript.setTagValue(self._tag2, 2)
+        self._writeTranscript(transcript)
+        combiner = CombineTags(0)
+        combiner.setInputFile(self._inputFileName, "gff3")
+        combiner.setOutputFile(self._outputFileName)
+        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
+        combiner.setOperation("plus")
+        combiner.run()
+        self._testOutputTag(3)
+
+    def test_minus(self):
+        transcript = self._createDummyTranscript()
+        transcript.setTagValue(self._tag1, 2)
+        transcript.setTagValue(self._tag2, 1)
+        self._writeTranscript(transcript)
+        combiner = CombineTags(0)
+        combiner.setInputFile(self._inputFileName, "gff3")
+        combiner.setOutputFile(self._outputFileName)
+        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
+        combiner.setOperation("minus")
+        combiner.run()
+        self._testOutputTag(1)
+
+    def test_times(self):
+        transcript = self._createDummyTranscript()
+        transcript.setTagValue(self._tag1, 6)
+        transcript.setTagValue(self._tag2, 3)
+        self._writeTranscript(transcript)
+        combiner = CombineTags(0)
+        combiner.setInputFile(self._inputFileName, "gff3")
+        combiner.setOutputFile(self._outputFileName)
+        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
+        combiner.setOperation("times")
+        combiner.run()
+        self._testOutputTag(18)
+
+    def test_div(self):
+        transcript = self._createDummyTranscript()
+        transcript.setTagValue(self._tag1, 6)
+        transcript.setTagValue(self._tag2, 3)
+        self._writeTranscript(transcript)
+        combiner = CombineTags(0)
+        combiner.setInputFile(self._inputFileName, "gff3")
+        combiner.setOutputFile(self._outputFileName)
+        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
+        combiner.setOperation("div")
+        combiner.run()
+        self._testOutputTag(2)
+
+    def test_default(self):
+        transcript = self._createDummyTranscript()
+        transcript.setTagValue(self._tag1, 1)
+        self._writeTranscript(transcript)
+        combiner = CombineTags(0)
+        combiner.setInputFile(self._inputFileName, "gff3")
+        combiner.setOutputFile(self._outputFileName)
+        combiner.setTags(self._tag1, self._tag2, self._outputTag, 0)
+        combiner.setOperation("plus")
+        combiner.run()
+        self._testOutputTag(0)
+
+    def _createDummyTranscript(self):
+        transcript = Transcript()
+        transcript.setChromosome("chr1")
+        transcript.setName("test1")
+        transcript.setEnd(200)
+        transcript.setStart(100)
+        transcript.setDirection("+")
+        return transcript
+
+    def _testOutputTag(self, value):
+        parser = GffParser(self._outputFileName, 0)
+        for transcript in parser.getIterator():
+            self.assertEquals(float(transcript.getTagValue(self._outputTag)), value)
+        parser.close()
+
+    def _writeTranscript(self, transcript):
+        f = open(self._inputFileName, "w")
+        f.write(transcript.printGff3("test"))
+        f.close()
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_FindOverlapsOptim.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_FindOverlapsOptim.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,500 @@\n+import unittest\n+import os\n+from SMART.Java.Python.misc import Utils\n+from commons.core.utils.FileUtils import FileUtils\n+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim\n+from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n+\n+class Test_F_FindOverlapsOptim(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputRefGff3FileName = \'sorted_Ref.gff3\'\n+ self._writeGFF3File(self._inputRefGff3FileName)\n+ self._inputQueryGff3FileName = \'sorted_Query.gff3\'\n+ self._writeQueryGff3File(self._inputQueryGff3FileName)\n+ self._outputGff3FileName = \'output.gff3\'\n+ self._expOutputFileName = \'expOutGff3.gff3\'\n+ \n+ def tearDown(self):\n+ for fileName in (self._inputRefGff3FileName, self._inputQueryGff3FileName, self._outputGff3FileName, self._expOutputFileName):\n+ if os.path.exists(fileName):\n+ os.remove(fileName)\n+ \n+ def test_run_general_asScript(self):\n+ cmd = \'python ../FindOverlapsOptim.py -i %s -f gff -j %s -g gff -o %s -v 0\' % (self._inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName) \n+ os.system(cmd)\n+ self._writeExpOutFile_general(self._expOutputFileName)\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+\n+ def test_run_overlap_special_case(self):\n+ inputQueryGff3FileName = \'query2.gff3\'\n+ self._writeQueryGff3File2(inputQueryGff3FileName)\n+ iFOO = FindOverlapsOptim(0)\n+ iFOO.setRefFileName(self._inputRefGff3FileName, "gff")\n+ iFOO.setQueryFileName(inputQueryGff3FileName, "gff")\n+ iFOO.setOutputFileName(self._outputGff3FileName)\n+ iFOO.run()\n+ iFOO.close()\n+ self._writeExpOutFile_special_case(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ \n+ def test_run_overlap_special_case_asScript(self):\n+ inputQueryGff3FileName = \'query2.gff3\'\n+ self._writeQueryGff3File2(inputQueryGff3FileName)\n+ cmd = \'python ../FindOverlapsOptim.py -i %s -f gff -j %s -g gff -o %s -v 0\' % (inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName) \n+ os.system(cmd) \n+ self._writeExpOutFile_special_case(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ \n+ def test_case_2(self):\n+ inputRefGff3FileName = \'ref_case2.gff3\'\n+ iMock = MockFindOverlapsWithServeralIntervals_case2()\n+ iMock.write(inputRefGff3FileName)\n+ inputQueryGff3FileName = \'query_case2.gff3\'\n+ self._writeQueryGff3File_case2(inputQueryGff3FileName) \n+ iFOO = FindOverlapsOptim(0)\n+ iFOO.setRefFileName(inputRefGff3FileName, "gff")\n+ iFOO.setQueryFileName(inputQueryGff3FileName, "gff")\n+ iFOO.setOutputFileName(self._outputGff3FileName)\n+ iFOO.run()\n+ iFOO.close()\n+ self._writeExpOutFile_case2(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ os.remove(inputRefGff3FileName) \n+ \n+ def test_case_3(self):\n+ inputRefGff3FileName = \'ref_case3.gff3\'\n+ iMock = MockFindOverlapsWithServeralIntervals_case3()\n+ iMock.write(inputRefGff3FileName)\n+ inputQueryGff3FileName = \'query_case3.gff3\'\n+ self._writeQueryGff3File_case3(inputQueryGff3FileName) \n+ iFOO = FindOverlapsOptim(0)\n+ iFOO.setRefFileName(inputRefGff3FileName, "gff")\n+ iFOO.setQueryFileName(inputQueryGff3FileName, "gff")\n+ iFOO.setOutputFileName(self._outputGff3FileName)\n+ iFOO.run()\n+ iFOO.close()\n+ self._writeExpOu'..b'ry_4;Name=test3.4\\n")\n+ f.write("chr1\\tquery\\ttest3.5\\t900\\t950\\t51\\t+\\t.\\tID=query_5;Name=test3.5\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case4(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest4.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test4.1\\n")\n+ f.write("chr1\\tquery\\ttest4.2\\t450\\t600\\t151\\t+\\t.\\tID=query_2;Name=test4.2\\n")\n+ f.write("chr1\\tquery\\ttest4.3\\t700\\t800\\t101\\t+\\t.\\tID=query_3;Name=test4.3\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case5(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest5.1\\t850\\t950\\t101\\t+\\t.\\tID=query_1;Name=test5.1\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case6(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest6.1\\t200\\t300\\t101\\t+\\t.\\tID=query_1;Name=test6.1\\n")\n+ f.write("chr1\\tquery\\ttest6.2\\t800\\t900\\t101\\t+\\t.\\tID=query_2;Name=test6.2\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case7(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest7.1\\t530\\t550\\t21\\t+\\t.\\tID=query_1;Name=test7.1\\n")\n+ f.write("chr1\\tquery\\ttest7.2\\t600\\t700\\t101\\t+\\t.\\tID=query_2;Name=test7.2\\n")\n+ f.write("chr1\\tquery\\ttest7.3\\t650\\t900\\t251\\t+\\t.\\tID=query_3;Name=test7.3\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case8(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest8.1\\t500\\t600\\t101\\t+\\t.\\tID=query_1;Name=test8.1\\n")\n+ f.write("chr1\\tquery\\ttest8.2\\t700\\t800\\t101\\t+\\t.\\tID=query_2;Name=test8.2\\n")\n+ f.write("chr1\\tquery\\ttest8.3\\t900\\t1100\\t201\\t+\\t.\\tID=query_3;Name=test8.3\\n")\n+ f.write("chr1\\tquery\\ttest8.4\\t1200\\t1300\\t101\\t+\\t.\\tID=query_4;Name=test8.4\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case9(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest9.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test9.1\\n")\n+ f.write("chr1\\tquery\\ttest9.2\\t550\\t650\\t101\\t+\\t.\\tID=query_2;Name=test9.2\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case10(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest10.1\\t700\\t800\\t101\\t+\\t.\\tID=query_1;Name=test10.1\\n")\n+ f.write("chr1\\tquery\\ttest10.2\\t900\\t1000\\t101\\t+\\t.\\tID=query_2;Name=test10.2\\n")\n+ f.write("chr1\\tquery\\ttest10.3\\t1100\\t1300\\t201\\t+\\t.\\tID=query_3;Name=test10.3\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case11(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest11.1\\t420\\t480\\t61\\t+\\t.\\tID=query_1;Name=test11.1\\n")\n+ f.write("chr1\\tquery\\ttest11.2\\t450\\t715\\t266\\t+\\t.\\tID=query_2;Name=test11.2\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case12(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest12.1\\t200\\t400\\t.\\t+\\t.\\tID=query_1;Name=test12.1\\n")\n+ f.write("chr1\\tquery\\ttest12.2\\t600\\t900\\t.\\t+\\t.\\tID=query_2;Name=test12.2\\n")\n+ f.write("chr1\\tquery\\ttest12.3\\t700\\t1000\\t.\\t+\\t.\\tID=query_3;Name=test12.3\\n")\n+ f.close()\n+ \n+ def _writeGFF3File(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("chr1\\ttest\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n+ f.write("chr1\\ttest\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n+ f.write("chr1\\ttest\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n+ f.write("chr1\\ttest\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n+ f.write("chr1\\ttest\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n+ f.write("chr1\\ttest\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tID=test2.6;Name=test2.6\\n")\n+ f.write("chr1\\ttest\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test2.7;Name=test2.7\\n")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_GetDifferentialExpression.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_GetDifferentialExpression.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,50 @@
+import unittest
+import os, os.path, glob
+from SMART.Java.Python.GetDifferentialExpression import GetDifferentialExpression
+
+
+class Test_F_GetDifferentialExpression(unittest.TestCase):
+
+    def setUp(self):
+        self.inputFileName1 = "inputFile1.gff3"
+        self.inputFileName2 = "inputFile2.gff3"
+        self.refFileName    = "refFile.gff3"
+        self.outputFileName = "outputFile.gff3"
+        self.plotFileName   = "outputFile.png"
+
+    def tearDown(self):
+        for fileRoot in (self.inputFileName1, self.inputFileName2, self.refFileName, self.outputFileName, self.plotFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+        os.remove(".RData")
+
+    def test_run_simple(self):
+        handle = open(self.inputFileName1, "w")
+        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1;nbElements=100\n")
+        handle.write("chr2\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test2;Name=test2;nbElements=1000\n")
+        handle.close()
+        handle = open(self.inputFileName2, "w")
+        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1;nbElements=200\n")
+        handle.write("chr2\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test2;Name=test2;nbElements=1000\n")
+        handle.close()
+        handle = open(self.refFileName, "w")
+        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1")
+        handle.close()
+        differentialExpression = GetDifferentialExpression(0)
+        differentialExpression.setInputFile(0, self.inputFileName1, "gff3")
+        differentialExpression.setInputFile(1, self.inputFileName2, "gff3")
+        differentialExpression.setReferenceFile(self.refFileName, "gff3")
+        differentialExpression.setOutputFile(self.outputFileName)
+        differentialExpression.setPlotterName(self.plotFileName)
+        differentialExpression.setPlotter()
+        differentialExpression.getDifferentialExpression()
+        differentialExpression.mySqlConnection.deleteDatabase()
+        handle = open(self.outputFileName)
+        lines = handle.readlines()
+        self.assertEquals(len(lines), 1)
+        self.assertEquals(lines[0], "chr1\tS-MART\ttranscript\t1000\t2000\t.\t+\t.\tnbReadsCond1=100;nbReadsCond2=200;ID=test1;regulation=up;nbUnnormalizedReadsCond2=200;nbUnnormalizedReadsCond1=100;pValue=6.010045e-08;Name=test1\n")
+        self.assertTrue(os.path.exists(self.plotFileName))
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_GetFlanking.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_GetFlanking.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,238 @@\n+import unittest\n+import os, os.path, glob\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.GetFlanking import GetFlanking\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from commons.core.parsing.GffParser import GffParser\n+\n+class Test_F_GetFlanking(unittest.TestCase):\n+\n+ def setUp(self):\n+ self.queryFileName = "testQuery.gff3"\n+ self.referenceFileName = "testReference.gff3"\n+ self.outputFileName = "testOutput.gff3"\n+ \n+ def tearDown(self):\n+ for fileRoot in (self.queryFileName, self.referenceFileName, self.outputFileName):\n+ for file in glob.glob("%s*" % (fileRoot)):\n+ os.remove(file)\n+\n+ def test_run_simple(self):\n+ #return\n+ reference1 = self._createTranscript("chr1", 1000, 1100, "+", "ref1")\n+ reference2 = self._createTranscript("chr1", 2000, 2100, "+", "ref2")\n+ reference3 = self._createTranscript("chr1", 1000000, 1200000, "+", "ref3")\n+ writer = Gff3Writer(self.referenceFileName, 0)\n+ writer.addTranscript(reference1)\n+ writer.addTranscript(reference2)\n+ writer.addTranscript(reference3)\n+ writer.close()\n+ query1 = self._createTranscript("chr1", 100, 200, "+", "query1")\n+ query2 = self._createTranscript("chr1", 10000, 10100, "+", "query2")\n+ writer = Gff3Writer(self.queryFileName, 0)\n+ writer.addTranscript(query1)\n+ writer.addTranscript(query2)\n+ writer.close()\n+ gf = GetFlanking(0)\n+ gf.setInputFile(self.queryFileName, \'gff3\', 0)\n+ gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n+ gf.setOutputFile(self.outputFileName)\n+ gf.run()\n+ parser = GffParser(self.outputFileName)\n+ self.assertEqual(parser.getNbTranscripts(), 2)\n+ for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n+ if i == 0:\n+ self._checkTranscript(transcript, "chr1", 100, 200, "+", "query1")\n+ self.assertEqual(transcript.getTagValue("flanking"), "ref1")\n+ self.assertEqual(transcript.getTagValue("_region_flanking"), "downstream")\n+ self.assertEqual(transcript.getTagValue("_sense_flanking"), "collinear")\n+ else:\n+ self._checkTranscript(transcript, "chr1", 10000, 10100, "+", "query2")\n+ self.assertEqual(transcript.getTagValue("flanking"), "ref2")\n+ self.assertEqual(transcript.getTagValue("_region_flanking"), "upstream")\n+ self.assertEqual(transcript.getTagValue("_sense_flanking"), "collinear")\n+\n+ def test_run_simple_downstream(self):\n+ return\n+ reference1 = self._createTranscript("chr1", 300, 400, "+", "ref1")\n+ reference2 = self._createTranscript("chr1", 1000, 1100, "+", "ref2")\n+ writer = Gff3Writer(self.referenceFileName, 0)\n+ writer.addTranscript(reference1)\n+ writer.addTranscript(reference2)\n+ writer.close()\n+ query1 = self._createTranscript("chr1", 100, 200, "+", "query1")\n+ query2 = self._createTranscript("chr1", 1200, 1300, "+", "query2")\n+ query3 = self._createTranscript("chr1", 1400, 1500, "+", "query3")\n+ writer = Gff3Writer(self.queryFileName, 0)\n+ writer.addTranscript(query1)\n+ writer.addTranscript(query2)\n+ writer.addTranscript(query3)\n+ writer.close()\n+ gf = GetFlanking(0)\n+ gf.setInputFile(self.queryFileName, \'gff3\', 0)\n+ gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n+ gf.setOutputFile(self.outputFileName)\n+ gf.addDownstreamDirection(True)\n+ gf.run()\n+ parser = GffParser(self.outputFileName)\n+ self.assertEqual(parser.getNbTranscripts(), 3)\n+ for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n+ if i == 0:\n+ self._checkTranscript'..b' "chr1", 100, 200, "+", "query1")\n+ self.assertIsNone(transcript.getTagValue("flanking_upstream"))\n+ if i == 1:\n+ self._checkTranscript(transcript, "chr1", 300, 400, "+", "query2")\n+ self.assertIsNone(transcript.getTagValue("flanking_upstream"))\n+ if i == 2:\n+ self._checkTranscript(transcript, "chr1", 1200, 1300, "+", "query3")\n+ self.assertEqual(transcript.getTagValue("flanking_upstream"), "ref2")\n+\n+ def test_run_simple_colinear(self):\n+ return\n+ reference1 = self._createTranscript("chr1", 100, 200, "+", "ref1")\n+ reference2 = self._createTranscript("chr1", 1000, 1100, "+", "ref2")\n+ reference3 = self._createTranscript("chr1", 1600, 1700, "+", "ref3")\n+ writer = Gff3Writer(self.referenceFileName, 0)\n+ writer.addTranscript(reference1)\n+ writer.addTranscript(reference2)\n+ writer.addTranscript(reference3)\n+ writer.close()\n+ query1 = self._createTranscript("chr1", 1200, 1300, "-", "query1")\n+ query2 = self._createTranscript("chr1", 1400, 1500, "+", "query2")\n+ writer = Gff3Writer(self.queryFileName, 0)\n+ writer.addTranscript(query1)\n+ writer.addTranscript(query2)\n+ writer.close()\n+ gf = GetFlanking(0)\n+ gf.setInputFile(self.queryFileName, \'gff3\', 0)\n+ gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n+ gf.setOutputFile(self.outputFileName)\n+ gf.addUpstreamDirection(True)\n+ gf.setColinear(True)\n+ gf.run()\n+ parser = GffParser(self.outputFileName)\n+ self.assertEqual(parser.getNbTranscripts(), 2)\n+ for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n+ if i == 0:\n+ self._checkTranscript(transcript, "chr1", 1200, 1300, "-", "query1")\n+ self.assertIsNone(transcript.getTagValue("flanking"))\n+ if i == 1:\n+ self._checkTranscript(transcript, "chr1", 1400, 1500, "+", "query2")\n+ self.assertEqual(transcript.getTagValue("flanking_upstream"), "ref2")\n+\n+ def test_run_simple_max_distance(self):\n+ return\n+ reference = self._createTranscript("chr1", 1000, 1100, "+", "ref")\n+ writer = Gff3Writer(self.referenceFileName, 0)\n+ writer.addTranscript(reference)\n+ writer.close()\n+ query1 = self._createTranscript("chr1", 2000, 2100, "-", "query1")\n+ writer = Gff3Writer(self.queryFileName, 0)\n+ writer.addTranscript(query1)\n+ writer.close()\n+ gf = GetFlanking(0)\n+ gf.setInputFile(self.queryFileName, \'gff3\', 0)\n+ gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n+ gf.setOutputFile(self.outputFileName)\n+ gf.setMaxDistance(100)\n+ gf.run()\n+ parser = GffParser(self.outputFileName)\n+ self.assertEqual(parser.getNbTranscripts(), 1)\n+ for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n+ if i == 0:\n+ self._checkTranscript(transcript, "chr1", 2000, 2100, "-", "query1")\n+ self.assertIsNone(transcript.getTagValue("flanking"))\n+\n+ def _createTranscript(self, chromosome, start, end, strand, name):\n+ transcript = Transcript()\n+ transcript.setChromosome(chromosome)\n+ transcript.setStart(start)\n+ transcript.setEnd(end)\n+ transcript.setDirection(strand)\n+ transcript.setName(name)\n+ return transcript\n+\n+ def _checkTranscript(self, transcript, chromosome, start, end, strand, name):\n+ self.assertEqual(transcript.getChromosome(), chromosome)\n+ self.assertEqual(transcript.getStart(), start)\n+ self.assertEqual(transcript.getEnd(), end)\n+ self.assertEqual(transcript.getStrand(), strand)\n+ self.assertEqual(transcript.getName(), name)\n+\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_GetRandomSubset.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_GetRandomSubset.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,55 @@
+import unittest
+import os
+from SMART.Java.Python.GetRandomSubset import GetRandomSubset
+from commons.core.parsing.BedParser import BedParser
+from commons.core.parsing.GffParser import GffParser
+
+class Test_F_GetRandomSubset(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = "inputFileTest.bed"
+        self._writeInputFile()
+        self._outputFileName = "output.gff3"
+
+    def tearDown(self):
+        os.remove(self._inputFileName)
+        os.remove(self._outputFileName)
+
+    def test_run_simple(self):
+        nbElements = 2
+        inputTranscripts = []
+        parser = BedParser(self._inputFileName, 0)
+        for transcript in parser.getIterator():
+            inputTranscripts.append(transcript)
+        grs = GetRandomSubset(0)
+        grs.setInputFile(self._inputFileName, "bed")
+        grs.setNumber(nbElements, None)
+        grs.setOutputFile(self._outputFileName)
+        grs.run()
+        outputTranscripts = []
+        parser = GffParser(self._outputFileName, 0)
+        for transcript in parser.getIterator():
+            outputTranscripts.append(transcript)
+        self.assertEqual(len(outputTranscripts), nbElements)
+        for outputTranscript in outputTranscripts:
+            for inputTranscript in inputTranscripts:
+                if outputTranscript.getChromosome() == inputTranscript.getChromosome() and outputTranscript.getDirection() == inputTranscript.getDirection() and outputTranscript.getStart() == inputTranscript.getStart() and outputTranscript.getEnd() == inputTranscript.getEnd() and outputTranscript.getName() == inputTranscript.getName():
+                    break
+            else:
+                self.fail()
+
+    def _writeInputFile(self):
+        f = open(self._inputFileName, "w")
+        f.write("arm_X\t10000100\t10000200\ttest1.1\t100\t+\t10000100\t10000200\t0\t1\t100,\t0,\n")
+        f.write("arm_X\t10000100\t10000200\ttest1.2\t100\t-\t10000100\t10000200\t0\t1\t100,\t0,\n")
+        f.write("arm_2R\t10000100\t10000200\ttest1.3\t100\t+\t10000100\t10000200\t0\t1\t100,\t0,\n")
+        f.write("arm_X\t10000000\t10000100\ttest1.4\t100\t+\t10000000\t10000100\t0\t1\t100,\t0,\n")
+        f.write("arm_X\t10000200\t10000300\ttest1.5\t100\t+\t10000200\t10000300\t0\t1\t100,\t0,\n")
+        f.write("arm_X\t9999900\t9999950\ttest1.6\t100\t+\t9999900\t9999950\t0\t1\t50,\t0,\n")
+        f.write("arm_X\t10000000\t10000050\ttest1.7\t100\t-\t10000000\t10000050\t0\t1\t50,\t0,\n")
+        f.close()
+
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_GetSizes.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_GetSizes.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,78 @@
+import unittest
+import os, glob
+import subprocess
+from SMART.Java.Python.getSizes import GetSizes
+
+class Test_F_GetSizes(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFastaFileName =  "inputFile.fa"
+        self._inputFastqFileName =  "inputFile.fq"
+
+    def tearDown(self):
+        try :
+            os.remove(self._inputFastaFileName)
+        except:pass
+
+    def test_run(self):
+        self.writeInputFasta(self._inputFastaFileName)
+        format = "fasta"
+        iGetSizes = GetSizes(inFileName = self._inputFastaFileName, inFormat=format)
+        iGetSizes.run()
+
+        self.assertEquals(iGetSizes.items, 3)
+        self.assertEquals(iGetSizes.subItems, 3)
+        self.assertEquals(iGetSizes.nucleotides, 108)
+        self.assertEquals(iGetSizes.minAvgMedMax, (36, 36.0, 36, 36))
+
+    def test_run_as_script(self):
+        self.writeInputFasta(self._inputFastaFileName)
+        format = "fasta"
+        cmd = "%s/SMART/Java/Python/getSizes.py -i %s -f %s -v 0" % (os.environ["REPET_PATH"], self._inputFastaFileName, format)
+        process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        obsOutput = process.communicate()[0]
+        expOutput = """3 items\n3 sub-items\n108 nucleotides\nmin/avg/med/max transcripts: 36/36.00/36.0/36\n"""
+
+        self.assertEquals(expOutput, obsOutput)
+
+    def writeInputFile(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\ttest2.1\t9\t1000\t1001\t+\t.\tName=test2.1;overlapsWith=query_3;ID=test2.1\n")
+        f.write("chr1\tS-MART\ttest2.3\t100\t600\t501\t+\t.\tName=test2.3;overlapsWith=query_3;ID=test2.3\n")
+        f.write("chr1\tS-MART\ttest2.5\t700\t950\t251\t+\t.\tName=test2.5;overlapsWith=query_3;ID=test2.5\n")
+        f.write("chr1\tS-MART\ttest2.6\t800\t900\t101\t+\t.\tName=test2.6;overlapsWith=query_3;ID=test2.6\n")
+        f.close()
+
+
+    def writeInputFasta(self,inFileName):
+        f = open(inFileName,'w')
+        f.write('>HWI-EAS337_3:7:1:415:1217/1\n')
+        f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
+        f.write('>HWI-EAS337_3:7:1:208:1489/1\n')
+        f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
+        f.write('>HWI-EAS337_3:7:1:278:1153\n')
+        f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
+        f.close()
+
+
+    def writeInputFastq(self,inFileName):
+        f = open(inFileName,'w')
+        f.write('@HWI-EAS337_3:7:1:415:1217/1\n')
+        f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
+        f.write('+HWI-EAS337_3:7:1:415:1217/1\n')
+        f.write('WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ\n')
+        f.write('@HWI-EAS337_3:7:1:208:1489/1\n')
+        f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
+        f.write('+HWI-EAS337_3:7:1:208:1489/1\n')
+        f.write('WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n')
+        f.write('@HWI-EAS337_3:7:1:278:1153/1\n')
+        f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
+        f.write('+HWI-EAS337_3:7:1:278:1153/1\n')
+        f.write('WWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n')
+        f.close()
+
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_RestrictFromCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_RestrictFromCoverage.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,133 @@
+import unittest
+import os, os.path
+from optparse import OptionParser
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.writer.Gff3Writer import Gff3Writer
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.RestrictFromCoverage import RestrictFromCoverage
+
+REFERENCE = 0
+QUERY     = 1
+
+class Test_F_Clusterize(unittest.TestCase):
+
+    def setUp(self):
+        self._queryFileName     = "testQuery.gff3"
+        self._refFileName       = "testRef.gff3"
+        self._outputFileName    = "output.gff3"
+        self._parsers           = {QUERY: Gff3Writer(self._queryFileName, 0), REFERENCE: Gff3Writer(self._refFileName, 0)}
+        self._writeQuery()
+
+    def tearDown(self):
+        for file in (self._queryFileName, self._refFileName, self._outputFileName):
+            if os.path.exists(file):
+                os.remove(file)
+
+    def _writeQuery(self):
+        self._addTranscript(QUERY, 1, 1000, 2000, "+")
+        self._parsers[QUERY].close()
+
+    def _writeReferences(self, values):
+        for value in values:
+            self._addTranscript(REFERENCE, value["cpt"], value["start"], value["end"], value["strand"])
+        self._parsers[REFERENCE].close()
+
+    def _addTranscript(self, type, cpt, start, end, strand):
+        t = Transcript()
+        t.setChromosome("chr1")
+        t.setName("test%d" % (cpt))
+        t.setStart(start)
+        t.setEnd(end)
+        t.setDirection(strand)
+        self._parsers[type].addTranscript(t)
+
+    def _checkTranscript(self, transcript, start, end, strand):
+        self.assertEquals(transcript.getStart(),     start)
+        self.assertEquals(transcript.getEnd(),       end)
+        self.assertEquals(transcript.getDirection(), strand)
+
+    def _startTool(self, minNucleotides = None, maxNucleotides = None, minPercent = None, maxPercent = None, minOverlap = None, maxOverlap = None, strands = False):
+        rfc = RestrictFromCoverage(0)
+        rfc.setInputFileName(self._queryFileName, "gff3", QUERY)
+        rfc.setInputFileName(self._refFileName,   "gff3", REFERENCE)
+        rfc.setOutputFileName(self._outputFileName)
+        rfc.setNbNucleotides(minNucleotides, maxNucleotides)
+        rfc.setPercent(minPercent, maxPercent)
+        rfc.setOverlap(minOverlap, maxOverlap)
+        rfc.setStrands(strands)
+        rfc.run()
+        return GffParser(self._outputFileName, 0)
+
+    def test_simple(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}])
+        parser = self._startTool()
+        self.assertEquals(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self._checkTranscript(transcript, 1000, 2000, 1)
+
+    def test_nbOverlapsMin_pos(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}, {"cpt": 2, "start": 1000, "end": 2000, "strand": "+"}])
+        parser = self._startTool(1, None, None, None, 2)
+        self.assertEquals(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self._checkTranscript(transcript, 1000, 2000, 1)
+
+    def test_nbOverlapsMin_neg(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}])
+        parser = self._startTool(1, None, None, None, 2)
+        self.assertEquals(parser.getNbTranscripts(), 0)
+
+    def test_nbOverlapsMax_pos(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}])
+        parser = self._startTool(1, None, None, None, None, 1)
+        self.assertEquals(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self._checkTranscript(transcript, 1000, 2000, 1)
+
+    def test_nbOverlapsMax_neg(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}, {"cpt": 2, "start": 1000, "end": 2000, "strand": "+"}])
+        parser = self._startTool(1, None, None, None, None, 1)
+        self.assertEquals(parser.getNbTranscripts(), 0)
+
+    def test_nbNucleotidesMin_pos(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1100, "strand": "+"}])
+        parser = self._startTool(100, None, None, None, 1)
+        self.assertEquals(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self._checkTranscript(transcript, 1000, 2000, 1)
+
+    def test_nbNucleotidesMin_neg(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1100, "strand": "+"}])
+        parser = self._startTool(200, None, None, None, 1)
+        self.assertEquals(parser.getNbTranscripts(), 0)
+
+    def test_PercentMin_pos(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "+"}])
+        parser = self._startTool(None, None, 50, None, 1)
+        self.assertEquals(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self._checkTranscript(transcript, 1000, 2000, 1)
+
+    def test_PercentMin_neg(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "+"}])
+        parser = self._startTool(None, None, 100, None, 1)
+        self.assertEquals(parser.getNbTranscripts(), 0)
+
+    def test_NoStrand_neg(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "-"}])
+        parser = self._startTool(1, None, None, None, 1)
+        self.assertEquals(parser.getNbTranscripts(), 1)
+
+    def test_strand_pos(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "+"}])
+        parser = self._startTool(1, None, None, None, 1, None, True)
+        self.assertEquals(parser.getNbTranscripts(), 1)
+
+    def test_strand_neg(self):
+        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "-"}])
+        parser = self._startTool(1, None, None, None, 1, None, True)
+        self.assertEquals(parser.getNbTranscripts(), 0)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_clusterizeBySlidingWindows.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_clusterizeBySlidingWindows.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,79 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from SMART.Java.Python.misc import Utils
+
+
+class Test_F_clusterizeBySlidingWindows(unittest.TestCase):
+
+    def setUp(self):
+        self._outputFileName         = 'output.gff3'
+        self._outputCsvFileName      = 'output.csv'
+        self._outputPngFileName      = 'output.png'
+        self._expectedOutputFileName = 'expOut.png'
+
+    def tearDown(self):
+        for fileName in (self._outputFileName, self._outputCsvFileName, self._outputPngFileName, self._expectedOutputFileName):
+            if os.path.exists(fileName):
+                os.remove(fileName)
+
+    def test_run_default_option(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerExpected.gff3 -f gff3 -o %s -s 50000 -e 50 -v 0" % (self._outputFileName)
+        os.system(cmd)
+        exp = '../TestFiles/clusterize_default_expected.gff3'
+        self.assertTrue(Utils.diff(exp, self._outputFileName))
+
+    def test_run_default_option_map_output(self):
+        self._outputFileName         = 'output.map'
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerExpected.gff3 -f gff3 -o %s -s 50000 -e 50 -u map -v 0" % (self._outputFileName)
+        os.system(cmd)
+        exp = '../TestFiles/clusterize_default_expected.map'
+        self.assertTrue(Utils.diff(exp, self._outputFileName))
+
+    def test_run_newTag_option(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerExpected.gff3 -f gff3 -o %s -s 50000 -e 50 -w newTag -v 0" % (self._outputFileName)
+        os.system(cmd)
+        exp = '../TestFiles/clusterize_output_tag_expected.gff3'
+        self.assertTrue(Utils.diff(exp, self._outputFileName))
+
+    def test_run_normalize_option(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -m -v 0" % (self._outputFileName)
+        os.system(cmd)
+        exp = '../TestFiles/clusterize_normalize_expected.gff3'
+        self.assertTrue(Utils.diff(exp, self._outputFileName))
+
+    def test_run_strand_option(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -2 -v 0"  % (self._outputFileName)
+        os.system(cmd)
+        exp = '../TestFiles/clusterize_strands_expected.gff3'
+        self.assertTrue(Utils.diff(exp, self._outputFileName))
+
+    def test_run_excel_option(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -x %s -v 0" % (self._outputFileName, self._outputCsvFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self._outputCsvFileName))
+
+
+    def test_run_normalize_strand_excel_option(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -m -2 -x %s -v 0" % (self._outputFileName, self._outputCsvFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self._outputCsvFileName))
+
+    def test_run_tag_operation_options(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/testSW.gff3 -f gff -s 100 -e 0 -g value1 -r min -o %s -v 0" % (self._outputFileName)
+        os.system(cmd)
+        exp = open(self._expectedOutputFileName, 'w')
+        exp.write("chr1\tS-MART\ttranscript\t1\t100\t.\t+\t.\tnbElements=0;minValue1=0;ID=region1;Name=region1\n")
+        exp.write("chr1\tS-MART\ttranscript\t101\t200\t.\t+\t.\tnbElements=2.000000;minValue1=1.0;ID=region2;Name=region2\n")
+        exp.write("chr1\tS-MART\ttranscript\t201\t300\t.\t+\t.\tnbElements=2.000000;minValue1=10.0;ID=region3;Name=region3\n")
+        exp.close()
+        self.assertTrue(Utils.diff(self._outputFileName, self._expectedOutputFileName))
+
+    def test_run_plot_options(self):
+        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/testSW.gff3 -f gff -s 100 -e 0 -g value1 -r min -o %s -p %s -v 0" % (self._outputFileName, self._outputPngFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self._outputPngFileName))
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_compareOverlapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_compareOverlapping.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,199 @@\n+import unittest\n+import os, os.path\n+from SMART.Java.Python.misc import Utils\n+\n+SMART_PATH = os.environ["REPET_PATH"] + "/SMART"\n+\n+class Test_F_compareOverlapping(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputFileQuery = "inputFileTest1.bed"\n+ self._inputFileReference = "inputFileTest2.bed"\n+ self._inputFileName3 = "inputFileTest3.bed"\n+ self._expOutputFileName = "expOutput.gff3" \n+ self._outputFileName = "output.gff3" \n+ self._writeInputFileQuery()\n+ self._writeInputFileRefence()\n+ self._writeInputFile3() \n+ \n+ def tearDown(self):\n+ for fileName in (self._inputFileQuery, self._inputFileReference, self._inputFileName3, self._expOutputFileName, self._outputFileName):\n+ if os.path.exists(fileName):\n+ os.remove(fileName)\n+\n+ def test_runAsScript_withoutOption(self):\n+ self._writeOutputFile_withoutOption(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+ \n+ def test_runAsScript_optionNFirstNucleotide(self):\n+ self._writeOutputFile_NFirstNucleotide(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -S 200 -s 200 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+\n+ def test_runAsScript_optionNLastNucleotide(self):\n+ self._writeOutputFile_NLastNucleotide(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -U 200 -u 200 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+\n+ def test_runAsScript_option_5PrimeExtension(self):\n+ self._writeOutputFile_5PrimeExtension(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -E 110 -e 110 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+\n+ def test_runAsScript_option_3PrimeExtension(self):\n+ self._writeOutputFile_3PrimeExtension(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -N 110 -n 110 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+ \n+ def test_runAsScript_colinear(self):\n+ self._writeOutputFile_colinear(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -c -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+\n+ def test_runAsScript_antisense(self):\n+ self._writeOutputFile_antisense(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -a -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+\n+ def test_runAsScript_minOverlap(self):\n+ self._writeOutputFile_minOverlap(self._expOutputFileName) \n+ os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -m 51 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputF'..b'890\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n+ f.close() \n+ \n+ def _writeOutputFile_colinear(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.4;Name=test1.4\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n+ f.close() \n+ \n+ def _writeOutputFile_antisense(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n+ f.close() \n+ \n+ def _writeOutputFile_minOverlap(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.3,test2.2,test2.1;nbOverlaps=3.000000;ID=test1.4;Name=test1.4\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n+ f.close() \n+ \n+ def _writeOutputFile_pcOverlap(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n+ f.close() \n+ \n+ def _writeOutputFile_includeNotOverlap(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.4;Name=test1.4\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t100\\t199\\t.\\t+\\t.\\tnbOverlaps=0;ID=test1.3;Name=test1.3\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n+ f.close()\n+\n+ def _writeOutputFile_exclude(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t100\\t199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n+ f.close()\n+ \n+ def _writeOutputFile_included(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n+ f.close()\n+\n+ def _writeOutputFile_including(self, outputFileName):\n+ f = open(outputFileName, "w")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.4;Name=test1.4\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n+ f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n+ f.close()\n+\n+ def _writeInputFileQuery_withExon(self):\n+ f = open(self._inputFileQuery, "w")\n+ f.write("arm_X\\t1000\\t2000\\ttest1.1\\t1000\\t+\\t1000\\t2000\\t0\\t1\\t1000,\\t0,\\n")\n+ f.write("arm_X\\t1000\\t2000\\ttest1.2\\t1000\\t-\\t1000\\t2000\\t0\\t1\\t1000,\\t0,\\n")\n+ f.write("arm_X\\t100\\t200\\ttest1.3\\t1000\\t+\\t100\\t200\\t0\\t1\\t100,\\t0,\\n")\n+ f.write("arm_X\\t100\\t3200\\ttest1.4\\t1000\\t+\\t100\\t3200\\t0\\t2\\t100,100,\\t0,3000,\\n")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_convertTranscriptFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_convertTranscriptFile.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,125 @@\n+import unittest\n+import os\n+from SMART.Java.Python.misc import Utils\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_F_convertTranscriptFile(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputFileName = None\n+ self._expOutputFileName = None\n+ self._outputFileName = None\n+\n+ def tearDown(self):\n+ for fileName in (self._expOutputFileName, self._outputFileName):\n+ if fileName != None and os.path.exists(fileName):\n+ os.remove(fileName)\n+\n+ def test_run_SAMtoGFF3(self):\n+ self._inputFileName = "%s/SMART/Java/Python/test/input.sam" % (os.environ["REPET_PATH"])\n+ self._writeInputSam(self._inputFileName)\n+ self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.gff3\' % (os.environ["REPET_PATH"])\n+ cmd = \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f sam -o %s -g gff3 -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n+ os.system(cmd)\n+ self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.gff3\' % (os.environ["REPET_PATH"])\n+ self._writeExpGff3File_test1(self._expOutputFileName)\n+ \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+ os.remove(self._inputFileName)\n+ \n+ def test_run_BEDtoGFF3(self):\n+ self._inputFileName = "%s/SMART/Java/Python/TestFiles/test_distance.bed" % (os.environ["REPET_PATH"])\n+ self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.gff3\' % (os.environ["REPET_PATH"])\n+ cmd = \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f bed -o %s -g gff3 -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n+ os.system(cmd)\n+ self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.gff3\' % (os.environ["REPET_PATH"])\n+ self._writeExpGff3File_test2(self._expOutputFileName)\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+ \n+ def test_run_GFF3toCSV(self):\n+ self._inputFileName = "%s/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3" % (os.environ["REPET_PATH"])\n+ self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.csv\' % (os.environ["REPET_PATH"])\n+ cmd = \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g csv -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n+ os.system(cmd)\n+ self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.csv\' % (os.environ["REPET_PATH"])\n+ self._writeExpCsvFile(self._expOutputFileName)\n+ \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+ \n+ def test_run_GFF3toSam(self):\n+ self._inputFileName = "%s/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3" % (os.environ["REPET_PATH"])\n+ self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.sam\' % (os.environ["REPET_PATH"])\n+ cmd = \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g sam -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n+ os.system(cmd)\n+ self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.sam\' % (os.environ["REPET_PATH"])\n+ self._writeExpSamFile(self._expOutputFileName)\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n+\n+ def test_run_Gff3toWig(self):\n+ self._inputFileName = \'%s/SMART/Java/Python/TestFiles/sorted_query.gff3\' % (os.environ["REPET_PATH"])\n+ self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.wig\' % (os.environ["REPET_PATH"])\n+ cmd = \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g wig -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n+ os.system(cmd) \n+ outputFile = \'%s/SMART/Java/Python/Test'..b'WWWWVWWWWVVWWWWWVVWWWWVVWWTTTTTR\\tXT:A:U\\tNM:i:0\\tSM:i:37\\tAM:i:37\\tX0:i:1\\tX1:i:0\\tXM:i:0\\tXO:i:0\\tXG:i:0\\tMD:Z:36\\n\' )\n+ file.close()\n+ \n+ def _writeExpGff3File_test1(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write( \'C02HBa0185P07_LR40\\tSMART\\ttranscript\\t3889\\t3924\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\\n\')\n+ file.write( \'C02HBa0185P07_LR40\\tSMART\\ttranscript\\t3830\\t3865\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\\n\')\n+ file.write( \'C11SLe0053P22_LR298\\tSMART\\ttranscript\\t2130\\t2165\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\\n\')\n+ file.write( \'C11SLe0053P22_LR298\\tSMART\\ttranscript\\t1980\\t2015\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\\n\')\n+ file.write( \'C06HBa0144J05_LR355\\tSMART\\ttranscript\\t1\\t36\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\\n\')\n+ file.write( \'C06HBa0144J05_LR355\\tSMART\\ttranscript\\t101\\t136\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\\n\')\n+ file.write( \'C08HBa0165B06_LR218\\tSMART\\ttranscript\\t3619\\t3654\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\\n\')\n+ file.write( \'C08HBa0165B06_LR218\\tSMART\\ttranscript\\t3575\\t3610\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\\n\')\n+ file.close()\n+ \n+ def _writeExpGff3File_test2(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write( \'arm_X\\tSMART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\tID=test2.1;Name=test2.1\\n\' )\n+ file.write( \'arm_X\\tSMART\\ttranscript\\t250\\t349\\t.\\t+\\t.\\tID=test2.2;Name=test2.2\\n\' )\n+ file.write( \'arm_X\\tSMART\\ttranscript\\t150\\t249\\t.\\t+\\t.\\tID=test2.3;Name=test2.3\\n\' )\n+ file.close()\n+ \n+ def _writeExpCsvFile(self, fileName):\n+ file = open(fileName, \'w\') \n+ file.write( \'chromosome,start,end,strand,exons,ID,bestRegion,feature,identity,nbGaps,nbMismatches,nbOccurrences,occurrence,rank,score\\n\' )\n+ file.write( \'chr1,6155418,6155441,"+",None,test1/1,(self),match,100,0,0,1,1,1,24\\n\' )\n+ file.write( \'chr2,26303950,26303981,"+",None,test2/1-1,(self),match,93,0,2,3,1,1,32\\n\' )\n+ file.write( \'chr3,28320540,28320574,"+",None,test2/1-2,chr2:26303950-26303981,match,94,0,2,3,2,None,35\\n\' )\n+ file.write( \'chr4,28565007,28565041,"+",None,test2/1-3,chr2:26303950-26303981,match,88,0,4,3,3,3,35\\n\' )\n+ file.close()\n+ \n+ def _writeExpSamFile(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write(\'@SQ\\tSN:chr4\\tLN:28565041\\n\')\n+ file.write(\'@SQ\\tSN:chr3\\tLN:28320574\\n\')\n+ file.write(\'@SQ\\tSN:chr2\\tLN:26303981\\n\')\n+ file.write(\'@SQ\\tSN:chr1\\tLN:6155441\\n\')\n+ file.write(\'test1/1\\t0\\tchr1\\t6155418\\t255\\t24M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n+ file.write(\'test2/1\\t0\\tchr2\\t26303950\\t255\\t32M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n+ file.write(\'test2/1\\t0\\tchr3\\t28320540\\t255\\t35M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n+ file.write(\'test2/1\\t0\\tchr4\\t28565007\\t255\\t35M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n+ file.close()\n+\n+\n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_coordinatesToSequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_coordinatesToSequence.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,31 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+TESTFILES_PATH = os.environ['REPET_PATH'] + '/SMART/Java/Python/TestFiles'
+
+class Test_F_coordinatesToSequence(unittest.TestCase):
+
+    def test_run(self):
+        cmd = "python ../coordinatesToSequence.py -i %s/testC2S.gff3 -f gff3 -s %s/testC2S.fa -o testOut.fa -v 10 " % (TESTFILES_PATH, TESTFILES_PATH)
+        os.system(cmd)
+        obs = 'testOut.fa'
+        exp = 'expOut.fa'
+        self._writeExpOut(exp)
+        self.assertTrue(FileUtils.isRessourceExists(obs))
+        self.assertTrue(FileUtils.are2FilesIdentical(obs, exp))
+        os.remove(obs)
+        os.remove(exp)
+
+    def _writeExpOut(self, outputFileName):
+        f = open(outputFileName, "w")
+        f.write(">region0\n")
+        f.write("CAACATTAGC\n")
+        f.write(">region1\n")
+        f.write("TTAGCCGGCC\n")
+        f.write(">region2\n")
+        f.write("GGCCGGCTAA\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_findTss.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_findTss.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,93 @@
+import unittest
+import os, os.path
+from SMART.Java.Python.misc import Utils
+
+
+TestF_PATH = os.environ['REPET_PATH'] + '/SMART/Java/Python/TestFiles'
+
+
+class Test_F_findTss(unittest.TestCase):
+
+    def setUp(self):
+        self._outputFileName = 'output.gff3'
+        self._expOutputFileName = 'expOutput.gff3'
+
+    def tearDown(self):
+        os.remove(self._outputFileName)
+        os.remove(self._expOutputFileName)
+
+    def test_run_default_option(self):
+        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -v 0" % TestF_PATH
+        os.system(cmd)
+        self._writeExpDefaultOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_normalize_option(self):
+        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -n -v 0" % TestF_PATH
+        os.system(cmd)
+        self._writeExpNormalizeOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_distance_option(self):
+        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -d 20 -v 0" % TestF_PATH
+        os.system(cmd)
+        self._writeExpDistance_option(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_colinear_option(self):
+        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -e -v 0" % TestF_PATH
+        os.system(cmd)
+        self._writeExpColinearOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_excel_option(self):
+        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -c output.csv -v 0" % TestF_PATH
+        os.system(cmd)
+        obsCsv = 'output.csv'
+        self.assertTrue(os.path.exists(obsCsv))
+        os.remove(obsCsv)
+        self._writeExpExcelOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def _writeExpDefaultOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
+        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
+        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
+        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
+        f.close()
+
+    def _writeExpNormalizeOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
+        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
+        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
+        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
+        f.close()
+
+    def _writeExpDistance_option(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
+        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
+        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
+        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
+        f.close()
+
+    def _writeExpColinearOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
+        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
+        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
+        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
+        f.close()
+
+    def _writeExpExcelOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
+        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
+        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
+        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_getExons.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_getExons.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,145 @@
+import unittest
+import os
+from SMART.Java.Python.getExons import GetExons
+from commons.core.parsing.GffParser import GffParser
+
+class Test_F_GetExons(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = "inputFileTest.bed"
+        self._writeInputFile()
+        self._outputFileName = "output.gff3"
+
+    def tearDown(self):
+        os.remove(self._inputFileName)
+        os.remove(self._outputFileName)
+
+    def test_run_simple(self):
+        ge = GetExons(0)
+        ge.setInputFile(self._inputFileName, "bed")
+        ge.setOutputFile(self._outputFileName)
+        ge.run()
+        parser = GffParser(self._outputFileName, 0)
+        self.assertEqual(parser.getNbTranscripts(), 5)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 1)
+            if cpt == 0:
+                self.assertEqual(transcript.getStart(), 10000001)
+                self.assertEqual(transcript.getEnd(),   10000100)
+            elif cpt == 1:
+                self.assertEqual(transcript.getStart(), 10000201)
+                self.assertEqual(transcript.getEnd(),   10000300)
+            elif cpt == 2:
+                self.assertEqual(transcript.getStart(), 10000401)
+                self.assertEqual(transcript.getEnd(),   10000500)
+            elif cpt == 3:
+                self.assertEqual(transcript.getStart(), 10000601)
+                self.assertEqual(transcript.getEnd(),   10000700)
+            elif cpt == 4:
+                self.assertEqual(transcript.getStart(), 10000801)
+                self.assertEqual(transcript.getEnd(),   10000900)
+
+    def test_run_firstExon(self):
+        ge = GetExons(0)
+        ge.setInputFile(self._inputFileName, "bed")
+        ge.setOutputFile(self._outputFileName)
+        ge.setSelection("1")
+        ge.run()
+        parser = GffParser(self._outputFileName, 0)
+        self.assertEqual(parser.getNbTranscripts(), 1)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 1)
+            self.assertEqual(transcript.getStart(), 10000001)
+            self.assertEqual(transcript.getEnd(),   10000100)
+
+    def test_run_lastExon(self):
+        ge = GetExons(0)
+        ge.setInputFile(self._inputFileName, "bed")
+        ge.setOutputFile(self._outputFileName)
+        ge.setSelection("-1")
+        ge.run()
+        parser = GffParser(self._outputFileName, 0)
+        self.assertEqual(parser.getNbTranscripts(), 1)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 1)
+            self.assertEqual(transcript.getStart(), 10000801)
+            self.assertEqual(transcript.getEnd(),   10000900)
+
+    def test_run_first_lastExon(self):
+        ge = GetExons(0)
+        ge.setInputFile(self._inputFileName, "bed")
+        ge.setOutputFile(self._outputFileName)
+        ge.setSelection("1,-1")
+        ge.run()
+        parser = GffParser(self._outputFileName, 0)
+        self.assertEqual(parser.getNbTranscripts(), 2)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 1)
+            if cpt == 0:
+                self.assertEqual(transcript.getStart(), 10000001)
+                self.assertEqual(transcript.getEnd(),   10000100)
+            elif cpt == 1:
+                self.assertEqual(transcript.getStart(), 10000801)
+                self.assertEqual(transcript.getEnd(),   10000900)
+
+    def test_run_interval(self):
+        ge = GetExons(0)
+        ge.setInputFile(self._inputFileName, "bed")
+        ge.setOutputFile(self._outputFileName)
+        ge.setSelection("2..3")
+        ge.run()
+        parser = GffParser(self._outputFileName, 0)
+        self.assertEqual(parser.getNbTranscripts(), 2)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 1)
+            if cpt == 0:
+                self.assertEqual(transcript.getStart(), 10000201)
+                self.assertEqual(transcript.getEnd(),   10000300)
+            elif cpt == 1:
+                self.assertEqual(transcript.getStart(), 10000401)
+                self.assertEqual(transcript.getEnd(),   10000500)
+
+    def test_run_interval_element(self):
+        ge = GetExons(0)
+        ge.setInputFile(self._inputFileName, "bed")
+        ge.setOutputFile(self._outputFileName)
+        ge.setSelection("2..3,-1")
+        ge.run()
+        parser = GffParser(self._outputFileName, 0)
+        self.assertEqual(parser.getNbTranscripts(), 3)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 1)
+            if cpt == 0:
+                self.assertEqual(transcript.getStart(), 10000201)
+                self.assertEqual(transcript.getEnd(),   10000300)
+            elif cpt == 1:
+                self.assertEqual(transcript.getStart(), 10000401)
+                self.assertEqual(transcript.getEnd(),   10000500)
+            elif cpt == 2:
+                self.assertEqual(transcript.getStart(), 10000801)
+                self.assertEqual(transcript.getEnd(),   10000900)
+
+
+
+    def _writeInputFile(self):
+        f = open(self._inputFileName, "w")
+        f.write("arm_X\t10000001\t10000900\ttest1.1\t100\t+\t10000100\t10000200\t0\t5\t100,100,100,100,100,\t0,200,400,600,800,\n")
+        f.close()
+
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_getLetterDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_getLetterDistribution.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,118 @@
+import os
+import sys
+import unittest
+from SMART.Java.Python.test.MockGetLetterDistribution import MockGetLetterDistributionFasta
+from SMART.Java.Python.test.MockGetLetterDistribution import MockGetLetterDistributionFastq
+from SMART.Java.Python.test.MockGetLetterDistribution import MockGetLetterDistributionExpectedCSV
+
+class Test_F_getLetterDistribution(unittest.TestCase):
+
+ def tearDown(self):
+ os.system("rm tmp*.*")
+
+ def test_getLetterDistributionWithFasta(self):
+ iFastaMock = MockGetLetterDistributionFasta()
+ fastaFileName = "MockFasta_GetLetterDistribution.fa"
+ iFastaMock.write(fastaFileName)
+
+ outputName = "dummy_result_fasta"
+ os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fasta -o %s" % (os.environ["REPET_PATH"], fastaFileName, outputName))
+
+ self.assertTrue (os.path.exists(outputName + ".png"))
+ self.assertTrue (os.path.exists(outputName + "PerNt.png"))
+
+ os.remove(outputName + ".png")
+ os.remove(outputName + "PerNt.png")
+ os.remove(fastaFileName)
+
+ def test_getLetterDistributionWithFastq(self):
+ iFastqMock = MockGetLetterDistributionFastq()
+ fastqFileName = "MockFastq_GetLetterDistribution.fastq"
+ iFastqMock.write(fastqFileName)
+
+ outputName = "dummy_result_fastq"
+ os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fastq -o %s" % (os.environ["REPET_PATH"], fastqFileName, outputName))
+
+ self.assertTrue (os.path.exists(outputName + ".png"))
+ self.assertTrue (os.path.exists(outputName + "PerNt.png"))
+
+ os.remove(fastqFileName)
+ os.remove(outputName + ".png")
+ os.remove(outputName + "PerNt.png")
+
+ def test_getLetterDistributionWithFastaCSVOutput(self):
+ iFastaMock = MockGetLetterDistributionFasta()
+ fastaFileName = "MockFasta_GetLetterDistribution.fa"
+ iFastaMock.write(fastaFileName)
+
+ iCSVMock = MockGetLetterDistributionExpectedCSV()
+ expCSVFileName = "expCSV.csv"
+ iCSVMock.write(expCSVFileName)
+
+ outputName = "dummy_result_fasta"
+ os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fasta -o %s -c" % (os.environ["REPET_PATH"], fastaFileName, outputName))
+
+ obsCSVFileName = outputName + ".csv"
+
+ self.assertTrue (os.path.exists(outputName + ".png"))
+ self.assertTrue (os.path.exists(outputName + "PerNt.png"))
+ self.assertTrue (self._are2FilesIdentical(expCSVFileName, obsCSVFileName))
+
+ os.remove(outputName + ".png")
+ os.remove(outputName + "PerNt.png")
+ os.remove(fastaFileName)
+ os.remove(expCSVFileName)
+ os.remove(obsCSVFileName)
+
+ def test_getLetterDistributionWithFastqCVSOutput(self):
+ iFastqMock = MockGetLetterDistributionFastq()
+ fastqFileName = "MockFastq_GetLetterDistribution.fastq"
+ iFastqMock.write(fastqFileName)
+
+ iCSVMock = MockGetLetterDistributionExpectedCSV()
+ expCSVFileName = "expCSV.csv"
+ iCSVMock.write(expCSVFileName)
+
+ outputName = "dummy_result_fastq"
+ os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fastq -o %s -c" % (os.environ["REPET_PATH"], fastqFileName, outputName))
+
+ obsCSVFileName = outputName + ".csv"
+
+ self.assertTrue (os.path.exists(outputName + ".png"))
+ self.assertTrue (os.path.exists(outputName + "PerNt.png"))
+ self.assertTrue (self._are2FilesIdentical(expCSVFileName, obsCSVFileName))
+
+ os.remove(fastqFileName)
+ os.remove(outputName + ".png")
+ os.remove(outputName + "PerNt.png")
+ os.remove(expCSVFileName)
+ os.remove(obsCSVFileName)
+
+ def _are2FilesIdentical(self, file1, file2 ):
+ tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) )
+ cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile )
+ returnStatus = os.system( cmd )
+ if returnStatus != 0:
+ msg = "ERROR: 'diff' returned '%i'" % ( returnStatus )
+ sys.stderr.write( "%s\n" % msg )
+ sys.stderr.flush()
+ os.remove( tmpFile )
+ return False
+ if self.isEmpty( tmpFile ):
+ os.remove( tmpFile )
+ return True
+ else:
+ os.remove( tmpFile )
+ return False
+
+ def getNbLinesInSingleFile(self, fileName):
+ fileHandler = open(fileName, "r" )
+ lines = fileHandler.readlines()
+ fileHandler.close()
+ return len(lines)
+
+ def isEmpty(self, fileName):
+ return 0 == self.getNbLinesInSingleFile( fileName )
+
+if __name__ == "__main__":
+ unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_getRandomRegions.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_getRandomRegions.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,66 @@
+import unittest
+import os
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
+
+MIN_SIZE = 36
+MAX_SIZE = 100
+CHR      = "chr1"
+CHR_SIZE = 1000000
+NB_READS = 1000
+
+class Test_F_getRandomRegions(unittest.TestCase):
+
+    def setUp(self):
+        self._outputFileName = 'output.gff3'
+
+    def tearDown(self):
+        for fileName in [self._outputFileName]:
+            if os.path.exists(fileName):
+                os.remove(fileName)
+
+    def test_simple(self):
+        iRR = RandomRegionsGenerator(0)
+        iRR.setMinSize(MIN_SIZE)
+        iRR.setMaxSize(MAX_SIZE)
+        iRR.setGenomeSize(CHR_SIZE)
+        iRR.setChromosomeName(CHR)
+        iRR.setStrands(False)
+        iRR.setNumber(NB_READS)
+        iRR.setOutputFile(self._outputFileName)
+        iRR.run()
+        parser = GffParser(self._outputFileName, 0)
+        starts = set()
+        self.assertTrue(parser.getNbTranscripts(), CHR_SIZE)
+        for transcript in parser.getIterator():
+            start      = transcript.getStart()
+            end        = transcript.getEnd()
+            size       = transcript.getSize()
+            chromosome = transcript.getChromosome()
+            strand     = transcript.getDirection()
+            self.assertTrue(start not in starts)
+            self.assertTrue(start >= 1)
+            self.assertTrue(end <= CHR_SIZE)
+            self.assertEquals(chromosome, CHR)
+            self.assertEquals(strand, 1)
+            starts.add(start)
+
+    def test_both_strands(self):
+        iRR = RandomRegionsGenerator(0)
+        iRR.setMinSize(MIN_SIZE)
+        iRR.setMaxSize(MAX_SIZE)
+        iRR.setGenomeSize(CHR_SIZE)
+        iRR.setChromosomeName(CHR)
+        iRR.setStrands(True)
+        iRR.setNumber(NB_READS)
+        iRR.setOutputFile(self._outputFileName)
+        iRR.run()
+        parser = GffParser(self._outputFileName, 0)
+        strands = set()
+        for transcript in parser.getIterator():
+            strands.add(transcript.getDirection())
+        self.assertTrue(1 in strands)
+        self.assertTrue(-1 in strands)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_getReadDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_getReadDistribution.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,37 @@
+import unittest
+import os, glob
+
+
+class Test_F_getReadDistribution(unittest.TestCase):
+
+    def setUp(self):
+        self.inputFileName  = "inputFile.fasta"
+        self.outputFileName = "outputFile"
+
+    def tearDown(self):
+        for fileRoot in (self.inputFileName, self.outputFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+        os.system("rm .RData")
+
+    def test_run_simple(self):
+        handle = open(self.inputFileName, "w")
+        handle.write(">test1\n")
+        handle.write("AAAAAA\n")
+        handle.write(">test2\n")
+        handle.write("AAAAAA\n")
+        handle.write(">test3\n")
+        handle.write("CCCCCC\n")
+        handle.close()
+        handle.close()
+        os.system("python ../getReadDistribution.py -i %s -f fasta -n 1 -o %s -v 0" % (self.inputFileName, self.outputFileName))
+        self.assertTrue(os.path.exists("%s.png" % (self.outputFileName)))
+        handle = open("%s.txt" % (self.outputFileName))
+        lines = handle.readlines()
+        self.assertEquals(len(lines), 1)
+        self.assertEquals(lines[0], "AAAAAA\t2\n")
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_getWigData.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_getWigData.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,71 @@
+import unittest
+import os, os.path
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.getWigData import *
+
+class Test_F_getWigData(unittest.TestCase):
+
+
+    def setUp(self):
+        self._inputGffFileName = 'inputGff.gff3'
+        self._writeInputGff(self._inputGffFileName)
+        self._inputWigFileName = '../TestFiles/sorted_query_wig.wig'
+        self._expOutFileName = 'expOut.gff3'
+        self._outFileName = 'outGffWig.gff3'
+
+
+    def tearDown(self):
+        os.remove(self._inputGffFileName)
+        os.remove(self._expOutFileName)
+        os.remove(self._outFileName)
+
+
+    def test_getWigData_DefaultOption_asScript(self):
+        cmd = 'python ../getWigData.py -i %s -f gff3 -w %s -t wigValue -o %s -v 0' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
+        os.system(cmd)
+        self._writeExpDefaultOption(self._expOutFileName)
+        self.assertTrue(Utils.diff(self._expOutFileName, self._outFileName))
+
+    def test_getWigData_strandsOption(self):
+        cmd = 'python ../getWigData.py -i %s -f gff3 -w %s -t wigValue -o %s -s -v 0' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
+        os.system(cmd)
+        self._writeExpStransOption(self._expOutFileName)
+        self.assertTrue(Utils.diff(self._expOutFileName, self._outFileName))
+
+
+    def _writeInputGff(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=query_1;Name=test1.1\n")
+        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t-\t.\tID=query_2;Name=test1.2\n")
+        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t-\t.\tID=query_3;Name=test1.3\n")
+        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=query_4;Name=test1.4\n")
+        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=query_5;Name=test1.5\n")
+        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=query_6;Name=test1.6\n")
+        f.close()
+
+    def _writeExpDefaultOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\ttest1.1\t25\t150\t126\t+\t.\twigValue=1.64285714286;ID=query_1;Name=test1.1\n")
+        f.write("chr1\tS-MART\ttest1.2\t70\t850\t781\t-\t.\twigValue=1.48911651729;ID=query_2;Name=test1.2\n")
+        f.write("chr1\tS-MART\ttest1.3\t550\t850\t201\t-\t.\twigValue=2.0;ID=query_3;Name=test1.3\n")
+        f.write("chr1\tS-MART\ttest1.4\t925\t1025\t101\t+\t.\twigValue=1.0;ID=query_4;Name=test1.4\n")
+        f.write("chr1\tS-MART\ttest1.5\t1201\t1210\t10\t+\t.\twigValue=1.0;ID=query_5;Name=test1.5\n")
+        f.write("chr1\tS-MART\ttest1.6\t1500\t1600\t101\t+\t.\twigValue=1.0;ID=query_6;Name=test1.6\n")
+        f.close()
+
+    def _writeExpStransOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\ttest1.1\t25\t150\t126\t+\t.\twigValue=0.0;ID=query_1;Name=test1.1\n")
+        f.write("chr1\tS-MART\ttest1.2\t70\t850\t781\t-\t.\twigValue=0.0;ID=query_2;Name=test1.2\n")
+        f.write("chr1\tS-MART\ttest1.3\t550\t850\t201\t-\t.\twigValue=0.0;ID=query_3;Name=test1.3\n")
+        f.write("chr1\tS-MART\ttest1.4\t925\t1025\t101\t+\t.\twigValue=0.0;ID=query_4;Name=test1.4\n")
+        f.write("chr1\tS-MART\ttest1.5\t1201\t1210\t10\t+\t.\twigValue=0.0;ID=query_5;Name=test1.5\n")
+        f.write("chr1\tS-MART\ttest1.6\t1500\t1600\t101\t+\t.\twigValue=0.0;ID=query_6;Name=test1.6\n")
+        f.close()
+
+
+
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_getWigDistance.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_getWigDistance.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,45 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from SMART.Java.Python.getWigDistance import *
+
+class Test_F_getWigDistance(unittest.TestCase):
+
+
+    def setUp(self):
+        self._inputGffFileName = 'inputGff.gff3'
+        self._writeInputGff(self._inputGffFileName)
+        self._inputWigFileName = '../TestFiles/sorted_query_wig.wig'
+        self._outFileName = 'outGffWig.png'
+
+    def tearDown(self):
+        os.remove(self._inputGffFileName)
+        os.remove(self._outFileName)
+
+    def test_getWigDistance_defaultOption(self):
+        cmd = 'python ../getWigDistance.py -i %s -f gff3 -w %s -d 10 -o %s' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
+        os.system(cmd)
+
+    def test_getWigDistance_strandsOption(self):
+        cmd = 'python ../getWigDistance.py -i %s -f gff3 -w %s -d 10 -o %s -s' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
+        os.system(cmd)
+
+    def test_getWigDistance_logOption(self):
+        cmd = 'python ../getWigDistance.py -i %s -f gff3 -w %s -d 10 -o %s -l' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
+        os.system(cmd)
+
+    def _writeInputGff(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=query_1;Name=test1.1\n")
+        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t-\t.\tID=query_2;Name=test1.2\n")
+        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t-\t.\tID=query_3;Name=test1.3\n")
+        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=query_4;Name=test1.4\n")
+        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=query_5;Name=test1.5\n")
+        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=query_6;Name=test1.6\n")
+        f.close()
+
+
+
+if __name__ == "__main__":
+    #import sys;sys.argv = ['', 'Test.testName']
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_getWigProfile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_getWigProfile.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,65 @@
+import unittest
+import os, glob
+from SMART.Java.Python.getWigProfile import GetWigProfile
+
+
+class Test_F_GetWigProfile(unittest.TestCase):
+
+    def setUp(self):
+        self.transcriptFileName = "transcriptFile.gff3"
+        self.wigFileName        = "file.wig"
+        self.outputFileName     = "outputFile.png"
+
+    def tearDown(self):
+        for fileRoot in (self.transcriptFileName, self.wigFileName, self.outputFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+        os.system("rm .RData .chr1.index ")
+
+    def test_run_simple(self):
+        handle = open(self.transcriptFileName, "w")
+        handle.write("chr1\tSMART\tmRNA\t10\t20\t.\t+\t.\tID=test1;Name=test1")
+        handle.close()
+        handle = open(self.wigFileName, "w")
+        handle.write("variableStep chrom=chr1\n")
+        handle.write("1 1\n")
+        handle.write("2 1\n")
+        handle.write("3 1\n")
+        handle.write("4 1\n")
+        handle.write("5 1\n")
+        handle.write("6 1\n")
+        handle.write("7 1\n")
+        handle.write("8 1\n")
+        handle.write("9 1\n")
+        handle.write("10 1\n")
+        handle.write("11 2\n")
+        handle.write("12 3\n")
+        handle.write("13 4\n")
+        handle.write("14 5\n")
+        handle.write("15 5\n")
+        handle.write("16 5\n")
+        handle.write("17 5\n")
+        handle.write("18 5\n")
+        handle.write("19 5\n")
+        handle.write("20 5\n")
+        handle.write("21 1\n")
+        handle.write("21 1\n")
+        handle.close()
+        wigProfile = GetWigProfile(0)
+        wigProfile.strands        = False
+        wigProfile.inputFileName  = self.transcriptFileName
+        wigProfile.inputFormat    = "gff3"
+        wigProfile.wig            = self.wigFileName
+        wigProfile.nbPoints       = 11
+        wigProfile.distance       = 1
+        wigProfile.smoothenForce  = None
+        wigProfile.log            = False
+        wigProfile.outputFileName = self.outputFileName
+        wigProfile.readTranscripts()
+        wigProfile.smoothen()
+        wigProfile.plot()
+        self.assertTrue(os.path.exists(self.outputFileName))
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_mapperAnalyzer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_mapperAnalyzer.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,55 @@
+import unittest
+import os, glob
+from SMART.Java.Python.mapperAnalyzer import MapperAnalyzer
+from commons.core.parsing.GffParser import GffParser
+
+class Test_F_mapperAnalyzer(unittest.TestCase):
+
+    def setUp(self):
+        self.readsFileName   = "inputFile.fastq"
+        self.mappingFileName = "inputFile.sam"
+        self.outputFileName  = "outputFile.gff3"
+
+    def tearDown(self):
+        for fileRoot in (self.readsFileName, self.mappingFileName, self.outputFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+        os.system("rm tmpNames_* tmpSequences_* smartdb*")
+
+    def test_run_simple(self):
+        handle = open(self.readsFileName, "w")
+        handle.write("@read1\n")
+        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
+        handle.write("+\n")
+        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
+        handle.write("@read2\n")
+        handle.write("CCCCCCCCCCCCCCCCCCCC\n")
+        handle.write("+\n")
+        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
+        handle.close()
+        handle = open(self.mappingFileName, "w")
+        handle.write("read1\t0\tchr1\t1\t30\t20M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAA\tAAAAAAAAAAAAAAAAAAAA\tNM:i:0\n")
+        handle.write("read2\t0\tchr2\t1\t30\t20M\t*\t0\t0\tCCCCCCCCCCCCCCCCCCCC\tAAAAAAAAAAAAAAAAAAAA\tNM:i:0\n")
+        handle.write("read2\t0\tchr3\t1\t30\t20M\t*\t0\t0\tCCCCCCCCCCCCCCCCCCCC\tAAAAAAAAAAAAAAAAAAAA\tNM:i:0\n")
+        handle.close()
+        analyzer = MapperAnalyzer(0)
+        analyzer.setMappingFile(self.mappingFileName, "sam")
+        analyzer.setSequenceFile(self.readsFileName, "fastq")
+        analyzer.setOutputFile(self.outputFileName, "S-MART")
+        analyzer.setMaxMappings(1)
+        analyzer.mergeExons(True)
+        analyzer.analyze()
+
+        parser = GffParser(self.outputFileName)
+        self.assertEqual(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self.assertEqual(transcript.getName(), "read1")
+            self.assertEqual(transcript.getChromosome(), "chr1")
+            self.assertEqual(transcript.getStart(), 1)
+            self.assertEqual(transcript.getEnd(), 20)
+            self.assertEqual(transcript.getDirection(), 1)
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_mappingToCoordinates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_mappingToCoordinates.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,22 @@
+import unittest
+import os
+from SMART.Java.Python.misc import Utils
+
+class Test_F_mappingToCoordinates(unittest.TestCase):
+
+
+    def setUp(self):
+        self._inputFileName = 'inputMTC.sam'
+        self._outputFileName = 'outputGff.gff3'
+        self._expOutputFileName = '../TestFiles/expOutputGff.gff3'
+
+    def tearDown(self):
+        os.remove(self._outputFileName)
+
+    def test_run_default_option(self):
+        cmd = 'python ../mappingToCoordinates.py -i ../TestFiles/%s -f sam -o %s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_mergeSlidingWindowsClusters.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_mergeSlidingWindowsClusters.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,80 @@
+import unittest
+import os
+from SMART.Java.Python.mergeSlidingWindowsClusters import MergeSlidingWindowsClusters
+from SMART.Java.Python.misc import Utils
+
+class Test_F_mergeSlidingWindowsClusters(unittest.TestCase):
+
+
+    def setUp(self):
+        self._outputFileName = 'outputMSWC'
+        self._inputFileName1 = 'inputMSWC1.gff3'
+        self._inputFileName2 = 'inputMSWC2.gff3'
+        self._writeInput1(self._inputFileName1)
+        self._writeInput2(self._inputFileName2)
+        self._expOutput = 'expOutputMSWC.gff3'
+        self._writeExpOutput(self._expOutput)
+
+    def tearDown(self):
+        os.remove(self._inputFileName1)
+        os.remove(self._inputFileName2)
+        os.remove(self._outputFileName+'.gff3')
+        os.remove(self._expOutput)
+
+
+    def test_run(self):
+        iMSWC = MergeSlidingWindowsClusters(0)
+        iMSWC.addInput(self._inputFileName1, 'gff3')
+        iMSWC.addInput(self._inputFileName2, 'gff3')
+        iMSWC.setOutput(self._outputFileName)
+        iMSWC.merge()
+        self.assertTrue(Utils.diff(self._outputFileName+'.gff3', self._expOutput))
+
+    def test_run_asScript(self):
+        cmd = 'python ../mergeSlidingWindowsClusters.py -i %s -f gff3 -j %s -g gff3 -o outputMSWC.gff3 --galaxy -v 0' % (self._inputFileName1, self._inputFileName2)
+        os.system(cmd)
+        self.assertTrue(Utils.diff(self._outputFileName+'.gff3', self._expOutput))
+
+    def _writeInput1(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\ttest\tmatch\t6155418\t6155441\t24\t+\t.\tName=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100\n")
+        f.write("chr2\ttest\tmatch\t26303950\t26303981\t32\t+\t.\tName=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
+        f.write("chr3\ttest\tmatch\t28320540\t28320574\t35\t+\t.\tName=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94\n")
+        f.write("chr4\ttest\tmatch\t28565007\t28565041\t35\t+\t.\tName=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
+        f.write("chr6\ttest\tmatch\t48565007\t48565041\t35\t+\t.\tName=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80\n")
+        f.close()
+
+    def _writeInput2(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\ttest\tmatch\t6155418\t6155441\t24\t+\t.\tName=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100\n")
+        f.write("chr2\ttest\tmatch\t26303990\t26304021\t32\t+\t.\tName=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
+        f.write("chr3\ttest\tmatch\t28320540\t28320574\t35\t+\t.\tName=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94\n")
+        f.write("chr4\ttest\tmatch\t28565017\t28565051\t35\t+\t.\tName=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
+        f.write("chr5\ttest\tmatch\t30000000\t30000050\t50\t+\t.\tName=test3/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50\n")
+        f.close()
+
+    def _writeExpOutput(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr6 S-MART match 48565007 48565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80;Name=region_1
+chr5 S-MART match 30000000 30000050 50 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50;Name=region_2
+chr4 S-MART match 28565017 28565051 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=region_3
+chr4 S-MART match 28565007 28565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=region_4
+chr3 S-MART match 28320540 28320574 35 + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=region_5
+chr2 S-MART match 26303990 26304021 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=region_6
+chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=region_7
+chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=region_8
+""")
+#       f.write("chr6\tS-MART\tmatch\t48565007\t48565041\t35\t+\t.\tName=region_1;occurrence=3;feature=match;rank=3;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80\n")
+#       f.write("chr5\tS-MART\tmatch\t30000000\t30000050\t50\t+\t.\tName=region_2;occurrence=3;feature=match;rank=3;score=50;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50\n")
+#       f.write("chr4\tS-MART\tmatch\t28565017\t28565051\t35\t+\t.\tName=region_3;occurrence=3;feature=match;rank=3;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
+#       f.write("chr4\tS-MART\tmatch\t28565007\t28565041\t35\t+\t.\tName=region_4;occurrence=3;feature=match;rank=3;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
+#       f.write("chr3\tS-MART\tmatch\t28320540\t28320574\t35\t+\t.\tName=region_5;occurrence=2;feature=match;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94\n")
+#       f.write("chr2\tS-MART\tmatch\t26303990\t26304021\t32\t+\t.\tName=region_6;occurrence=1;feature=match;rank=1;score=32;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
+#       f.write("chr2\tS-MART\tmatch\t26303950\t26303981\t32\t+\t.\tName=region_7;occurrence=1;feature=match;rank=1;score=32;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
+#       f.write("chr1\tS-MART\tmatch\t6155418\t6155441\t24\t+\t.\tName=region_8;occurrence=1;feature=match;rank=1;score=24;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100\n")
+        f.close()
+
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_mergeTranscriptLists.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_mergeTranscriptLists.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,92 @@
+import unittest
+import os, os.path, glob
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.mergeTranscriptLists import MergeLists
+from commons.core.writer.Gff3Writer import Gff3Writer
+from commons.core.parsing.GffParser import GffParser
+
+class Test_F_mergeTranscriptLists(unittest.TestCase):
+
+    def setUp(self):
+        self.queryFileName     = "testQuery.gff3"
+        self.referenceFileName = "testReference.gff3"
+        self.outputFileName    = "testOutput.gff3"
+
+    def tearDown(self):
+        for fileRoot in (self.queryFileName, self.referenceFileName, self.outputFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+        for file in glob.glob("tmp_*.gff3"):
+            os.remove(file)
+
+    def test_run_simple(self):
+        reference1 = self._createTranscript("chr1", 1000, 2000, "+", "ref1")
+        reference2 = self._createTranscript("chr1", 3000, 4000, "+", "ref2")
+        reference3 = self._createTranscript("chr1", 5000, 6000, "+", "ref3")
+        writer = Gff3Writer(self.referenceFileName, 0)
+        writer.addTranscript(reference1)
+        writer.addTranscript(reference2)
+        writer.addTranscript(reference3)
+        writer.close()
+        query1 = self._createTranscript("chr1", 1500, 3500, "+", "query1")
+        writer = Gff3Writer(self.queryFileName, 0)
+        writer.addTranscript(query1)
+        writer.close()
+        ml = MergeLists(0)
+        ml.setInputFileName(self.queryFileName, 'gff3', 0)
+        ml.setInputFileName(self.referenceFileName, 'gff3', 1)
+        ml.setOutputFileName(self.outputFileName)
+        ml.run()
+        parser = GffParser(self.outputFileName)
+        self.assertEqual(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self._checkTranscript(transcript, "chr1", 1000, 4000, "+", None)
+
+    def test_run_simple_aggregate(self):
+        reference1 = self._createTranscript("chr1", 1000, 2000, "+", "ref1")
+        reference2 = self._createTranscript("chr1", 3000, 4000, "+", "ref2")
+        reference3 = self._createTranscript("chr1", 5000, 6000, "+", "ref3")
+        writer = Gff3Writer(self.referenceFileName, 0)
+        writer.addTranscript(reference1)
+        writer.addTranscript(reference2)
+        writer.addTranscript(reference3)
+        writer.close()
+        query1 = self._createTranscript("chr1", 1500, 3500, "+", "query1")
+        writer = Gff3Writer(self.queryFileName, 0)
+        writer.addTranscript(query1)
+        writer.close()
+        ml = MergeLists(0)
+        ml.setInputFileName(self.queryFileName, 'gff3', 0)
+        ml.setInputFileName(self.referenceFileName, 'gff3', 1)
+        ml.setOutputFileName(self.outputFileName)
+        ml.setAggregate(True)
+        ml.run()
+        parser = GffParser(self.outputFileName)
+        self.assertEqual(parser.getNbTranscripts(), 2)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            if cpt == 0:
+                self._checkTranscript(transcript, "chr1", 1000, 4000, "+", None)
+            else:
+                self._checkTranscript(transcript, "chr1", 5000, 6000, "+", None)
+
+    def _createTranscript(self, chromosome, start, end, strand, name):
+        transcript = Transcript()
+        transcript.setChromosome(chromosome)
+        transcript.setStart(start)
+        transcript.setEnd(end)
+        transcript.setDirection(strand)
+        transcript.setName(name)
+        return transcript
+
+    def _checkTranscript(self, transcript, chromosome, start, end, strand, name):
+        self.assertEqual(transcript.getChromosome(), chromosome)
+        self.assertEqual(transcript.getStart(), start)
+        self.assertEqual(transcript.getEnd(), end)
+        self.assertEqual(transcript.getStrand(), strand)
+        if name != None:
+            self.assertEqual(transcript.getName(), name)
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_plot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_plot.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,42 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+TestF_PATH = os.environ['REPET_PATH'] + '/SMART/Java/Python/TestFiles'
+
+
+class Test_F_plot(unittest.TestCase):
+
+    def setUp(self):
+        self.outputFileName = "testOut.png"
+
+    def tearDown(self):
+        os.remove(self.outputFileName)
+
+    def test_run_default_option(self):
+        cmd = "python ../plot.py -i %s/mapperAnalyzerOutput.gff3 -f gff3 -x identity -y nbMismatches -s line -o %s " % (TestF_PATH, self.outputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
+
+    def test_run_log_option(self):
+        cmd = "python ../plot.py -i %s/mapperAnalyzerOutput.gff3 -f gff3 -x identity -y nbMismatches -s line -o %s -l xy" % (TestF_PATH, self.outputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
+
+    def test_run_z_xDefault_yDefault_heatPoints_option(self):
+        cmd = "python ../plot.py -i %s/testPlot.gff3 -f gff3 -x value1 -X 1 -y value2 -Y 1 -z value3 -s heatPoints -o %s -v 10 " % (TestF_PATH, self.outputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
+
+    def test_points_option(self):
+        cmd = "python ../plot.py -i %s/testPlot.gff3 -f gff3 -x value1 -X 1 -y value2 -Y 1 -s points -o %s -v 10 " % (TestF_PATH, self.outputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
+
+    def test_xDefault_points_option(self):
+        cmd = "python ../plot.py -i %s/testPlot.gff3 -f gff3 -x value1 -X 1 -n 2 -s barplot -o %s -v 10 " % (TestF_PATH, self.outputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_plotCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_plotCoverage.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,39 @@
+import unittest
+import os, glob
+from SMART.Java.Python.plotCoverage import PlotParser
+
+
+class Test_F_PlotCoverage(unittest.TestCase):
+
+    def setUp(self):
+        self.queryFileName  = "queryFile.gff3"
+        self.refFileName    = "refFile.gff3"
+        self.outputFileName = "outputFile"
+
+    def tearDown(self):
+        for fileRoot in (self.queryFileName, self.refFileName, self.outputFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+        os.remove(".RData")
+
+    def test_run_simple(self):
+        handle = open(self.refFileName, "w")
+        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1")
+        handle.close()
+        handle = open(self.queryFileName, "w")
+        handle.write("chr1\tSMART\tmRNA\t1100\t1200\t.\t+\t.\tID=test2.1;Name=test2.1\n")
+        handle.write("chr1\tSMART\tmRNA\t1300\t1400\t.\t+\t.\tID=test2.2;Name=test2.2\n")
+        handle.close()
+        pp = PlotParser(0)
+        pp.addInput(0, self.queryFileName, "gff3")
+        pp.addInput(1, self.refFileName, "gff3")
+        pp.setLabels("x", "y")
+        pp.setPlotSize(1000, 500)
+        pp.setOutput(self.outputFileName)
+        pp.start()
+        self.assertTrue(os.path.exists("%s_test1_overlap.png" % (self.outputFileName)))
+        self.assertTrue(os.path.exists("%s_test1_coverage.png" % (self.outputFileName)))
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_qualToFastq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_qualToFastq.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,38 @@
+import unittest
+import os, glob
+
+
+class Test_F_qualToFastq(unittest.TestCase):
+
+    def setUp(self):
+        self.fastaFileName  = "file.fasta"
+        self.qualFileName   = "file.qual"
+        self.outputFileName = "outputFile.fastq"
+
+    def tearDown(self):
+        for fileRoot in (self.fastaFileName, self.qualFileName, self.outputFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+
+    def test_run_simple(self):
+        handle = open(self.fastaFileName, "w")
+        handle.write(">test1\n")
+        handle.write("AAAAAA")
+        handle.close()
+        handle = open(self.qualFileName, "w")
+        handle.write(">test1\n")
+        handle.write("32\t32\t32\t32\t32\t32")
+        handle.close()
+        os.system("python ../qualToFastq.py -f %s -q %s -o %s -v 0" % (self.fastaFileName, self.qualFileName, self.outputFileName))
+        handle = open(self.outputFileName)
+        lines = handle.readlines()
+        self.assertEquals(len(lines), 4)
+        self.assertEquals(lines[0], "@test1\n")
+        self.assertEquals(lines[1], "AAAAAA\n")
+        self.assertEquals(lines[2], "+\n")
+        self.assertEquals(lines[3], "AAAAAA\n")
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_restrictSequenceList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_restrictSequenceList.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,37 @@
+import unittest
+import os, glob
+
+class Test_F_restrictSequenceList(unittest.TestCase):
+
+    def setUp(self):
+        self.sequencesFileName = "inputFile.fasta"
+        self.namesFileName     = "names.txt"
+        self.outputFileName    = "outputFile.fasta"
+
+    def tearDown(self):
+        for fileRoot in (self.sequencesFileName, self.namesFileName, self.outputFileName):
+            for file in glob.glob("%s*" % (fileRoot)):
+                os.remove(file)
+
+    def test_run_simple(self):
+        handle = open(self.sequencesFileName, "w")
+        handle.write(">sequence1\n")
+        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
+        handle.write(">sequence2\n")
+        handle.write("CCCCCCCCCCCCCCCCCCCC\n")
+        handle.close()
+        handle = open(self.namesFileName, "w")
+        handle.write("""sequence1""")
+        handle.close()
+        os.system("python ../restrictSequenceList.py -i %s -f fasta -n %s -o %s -v 0" % (self.sequencesFileName, self.namesFileName, self.outputFileName))
+        handle = open(self.outputFileName)
+        lines = handle.readlines()
+        handle.close()
+        self.assertEqual(len(lines), 2)
+        self.assertEqual(lines[0], ">sequence1\n")
+        self.assertEqual(lines[1], "AAAAAAAAAAAAAAAAAAAA\n")
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_selectByTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_selectByTag.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,86 @@
+import unittest
+import os
+from SMART.Java.Python.misc import Utils
+
+class Test_F_selectByTag(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = "%s/SMART/Java/Python/TestFiles/inputMSWC1.gff3" % os.environ["REPET_PATH"]
+        self._outputFileName = "outputSBT.gff3"
+        self._expOutputFileName = "expSBT.gff3"
+
+    def tearDown(self):
+        os.remove(self._outputFileName)
+        os.remove(self._expOutputFileName)
+
+    def test_run_compulsory_option(self):
+        cmd = 'python ../SelectByTag.py -i %s -f gff3 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self._writeExpDefault(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_value_option(self):
+        cmd = 'python ../SelectByTag.py -i %s -f gff3 -a 1 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self._writeExpValueOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_min_option(self):
+        cmd = 'python ../SelectByTag.py -i %s -f gff3 -m 3 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self._writeExpMinOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_max_option(self):
+        cmd = 'python ../SelectByTag.py -i %s -f gff3 -M 1 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self._writeExpMaxOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def test_run_max_min_option(self):
+        cmd = 'python ../SelectByTag.py -i %s -f gff3 -M 2 -m 1 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
+        os.system(cmd)
+        self._writeExpMaxMinOption(self._expOutputFileName)
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
+
+    def _writeExpDefault(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
+chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
+chr3 S-MART match 28320540 28320574 35 + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
+chr4 S-MART match 28565007 28565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
+chr6 S-MART match 48565007 48565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80;Name=test2/1
+""")
+        f.close()
+
+    def _writeExpValueOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
+chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
+""")
+        f.close()
+
+    def _writeExpMinOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr4 S-MART match 28565007 28565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
+chr6 S-MART match 48565007 48565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80;Name=test2/1
+""")
+        f.close()
+
+    def _writeExpMaxOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
+chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
+""")
+        f.close()
+
+    def _writeExpMaxMinOption(self, fileName):
+        f = open(fileName, 'w')
+        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
+chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
+chr3 S-MART match 28320540 28320574 35 + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
+""")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_F_trimSequences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_F_trimSequences.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,28 @@
+import unittest, os, os.path
+from SMART.Java.Python.misc import Utils
+SMART_PATH = os.environ["REPET_PATH"] + "/SMART"
+
+class Test_F_trimSequences(unittest.TestCase):
+
+    def setUp(self):
+        self._expOutputFileName = "expOutputFile.fasta"
+        self._obsOutputFileName = "obsOutputFile.mfa"
+        self._inputFileName     = "inputFile.fasta"
+
+    def tearDown(self):
+        for fileName in (self._expOutputFileName, self._obsOutputFileName, self._inputFileName):
+            if os.path.exists(fileName):
+                os.remove(fileName)
+
+    def test_simple(self):
+        expOutputFile = open(self._expOutputFileName, "w")
+        expOutputFile.write(">sequence1\nTTGCATAGCGCTACGTA\n")
+        expOutputFile.close()
+        inputFile = open(self._inputFileName, "w")
+        inputFile.write(">sequence1\nAGCTCGGGTATTGCATAGCGCTACGTACCCTTTATATC\n")
+        inputFile.close()
+        os.system("python %s/Java/Python/trimSequences.py -i %s -f fasta -3 CCCTTTATATC -5 AGCTCGGGTA -o %s -v 0" % (SMART_PATH, self._inputFileName, self._obsOutputFileName))
+        self.assertTrue(Utils.diff(self._expOutputFileName, self._obsOutputFileName))
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_FindOverlapsOptim.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_FindOverlapsOptim.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,174 @@\n+import unittest\n+import os\n+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n+\n+REFERENCE = 0\n+QUERY = 1\n+\n+class Test_FindOverlapsOptim(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputRefGff3FileName = \'sorted_Ref.gff3\'\n+ self._inputQueryGff3FileName = \'sorted_Query.gff3\'\n+ self._writeQueryGff3File(self._inputQueryGff3FileName)\n+ self._outputGff3FileName = \'overlaps.gff3\'\n+ iMock = MockFindOverlapsWithServeralIntervals_case1()\n+ iMock.write(self._inputRefGff3FileName)\n+ self._iFOO = FindOverlapsOptim(0)\n+ self._iFOO.setRefFileName(self._inputRefGff3FileName, "gff3")\n+ self._iFOO.setQueryFileName(self._inputQueryGff3FileName, "gff3")\n+ self._iFOO.setOutputFileName(self._outputGff3FileName)\n+ self._iFOO.prepareIntermediateFiles()\n+ self._iFOO.createNCLists()\n+ self._queryNcList = self._iFOO._ncLists[QUERY]["chr1"]\n+ self._refNcList = self._iFOO._ncLists[REFERENCE]["chr1"]\n+ \n+ def tearDown(self):\n+ os.remove(self._inputRefGff3FileName)\n+ os.remove(self._inputQueryGff3FileName)\n+ os.remove(self._outputGff3FileName)\n+ \n+ def test_isOverlapping_true(self):\n+ queryCursor = NCListCursor(None, self._queryNcList, 5, 0)\n+ refCursor = NCListCursor(None, self._refNcList, 4, 0)\n+ obs = self._iFOO.isOverlapping(queryCursor, refCursor)\n+ exp = 0\n+ self.assertEquals(exp, obs)\n+ \n+ def test_isOverlapping_false_left(self):\n+ queryCursor = NCListCursor(None, self._queryNcList, 5, 0)\n+ refCursor = NCListCursor(None, self._refNcList, 2, 0)\n+ obs = self._iFOO.isOverlapping(queryCursor, refCursor)\n+ exp = -1\n+ self.assertEquals(exp, obs)\n+ \n+ def test_isOverlapping_false_right(self):\n+ queryCursor = NCListCursor(None, self._queryNcList, 5, 0)\n+ refCursor = NCListCursor(None, self._refNcList, 1, 0)\n+ obs = self._iFOO.isOverlapping(queryCursor, refCursor)\n+ exp = 1\n+ self.assertEquals(exp, obs) \n+ \n+ def test_isLastElement_true(self):\n+ refCursor = NCListCursor(None, self._refNcList, 4, 0)\n+ obsBool = refCursor.isLast()\n+ expBool = True\n+ self.assertEquals(expBool, obsBool)\n+ \n+ def test_isLastElement_false(self):\n+ refCursor = NCListCursor(None, self._refNcList, 3, 0)\n+ obsBool = refCursor.isLast()\n+ expBool = False\n+ self.assertEquals(expBool, obsBool) \n+ \n+ def test_isLastElement_highestLevel_true(self):\n+ refCursor = NCListCursor(None, self._refNcList, 1, 0)\n+ obsBool = refCursor.isLast()\n+ expBool = True\n+ self.assertEquals(expBool, obsBool)\n+ \n+ def test_isLastElement_highestLevel_false(self):\n+ refCursor = NCListCursor(None, self._refNcList, 0, 0)\n+ obsBool = refCursor.isLast()\n+ expBool = False\n+ self.assertEquals(expBool, obsBool) \n+\n+ def test_findOverlapIter(self):\n+ queryCursor = NCListCursor(None, self._queryNcList, 2, 0)\n+ refCursor = NCListCursor(None, self._refNcList, 0, 0)\n+ queryTranscript = queryCursor.getTranscript()\n+ done = False\n+ (cursor, done, empty) = self._iFOO.findOverlapIter(queryTranscript, refCursor, done)\n+ obsFirstOverlapLAddr = (cursor._lIndex, done, empty)\n+ expFirstOverlapLAddr = 4, True, False\n+ self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n+ \n+ def test_not_findOverlapIter(self):\n+ queryCursor = NCListCursor(None, self._queryNcList, 4, 0)\n+ refCursor = NCListCursor(None, self._refNcList, 1, 0)\n+ queryTranscript '..b' done, empty)\n+ expFirstOverlapLAddr = -1, False, True\n+ self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n+ \n+ def test_findOverlapIter_not_the_first_RefOverlap(self):\n+ queryCursor = NCListCursor(None, self._queryNcList, 3, 0)\n+ refCursor = NCListCursor(None, self._refNcList, 4, 0)\n+ queryTranscript = queryCursor.getTranscript()\n+ done = True\n+ (cursor, done, empty) = self._iFOO.findOverlapIter(queryTranscript, refCursor, done)\n+ obsFirstOverlapLAddr = (cursor._lIndex, done, empty)\n+ expFirstOverlapLAddr = 1, True, False\n+ self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n+ \n+ def test_moveDown(self):\n+ refCursor = NCListCursor(None, self._refNcList, 0, 0)\n+ refCursor.moveDown()\n+ expFirstChildLAddr = 2\n+ self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n+ \n+ def test_moveUp(self):\n+ refCursor = NCListCursor(None, self._refNcList, 4, 0)\n+ refCursor.moveUp()\n+ expFirstChildLAddr = 0\n+ self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n+ \n+ def test_moveRight(self):\n+ refCursor = NCListCursor(None, self._refNcList, 3, 0)\n+ refCursor.moveRight()\n+ expFirstChildLAddr = 4\n+ self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n+ \n+ def test_moveNext(self):\n+ refCursor = NCListCursor(None, self._refNcList, 6, 0)\n+ refCursor.moveNext()\n+ expFirstChildLAddr = 1\n+ self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n+\n+ def test_not_findOverlapIter_between2RefIntervals(self):\n+ inputQueryGff3FileName = \'query2.gff3\'\n+ self._writeQueryGff3File2(inputQueryGff3FileName)\n+ self._outputGff3FileName = \'overlaps.gff3\'\n+ iMock = MockFindOverlapsWithServeralIntervals_case1()\n+ iMock.write(self._inputRefGff3FileName)\n+ _iFOO = FindOverlapsOptim(0)\n+ _iFOO.setRefFileName(self._inputRefGff3FileName, "gff3")\n+ _iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")\n+ _iFOO.setOutputFileName(self._outputGff3FileName)\n+ _iFOO.prepareIntermediateFiles()\n+ _iFOO.createNCLists()\n+ _queryNcList = _iFOO._ncLists[QUERY]["chr1"]\n+ _refNcList = _iFOO._ncLists[REFERENCE]["chr1"]\n+ queryCursor = NCListCursor(None, _queryNcList, 0, 0)\n+ refCursor = NCListCursor(None, _refNcList, 0, 0)\n+ queryTranscript = queryCursor.getTranscript()\n+ done = True\n+ (cursor, done, empty) = _iFOO.findOverlapIter(queryTranscript, refCursor, done)\n+ lIndex = cursor._lIndex\n+ obsFirstOverlapLAddr = (lIndex, done, empty)\n+ expFirstOverlapLAddr = 1, False, True\n+ self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n+ os.remove(inputQueryGff3FileName) \n+\n+ def _writeQueryGff3File2(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest1\\t1100\\t1150\\t126\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n+ f.write("chr1\\tquery\\ttest2\\t1250\\t1300\\t781\\t+\\t.\\tID=test1.2;Name=test1.2\\n")\n+ f.close() \n+ \n+ def _writeQueryGff3File(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest1.1\\t25\\t150\\t126\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n+ f.write("chr1\\tquery\\ttest1.2\\t70\\t850\\t781\\t+\\t.\\tID=test1.2;Name=test1.2\\n")\n+ f.write("chr1\\tquery\\ttest1.3\\t550\\t850\\t201\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n+ f.write("chr1\\tquery\\ttest1.4\\t925\\t1025\\t101\\t+\\t.\\tID=test1.4;Name=test1.4\\n")\n+ f.write("chr1\\tquery\\ttest1.5\\t1201\\t1210\\t10\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n+ f.write("chr1\\tquery\\ttest1.6\\t1500\\t1600\\t101\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/Test_FindOverlaps_optim.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/Test_FindOverlaps_optim.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,149 @@
+import unittest
+import os
+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
+from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *
+
+class Test_FindOverlaps_optim(unittest.TestCase):
+
+    def setUp(self):
+        self._inputRefGff3FileName = 'sorted_Ref.gff3'
+        iMock = MockFindOverlapsWithServeralIntervals_case1()
+        iMock.write(self._inputRefGff3FileName)
+        self._inputQueryGff3FileName = 'sorted_Query.gff3'
+        self._writeQueryGff3File(self._inputQueryGff3FileName)
+        self._outputGff3FileName = 'overlaps.gff3'
+        self._iFOO = FindOverlaps_optim(self._inputRefGff3FileName, self._inputQueryGff3FileName)
+        self._iFOO.prepareIntermediateFiles_sorted()
+        self._iFOO.setOutputGff3FileName(self._outputGff3FileName)
+
+    def tearDown(self):
+        os.remove(self._inputRefGff3FileName)
+        os.remove(self._inputQueryGff3FileName)
+        os.remove(self._outputGff3FileName)
+        self._iFOO.deletIntermediateFiles()
+
+    def test_isOverlapping_true(self):
+        queryGff3Addr = 116
+        RefGff3Addr = 231
+        obs = self._iFOO.isOverlapping(queryGff3Addr, RefGff3Addr)
+        exp = 0
+        self.assertEquals(exp, obs)
+
+    def test_isOverlapping_false_left(self):
+        queryGff3Addr = 116
+        RefGff3Addr = 58
+        obs = self._iFOO.isOverlapping(queryGff3Addr, RefGff3Addr)
+        exp = -1
+        self.assertEquals(exp, obs)
+
+    def test_isOverlapping_false_right(self):
+        queryGff3Addr = 116
+        RefGff3Addr = 347
+        obs = self._iFOO.isOverlapping(queryGff3Addr, RefGff3Addr)
+        exp = 1
+        self.assertEquals(exp, obs)
+
+    def test_getHisFirstChild(self):
+        firstRefLAddr = 0
+        obsFirstChildLAddr = self._iFOO.getHisFirstChild(firstRefLAddr)
+        expFirstChildLAddr = 48
+        self.assertEquals(expFirstChildLAddr, obsFirstChildLAddr)
+
+    def test_isLastElement_true(self):
+        refLAddr = 96
+        obsBool = self._iFOO.isLastElement(refLAddr)
+        expBool = True
+        self.assertEquals(expBool, obsBool)
+
+    def test_isLastElement_false(self):
+        refLAddr = 72
+        obsBool = self._iFOO.isLastElement(refLAddr)
+        expBool = False
+        self.assertEquals(expBool, obsBool)
+
+    def test_isLastElement_highestLevel_true(self):
+        refLAddr = 24
+        obsBool = self._iFOO.isLastElement(refLAddr)
+        expBool = True
+        self.assertEquals(expBool, obsBool)
+
+    def test_isLastElement_highestLevel_false(self):
+        refLAddr = 0
+        obsBool = self._iFOO.isLastElement(refLAddr)
+        expBool = False
+        self.assertEquals(expBool, obsBool)
+
+    def test_findOverlapIter(self):
+        queryGff3Addr = 175
+        firstRefLAddr = 0
+        done = False
+        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
+        expFirstOverlapLAddr = 96, True
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+
+    def test_not_findOverlapIter(self):
+        queryGff3Addr = 295
+        firstRefLAddr = 24
+        done = False
+        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
+        expFirstOverlapLAddr = None, False
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+
+    def test_findOverlapIter_not_the_first_RefOverlap(self):
+        queryGff3Addr = 235
+        firstRefLAddr = 96
+        done = True
+        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
+        expFirstOverlapLAddr = 24, False
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+
+    def test_changeToNewSubEndLAddr(self):
+        firstChildLAddr = 48
+        subEndLAddr = 48
+        expSubEndLAddr = 120
+        obsSubEndLAddr = self._iFOO.changeToNewSubEndLAddr(firstChildLAddr, subEndLAddr)
+        self.assertEquals(expSubEndLAddr, obsSubEndLAddr)
+
+    def test_defineSubEndLaddr(self):
+        parentLAddr = -1
+        expSubEndLAddr = 48
+        obsSubEndLAddr = self._iFOO.defineSubEndLaddr(parentLAddr)
+        self.assertEquals(expSubEndLAddr, obsSubEndLAddr)
+
+    def test_getNextRefIntervalInCaseNotOverLap(self):
+        firstRefLAddr = 96
+        expRefLAddr = 24
+        obsRefLAddr = self._iFOO.getNext(firstRefLAddr)
+        self.assertEquals(expRefLAddr, obsRefLAddr)
+
+
+    def test_not_findOverlapIter_between2RefIntervals(self):
+        inputQueryGff3FileName = 'query2.gff3'
+        self._writeQueryGff3File2(inputQueryGff3FileName)
+        self._iFOO.setQueryGff3FileName(inputQueryGff3FileName)
+        queryGff3Addr = 0
+        firstRefLAddr = 0
+        done = False
+        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
+        expFirstOverlapLAddr = 24, False
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+        os.remove(inputQueryGff3FileName)
+
+    def _writeQueryGff3File2(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tquery\ttest1\t1100\t1150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
+        f.write("chr1\tquery\ttest2\t1250\t1300\t781\t+\t.\tID=test1.2;Name=test1.2\n")
+        f.close()
+
+    def _writeQueryGff3File(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
+        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t+\t.\tID=test1.2;Name=test1.2\n")
+        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t+\t.\tID=test1.3;Name=test1.3\n")
+        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=test1.4;Name=test1.4\n")
+        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=test1.5;Name=test1.5\n")
+        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=test1.6;Name=test1.6\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test/timeResults.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test/timeResults.R Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,4 @@
+data <- read.table("timeResults.dat", header=TRUE)
+attach(data)
+plot(numberOfReads, time, xlab="number of reads", ylab="time used")
+title("7 overlaps and random reference input fixed")

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test3.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/test3.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,501 @@\n+chr1\tS-MART\tgene\t658657\t659771\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G02920;nbOverlaps=1;ID=AT1G02920;Name=AT1G02920\n+chr1\tS-MART\tgene\t306384\t306456\t.\t+\t.\tNote=tRNA;overlapWith=AT1G01870;nbOverlaps=1;ID=AT1G01870;Name=AT1G01870\n+chr1\tS-MART\tgene\t28500\t28706\t.\t+\t.\tNote=miRNA;overlapWith=AT1G01046,AT1G01040;nbOverlaps=2;ID=AT1G01046;Name=AT1G01046\n+chr1\tS-MART\tgene\t78932\t79032\t.\t-\t.\tNote=miRNA;overlapWith=AT1G01183;nbOverlaps=1;ID=AT1G01183;Name=AT1G01183\n+chr1\tS-MART\tgene\t31170\t33153\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01050,AT1G01040;nbOverlaps=2;ID=AT1G01050;Name=AT1G01050\n+chr1\tS-MART\tgene\t38752\t40944\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01070;nbOverlaps=1;ID=AT1G01070;Name=AT1G01070\n+chr1\tS-MART\tgene\t47485\t49286\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01090;nbOverlaps=1;ID=AT1G01090;Name=AT1G01090\n+chr1\tS-MART\tgene\t56624\t56740\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01115;nbOverlaps=1;ID=AT1G01115;Name=AT1G01115\n+chr1\tS-MART\tgene\t72339\t74096\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01160,AT1G01170;nbOverlaps=2;ID=AT1G01160;Name=AT1G01160\n+chr1\tS-MART\tgene\t73931\t74737\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01160,AT1G01170;nbOverlaps=2;ID=AT1G01170;Name=AT1G01170\n+chr1\tS-MART\tgene\t75583\t76758\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01180;nbOverlaps=1;ID=AT1G01180;Name=AT1G01180\n+chr1\tS-MART\tgene\t111890\t111961\t.\t-\t.\tNote=tRNA;overlapWith=AT1G01270;nbOverlaps=1;ID=AT1G01270;Name=AT1G01270\n+chr1\tS-MART\tgene\t88898\t89745\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01210;nbOverlaps=1;ID=AT1G01210;Name=AT1G01210\n+chr1\tS-MART\tgene\t91376\t95651\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01220;nbOverlaps=1;ID=AT1G01220;Name=AT1G01220\n+chr1\tS-MART\tgene\t95987\t97407\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01225;nbOverlaps=1;ID=AT1G01225;Name=AT1G01225\n+chr1\tS-MART\tgene\t97456\t99240\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01230;nbOverlaps=1;ID=AT1G01230;Name=AT1G01230\n+chr1\tS-MART\tgene\t99894\t101834\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01240;nbOverlaps=1;ID=AT1G01240;Name=AT1G01240\n+chr1\tS-MART\tgene\t104491\t105330\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01250;nbOverlaps=1;ID=AT1G01250;Name=AT1G01250\n+chr1\tS-MART\tgene\t108946\t111609\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01260;nbOverlaps=1;ID=AT1G01260;Name=AT1G01260\n+chr1\tS-MART\tgene\t163419\t166239\t.\t+\t.\tNote=other_RNA;overlapWith=AT1G01448,AT1G01450;nbOverlaps=2;ID=AT1G01448;Name=AT1G01448\n+chr1\tS-MART\tgene\t114286\t116108\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01290;nbOverlaps=1;ID=AT1G01290;Name=AT1G01290\n+chr1\tS-MART\tgene\t116943\t118764\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01300;nbOverlaps=1;ID=AT1G01300;Name=AT1G01300\n+chr1\tS-MART\tgene\t119397\t119997\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01305;nbOverlaps=1;ID=AT1G01305;Name=AT1G01305\n+chr1\tS-MART\tgene\t120154\t121130\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01310;nbOverlaps=1;ID=AT1G01310;Name=AT1G01310\n+chr1\tS-MART\tgene\t132328\t135831\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01340;nbOverlaps=1;ID=AT1G01340;Name=AT1G01340\n+chr1\tS-MART\tgene\t136124\t138162\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01350;nbOverlaps=1;ID=AT1G01350;Name=AT1G01350\n+chr1\tS-MART\tgene\t141971\t143183\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01360;nbOverlaps=1;ID=AT1G01360;Name=AT1G01360\n+chr1\tS-MART\tgene\t143564\t145684\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01370;nbOverlaps=1;ID=AT1G01370;Name=AT1G01370\n+chr1\tS-MART\tgene\t147153\t147942\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01380;nbOverlaps=1;ID=AT1G01380;Name=AT1G01380\n+chr1\tS-MART\tgene\t148120\t149806\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01390;nbOverlaps=1;ID=AT1G01390;Name=AT1G01390\n+chr1\tS-MART\tgene\t150689\t152210\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01400;nbOverlaps=1;ID=AT1G01400;Name=AT1G01400\n+chr1\tS-MART\tgene\t153113\t154198\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01410;nbOverlaps=1;ID=AT1G01410;Name=AT1G'..b'1G07050;nbOverlaps=1;ID=AT1G07050;Name=AT1G07050\n+chr1\tS-MART\tgene\t2177885\t2177958\t.\t+\t.\tNote=tRNA;overlapWith=AT1G07100;nbOverlaps=1;ID=AT1G07100;Name=AT1G07100\n+chr1\tS-MART\tgene\t2167107\t2168397\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07060;nbOverlaps=1;ID=AT1G07060;Name=AT1G07060\n+chr1\tS-MART\tgene\t2168564\t2169851\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07070;nbOverlaps=1;ID=AT1G07070;Name=AT1G07070\n+chr1\tS-MART\tgene\t2169982\t2172194\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07080;nbOverlaps=1;ID=AT1G07080;Name=AT1G07080\n+chr1\tS-MART\tgene\t2173952\t2174894\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07090;nbOverlaps=1;ID=AT1G07090;Name=AT1G07090\n+chr1\tS-MART\tgene\t2184347\t2186539\t.\t+\t.\tNote=other_RNA;overlapWith=AT1G07119,AT1G07120;nbOverlaps=2;ID=AT1G07119;Name=AT1G07119\n+chr1\tS-MART\tgene\t2187621\t2188417\t.\t-\t.\tNote=other_RNA;overlapWith=AT1G07128;nbOverlaps=1;ID=AT1G07128;Name=AT1G07128\n+chr1\tS-MART\tgene\t2184759\t2186580\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07119,AT1G07120;nbOverlaps=2;ID=AT1G07120;Name=AT1G07120\n+chr1\tS-MART\tgene\t2193941\t2195798\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07150;nbOverlaps=1;ID=AT1G07150;Name=AT1G07150\n+chr1\tS-MART\tgene\t2200123\t2201265\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07170;nbOverlaps=1;ID=AT1G07170;Name=AT1G07170\n+chr1\tS-MART\tgene\t2202330\t2202774\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07175;nbOverlaps=1;ID=AT1G07175;Name=AT1G07175\n+chr1\tS-MART\tgene\t2204320\t2206934\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07180;nbOverlaps=1;ID=AT1G07180;Name=AT1G07180\n+chr1\tS-MART\tgene\t2208013\t2208177\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07190;nbOverlaps=1;ID=AT1G07190;Name=AT1G07190\n+chr1\tS-MART\tgene\t2208719\t2212546\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07200;nbOverlaps=1;ID=AT1G07200;Name=AT1G07200\n+chr1\tS-MART\tgene\t2215223\t2216982\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07210;nbOverlaps=1;ID=AT1G07210;Name=AT1G07210\n+chr1\tS-MART\tgene\t2245758\t2246492\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07300;nbOverlaps=1;ID=AT1G07300;Name=AT1G07300\n+chr1\tS-MART\tgene\t2249133\t2250529\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07320;nbOverlaps=1;ID=AT1G07320;Name=AT1G07320\n+chr1\tS-MART\tgene\t2260389\t2262865\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07360;nbOverlaps=1;ID=AT1G07360;Name=AT1G07360\n+chr1\tS-MART\tgene\t2263140\t2264551\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07370;nbOverlaps=1;ID=AT1G07370;Name=AT1G07370\n+chr1\tS-MART\tgene\t2290201\t2290977\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07460;nbOverlaps=1;ID=AT1G07460;Name=AT1G07460\n+chr1\tS-MART\tgene\t2382251\t2382331\t.\t-\t.\tNote=snoRNA;overlapWith=AT1G07702;nbOverlaps=1;ID=AT1G07702;Name=AT1G07702\n+chr1\tS-MART\tgene\t2338904\t2339321\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07600,AT1G07590;nbOverlaps=2;ID=AT1G07600;Name=AT1G07600\n+chr1\tS-MART\tgene\t2349097\t2351692\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07630;nbOverlaps=1;ID=AT1G07630;Name=AT1G07630\n+chr1\tS-MART\tgene\t2354354\t2356227\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07640;nbOverlaps=1;ID=AT1G07640;Name=AT1G07640\n+chr1\tS-MART\tgene\t2367437\t2368385\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07645;nbOverlaps=1;ID=AT1G07645;Name=AT1G07645\n+chr1\tS-MART\tgene\t2408203\t2409580\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07770;nbOverlaps=1;ID=AT1G07770;Name=AT1G07770\n+chr1\tS-MART\tgene\t2410056\t2412677\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07780;nbOverlaps=1;ID=AT1G07780;Name=AT1G07780\n+chr1\tS-MART\tgene\t2412980\t2413708\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07790;nbOverlaps=1;ID=AT1G07790;Name=AT1G07790\n+chr1\tS-MART\tgene\t2414286\t2414967\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07795;nbOverlaps=1;ID=AT1G07795;Name=AT1G07795\n+chr1\tS-MART\tgene\t2421216\t2421947\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07820;nbOverlaps=1;ID=AT1G07820;Name=AT1G07820\n+chr1\tS-MART\tgene\t2416265\t2420757\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07810;nbOverlaps=1;ID=AT1G07810;Name=AT1G07810\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test3.png

Binary file SMART/Java/Python/test3.png has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/test3.png_I.png

Binary file SMART/Java/Python/test3.png_I.png has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/testInstall.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/testInstall.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,103 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Test if the configuration is sound
+"""
+
+import sys
+import os
+import subprocess
+
+# Test Python files
+try :
+    from SMART.Java.Python.misc.RPlotter import *
+except:
+    print "Cannot find Python scripts! Update PYTHONPATH (currently %s) environment variable and see configuration in the documentation!" % (os.environ["PYTHONPATH"] if "PYTHONPATH" in os.environ else "empty")
+    sys.exit(3)
+
+try :
+    from SMART.Java.Python.mySql.MySqlTranscriptTable import *
+    from SMART.Java.Python.mySql.MySqlConnection import *
+except:
+    print "SQLite is not installed ! Please read the documentation!"
+    sys.exit(4)
+
+
+if __name__ == "__main__":
+
+    print "Python scripts are correctly read."
+
+    # Test mySQL
+    connection = MySqlConnection()
+    table = MySqlTranscriptTable(connection)
+
+    try:
+        table.createTranscriptTable()
+    except:
+        print "Cannot connect to the SQLite database! See configuration in the documentation!"
+        sys.exit(5)
+
+    print "SQLite database is correctly set up."
+
+
+    # Test R
+    fileName = "tmpFile.R"
+    file = open(fileName, "w")
+    file.write("?licence\n")
+    file.close()
+    rCommand = "R"
+    if "SMARTRPATH" in os.environ:
+        rCommand = os.environ["SMARTRPATH"]
+    command = "\"%s\" CMD BATCH %s" % (rCommand, fileName)
+    status    = subprocess.call(command, shell=True)
+    os.remove(fileName)
+    outputFileName = "%sout" % (fileName)
+    if os.path.exists(outputFileName):
+        os.remove(outputFileName)
+
+    if status != 0:
+        print "Problem with the execution of R script (command '%s' did not work, current directory is %s, status is %d)! See configuration in the documentation!" % (command, os.getcwd(), status)
+        sys.exit(6)
+
+    line = {0: 1, 1: 2}
+    pngFileName = "tmpFile.png"
+    plotter = RPlotter(pngFileName)
+    plotter.addLine(line)
+    try:
+        plotter.plot()
+    except:
+        print "Problem with the execution of R script: library 'RColorBrewer' is missing! See configuration in the documentation!"
+        sys.exit(7)
+    os.remove(pngFileName)
+
+    print "R is available."
+
+    print "Set up is fine! Enjoy S-MART!"

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Python/testOut.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/testOut.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,7949 @@\n+X\tS-MART\ttranscript\t559416\t566828\t.\t+\t.\tseqedit=false;p_id=P3233;gene_id=YJR066W;tss_id=TSS6291;nbOverlaps=0;ID=YJR066W;Name=TOR1\n+X\tS-MART\ttranscript\t567019\t567444\t.\t-\t.\tseqedit=false;p_id=P5345;gene_id=YJR067C;tss_id=TSS1719;nbOverlaps=0;ID=YJR067C;Name=YAE1\n+X\tS-MART\ttranscript\t567643\t568704\t.\t+\t.\tseqedit=false;p_id=P3144;gene_id=YJR068W;tss_id=TSS5398;nbOverlaps=0;ID=YJR068W;Name=RFC2\n+X\tS-MART\ttranscript\t568806\t569399\t.\t-\t.\tseqedit=false;p_id=P860;gene_id=YJR069C;tss_id=TSS4309;nbOverlaps=0;ID=YJR069C;Name=HAM1\n+X\tS-MART\ttranscript\t569621\t570598\t.\t-\t.\tseqedit=false;p_id=P6450;gene_id=YJR070C;tss_id=TSS1494;nbOverlaps=0;ID=YJR070C;Name=LIA1\n+X\tS-MART\ttranscript\t570967\t572124\t.\t-\t.\tseqedit=false;p_id=P5586;gene_id=YJR072C;tss_id=TSS6083;nbOverlaps=0;ID=YJR072C;Name=NPA3\n+X\tS-MART\ttranscript\t570405\t570773\t.\t+\t.\tseqedit=false;p_id=P6019;gene_id=YJR071W;tss_id=TSS738;nbOverlaps=0;ID=YJR071W;Name=YJR071W\n+XV\tS-MART\ttranscript\t80348\t81190\t.\t+\t.\tseqedit=false;p_id=P4151;gene_id=YOL127W;tss_id=TSS4223;nbOverlaps=0;ID=YOL127W;Name=RPL25\n+XV\tS-MART\texon\t80348\t80360\t.\t+\t.\tID=YOL127W-exon1;Name=RPL25-exon1;Parent=YOL127W\n+XV\tS-MART\texon\t80775\t81190\t.\t+\t.\tID=YOL127W-exon2;Name=RPL25-exon2;Parent=YOL127W\n+X\tS-MART\ttranscript\t572315\t572935\t.\t-\t.\tseqedit=false;p_id=P2012;gene_id=YJR073C;tss_id=TSS710;nbOverlaps=0;ID=YJR073C;Name=OPI3\n+VI\tS-MART\ttranscript\t157916\t158007\t.\t-\t.\tseqedit=false;overlapsWith=HWUSI-EAS1656_0009_FC:3:74:18104:5842#0--HWUSI-EAS1656_0009_FC:3:56:11954:3071#0--HWUSI-EAS1656_0009;gene_id=tF(GAA)F;tss_id=TSS3380;nbOverlaps=184;ID=tF(GAA)F;Name=tF(GAA)F\n+VI\tS-MART\texon\t157916\t157951\t.\t-\t.\tID=tF(GAA)F-exon1;Name=tF(GAA)F-exon1;Parent=tF(GAA)F\n+VI\tS-MART\texon\t157971\t158007\t.\t-\t.\tID=tF(GAA)F-exon2;Name=tF(GAA)F-exon2;Parent=tF(GAA)F\n+X\tS-MART\ttranscript\t573095\t573751\t.\t+\t.\tseqedit=false;p_id=P5576;gene_id=YJR074W;tss_id=TSS2929;nbOverlaps=0;ID=YJR074W;Name=MOG1\n+X\tS-MART\ttranscript\t573981\t575171\t.\t+\t.\tseqedit=false;p_id=P1330;gene_id=YJR075W;tss_id=TSS857;nbOverlaps=0;ID=YJR075W;Name=HOC1\n+X\tS-MART\ttranscript\t575354\t576601\t.\t-\t.\tseqedit=false;p_id=P1404;gene_id=YJR076C;tss_id=TSS1037;nbOverlaps=0;ID=YJR076C;Name=CDC11\n+X\tS-MART\ttranscript\t577255\t578190\t.\t-\t.\tseqedit=false;p_id=P1673;gene_id=YJR077C;tss_id=TSS2801;nbOverlaps=0;ID=YJR077C;Name=MIR1\n+X\tS-MART\ttranscript\t578860\t580221\t.\t+\t.\tseqedit=false;p_id=P2480;gene_id=YJR078W;tss_id=TSS3796;nbOverlaps=0;ID=YJR078W;Name=BNA2\n+X\tS-MART\ttranscript\t580432\t581616\t.\t-\t.\tseqedit=false;p_id=P3568;gene_id=YJR080C;tss_id=TSS3503;nbOverlaps=0;ID=YJR080C;Name=AIM24\n+X\tS-MART\ttranscript\t580205\t581239\t.\t+\t.\tseqedit=false;p_id=P2074;gene_id=YJR079W;tss_id=TSS3170;nbOverlaps=0;ID=YJR079W;Name=YJR079W\n+X\tS-MART\texon\t580205\t580347\t.\t+\t.\tID=YJR079W-exon1;Name=YJR079W-exon1;Parent=YJR079W\n+X\tS-MART\texon\t581053\t581239\t.\t+\t.\tID=YJR079W-exon2;Name=YJR079W-exon2;Parent=YJR079W\n+X\tS-MART\ttranscript\t581914\t582255\t.\t-\t.\tseqedit=false;p_id=P5333;gene_id=YJR082C;tss_id=TSS407;nbOverlaps=0;ID=YJR082C;Name=EAF6\n+X\tS-MART\ttranscript\t582608\t583537\t.\t-\t.\tseqedit=false;p_id=P1588;gene_id=YJR083C;tss_id=TSS3054;nbOverlaps=0;ID=YJR083C;Name=ACF4\n+X\tS-MART\ttranscript\t586068\t586400\t.\t+\t.\tseqedit=false;p_id=P636;gene_id=YJR086W;tss_id=TSS1801;nbOverlaps=0;ID=YJR086W;Name=STE18\n+X\tS-MART\ttranscript\t583733\t585004\t.\t+\t.\tseqedit=false;p_id=P6689;gene_id=YJR084W;tss_id=TSS4164;nbOverlaps=0;ID=YJR084W;Name=YJR084W\n+X\tS-MART\ttranscript\t585120\t585437\t.\t-\t.\tseqedit=false;p_id=P4342;gene_id=YJR085C;tss_id=TSS1798;nbOverlaps=0;ID=YJR085C;Name=YJR085C\n+X\tS-MART\ttranscript\t586495\t587373\t.\t-\t.\tseqedit=false;p_id=P5189;gene_id=YJR088C;tss_id=TSS5912;nbOverlaps=0;ID=YJR088C;Name=EMC2\n+X\tS-MART\ttranscript\t586400\t586750\t.\t+\t.\tseqedit=false;p_id=P5459;gene_id=YJR087W;tss_id=TSS2614;nbOverlaps=0;ID=YJR087W;Name=YJR087W\n+X\tS-MART\ttranscript\t587718\t590582\t.\t+\t.\tseqedit=false;p_id=P2527;gene_id=YJR089W;tss_id=TSS2068;nbOverlaps=0;ID=YJR089W;Name=BIR1\n+XV\tS-MART\ttranscript\t78352\t79479\t.\t-\t.\tseqedit=false;p_id=P637'..b'0;ID=YML081W;Name=TDA9\n+III\tS-MART\ttranscript\t186489\t190169\t.\t+\t.\tseqedit=false;p_id=P5337;gene_id=YCR033W;tss_id=TSS5862;nbOverlaps=0;ID=YCR033W;Name=SNT1\n+XIII\tS-MART\ttranscript\t97941\t99400\t.\t-\t.\tseqedit=false;p_id=P3499;gene_id=YML085C;tss_id=TSS4259;nbOverlaps=0;ID=YML085C;Name=TUB1\n+XIII\tS-MART\texon\t97941\t99259\t.\t-\t.\tID=YML085C-exon1;Name=TUB1-exon1;Parent=YML085C\n+XIII\tS-MART\texon\t99376\t99400\t.\t-\t.\tID=YML085C-exon2;Name=TUB1-exon2;Parent=YML085C\n+XIII\tS-MART\ttranscript\t101862\t103811\t.\t+\t.\tseqedit=false;p_id=P2203;gene_id=YML082W;tss_id=TSS1709;nbOverlaps=0;ID=YML082W;Name=YML082W\n+XIII\tS-MART\ttranscript\t99794\t101050\t.\t-\t.\tseqedit=false;p_id=P5692;gene_id=YML083C;tss_id=TSS948;nbOverlaps=0;ID=YML083C;Name=YML083C\n+XIII\tS-MART\ttranscript\t99489\t99797\t.\t+\t.\tseqedit=false;p_id=P3429;gene_id=YML084W;tss_id=TSS5738;nbOverlaps=0;ID=YML084W;Name=YML084W\n+VII\tS-MART\ttranscript\t790459\t793053\t.\t-\t.\tseqedit=false;p_id=P4647;gene_id=YGR150C;tss_id=TSS3036;nbOverlaps=0;ID=YGR150C;Name=CCM1\n+VII\tS-MART\ttranscript\t789031\t790329\t.\t+\t.\tseqedit=false;p_id=P267;gene_id=YGR149W;tss_id=TSS6312;nbOverlaps=0;ID=YGR149W;Name=YGR149W\n+XVI\tS-MART\ttranscript\t919381\t920487\t.\t+\t.\tseqedit=false;p_id=P2858;gene_id=YPR191W;tss_id=TSS1183;nbOverlaps=0;ID=YPR191W;Name=QCR2\n+XIV\tS-MART\ttranscript\t340352\t340858\t.\t+\t.\tseqedit=false;p_id=P3270;gene_id=YNL157W;tss_id=TSS5800;nbOverlaps=0;ID=YNL157W;Name=IGO1\n+XIII\tS-MART\ttranscript\t95791\t97371\t.\t-\t.\tseqedit=false;p_id=P392;gene_id=YML086C;tss_id=TSS3973;nbOverlaps=0;ID=YML086C;Name=ALO1\n+XIII\tS-MART\ttranscript\t94431\t95369\t.\t-\t.\tseqedit=false;p_id=P6540;gene_id=YML087C;tss_id=TSS643;nbOverlaps=0;ID=YML087C;Name=AIM33\n+XIII\tS-MART\ttranscript\t92235\t94241\t.\t+\t.\tseqedit=false;p_id=P4927;gene_id=YML088W;tss_id=TSS3372;nbOverlaps=0;ID=YML088W;Name=UFO1\n+XIII\tS-MART\ttranscript\t87123\t90731\t.\t-\t.\tseqedit=false;p_id=P927;gene_id=YML091C;tss_id=TSS632;nbOverlaps=0;ID=YML091C;Name=RPM2\n+XIII\tS-MART\ttranscript\t91041\t91409\t.\t-\t.\tseqedit=false;p_id=P4844;gene_id=YML089C;tss_id=TSS911;nbOverlaps=0;ID=YML089C;Name=YML089C\n+XIII\tS-MART\ttranscript\t90744\t91130\t.\t+\t.\tseqedit=false;p_id=P849;gene_id=YML090W;tss_id=TSS5516;nbOverlaps=0;ID=YML090W;Name=YML090W\n+XIII\tS-MART\ttranscript\t85987\t86739\t.\t-\t.\tseqedit=false;p_id=P5314;gene_id=YML092C;tss_id=TSS3012;nbOverlaps=0;ID=YML092C;Name=PRE8\n+XIII\tS-MART\ttranscript\t83090\t85789\t.\t+\t.\tseqedit=false;p_id=P3589;gene_id=YML093W;tss_id=TSS794;nbOverlaps=0;ID=YML093W;Name=UTP14\n+XIII\tS-MART\ttranscript\t82219\t82620\t.\t-\t.\tseqedit=false;p_id=P3718;gene_id=YML094C-A;tss_id=TSS4585;nbOverlaps=0;ID=YML094C-A;Name=YML094C-A\n+VII\tS-MART\ttranscript\t817747\t823015\t.\t-\t.\tseqedit=false;overlapsWith=HWUSI-EAS1656_0009_FC:3:49:2195:2756#0--HWUSI-EAS1656_0009_FC:3:12:10443:12335#0--HWUSI-EAS1656_0009;p_id=P3114;gene_id=YGR161C-D;tss_id=TSS2033;nbOverlaps=431;ID=YGR161C-D;Name=YGR161C-D\n+VII\tS-MART\texon\t817747\t821709\t.\t-\t.\tID=YGR161C-D-exon1;Name=YGR161C-D-exon1;Parent=YGR161C-D\n+VII\tS-MART\texon\t821711\t823015\t.\t-\t.\tID=YGR161C-D-exon2;Name=YGR161C-D-exon2;Parent=YGR161C-D\n+XIII\tS-MART\ttranscript\t82275\t82849\t.\t+\t.\tseqedit=false;p_id=P1742;gene_id=YML094W;tss_id=TSS1864;nbOverlaps=0;ID=YML094W;Name=GIM5\n+XIII\tS-MART\texon\t82275\t82290\t.\t+\t.\tID=YML094W-exon1;Name=GIM5-exon1;Parent=YML094W\n+XIII\tS-MART\texon\t82374\t82849\t.\t+\t.\tID=YML094W-exon2;Name=GIM5-exon2;Parent=YML094W\n+VII\tS-MART\ttranscript\t807073\t807684\t.\t+\t.\tseqedit=false;p_id=P5320;gene_id=YGR160W;tss_id=TSS4839;nbOverlaps=0;ID=YGR160W;Name=YGR160W\n+VII\tS-MART\ttranscript\t821693\t823015\t.\t-\t.\tseqedit=false;overlapsWith=HWUSI-EAS1656_0009_FC:3:69:10212:8264#0--HWUSI-EAS1656_0009_FC:3:26:11389:20176#0--HWUSI-EAS1656_000;p_id=P2676;gene_id=YGR161C-C;tss_id=TSS2033;nbOverlaps=12;ID=YGR161C-C;Name=YGR161C-C\n+XII\tS-MART\ttranscript\t660716\t662833\t.\t+\t.\tseqedit=false;p_id=P1153;gene_id=YLR258W;tss_id=TSS609;nbOverlaps=0;ID=YLR258W;Name=GSY2\n+XIII\tS-MART\ttranscript\t81481\t82113\t.\t-\t.\tseqedit=false;p_id=P6599;gene_id=YML095C;tss_id=TSS4095;nbOverlaps=0;ID=YML095C;Name=RAD10\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/PythonHelperReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/PythonHelperReader.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,336 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.io.File;\n+import java.io.*;\n+import java.util.regex.*;\n+\n+public class PythonHelperReader {\n+\n+ String fileName;\n+ Program program;\n+ BufferedReader reader;\n+ String message;\n+\n+ public PythonHelperReader(String fileName) {\n+ this.fileName = fileName; \n+ this.reader = reader;\n+ this.message = null;\n+ }\n+\n+ public void setReader(BufferedReader reader) {\n+ this.reader = reader;\n+ }\n+ \n+ public void run() {\n+ this.program = new Program();\n+ boolean inBeginning = true;\n+ boolean inUsage = false;\n+ boolean afterUsage = false;\n+ boolean inDescription = false;\n+ boolean afterDescription = false;\n+ boolean inOptions = false;\n+ boolean inOptionBlank = false;\n+ boolean inError = false;\n+ String usage = null;\n+ String description = null;\n+ String option = null;\n+ Vector <String> options = new Vector < String > ();\n+ String[] optionSplitted;\n+\n+ // Parse file\n+ try {\n+ String line = null;\n+\n+ while ((line = reader.readLine()) != null) {\n+ line = line.trim();\n+ if (line.startsWith("Traceback")) {\n+ this.message = "Problem with header of \'" + this.fileName + "\':\\n" + line + "\\n";\n+ inError = true;\n+ inBeginning = false;\n+ inUsage = false;\n+ afterUsage = false;\n+ inDescription = false;\n+ afterDescription = false;\n+ inOptions = false;\n+ inOptionBlank = false;\n+ }\n+ else if (inError) {\n+ this.message += line + "\\n";\n+ }\n+ else if (inBeginning) {\n+ if (line.startsWith("Usage:")) {\n+ inUsage = true;\n+ inBeginning = false;\n+ usage = line;\n+ }\n+ }\n+ else if (inUsage) {\n+ if ("".equals(line)) {\n+ inUsage = false;\n+ afterUsage = true;\n+ }\n+ else {\n+ usage += " " + line;\n+ }\n+ }\n+ else if (afterUsage) {\n+ if (! "".equals(line)) {\n+ description = line;\n+ afterUsage = false;\n+ inDescription = true;\n+ }\n+ '..b' int pos = value.indexOf(" ");\n+ currentWord = value.substring(0, pos);\n+ rest = value.substring(pos+1);\n+ }\n+ else {\n+ currentWord = value;\n+ }\n+ // Output file type\n+ if ("output".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setInput(false);\n+ int pos = rest.indexOf(" ");\n+ currentWord = rest.substring(0, pos).trim();\n+ rest = rest.substring(pos+1).trim();\n+ }\n+ // File (input or output file)\n+ if ("file".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("file");\n+ // Format given by an associated option (to be found later)\n+ if (rest.startsWith("in format given by ")) {\n+ associatedOption.put(programOption, rest.substring(rest.indexOf("format given by ") + "format given by ".length() + 1).trim());\n+ }\n+ else {\n+ if (! rest.startsWith("in ")) {\n+ this.message = "Descriptor " + option + " does not have a proper format.\\n";\n+ return;\n+ }\n+ rest = rest.substring("in ".length());\n+ int pos = rest.indexOf(" format");\n+ if (pos == -1) {\n+ this.message = "Descriptor " + option + " does not have a proper format.\\n";\n+ return;\n+ }\n+ programOption.setFormat(rest.substring(0, pos).trim().toLowerCase().split(" or "));\n+ }\n+ }\n+ // Format type\n+ else if (rest.endsWith("file format")) {\n+ programOption.setFormat((currentWord + " " + rest.substring(0, rest.indexOf("file format"))).trim().toLowerCase().split(" or "));\n+ programOption.setType("format");\n+ }\n+ // Choice type\n+ else if ("choice".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setChoices(rest.replace("(", "").replace(")", "").split(", "));\n+ programOption.setType("choice");\n+ }\n+ // Boolean type\n+ else if ("bool".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("boolean");\n+ }\n+ // Other type\n+ else {\n+ if (currentWord == null) {\n+ this.message = "Program \'" + this.fileName + "\' has a problem concerning the type of option \'" + identifier + "\'.\\n";\n+ return;\n+ }\n+ programOption.setType(currentWord);\n+ }\n+ }\n+ // Default value\n+ else if ("default".compareToIgnoreCase(type) == 0) {\n+ programOption.setDefault(value);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.\\n";\n+ return;\n+ }\n+ }\n+ else {\n+ // Compulsory option\n+ if ("compulsory".compareToIgnoreCase(inner) == 0) {\n+ programOption.setCompulsory(true);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.\\n";\n+ return;\n+ }\n+ }\n+ }\n+ if (! programOption.checkSettings()) {\n+ this.message = "Program \'" + this.fileName + "\' has a problem concerning option \'" + identifier + "\'.\\n";\n+ return;\n+ }\n+ program.addOption(programOption);\n+ }\n+\n+ // Set associated option\n+ Iterator it = associatedOption.keySet().iterator();\n+ while (it.hasNext()) {\n+ ProgramOption programOption = (ProgramOption) it.next();\n+ programOption.setAssociatedOption(identifierToOptions.get(associatedOption.get(programOption)));\n+ }\n+ }\n+\n+ public String getMessage () {\n+ return this.message;\n+ }\n+\n+ public Program getProgram () {\n+ return this.program;\n+ }\n+}\n+\n+\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/PythonProgramFinder$1.class

Binary file SMART/Java/PythonProgramFinder$1.class has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/PythonProgramFinder.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/PythonProgramFinder.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,92 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.io.*;
+import java.util.*;
+
+public class PythonProgramFinder {
+
+  String             dirName;
+  Vector < Program > programs;
+
+  public PythonProgramFinder(String dirName) {
+    this.dirName = dirName;
+  }
+
+  public String findPrograms() {
+    java.io.File directory = new java.io.File(this.dirName);
+    String[] files         = directory.list(new FilenameFilter() {public boolean accept(java.io.File dir, String name) {return ((! name.startsWith(".")) && (! name.startsWith("test")) && ((new java.io.File(dir, name)).isFile()) && (name.endsWith(".py")) && (name.compareToIgnoreCase("__init__.py") != 0));}});
+    this.programs          = new Vector < Program > ();
+
+    for (int i = 0; i < files.length; i++) {
+      String[] commandList  = {Global.pythonCommand, "Python" + java.io.File.separator + files[i], "-h"};
+      String command = "";
+      for (int j = 0; j < commandList.length; j++) {
+        command += commandList[j] + " ";
+      }
+      ProcessBuilder pb = new ProcessBuilder(commandList);
+      pb = pb.redirectErrorStream(true);
+      Map<String, String> env = pb.environment();
+      env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+      env.put("SMARTRPATH", Global.rCommand);
+
+      PythonHelperReader helperReader = new PythonHelperReader(files[i]);
+      try {
+        final Process process = pb.start();
+        InputStream is        = process.getInputStream();
+        InputStreamReader isr = new InputStreamReader(is);
+        BufferedReader br     = new BufferedReader(isr);
+        helperReader.setReader(br);
+        helperReader.run();
+      }
+      catch (IOException e) {
+        final Writer result = new StringWriter();
+        final PrintWriter printWriter = new PrintWriter(result);
+        e.printStackTrace(printWriter);
+        return "Command '" + command + "' failed (I/O error)...\n" + result.toString();
+      }
+      String comments = helperReader.getMessage();
+      if (comments != null) return comments;
+      Program program = helperReader.getProgram();
+      if (("Personnal".compareToIgnoreCase(program.getSection()) != 0) && ("Personal".compareToIgnoreCase(program.getSection()) != 0)) {
+        this.programs.add(program);
+      }
+    }
+    return null;
+  }
+
+  public Vector <Program> getPrograms () {
+    return this.programs;
+  }
+}
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/README.txt
--- a/SMART/Java/README.txt Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/Java/README.txt Tue Apr 30 14:33:21 2013 -0400

@@ -13,9 +13,14 @@
S-MART does not require a computer science background and thus can be used by all biologists through a graphical interface. S-MART can run on any personal computer, yielding results within an hour for most queries.

+Instructions
+------------
+Installation instructions and the user guide are available in the file "doc.pdf".
+
+
Copyright
---------
-Copyright INRA-URGI 2009-2013
+Copyright INRA-URGI 2009-2010

Authors
@@ -35,41 +40,12 @@
See the LICENSE.txt file.

-Installation under Galaxy
--------------------------
-S-MART is available under the Galaxy Tool Shed: http://toolshed.g2.bx.psu.edu/
-Remember to set the variables "tool_config_file" and "tool_dependency_dir" accordingly. Please look up the Galaxy Tool Shed wiki to know more about it.
-It assumes you have R installed, as well as two packages: RColorBrewer (for colors in graphics), and Hmisc (for statistics). You can install them as root with the commands:
- - R --slave --no-save --no-restore --quiet -e 'if("RColorBrewer" %in% rownames(installed.packages()) == FALSE){install.packages("RColorBrewer", repos = c("http://cran.rstudio.com/"), dependencies = TRUE)}'
- - R --slave --no-save --no-restore --quiet -e 'if("Hmisc" %in% rownames(installed.packages()) == FALSE){install.packages("Hmisc", repos = c("http://cran.rstudio.com/"), dependencies = TRUE)}'
-
-Optionally, you can organize the layout of S-MART tools following these instructions. This way, all the tools will be correctly sorted and appear in categories.
- - Locate the directory where S-MART has been installed: probably in "<galaxy install dir>/shed_tool/toolshed.g2.bx.psu.edu/repos/yufei-luo/s_mart/XXX/s_mart/"
- - Create a symbolic link "<galaxy install dir>/tools/s_mart" directing to "<S-MART install dir>/SMART/galaxy/"
- - Paste the content of "<S-MART install dir>/SMART/galaxy/tool_conf.xml" to your local "<galaxy install dir>/tool_conf.xml", for instance, right before the </toolbox> mark-up.
- - Remove the S-MART layout in "<galaxy install dir>/shed_tool_conf.xml" (the name may vary depending on your "universe_wgsi.ini" file) which has been automatically generated: remove the whole block between the markup <section id="s-mart" name="S-MART" version="XXX"> and the corresponding </section>.
- - Restart Galaxy to complete the install.
-
-
-Stand-alone installation
-------------------------
-This product needs the following softwares :
- - R, under the GNU General Public License, and several R package (under the same License)
- - Python, under the Python License, compatible with the GNU General Public License
- - Java, under the GNU General Public License
-
-
-Instructions
-------------
-Further installation instructions and the user guide are available in the file "doc.pdf".
-
-
Acknowledgements
----------------
-Many thanks go helping developers:
- - Yufei Luo
- - the URGI team
-and the beta-testers:
- - Claire Toffano-Nioche
- - Claire Kuchly
- - among others...
+This product needs the following softwares :
+ * R, under the GNU General Public License
+ * MySQL, under the GNU General Public License
+ * Python, under the Python License, compatible with the GNU General Public License
+ * MySQL for Python, under the GNU General Public License
+ * Java, under the GNU General Public License
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/File.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/File.java Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/Files.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Files.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,71 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class Files {
+  HashMap <String, File> files;
+
+  public Files () {
+    files = new HashMap < String, File> ();
+  }
+
+  public void addFile(String fileName, String type, String format) {
+    this.addFile(new File(fileName, type, format));
+  }
+
+  public void addFile(File file) {
+    files.put(file.name, file);
+  }
+
+  public String getType(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format type of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).formatType;
+  }
+
+  public String getFormat(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).format;
+  }
+}
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/FormatType.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/FormatType.java Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/FormatsContainer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/FormatsContainer.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,81 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class FormatsContainer {
+
+  HashMap < String, FormatType > formatTypes;
+
+
+  public FormatsContainer() {
+    this.formatTypes = new HashMap < String, FormatType > ();
+  }
+
+
+  public void addFormat(String type, String format) {
+    FormatType formatType;
+    if (formatTypes.containsKey(type)) {
+      formatType = this.formatTypes.get(type);
+    }
+    else {
+      formatType = new FormatType(type);
+      this.formatTypes.put(type, formatType);
+    }
+    formatType.addFormat(format);
+  }
+
+
+  public Vector < String > getFormatTypes () {
+    Vector < String > v = new Vector < String > ();
+    v.addAll(this.formatTypes.keySet());
+    return v;
+  }
+
+
+  public FormatType getFormats (String type) {
+    return formatTypes.get(type);
+  }
+
+
+  public String getFormatType (String format) {
+    for (Iterator it = formatTypes.keySet().iterator(); it.hasNext(); ) {
+      Object type       =  it.next();
+      Object formatType = formatTypes.get(type);
+      if (((FormatType) formatType).containsFormat(format)) {
+        return (String) type;
+      }
+    }
+    return null;
+  }
+}
+
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/FormatsReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/FormatsReader.java Tue Apr 30 14:33:21 2013 -0400

[

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/Global.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Global.java Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,70 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.Vector;
+import java.util.HashMap;
+import javax.swing.DefaultListModel;
+import javax.swing.JButton;
+import javax.swing.JTextField;
+
+public class Global {
+
+  public static int logAreaSize = 100;
+
+  public static String smartConfFileName = "smart.conf";
+
+  public static String smartProgramsFileName = "programs.txt";
+
+  public static String smartFormatsFileName = "formats.txt";
+
+  public static String pythonPath = new String();
+
+  public static String pythonCommand = "python";
+
+  public static String mysqlCommand = "mysql";
+
+  public static String rCommand = "R";
+
+  public static Files files = new Files();
+
+  public static DefaultListModel fileNames = new DefaultListModel();
+
+  public static FormatsContainer formats = new FormatsContainer();
+
+  public static boolean programRunning = false;
+
+  public static HashMap < JButton, JTextField > otherFilesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherDirectoriesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherFileConcatenationChooser = new HashMap < JButton, JTextField >();
+
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/Program.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Program.java Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/ProgramFileReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/ProgramFileReader.java Tue Apr 30 14:33:21 2013 -0400

[

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/ProgramLauncher.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/ProgramLauncher.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,191 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.*;
+import javax.swing.SwingUtilities;
+import javax.swing.*;
+import java.util.concurrent.CountDownLatch;
+
+public class ProgramLauncher extends SwingWorker<Boolean, String>  {
+
+  String[]     command;
+  JTextArea    logArea;
+  JLabel       messageField;
+  JProgressBar progressBar;
+  JLabel       etaField;
+  int          exitValue;
+
+
+  public ProgramLauncher (LinkedList <String> c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = new String[c.size()];
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+    c.toArray(command);
+  }
+
+
+  public ProgramLauncher (String[] c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = c;
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+  }
+
+
+  @Override
+  public Boolean doInBackground() {
+    ProcessBuilder pb           = new ProcessBuilder(command);
+    Process process             = null;
+    BufferedReader outputReader = null;
+    pb                          = pb.redirectErrorStream(true);
+    Map<String, String> env     = pb.environment();
+    env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+    env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+    env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+    env.put("SMARTRPATH", Global.rCommand);
+    String commandJoined = Arrays.toString(command);
+
+    try {
+      publish("=== Starting command '" + commandJoined.trim() + "' ===\n");
+      process = pb.start();
+
+      BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());
+      InputStream is                   = process.getInputStream();
+      InputStreamReader isr            = new InputStreamReader(is);
+      outputReader                     = new BufferedReader(isr);
+    }
+    catch (Exception exception) {
+      publish("!Process cannot be started (command is '" + commandJoined + "')!\n");
+      exception.printStackTrace();
+      return Boolean.FALSE;
+    }
+    if (outputReader == null) {
+      publish("!Problem in the output of the command!\n");
+      return Boolean.FALSE;
+    }
+    else {
+      try {
+        String line;
+        while ((line = outputReader.readLine()) != null) {
+          publish(line + "\n");
+        }
+      }
+      catch (IOException e) {
+        e.printStackTrace();
+        publish("!Cannot get the output of the command!\n");
+        return Boolean.FALSE;
+      }
+    }
+    try {
+      process.waitFor();
+    }
+    catch (InterruptedException e) {
+      e.printStackTrace();
+      publish("!Cannot wait for the end of the command!\n");
+      return Boolean.FALSE;
+    }
+    try {
+      exitValue = process.exitValue();
+      System.out.println(exitValue);
+    }
+    catch (IllegalThreadStateException e) {
+      e.printStackTrace();
+      publish("!Cannot get the exit value of the command!\n");
+      return Boolean.FALSE;
+    }
+    if (exitValue != 0) {
+      publish("!Problem during the execution of the command '" + commandJoined + "'!\n");
+      return Boolean.FALSE;
+    }
+    publish("=== Ending command '" + commandJoined.trim() + "' ===\n");
+    return Boolean.TRUE;
+  }
+
+
+  @Override
+  protected void process(List<String> chunks) {
+    String message = "";
+    String text    = logArea.getText();
+    for (String chunk: chunks) {
+      text += chunk;
+    }
+    for (String lineSeparatedByCarriageReturn: text.split("\n")) {
+      for (String line: lineSeparatedByCarriageReturn.split("\r")) {
+        boolean progressLine = false;
+        if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*")) {
+          String[] ratioElements = line.split("\\]")[1].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("");
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*ETA:\\s*.*")) {
+          String[] ratioElements = line.split("\\]")[1].split("E")[0].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          String   eta           = line.split("ETA:")[1].trim();
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("ETA: " + eta);
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*\\s*completed in.*")) {
+          String nbElements = line.split("\\]")[1].split("completed")[0].trim();
+          String timeSpent  = line.split("completed in")[1].trim();
+          message          += line.split("\\[")[0].trim() + ": " + nbElements + " elements completed in " + timeSpent + "\n";
+          messageField.setText(line.split("\\[")[0].trim());
+          progressLine = true;
+        }
+        if (! progressLine) {
+          message += line + "\n";
+        }
+      }
+    }
+    String lines[]     = message.split("\n");
+    String toBeWritten = "";
+    for (int i = Math.max(0, lines.length - Global.logAreaSize); i < lines.length; i++) {
+      toBeWritten += lines[i] + "\n";
+    }
+    logArea.setText(toBeWritten);
+  }
+
+  public int getExitValue() {
+    return exitValue;
+  }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/ProgramOption.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/ProgramOption.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,329 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+\n+\n+public class ProgramOption {\n+ boolean input;\n+ String identifier;\n+ String type;\n+ String comment;\n+ boolean compulsory;\n+ String[] format;\n+ String formatIdentifier;\n+ ProgramOption associatedOption;\n+ String defaultValue;\n+ String[] choices;\n+ JComponent component;\n+ JPanel panel;\n+\n+\n+ public ProgramOption() {\n+ this.input = true;\n+ this.identifier = null;\n+ this.type = null;\n+ this.comment = null;\n+ this.compulsory = false;\n+ this.format = null;\n+ this.formatIdentifier = null;\n+ this.associatedOption = null;\n+ this.defaultValue = "";\n+ this.choices = null;\n+ this.component = null;\n+ this.panel = null;\n+ }\n+\n+\n+ public void setInput(boolean input) {\n+ this.input = input;\n+ }\n+\n+\n+ public void setIdentifier(String identifier) {\n+ this.identifier = identifier;\n+ }\n+\n+\n+ public void setType(String type) {\n+ this.type = type;\n+ }\n+\n+\n+ public void setComment(String comment) {\n+ this.comment = comment;\n+ }\n+\n+\n+ public void setCompulsory(boolean compulsory) {\n+ this.compulsory = compulsory;\n+ }\n+\n+\n+ public void setFormat(String[] format) {\n+ this.format = format;\n+ }\n+\n+\n+ public void setFormat(String format) {\n+ this.format = new String[1];\n+ this.format[0] = format;\n+ }\n+\n+\n+ public void setFormatIdentifier(String formatIdentifier) {\n+ this.formatIdentifier = formatIdentifier;\n+ }\n+\n+\n+ public void setAssociatedOption(ProgramOption option) {\n+ this.associatedOption = option;\n+ }\n+\n+\n+ public void setChoices(String[] choices) {\n+ this.choices = choices;\n+ }\n+\n+\n+ public void setDefault(String defaultValue) {\n+ this.defaultValue = defaultValue;\n+ }\n+\n+\n+ public boolean isInput() {\n+ return this.input;\n+ }\n+\n+\n+ public JPanel getPanel() {\n+ if (this.panel != null) {\n+ return this.panel;\n+ }\n+ String comment = this.comment;\n+ if (this.compulsory) {\n+ comment += " [*]";\n+ }\n+\n+ GridLayout horizontalLayout = new GridLayout(1, 0);\n+ this.panel = ne'..b'component = new JTextField();\n+ label.setLabelFor(this.component);\n+ this.panel.add(label);\n+ JPanel rightPanel = new JPanel(false);\n+ rightPanel.setLayout(new BoxLayout(rightPanel, BoxLayout.LINE_AXIS));\n+ rightPanel.add(this.component);\n+ rightPanel.add(button);\n+ this.panel.add(rightPanel);\n+ Global.otherDirectoriesChooser.put(button, (JTextField) this.component);\n+ }\n+ else if ("choice".compareToIgnoreCase(this.type) == 0) {\n+ this.component = new JComboBox(this.choices);\n+ label.setLabelFor(this.component);\n+ this.panel.add(label);\n+ this.panel.add(this.component);\n+ }\n+ else {\n+ System.out.println("Do not know how to read type " + this.type);\n+ }\n+\n+ return this.panel;\n+ }\n+\n+\n+ public JComponent getComponent() {\n+ if (component == null) {\n+ this.getPanel();\n+ }\n+ return this.component;\n+ }\n+\n+\n+ private String getValue() {\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("directory".equals(this.type)) || ("files".equals(this.type))) {\n+ String s = ((JTextField) this.component).getText();\n+ if ("None".equals(s)) {\n+ return "";\n+ }\n+ return s;\n+ }\n+ if ("file".equals(this.type)) {\n+ return (String) ((JList) this.component).getSelectedValue();\n+ }\n+ if ("boolean".equals(this.type)) {\n+ return ((JCheckBox) this.component).isSelected()? "true": "false";\n+ }\n+ if ("format".equals(this.type)) {\n+ return (String) ((JComboBox) this.component).getSelectedItem();\n+ }\n+ return null;\n+ }\n+\n+\n+ public String checkValue() {\n+ String value = this.getValue();\n+ if ((this.compulsory) && ((value == null) || ("".equals(value)))) {\n+ return "Option \'" + this.comment + "\' has no value... Please specify it.\\n";\n+ }\n+ if ("int".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value)) && (! "None".equals(value))) {\n+ try {\n+ int i = Integer.parseInt(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be an integer... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ else if ("float".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value))) {\n+ try {\n+ float i = Float.parseFloat(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be a float... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ return null;\n+ }\n+\n+\n+ public LinkedList <String> getCommand() {\n+ LinkedList <String> list = new LinkedList <String> ();\n+\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("format".equals(this.type)) || ("directory".equals(this.type)) || ("files".equals(this.type))) {\n+ String value = this.getValue();\n+ if (value.length() == 0) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(value);\n+ return list;\n+ }\n+ if ("file".equals(this.type)) {\n+ String fileName = (String) ((JList) this.component).getSelectedValue();\n+ if (fileName == null) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(this.getValue());\n+ return list;\n+ }\n+ if ("boolean".equals(this.type)) {\n+ if ("true".equals(this.getValue())) {\n+ list.add(this.identifier);\n+ }\n+ return list;\n+ }\n+ return null;\n+ }\n+\n+\n+ public File getOutputFile() {\n+ if (this.input) return null;\n+ String format = "";\n+ if (this.format != null) {\n+ format = this.format[0];\n+ }\n+ if (this.associatedOption != null) {\n+ format = this.associatedOption.getValue();\n+ }\n+ return new File(this.getValue() + "." + format, Global.formats.getFormatType(format), format);\n+ }\n+}\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/PythonHelperReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/PythonHelperReader.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,323 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.io.File;\n+import java.io.*;\n+import java.util.regex.*;\n+\n+public class PythonHelperReader {\n+\n+ String fileName;\n+ Program program;\n+ BufferedReader reader;\n+ String message;\n+\n+ public PythonHelperReader(String fileName) {\n+ this.fileName = fileName; \n+ this.reader = reader;\n+ this.message = null;\n+ }\n+\n+ public void setReader(BufferedReader reader) {\n+ this.reader = reader;\n+ }\n+ \n+ public void run() {\n+ this.program = new Program();\n+ boolean inBeginning = true;\n+ boolean inUsage = false;\n+ boolean afterUsage = false;\n+ boolean inDescription = false;\n+ boolean afterDescription = false;\n+ boolean inOptions = false;\n+ boolean inOptionBlank = false;\n+ boolean inError = false;\n+ String usage = null;\n+ String description = null;\n+ String option = null;\n+ Vector <String> options = new Vector < String > ();\n+ String[] optionSplitted;\n+\n+ // Parse file\n+ try {\n+ String line = null;\n+\n+ while ((line = reader.readLine()) != null) {\n+ line = line.trim();\n+ if (line.startsWith("Traceback")) {\n+ this.message = "Problem with header of \'" + this.fileName + "\':\\n" + line + "\\n";\n+ inError = true;\n+ inBeginning = false;\n+ inUsage = false;\n+ afterUsage = false;\n+ inDescription = false;\n+ afterDescription = false;\n+ inOptions = false;\n+ inOptionBlank = false;\n+ }\n+ else if (inError) {\n+ this.message += line + "\\n";\n+ }\n+ else if (inBeginning) {\n+ if (line.startsWith("Usage:")) {\n+ inUsage = true;\n+ inBeginning = false;\n+ usage = line;\n+ }\n+ }\n+ else if (inUsage) {\n+ if ("".equals(line)) {\n+ inUsage = false;\n+ afterUsage = true;\n+ }\n+ else {\n+ usage += " " + line;\n+ }\n+ }\n+ else if (afterUsage) {\n+ if (! "".equals(line)) {\n+ description = line;\n+ afterUsage = false;\n+ inDescription = true;\n+ }\n+ '..b'her.end()-1);\n+ if (inner.contains(":")) {\n+ String type = inner.substring(0, inner.indexOf(":")).trim();\n+ String value = inner.substring(inner.indexOf(":")+1).trim();\n+ // Types of the options\n+ if ("format".compareToIgnoreCase(type) == 0) {\n+ String currentWord = "";\n+ String rest = "";\n+ if (value.contains(" ")) {\n+ int pos = value.indexOf(" ");\n+ currentWord = value.substring(0, pos);\n+ rest = value.substring(pos+1);\n+ }\n+ else {\n+ currentWord = value;\n+ }\n+ // Output file type\n+ if ("output".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setInput(false);\n+ int pos = rest.indexOf(" ");\n+ currentWord = rest.substring(0, pos).trim();\n+ rest = rest.substring(pos+1).trim();\n+ }\n+ // File (input or output file)\n+ if ("file".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("file");\n+ // Format given by an associated option (to be found later)\n+ if (rest.startsWith("in format given by ")) {\n+ associatedOption.put(programOption, rest.substring(rest.indexOf("format given by ") + "format given by ".length() + 1).trim());\n+ }\n+ else {\n+ if (! rest.startsWith("in ")) {\n+ this.message = "Descriptor " + option + " does not have a proper format.";\n+ return;\n+ }\n+ rest = rest.substring("in ".length());\n+ int pos = rest.indexOf(" format");\n+ if (pos == -1) {\n+ this.message = "Descriptor " + option + " does not have a proper format.";\n+ return;\n+ }\n+ programOption.setFormat(rest.substring(0, pos).trim().toLowerCase().split(" or "));\n+ }\n+ }\n+ // Format type\n+ else if (rest.endsWith("file format")) {\n+ programOption.setFormat((currentWord + " " + rest.substring(0, rest.indexOf("file format"))).trim().toLowerCase().split(" or "));\n+ programOption.setType("format");\n+ }\n+ // Choice type\n+ else if ("choice".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setChoices(rest.replace("(", "").replace(")", "").split(", "));\n+ programOption.setType("choice");\n+ }\n+ // Boolean type\n+ else if ("bool".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("boolean");\n+ }\n+ // Other type\n+ else {\n+ programOption.setType(currentWord);\n+ }\n+ }\n+ // Default value\n+ else if ("default".compareToIgnoreCase(type) == 0) {\n+ programOption.setDefault(value);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.";\n+ return;\n+ }\n+ }\n+ else {\n+ // Compulsory option\n+ if ("compulsory".compareToIgnoreCase(inner) == 0) {\n+ programOption.setCompulsory(true);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.";\n+ return;\n+ }\n+ }\n+ }\n+ program.addOption(programOption);\n+ }\n+\n+ // Set associated option\n+ Iterator it = associatedOption.keySet().iterator();\n+ while (it.hasNext()) {\n+ ProgramOption programOption = (ProgramOption) it.next();\n+ programOption.setAssociatedOption(identifierToOptions.get(associatedOption.get(programOption)));\n+ }\n+ }\n+\n+ public String getMessage () {\n+ return this.message;\n+ }\n+\n+ public Program getProgram () {\n+ return this.program;\n+ }\n+}\n+\n+\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/PythonProgramFinder.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/PythonProgramFinder.java Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,90 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.io.*;
+import java.util.*;
+
+public class PythonProgramFinder {
+
+  String             dirName;
+  Vector < Program > programs;
+
+  public PythonProgramFinder(String dirName) {
+    this.dirName = dirName;
+  }
+
+  public String findPrograms() {
+    java.io.File directory = new java.io.File(this.dirName);
+    String[] files         = directory.list(new FilenameFilter() {public boolean accept(java.io.File dir, String name) {return ((! name.startsWith(".")) && (! name.startsWith("test")) && ((new java.io.File(dir, name)).isFile()) && (name.endsWith(".py")));}});
+    this.programs          = new Vector < Program > ();
+
+    for (int i = 0; i < files.length; i++) {
+      String[] commandList  = {Global.pythonCommand, "Python" + java.io.File.separator + files[i], "-h"};
+      String command = "";
+      for (int j = 0; j < commandList.length; j++) {
+        command += commandList[j] + " ";
+      }
+      ProcessBuilder pb = new ProcessBuilder(commandList);
+      pb = pb.redirectErrorStream(true);
+      Map<String, String> env = pb.environment();
+      env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+      env.put("SMARTRPATH", Global.rCommand);
+
+      PythonHelperReader helperReader = new PythonHelperReader(files[i]);
+      try {
+        final Process process = pb.start();
+        InputStream is        = process.getInputStream();
+        InputStreamReader isr = new InputStreamReader(is);
+        BufferedReader br     = new BufferedReader(isr);
+        helperReader.setReader(br);
+        helperReader.run();
+      }
+      catch (IOException e) {
+        e.printStackTrace();
+        return "Command '" + command + "' failed (I/O error)...\n";
+      }
+      String comments = helperReader.getMessage();
+      if (comments != null) return comments;
+      Program program = helperReader.getProgram();
+      if ("Personnal".compareToIgnoreCase(program.getSection()) != 0) {
+        this.programs.add(program);
+      }
+    }
+    return null;
+  }
+
+  public Vector <Program> getPrograms () {
+    return this.programs;
+  }
+}
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Sav/Smart.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Smart.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,489 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+\n+\n+public class Smart extends JPanel implements ActionListener {\n+\n+ String version = "1.0.2";\n+\n+ JFrame mainFrame;\n+ JButton openButton;\n+ JButton comparisonGoButton;\n+\n+ JComboBox formatTypes;\n+ JComboBox fileFormats;\n+ String[] emptyFormats = {"Choose a type first..."};\n+\n+ JFrame askFrame;\n+ JButton pythonButton;\n+ JButton mySqlButton;\n+ JButton rButton;\n+\n+ HashMap <JButton, Program> callingProgram;\n+\n+ // comparison\n+ JList comparisonFile1List;\n+ JList comparisonFile2List;\n+ JTextField comparisonOutputTextField;\n+ JTextField comparisonFiveQueryExtensionTextField;\n+ JCheckBox comparisonColinearCheckBox;\n+ JCheckBox comparisonAntisenseCheckBox;\n+ JCheckBox comparisonInvertCheckBox;\n+\n+ JList fileList;\n+ JTextArea logArea;\n+\n+ // progress bar\n+ JLabel messageField;\n+ JProgressBar progressBar;\n+ JLabel etaField;\n+\n+ // process\n+ Program currentProgram;\n+ Process process;\n+ javax.swing.Timer processTimer;\n+\n+\n+ int previousStatus;\n+\n+ public Smart() {\n+ super(new BorderLayout());\n+\n+ callingProgram = new HashMap <JButton, Program> ();\n+\n+ previousStatus = -1;\n+\n+ processTimer = new javax.swing.Timer(1000, this);\n+ processTimer.stop();\n+\n+ // Ask frame buttons\n+ pythonButton = new JButton("find...");\n+ mySqlButton = new JButton("find...");\n+ rButton = new JButton("find...");\n+\n+ // Get available formats\n+ FormatsReader formatReader = new FormatsReader(Global.smartFormatsFileName);\n+ if (! formatReader.read()) {\n+ System.out.println("Something was wrong while reading file format...");\n+ }\n+\n+ // Get screen size\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+\n+ // Log\n+ logArea = new JTextArea(512, Global.logAreaSize);\n+ logArea.setPreferredSize(new Dimension(screenSize.width, (int) (screenSize.height * 0.22)));\n+ logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));\n+ JScrollPane logScroll = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);\n+ TitledBorder logBorder '..b'\\n");\n+ }\n+ }\n+ this.checkConfiguration();\n+ }\n+ // Format type\n+ else if (e.getSource() == formatTypes) {\n+ fileFormats.removeAllItems();\n+ Vector < String > selectedFormats = Global.formats.getFormats((String) formatTypes.getSelectedItem()).getFormats();\n+ for (int i = 0; i < selectedFormats.size(); i++) {\n+ fileFormats.addItem(selectedFormats.get(i));\n+ }\n+ }\n+ // Main file chooser\n+ else if (e.getSource() == openButton) {\n+ JFileChooser chooser = new JFileChooser();\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ String fileName = chooser.getSelectedFile().getPath();\n+ Global.fileNames.addElement(fileName);\n+ Global.files.addFile(fileName, (String) formatTypes.getSelectedItem(), (String) fileFormats.getSelectedItem());\n+ }\n+ }\n+ // Other file choosers\n+ else if (Global.otherFilesChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherFilesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ // Other directories choosers\n+ else if (Global.otherDirectoriesChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ else if (Global.otherFileConcatenationChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ String text = textField.getText();\n+ if ((text == null) || ("".equals(text))) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ else {\n+ textField.setText(text + "," + chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ }\n+ // Programs\n+ else {\n+ currentProgram = callingProgram.get(e.getSource());\n+ String comment = currentProgram.checkValues();\n+ if (comment != null) {\n+ logArea.append(comment);\n+ return;\n+ }\n+ LinkedList <String> command = currentProgram.getCommand();\n+ ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n+ launcher.execute();\n+ Vector < File > outputFiles = currentProgram.getOutputFiles();\n+ for (int i = 0; i < outputFiles.size(); i++) {\n+ File file = outputFiles.get(i);\n+ if (file.getFormatType().compareToIgnoreCase("other") != 0) {\n+ Global.fileNames.addElement(file.getName());\n+ Global.files.addFile(file);\n+ }\n+ }\n+ currentProgram = null;\n+ }\n+ }\n+\n+\n+ private static void createAndShowGUI() {\n+ // Create and set up the window.\n+ JFrame mainFrame = new JFrame("S-Mart");\n+ mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);\n+\n+ //Create and set up the content pane.\n+ JComponent newContentPane = new Smart();\n+ newContentPane.setOpaque(true);\n+ mainFrame.setContentPane(newContentPane);\n+\n+ // Display the window.\n+ mainFrame.pack();\n+ mainFrame.setVisible(true);\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+ mainFrame.setBounds(0, 0, screenSize.width, screenSize.height);\n+ }\n+\n+\n+ public static void main(String[] args) {\n+ javax.swing.SwingUtilities.invokeLater(new Runnable() {\n+ public void run() {\n+ createAndShowGUI();\n+ }\n+ });\n+ }\n+}\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Smart$1.class

Binary file SMART/Java/Smart$1.class has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Smart$2.class

Binary file SMART/Java/Smart$2.class has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Smart.jar

Binary file SMART/Java/Smart.jar has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/Smart.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Smart.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,668 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.awt.event.WindowEvent;\n+import java.awt.event.WindowAdapter;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+import java.util.prefs.BackingStoreException;\n+\n+\n+public class Smart extends JPanel implements ActionListener {\n+\n+ String version = "1.1.0";\n+\n+ JFrame mainFrame;\n+ JButton openButton;\n+ JButton resetFileButton;\n+\n+ JComboBox formatTypes;\n+ JComboBox fileFormats;\n+ String[] emptyFormats = {"Choose a type first..."};\n+\n+ JFrame askFrame;\n+ JButton pythonButton;\n+ JButton mySqlButton;\n+ JButton rButton;\n+\n+ HashMap <JButton, Program> callingProgram;\n+\n+ static JList fileList;\n+ static JTextArea logArea;\n+\n+ // progress bar\n+ static JLabel messageField;\n+ static JProgressBar progressBar;\n+ static JLabel etaField;\n+\n+ // process\n+ Program currentProgram;\n+ Process process;\n+ javax.swing.Timer processTimer;\n+\n+\n+ int previousStatus;\n+\n+ public Smart() {\n+ super(new BorderLayout());\n+\n+ callingProgram = new HashMap <JButton, Program> ();\n+\n+ previousStatus = -1;\n+\n+ processTimer = new javax.swing.Timer(1000, this);\n+ processTimer.stop();\n+\n+ // Ask frame buttons\n+ pythonButton = new JButton("find...");\n+ mySqlButton = new JButton("find...");\n+ rButton = new JButton("find...");\n+\n+ // Get available formats\n+ FormatsReader formatReader = new FormatsReader(Global.smartFormatsFileName);\n+ if (! formatReader.read()) {\n+ System.out.println("Something was wrong while reading file format...");\n+ }\n+\n+ // Get screen size\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+\n+ // Log\n+ logArea = new JTextArea(512, Global.logAreaSize);\n+ logArea.setPreferredSize(new Dimension(screenSize.width, (int) (screenSize.height * 0.22)));\n+ logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));\n+ JScrollPane logScroll = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);\n+ TitledBorder logBorder = BorderFactory.createTitledBorder("Log");\n+ logScroll.setBorder(logBorder);\n+ logArea.append("Using S-MART " + version + "\\n");\n+\n+ '..b'extField = Global.otherFilesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ // Other directories choosers\n+ else if (Global.otherDirectoriesChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ else if (Global.otherFileConcatenationChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ String text = textField.getText();\n+ if ((text == null) || ("".equals(text))) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ else {\n+ textField.setText(text + "," + chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ }\n+ // Programs\n+ else {\n+ currentProgram = callingProgram.get(e.getSource());\n+ String comment = currentProgram.checkValues();\n+ if (comment != null) {\n+ logArea.append(comment);\n+ return;\n+ }\n+ LinkedList <String> command = currentProgram.getCommand();\n+ ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n+ launcher.execute();\n+ Vector < File > outputFiles = currentProgram.getOutputFiles();\n+ for (int i = 0; i < outputFiles.size(); i++) {\n+ File file = outputFiles.get(i);\n+ if (file.getFormatType().compareToIgnoreCase("other") != 0) {\n+ Global.fileNames.addElement(file.getName());\n+ Global.files.addFile(file);\n+ }\n+ }\n+ currentProgram = null;\n+ }\n+ }\n+\n+\n+ private static void removeTmpFiles() {\n+ logArea.append("You want to quit already?\\nRemoving temporary files...");\n+ String[] command = {Global.pythonCommand, "Python" + java.io.File.separator + "removeAllTmpTables.py"};\n+ ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n+ launcher.execute();\n+ logArea.append(" done.\\nNow quitting.\\nBye!");\n+ }\n+\n+\n+ private static void printJavaVersions() {\n+ String[] pro = {"java.version", "java.vm.version", "java.runtime.version"};\n+\n+ Properties properties = System.getProperties();\n+ for (int i = 0; i < pro.length; i++) {\n+ logArea.append(pro[i] + ": " + properties.getProperty(pro[i]) + "\\n");\n+ }\n+ }\n+ \n+ private static void createAndShowGUI() {\n+ // Create and set up the window.\n+ JFrame mainFrame = new JFrame("S-Mart");\n+ mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);\n+\n+ //Create and set up the content pane.\n+ JComponent newContentPane = new Smart();\n+ newContentPane.setOpaque(true);\n+ mainFrame.setContentPane(newContentPane);\n+\n+ // Display the window.\n+ mainFrame.pack();\n+ mainFrame.setVisible(true);\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+ mainFrame.setBounds(0, 0, screenSize.width, screenSize.height);\n+ printJavaVersions();\n+\n+ // Remove tmp file while quitting.\n+ mainFrame.addWindowListener(new WindowAdapter() {\n+ @Override\n+ public void windowClosing(WindowEvent e) {\n+ removeTmpFiles();\n+ }\n+ });\n+ }\n+\n+\n+ public static void main(String[] args) {\n+ javax.swing.SwingUtilities.invokeLater(new Runnable() {\n+ public void run() {\n+ createAndShowGUI();\n+ }\n+ });\n+ }\n+}\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/SmartInstaller.jar

Binary file SMART/Java/SmartInstaller.jar has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/WindowsRegistry.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/WindowsRegistry.java Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,387 @@\n+import java.lang.reflect.InvocationTargetException;\n+import java.lang.reflect.Method;\n+import java.util.HashMap;\n+import java.util.Map;\n+import java.util.ArrayList;\n+import java.util.List;\n+import java.util.prefs.Preferences;\n+\n+public class WindowsRegistry {\n+ public static final int HKEY_CURRENT_USER = 0x80000001;\n+ public static final int HKEY_LOCAL_MACHINE = 0x80000002;\n+ public static final int REG_SUCCESS = 0;\n+ public static final int REG_NOTFOUND = 2;\n+ public static final int REG_ACCESSDENIED = 5;\n+\n+ private static final int KEY_ALL_ACCESS = 0xf003f;\n+ private static final int KEY_READ = 0x20019;\n+ private static Preferences userRoot = Preferences.userRoot();\n+ private static Preferences systemRoot = Preferences.systemRoot();\n+ private static Class<? extends Preferences> userClass = userRoot.getClass();\n+ private static Method regOpenKey = null;\n+ private static Method regCloseKey = null;\n+ private static Method regQueryValueEx = null;\n+ private static Method regEnumValue = null;\n+ private static Method regQueryInfoKey = null;\n+ private static Method regEnumKeyEx = null;\n+ private static Method regCreateKeyEx = null;\n+ private static Method regSetValueEx = null;\n+ private static Method regDeleteKey = null;\n+ private static Method regDeleteValue = null;\n+\n+ static {\n+ try {\n+ regOpenKey = userClass.getDeclaredMethod("WindowsRegOpenKey",\n+ new Class[] { int.class, byte[].class, int.class });\n+ regOpenKey.setAccessible(true);\n+ regCloseKey = userClass.getDeclaredMethod("WindowsRegCloseKey",\n+ new Class[] { int.class });\n+ regCloseKey.setAccessible(true);\n+ regQueryValueEx = userClass.getDeclaredMethod("WindowsRegQueryValueEx",\n+ new Class[] { int.class, byte[].class });\n+ regQueryValueEx.setAccessible(true);\n+ regEnumValue = userClass.getDeclaredMethod("WindowsRegEnumValue",\n+ new Class[] { int.class, int.class, int.class });\n+ regEnumValue.setAccessible(true);\n+ regQueryInfoKey = userClass.getDeclaredMethod("WindowsRegQueryInfoKey1",\n+ new Class[] { int.class });\n+ regQueryInfoKey.setAccessible(true);\n+ regEnumKeyEx = userClass.getDeclaredMethod( \n+ "WindowsRegEnumKeyEx", new Class[] { int.class, int.class, \n+ int.class }); \n+ regEnumKeyEx.setAccessible(true);\n+ regCreateKeyEx = userClass.getDeclaredMethod( \n+ "WindowsRegCreateKeyEx", new Class[] { int.class, \n+ byte[].class }); \n+ regCreateKeyEx.setAccessible(true); \n+ regSetValueEx = userClass.getDeclaredMethod( \n+ "WindowsRegSetValueEx", new Class[] { int.class, \n+ byte[].class, byte[].class }); \n+ regSetValueEx.setAccessible(true); \n+ regDeleteValue = userClass.getDeclaredMethod( \n+ "WindowsRegDeleteValue", new Class[] { int.class, \n+ byte[].class }); \n+ regDeleteValue.setAccessible(true); \n+ regDeleteKey = userClass.getDeclaredMethod( \n+ "WindowsRegDeleteKey", new Class[] { int.class, \n+ byte[].class }); \n+ regDeleteKey.setAccessible(true); \n+ }\n+ catch (Exception e) {\n+ e.printStackTrace();\n+ }\n+ }\n+\n+ private WindowsRegistry() { }\n+\n+ /**\n+ * Read a value from key and value name\n+ * @param hkey HKEY_CURRENT_USER/HKEY_LOCAL_MACHINE\n+ * @param key\n+ * @param valueName\n+ * @return the value\n+ * @throws IllegalArgumentException\n+ * @throws IllegalAccessException\n+ * @throws InvocationTargetException\n+ */\n+ public static String readString(int hkey, String key, String valueName) \n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ if (hkey == HKEY_LOCAL_MACHINE) {\n+ return readString(systemRoot, hkey, key, valueName);\n+ }\n+ else if (hkey == HKEY_CURRENT_USER) {\n+ return readString(userRoot, hkey, key, valueName);\n+ }\n+ else {\n+ throw new Illeg'..b' return rc; // can REG_NOTFOUND, REG_ACCESSDENIED, REG_SUCCESS\n+ }\n+\n+ private static String readString(Preferences root, int hkey, String key, String value)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_READ) });\n+ if (handles[1] != REG_SUCCESS) {\n+ return null; \n+ }\n+ byte[] valb = (byte[]) regQueryValueEx.invoke(root, new Object[] {\n+ new Integer(handles[0]), toCstr(value) });\n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ return (valb != null ? new String(valb).trim() : null);\n+ }\n+\n+ private static Map<String,String> readStringValues\n+ (Preferences root, int hkey, String key)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ HashMap<String, String> results = new HashMap<String,String>();\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_READ) });\n+ if (handles[1] != REG_SUCCESS) {\n+ return null;\n+ }\n+ int[] info = (int[]) regQueryInfoKey.invoke(root,\n+ new Object[] { new Integer(handles[0]) });\n+\n+ int count = info[2]; // count \n+ int maxlen = info[3]; // value length max\n+ for(int index=0; index<count; index++) {\n+ byte[] name = (byte[]) regEnumValue.invoke(root, new Object[] {\n+ new Integer\n+ (handles[0]), new Integer(index), new Integer(maxlen + 1)});\n+ String value = readString(hkey, key, new String(name));\n+ results.put(new String(name).trim(), value);\n+ }\n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ return results;\n+ }\n+\n+ private static List<String> readStringSubKeys\n+ (Preferences root, int hkey, String key)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ List<String> results = new ArrayList<String>();\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_READ) \n+ });\n+ if (handles[1] != REG_SUCCESS) {\n+ return null;\n+ }\n+ int[] info = (int[]) regQueryInfoKey.invoke(root,\n+ new Object[] { new Integer(handles[0]) });\n+\n+ int count = info[2]; // count \n+ int maxlen = info[3]; // value length max\n+ for(int index=0; index<count; index++) {\n+ byte[] name = (byte[]) regEnumKeyEx.invoke(root, new Object[] {\n+ new Integer\n+ (handles[0]), new Integer(index), new Integer(maxlen + 1)\n+ });\n+ results.add(new String(name).trim());\n+ }\n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ return results;\n+ }\n+\n+ private static int [] createKey(Preferences root, int hkey, String key)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ return (int[]) regCreateKeyEx.invoke(root,\n+ new Object[] { new Integer(hkey), toCstr(key) });\n+ }\n+\n+ private static void writeStringValue \n+ (Preferences root, int hkey, String key, String valueName, String value) \n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_ALL_ACCESS) });\n+\n+ regSetValueEx.invoke(root, \n+ new Object[] { \n+ new Integer(handles[0]), toCstr(valueName), toCstr(value) \n+ }); \n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ }\n+\n+ // utility\n+ private static byte[] toCstr(String str) {\n+ byte[] result = new byte[str.length() + 1];\n+\n+ for (int i = 0; i < str.length(); i++) {\n+ result[i] = (byte) str.charAt(i);\n+ }\n+ result[str.length()] = 0;\n+ return result;\n+ }\n+}\n+\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/__init__.pyc

Binary file SMART/Java/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/formats.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/formats.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,4 @@
+sequence: fasta, fastq
+transcript: bed, gff, gff2, gff3, csv
+mapping: axt, blast, bowtie, exo, maq, nucmer, psl, sam, seqmap, shrimp, soap, soap2
+other: txt, wig, png, nclist

diff -r 5677346472b5 -r 0ab839023fe4 SMART/Java/manifest.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/manifest.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Created-By: Matthias Zytnicki
+Main-Class: Smart

diff -r 5677346472b5 -r 0ab839023fe4 SMART/__init__.pyc

Binary file SMART/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/changeName.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/changeName.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,99 @@
+#! /usr/bin/env python
+
+import optparse, os, sys, subprocess, tempfile, shutil
+from optparse import OptionParser
+
+def stop_err(msg):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit()
+
+def changeName(fileName, format, name, outputName):
+    file = open(fileName, 'r')
+    line = file.readline()
+    if format == "fasta":
+        while not line.startswith('>'):
+            line = file.readline()
+        old_name = line[1:]
+    elif format == "gff":
+        while line.startswith('#'):
+            line = file.readline()
+        old_name = (line.split('\t'))[0]
+    elif format == "sam":
+        while line.startswith('@'):
+            line = file.readline()
+        old_name = (line.split('\t'))[2]
+    file.close()
+    cmd = "sed \"s/%s/%s/g\" %s >%s " % (old_name.strip(), name.strip(), fileName, outputName)
+    proc = subprocess.Popen(cmd, shell=True)
+    proc.communicate()
+    if proc.returncode != 0:
+        raise Exception("ERROR when launching '%s'" % cmd)
+
+def getName(fileName, format):
+    file = open(fileName, 'r')
+    line = file.readline()
+    if format == "gff":
+        while line.startswith('#'):
+            line = file.readline()
+        old_name = (line.split('\t'))[0]
+    elif format == "sam":
+        while line.startswith('@') or line.startswith('#'):
+            line = file.readline()
+        old_name = (line.split('\t'))[2]
+    file.close()
+    return old_name
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option("", "--input1", dest="inputFile1", default=None, help="Choose a fasta file.")
+    parser.add_option("", "--input2", dest="inputFile2", default=None, help="Choose a gff file.")
+    parser.add_option("", "--input3", dest="inputFile3", default=None, help="Choose a sam file.")
+    parser.add_option("", "--name", dest="name", default=None, help="Change to a new name.[compulsory] if there is only one input.")
+    parser.add_option("", "--output1", dest="outputFile1", default=None, help="OutputFile1")
+    parser.add_option("", "--output2", dest="outputFile2", default=None, help="OutputFile2")
+    parser.add_option("", "--output3", dest="outputFile3", default=None, help="OutputFile3")
+    (options, args) = parser.parse_args()
+
+#TODO:write raise Exception!!
+
+    #In case only one input
+    if options.name == None:
+        #find a default_name to unify the name for all input files
+        if options.inputFile1 != None:
+                if options.inputFile2 == None and options.inputFile3 == None:
+                    raise Exception("ERROR, only one input, you should identify a new name to modify.")
+                elif options.inputFile2 != None and options.outputFile2 != None:
+                    default_name = getName(options.inputFile2, 'gff')
+                    changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
+                    changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
+                    if options.inputFile3 != None and options.outputFile3 != None:
+                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
+                elif options.inputFile3 != None and options.outputFile3 != None:
+                        default_name = getName(options.inputFile3, 'sam')
+                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
+                        changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
+                        if options.inputFile2 != None and options.outputFile2 != None:
+                            changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
+    else:
+        if options.inputFile1 != None and options.outputFile1 != None:
+            changeName(options.inputFile1, 'fasta', options.name, options.outputFile1)
+        if options.inputFile2 != None and options.outputFile2 != None:
+            changeName(options.inputFile2, 'gff', options.name, options.outputFile2)
+        if options.inputFile3 != None and options.outputFile3 != None:
+            changeName(options.inputFile3, 'sam', options.name, options.outputFile3)
+
+if __name__ == '__main__':__main__()
+
+
+#test commands:
+#only one input:
+#python changeName.py --input1 NC_011744.fna --name NC_test --output1 out.fna
+#several inputs:
+#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --output1 out.fna --output2 out.gff
+#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --name NC_test --output1 out.fna --output2 out.gff
+#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff  --input3 NC_011744.sam --name NC_test2 --output1 out.fna --output2 out.gff --output3 out.sam
+#python changeName.py --input1 NC_011744.fna --input3 out.sam --output1 out.fna --output3 out.sam
+
+
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/changeName.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/changeName.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,80 @@
+<tool id="changeName" name="changeName">
+ <description>Change the chromosome name or gene name of a singla fasta, gff or sam file. For this tool, it can not treat mutiple-chromosome, gene files.</description>
+ <command interpreter="python">
+ changeName.py
+ #if $optionFasta.fastaFile == 'Yes':
+ --input1 $optionFasta.fasta --output1 $outputFasta
+ #end if
+ #if $optionGff.gffFile == 'Yes':
+ --input2 $optionGff.gff --output2 $outputGff
+ #end if
+ #if $optionSam.samFile == 'Yes':
+ --input3 $optionSam.sam --output3 $outputSam
+ #end if
+ #if $optionName.name == 'Yes':
+ --name $optionName.nameValue
+ #end if
+ </command>
+
+ <inputs>
+ <conditional name="optionFasta">
+ <param name="fastaFile" type="select" label="You can choose a fasta input file to change the name." >
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="fasta" format="fasta" type="data" label="Identify you fasta input file."/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionGff">
+ <param name="gffFile" type="select" label="You can choose a supplementary gff input file to change the name." >
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="gff" format="gff" type="data" label="Identify you gff input file."/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionSam">
+ <param name="samFile" type="select" label="You can choose a supplementary sam input file to change the name." >
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="sam" format="sam" type="data" label="Identify you sam input file."/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionName">
+ <param name="name" type="select" label="Identify a new name to change." >
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="nameValue" type="text" value="None" label="Identify the new name."/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ </inputs>
+
+ <outputs>
+ <data name="outputFasta" format="fasta">
+ <filter>optionFasta['fastaFile'] == 'Yes'</filter>
+ </data>
+ <data format="gff" name="outputGff" >
+         <filter>optionGff['gffFile'] == 'Yes'</filter>
+     </data>
+ <data format="sam" name="outputSam" >
+         <filter>optionSam['samFile'] == 'Yes'</filter>
+     </data>
+ </outputs>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/colorGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/colorGff.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,71 @@
+#!/usr/bin/perl -w
+###
+# But : ajout ou modif de couleur d'un gff
+#
+# Entrees : fichier gff
+#
+# Sortie : gff affiche a l'ecran
+#
+###------------------------------------------------------
+use vars qw($USAGE);
+use strict;
+
+=head1 NAME
+
+colorGff.pl - add or change color of a gff file
+
+=head1 SYNOPSIS
+
+% colorGff.pl -i file.gff -c color [-h]
+
+=head1 DESCRIPTION
+This script will parse DOOR repport file and write information in gff3 format.
+
+    -i|--input fileName  gff input file name
+    -c|--color RGBcode   RGB code for color
+    -o|--output fileName gff3 output file name
+   [-h|--help]           help mode then die
+
+=head1 AUTHOR - Claire Toffano-Nioche - jan.11
+
+=cut
+#-----------------------
+my ($fileName, $colourGff, $outFileName) = ("", "", "colorOut.gff3") ;
+   # command line check
+    foreach my $num (0 .. $#ARGV) {
+        SWITCH: for ($ARGV[$num]) {
+        /--input|-i/ && do {
+ $fileName=$ARGV[$num+1];
+ open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ;
+ last };
+        /--color|-c/ && do {
+ $colourGff =$ARGV[$num+1]." ".$ARGV[$num+2]." ".$ARGV[$num+3];
+ last };
+#     /--output|-o/ && do {
+# $outFileName=$ARGV[$num+1];
+# last };
+        /--help|-h/ && do { exec("pod2text $0\n") ; die };
+        }
+    }
+#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
+    # informations retrieval
+    my @lines = <fichierGff> ;
+    close fichierGff ;
+    # treatment
+ #print "gff file read ; number of lines : $#lines\n";
+    for (my $i=0 ; $i <= $#lines ; $i++) {
+ if ($lines[$i] =~ /;/) {
+    if ($lines[$i] =~ /color=/) {
+      $lines[$i] =~ s/color=.*;/color=$colourGff;/ ;
+ } else { # add colour
+      $lines[$i] =~ s/;/;color=$colourGff;/ ;
+      }
+     } else { # (no = gff bug if col9 begin with semi-coma ?) or only one tag : add color tag
+      chomp($lines[$i]) ;
+      $lines[$i] .= "; color=".$colourGff.";\n";
+ }
+# print OUT $lines[$i] ;
+ print $lines[$i];
+    }
+# close OUT ;
+exit(0);

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/colorGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/colorGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,19 @@
+<tool id="colorGff" name="colorGff">
+ <description> Parses a DOOR report file and writes the information in a gff3 out file. </description>
+ <command interpreter="perl">
+ colorGff.pl -i $inputFile -c $RGBcode > $outputFile
+ </command>
+
+ <inputs>
+ <param name="inputFile" type="data" label="Input File" format="gff"/>
+ <param name="RGBcode" type="text" value="250 128 114" help="RGB code is necessary for choosing the color."/>
+ </inputs>
+
+ <outputs>
+ <data format="gff3" name="outputFile" label="[colorGff] Output File"/>
+ </outputs>
+
+ <help>
+ Command example: perl colorGff.pl -i trans_covUp5_nbEUp10_lgUp50.gff3 -c "250 128 114" > trans_covUp5_nbEUp10_lgUp50_c.gff3
+ </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+###
+# But : ajout ou modif de la couverture d'un gff
+#
+# Entrees : fichier gff
+#
+# Sortie : gff affiche a l'ecran
+#
+###------------------------------------------------------
+
+#!/usr/bin/perl -w
+
+use vars qw($USAGE);
+use strict;
+
+=head1 NAME
+
+coverageGff.pl - add or compute the coverage of a gff file
+
+=head1 SYNOPSIS
+
+% coverageGff.pl -i file.gff -l readLength [-h]
+
+=head1 DESCRIPTION
+This script will parse gff file, compute read coverage form the "nbElements" tag and write coverage in gff3 format.
+
+    -i|--input fileName     gff input file name
+    -l|--length ReadLength  lenght of the reads in bp [38 default]
+    -o|--output fileName    gff3 output file name
+   [-h|--help]              help mode then die
+
+=head1 AUTHOR - Claire Toffano-Nioche - fev.11
+
+=cut
+#-----------------------
+my ($fileName, $length, $outFileName) = ("", 38, "coverageOut.gff3") ;
+   # command line check
+    foreach my $num (0 .. $#ARGV) {
+        SWITCH: for ($ARGV[$num]) {
+        /--input|-i/ && do {
+ $fileName=$ARGV[$num+1];
+ open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ;
+ last };
+        /--length|-l/ && do {
+ $length=$ARGV[$num+1];
+ last };
+        /--help|-h/ && do { exec("pod2text $0\n") ; die };
+        }
+    }
+    # informations retrieval
+#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
+    my @lines = <fichierGff> ;
+    close fichierGff ;
+    # treatment
+ #print "gff file read ; number of lines : $#lines\n";
+    for (my $i=0 ; $i <= $#lines ; $i++) {
+ # compute coverage :
+ if ($lines[$i] =~ /nbElements=/) {
+ my ($nbE)=($lines[$i] =~ /nbElements=(\d+)/) ;
+ my @gffCol=split("\t", $lines[$i]) ;
+ # print "ligne : $i, nbE : $nbE, length : $length, debut : $gffCol[3], fin : $gffCol[4].\n";
+ my $cover=$length*$nbE/($gffCol[4]-$gffCol[3]+1) ;
+ $cover=int(100*$cover+0.5)/100 ; # arronri sup. precision 2 chiffres
+ if ($lines[$i] =~ /coverage=/) { # replace coverage
+     $lines[$i] =~ s/coverage=.*;/coverage=$cover;/ ;
+ } else { # add coverage
+     $lines[$i] =~ s/;/;coverage=$cover;/ ;
+ }
+ }
+# print OUT $lines[$i] ;
+ print $lines[$i] ;
+    }
+#close OUT ;
+exit(0);

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/coverageGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/coverageGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,17 @@
+<tool id="coverageGff" name="coverage Gff">
+  <description>Computes reads coverage form a "nbElements" tag and writes the calculated coverage in a gff3 out file.</description>
+  <command interpreter="perl"> coverageGff.pl -i $inputFile -l $readSize > $outputFile  </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+    <param name="readSize" type="integer" value="38" help="The size of read, default: 38nt"/>
+
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[coveragePl] Output File"/>
+  </outputs>
+
+  <help>
+ command example: perl coverageGff.pl -i *_trans_inIG.gff > *_trans_inIG_cov.gff
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,187 @@
+#!/usr/bin/perl -w
+###
+# But : protocol permettant la detection d'RNA non codant potentiel
+#
+# Entrees : fichier de mapping Smart gff3
+# fichier gff des gènes
+# fichier gff des clusters Cis regulateur potentiel
+#
+# Sortie : fichier gff des clusters ARN nc
+#
+###------------------------------------------------------
+use vars qw($USAGE);
+use strict;
+
+=head1 NAME
+
+interElementGff.pl - creation of a new Gff corresponding to the region of two successive Elements
+
+=head1 SYNOPSIS
+
+% interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 [-s 50] [-a 20] [-n seqName] [-h]
+
+=head1 DESCRIPTION
+This script will determine cluster ok ncRNA.
+
+    -i|--input fileName   gff input file name
+    -o|--output fileName   gff output file name
+    -n|--name seqName      sequence name
+    -p|--print    print parameters used
+
+    -f5ff n   number of nt to exclude from 5' seed when gene before is Forward, seed is Forward and next gene is Forward [default 0]
+    -ff3f n   number... " ...[default 0]
+
+    -f5fr n   number... " ...[default 0]
+    -ff3r n   number... " ...[default 0]
+
+    -fr3f n   number... " ...[default 0]
+    -fr5f n   number... " ...[default 0]
+
+    -f3rr n   number... " ...[default 0]
+    -fr5r n   number... " ...[default 0]
+
+    -r5ff n   number... " ...[default 0]
+    -rf3f n   number... " ...[default 0]
+
+    -r5fr n   number... " ...[default 0]
+    -rf3r n   number... " ...[default 0]
+
+    -r3rf n   number... " ...[default 0]
+    -rr5f n   number... " ...[default 0]
+
+    -r3rr n   number... " ...[default 0]
+    -rr5r n   number... " ...[default 0]
+
+   [-h|--help]           help mode then die
+
+
+USAGE_CASE
+
+% interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 -ff 53 -rr 23 -n NC_011744
+
+BUG
+
+Caution : input file needs to be sorted on positions
+
+Caution : for -f/r options add +3 bp to include stop codon if not in input file
+
+=head1 AUTHOR - CTN - apr.11
+(from RNA-Vibrio/protocol_NC_V2.pl - Claire KUCHLY)
+
+=cut
+#----------------------------------------------------------------------------
+# check command line :
+my ($IDfile, $OutputFileName, $f5ff, $ff3f, $f5fr, $ff3r, $f3rf, $fr5f, $f3rr,$fr5r, $r5ff, $rf3f, $r5fr, $rf3r, $r3rf, $rr5f, $r3rr, $rr5r, $seqName, $printParameters) =
+   (undef, undef , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", 0) ;
+if ($#ARGV==0) {
+ die (exec("pod2text $0\n"));
+} else {
+    foreach my $num (0 .. $#ARGV) {
+ SWITCH: for ($ARGV[$num]) {
+ /--input|-i/ && do { $IDfile=$ARGV[$num+1];
+ open(F,"<$IDfile") or die "Error: Can't open \"$IDfile\", $!";
+ last; };
+ /-f5ff/ && do { $f5ff=$ARGV[$num+1]+1; last; }; # need +1 for intervall computations
+ /-ff3f/ && do { $ff3f=$ARGV[$num+1]+1; last; };
+
+ /-f5fr/ && do { $f5fr=$ARGV[$num+1]+1; last; };
+ /-ff3r/ && do { $ff3r=$ARGV[$num+1]+1; last; };
+
+ /-f3rf/ && do { $f3rf=$ARGV[$num+1]+1; last; };
+ /-fr5f/ && do { $fr5f=$ARGV[$num+1]+1; last; };
+
+ /-f3rr/ && do { $f3rr=$ARGV[$num+1]+1; last; };
+ /-fr5r/ && do { $fr5r=$ARGV[$num+1]+1; last; };
+
+ /-r5ff/ && do { $r5ff=$ARGV[$num+1]+1; last; };
+ /-rf3f/ && do { $rf3f=$ARGV[$num+1]+1; last; };
+
+ /-r5fr/ && do { $r5fr=$ARGV[$num+1]+1; last; };
+ /-rf3r/ && do { $rf3r=$ARGV[$num+1]+1; last; };
+
+ /-r3rf/ && do { $r3rf=$ARGV[$num+1]+1; last; };
+ /-rr5f/ && do { $rr5f=$ARGV[$num+1]+1; last; };
+
+ /-r3rr/ && do { $r3rr=$ARGV[$num+1]+1; last; };
+ /-rr5r/ && do { $rr5r=$ARGV[$num+1]+1; last; };
+
+# /--name|-n/ && do { $seqName=$ARGV[$num+1]; last; };
+ /--print|-p/ && do { $printParameters=1; last; };
+ /--output|-o/ && do { $OutputFileName=$ARGV[$num+1];
+ open(S,">$OutputFileName") or die "Error : Can't open result file \"$OutputFileName\", $!";
+ last; };
+ /--help|-h/ && do { exec("pod2text $0\n") ; die };
+ }
+    }
+ if ($printParameters) {
+ print "
+        --> f5ff ",$f5ff-1," --> ff3f ",$ff3f-1,"  --> ;
+        --> f5fr ",$f5fr-1," --> ff3r ",$ff3r-1,"  <-- ;
+        --> f3rf ",$f3rf-1," <-- fr5f ",$fr5f-1,"  --> ;
+        --> f3rr ",$f3rr-1," <-- fr5r ",$fr5r-1,"  <-- ;
+        <-- r5ff ",$r5ff-1," --> rf3f ",$rf3f-1,"  --> ;
+        <-- r5fr ",$r5fr-1," --> rf3r ",$rf3r-1,"  <-- ;
+        <-- r3rf ",$r3rf-1," <-- rr5f ",$rr5f-1,"  --> ;
+        <-- r3rr ",$r3rr-1," <-- rr5r ",$rr5r-1,"  <-- ;\n";
+   }
+   ##NC_011753.2 RefSeq gene 367 834 . - . locus_tag=VS_0001;db_xref=GeneID:7162789
+   my $finSeedSens;
+   my $finSeedAntisens;
+   my $debSeedSens;
+   my $debSeedAntisens;
+   my $info_gene="";
+   my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist
+   my @chromList;
+   while(my $ligne = <F>){
+ chomp($ligne);
+ my @list = split(/\t/,$ligne);
+ if ((scalar(@chromList) == 0) or ($chromList[$#chromList] ne $list[0])){
+ push(@chromList, $list[0]);
+ my $finSeedSens;
+    my $finSeedAntisens;
+    my $debSeedSens;
+    my $debSeedAntisens;
+    my $info_gene="";
+    my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist
+ }
+ if (($sensGeneAvant eq "+") and ($list[6] eq "+")) { #CTN ie geneavant == f, geneapres == f
+ $debSeedSens += $f5ff;
+ $finSeedSens = $list[3]- $ff3f;
+ $debSeedAntisens += $f3rf;
+ $finSeedAntisens = $list[3]- $fr5f;
+ } elsif (($sensGeneAvant eq "+") and ($list[6] eq "-")) { #CTN ie geneaavant == f, geneapres == r
+ $debSeedSens += $f5fr;
+ $finSeedSens = $list[3]- $ff3r;
+ $debSeedAntisens += $f3rr;
+ $finSeedAntisens = $list[3]- $fr5r;
+ } elsif (($sensGeneAvant eq "-") and ($list[6] eq "+")) { #CTN ie geneaavant == r, geneapres == f
+ $debSeedSens += $r5ff;
+ $finSeedSens = $list[3]- $rf3f;
+ $debSeedAntisens += $r3rf;
+ $finSeedAntisens = $list[3]- $rr5f;
+ } else {                    #CTN ie geneaavant == r, geneapres == r
+ $debSeedSens += $r5fr;
+ $finSeedSens = $list[3]- $rf3r;
+ $debSeedAntisens += $r3rr;
+ $finSeedAntisens = $list[3]- $rr5r;
+ }
+ if ($debSeedSens <= 0) { $debSeedSens=1 ; } # 1srt
+ if ($debSeedAntisens <= 0) { $debSeedAntisens=1 ; }
+ if($debSeedSens < $finSeedSens){ # only "real" seed
+ #print "$gene_avant\nNC_011753\tperso\tseed\t$deb_seed\t$fin_seed\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n$ligne\n\n";
+    #
+
+ print S "$list[0]\tperso\tseedIR\t$debSeedSens\t$finSeedSens\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n";
+ }
+ if ($debSeedAntisens < $finSeedAntisens){
+ print S "$list[0]\tperso\tseedIR\t$debSeedAntisens\t$finSeedAntisens\t.\t-\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n";
+ }
+ $sensGeneAvant = $list[6] ; # GFF : column 6 gives strand
+ $debSeedSens = $list[4];
+ $debSeedAntisens = $list[4];
+ $info_gene = $list[@list-1];
+   }
+   close F;
+   close S;
+   exit(0);
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/interElementGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/interElementGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,284 @@
+<tool id="interElementGff" name="interElementGff">
+ <description>Creates a new Gff output, which corresponds to the region of two successive Elements. </description>
+ <command interpreter="perl">
+ interElementGff.pl -i $inputFile
+
+ #if $Optionf5ff.option == "Yes":
+ -f5ff $Optionf5ff.f5ffValue
+ #end if
+
+ #if $Optionff3f.option == "Yes":
+ -ff3f $Optionff3f.ff3fValue
+ #end if
+
+ #if $Optionf5fr.option == "Yes":
+ -f5fr $Optionf5fr.f5frValue
+ #end if
+
+ #if $Optionff3r.option == "Yes":
+ -ff3r $Optionff3r.ff3rValue
+ #end if
+
+ #if $Optionf3rf.option == "Yes":
+ -f3rf $Optionf3rf.f3rfValue
+ #end if
+
+ #if $Optionfr5f.option == "Yes":
+ -fr5f $Optionfr5f.fr5fValue
+ #end if
+
+ #if $Optionf3rr.option == "Yes":
+ -f3rr $Optionf3rr.f3rrValue
+ #end if
+
+ #if $Optionfr5r.option == "Yes":
+ -fr5r $Optionfr5r.fr5rValue
+ #end if
+
+ #if $Optionr5ff.option == "Yes":
+ -r5ff $Optionr5ff.r5ffValue
+ #end if
+
+ #if $Optionrf3f.option == "Yes":
+ -rf3f $Optionrf3f.rf3fValue
+ #end if
+
+ #if $Optionr5fr.option == "Yes":
+ -r5fr $Optionr5fr.r5frValue
+ #end if
+
+ #if $Optionrf3r.option == "Yes":
+ -rf3r $Optionrf3r.rf3rValue
+ #end if
+
+ #if $Optionr3rf.option == "Yes":
+ -r3rf $Optionr3rf.r3rfValue
+ #end if
+
+ #if $Optionrr5f.option == "Yes":
+ -rr5f $Optionrr5f.rr5fValue
+ #end if
+
+ #if $Optionr3rr.option == "Yes":
+ -r3rr $Optionr3rr.r3rrValue
+ #end if
+
+ #if $Optionrr5r.option == "Yes":
+ -rr5r $Optionrr5r.rr5rValue
+ #end if
+
+ -o $outputFile
+ </command>
+
+ <inputs>
+ <param name="inputFile" type="data" label="Input File" format="gff"/>
+
+ <conditional name="Optionf5ff">
+ <param name="option" type="select" label="Option[f5ff]" help="number of nt to exclude from 5' seed when gene before is Forward, seed is Forward and next gene is Forward [default 0]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="f5ffValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionff3f">
+ <param name="option" type="select" label="Option[ff3f]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ff3fValue" type="integer" value="30"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionf5fr">
+ <param name="option" type="select" label="Option[f5fr]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="f5frValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionff3r">
+ <param name="option" type="select" label="Option[ff3r]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ff3rValue" type="integer" value="-10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionf3rf">
+ <param name="option" type="select" label="Option[f3rf]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="f3rfValue" type="integer" value="-10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionfr5f">
+ <param name="option" type="select" label="Option[fr5f]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="fr5fValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionf3rr">
+ <param name="option" type="select" label="Option[f3rr]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="f3rrValue" type="integer" value="-10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionfr5r">
+ <param name="option" type="select" label="Option[fr5r]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="fr5rValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionr5ff">
+ <param name="option" type="select" label="Option[r5ff]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="r5ffValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionrf3f">
+ <param name="option" type="select" label="Option[rf3f]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="rf3fValue" type="integer" value="30"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionr5fr">
+ <param name="option" type="select" label="Option[r5fr]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="r5frValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionrf3r">
+ <param name="option" type="select" label="Option[rf3r]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="rf3rValue" type="integer" value="-10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionr3rf">
+ <param name="option" type="select" label="Option[r3rf]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="r3rfValue" type="integer" value="30"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionrr5f">
+ <param name="option" type="select" label="Option[rr5f]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="rr5fValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionr3rr">
+ <param name="option" type="select" label="Option[r3rr]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="r3rrValue" type="integer" value="30"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="Optionrr5r">
+ <param name="option" type="select" label="Option[rr5r]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="rr5rValue" type="integer" value="10"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFile" format="gff" label="[interElementGff] Output File"/>
+ </outputs>
+
+ <help>
+ command example: interElementGff.pl -i ${i}_annot.gff -o ${i}_trans_IG.gff -f5ff 10 -ff3f 30 -f5fr 10 -ff3r -10 -f3rf -10 -fr5f 10 -f3rr -10 -fr5r 10 -r5ff 10 -rf3f 30 -r5fr 10 -rf3r -10 -r3rf 30 -rr5f 10 -r3rr 30 -rr5r 10
+ </help>
+
+</tool>
+
+
+
+
+
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/listGff.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/listGff.sh Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,3 @@
+#!/bin/bash
+awk '{print $3}' $1 | grep "[[:alpha:]]" | sort -n | uniq -c
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.sh Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,5 @@
+#!/bin/bash
+python $GALAXY_ROOT/tools/repet_pipe/SMART/Java/Python/clusterize.py -f gff -i $1 -o intermedia.gff3 -c -d 150
+awk '{if ($3!="exon") {print $0}}' intermedia.gff3 > intermedia.gff
+#perl sortGff.pl -i intermedia.gff > $2
+python $GALAXY_ROOT/tools/repet_pipe/SMART/Java/Python/CollapseReads.py -i intermedia.gff -f gff -o $2

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="prepareAnnotation" name="prepare annotation file">
+  <description>Prepares Annotation file -> clusterizes, filters exon and sorts annotations.</description>
+  <command interpreter="bash"> prepareAnnot.sh $inputFile $outputFile  </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[prepareAnnotation] Output File"/>
+  </outputs>
+
+  <help>
+ command example: sh prepareAnnot.sh NC_011744r_annot_tmp1.gff NC_011744r_annot_pre1.gff
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,91 @@
+#!/usr/bin/perl -w
+###
+# But : extension des UTR5 à partir des clusters de reads
+#
+# Entrees : fichier gff annotation + cluster
+#
+# Sortie : UTR5.gff
+#
+###------------------------------------------------------
+use vars qw($USAGE);
+use strict;
+use Getopt::Long;
+
+=head1 NAME
+
+seedGff.pl
+
+=head1 SYNOPSIS
+
+% seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h]
+
+=head1 DESCRIPTION
+This script will parse input gff file and write information in gff3 format.
+
+    -i|--input fileName       gff input file name of annotations
+    -p|--pos BeginPosFromAtg greather positive number for the begin position of the seed from Atg
+   [-l|--length seedLength]    lentgth of the seed to compute (default 4nt)
+   [-e|--end seedEnd]       end of the seed to compute (smaller positive number)
+    -o|--output fileName       gff output file name
+   [-h|--help]                help mode then die
+
+=head1 AUTHOR - Claire Toffano-Nioche - mar.11
+    from Claire Kuchly initial script
+
+=cut
+#-----------------------
+my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ;
+   # command line check
+    foreach my $num (0 .. $#ARGV) {
+        SWITCH: for ($ARGV[$num]) {
+        /--input|-i/ && do {
+ $inFileName=$ARGV[$num+1];
+ open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ;
+ last };
+        /--pos|-p/ && do {
+ $beginSeed=$ARGV[$num+1];
+ last };
+        /--end|-e/ && do {
+ $endSeed=$ARGV[$num+1];
+ last };
+        /--length|-l/ && do {
+ $lgSeed=$ARGV[$num+1];
+ last };
+        /--output|-o/ && do {
+ $outFileName=$ARGV[$num+1];
+ last };
+        /--help|-h/ && do { exec("pod2text $0\n") ; die };
+        }
+    }
+    open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
+    if (($endSeed > 0) and ($lgSeed > 0)) {
+ print "Error : only -e or -l definition, not both\n";
+ exec("pod2text $0\n") ; die ;
+    } elsif ($lgSeed > 0) {
+ print "ERROR : Lg Seed => TODO \n";
+    }
+
+    #Création des fichiers de filtres (séquences UTR) :
+        #print "Création des fichiers de séquences !\n";
+###Creer les fichiers des séquences en 5' et 3' des gènes.
+###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport à l'ATG. Donc seed de -22/-18.
+    while(my $ligne = <INGFF>){
+ chomp($ligne);
+ my @list = split(/\t/,$ligne) ;
+ my $finUTR5 ;
+ my $debUTR5 ;
+ my $strand = $list[6] ;
+ if($strand eq "+"){
+ $finUTR5 = $list[3]-$endSeed;
+ $debUTR5 = $list[3]-$beginSeed;
+ } elsif($strand eq "-"){
+ $debUTR5 = $list[4]+$endSeed;
+ $finUTR5 = $list[4]+$beginSeed;
+ }
+ if($debUTR5 < 0){$debUTR5 =0;}
+ if($finUTR5 < 0){$finUTR5 =0;}
+ print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n";
+    }
+    close INGFF;
+    close UTR5;
+exit(0);

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/seedGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/seedGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,35 @@
+<tool id="seedGff" name="seedGff">
+  <description>Creates the seed from -15 to -25 bp before ATG</description>
+  <command interpreter="perl"> seedGff.pl -i $inputFile -p $startPosFromAtg -e $endPosSeed
+    #if $optionSeedSize.seedSize == "Yes":
+        -l $optionSeedSize.seedLength
+    #end if
+    -o $outputFile
+  </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+    <param name="startPosFromAtg" type="integer" value="25" help="greather positive number for the start position of the seed from Atg "/>
+    <param name="endPosSeed" type="integer" value="15" help="end of the seed (smaller than the sart of the seed, positive number)"/>
+    <conditional name="optionSeedSize">
+ <param name="seedSize" type="select" label="The length of seed." help="If you have choosed the value of start/end position of seed, you don't need to fill this option.">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="seedLength" type="integer" value="4" label="The length of seed, default: 4nt" />
+ </when>
+ <when value="No">
+ </when>
+    </conditional>
+
+
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[seedGff] Output File"/>
+  </outputs>
+
+  <help>
+ command example: perl seedGff.pl -i input_annot.gff -p 25 -e 15 -o output_cis_seed.gff
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/sortGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/sortGff.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,88 @@
+#!/usr/bin/perl -w
+###
+# But : ajout ou modif de couleur d'un gff
+#
+# Entrees : fichier gff
+#
+# Sortie : gff affiche a l'ecran
+#
+###------------------------------------------------------
+
+#!/usr/bin/perl -w
+
+use vars qw($USAGE);
+use strict;
+
+=head1 NAME
+
+sortGff.pl - sort a gff file
+
+=head1 SYNOPSIS
+
+% sortGff.pl -i file.gff [-h]
+
+=head1 DESCRIPTION
+This script will sort a gff file (only when inversion of two successive lines).
+
+    -i|--input fileName  gff input file name
+    -o|--output fileName  gff3 output file name
+   [-h|--help]           help mode then die
+
+=head1 AUTHOR - Claire Toffano-Nioche - mar.11
+
+=cut
+
+#-----------------------
+my ($fileName, $colourGff, $outFileName) = ("", "", "sortOut.gff3") ;
+   # command line check
+    foreach my $num (0 .. $#ARGV) {
+        SWITCH: for ($ARGV[$num]) {
+        /--input|-i/ && do {
+ $fileName=$ARGV[$num+1];
+ open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ;
+ last };
+# /--output|-o/ && do {
+# $outFileName=$ARGV[$num+1];
+# last };
+        /--help|-h/ && do { exec("pod2text $0\n") ; die };
+        }
+    }
+    # informations retrieval
+#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
+    my @lines = <fichierGff> ;
+    close fichierGff ;
+    # treatment
+ #print "gff file read ; number of lines : $#lines\n";
+    my $previous = 0;
+    my $i = 0;
+ #print "$#lines\n" ;
+    while ($i <= $#lines) {
+ my @infos = split('\t', $lines[$i]) ;
+ #print "info[3]:$infos[3]; prv:$previous!\n";
+ if ($infos[3] < $previous) {
+     &exchange($i, $infos[3]) ;
+     $previous=$infos[3] ;
+     $i--;
+ }
+ $previous=$infos[3];
+ $i++;
+    }
+    for (my $i=0 ; $i <= $#lines ; $i++) {
+# print OUT $lines[$i] ;
+ print $lines[$i] ;
+    }
+#close OUT ;
+exit(0);
+#-----------------------
+sub exchange {
+ my ($index, $position) = @_ ;
+ my @info_col = split("\t", $lines[$index-1]) ;
+ if ($info_col[3] > $position) {
+ #print "$lines[$index]";
+ my $line_to_push = $lines[$index-1] ;
+ $lines[$index-1] = $lines[$index] ;
+ $lines[$index] = $line_to_push ;
+ } else {
+ print "TODO : push > one line\n" ;
+ }
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/sortGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/sortGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="sortGff" name="sortGff">
+  <description>Sorts a gff file.</description>
+  <command interpreter="perl"> sortGff.pl -i $inputFile > $outputFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[sortGff] Output File"/>
+  </outputs>
+
+  <help>
+ command example: perl sortGff.pl -i *_unsort.gff3 > *_sort.gff3
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.pl Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,189 @@\n+#!/usr/bin/perl -w\n+###\n+# Main : defining utr and intergenic operonic intervalles from a transcripts file following a referencies file \n+# \n+# Input : 2 gff files to intersect, transcript queries vs referencies\n+#\n+# Output : resulting gff file printing to standard output\n+#\n+###------------------------------------------------------\n+use vars qw($USAGE); \n+use strict; \n+\n+=head1 NAME\n+\n+splitTranscriptGff.pl - compare 2 input gff files and define utr and intergenic operonic intervalles by couple of overlapping elements\n+\n+=head1 SYNOPSIS\n+\n+% intervallsExtractorGff.pl -i referencies.gff -j transcriptQueries.gff -s strand [-h] \n+\n+=head1 DESCRIPTION\n+This script will intersect 2 gff files and compute distance between 2 successives lines. Take care both of sorting by positions the input files and of that referencies are included in transcriptQueries.\n+\n+ -i|--input1 fileName gff input file name: included elements\n+ -j|--input2 fileName gff input file name: extended elements\n+ [-s|--strand] [s|d]\t s for single strand (colinear) or d for double strands (antisense) [default d]\n+ [-h|--help] help mode then die \n+\n+=head1 USECASE\n+Define many fragments for each extended element (transcript): UTR5, gene, UTR3, "inOperon" for intergenomic region between 2 genes\n+intervallsExtractorGff.pl -i CDSannotations.gff -j RNAseqTranscripts.gff > UTRsGenesOperonsLists.gff;\n+\n+=head1 KWON BUGS\n+No disjonction of overlapping elements of the included elements (-i file).\n+In usecase, overlapping genes are fused in one long gene.\n+\n+=head1 AUTHOR\n+Claire Toffano-Nioche - sep.11\n+\n+=cut\n+#-----------------------\n+sub feedPositionTab { my ($val, $pF, $pB, @info) = @_ ;\n+\t\t#print "feedPositionTab::$#info, ", ($#info+1)/4," \\n";\n+\tfor (my $i=0 ; $i <= $#info ; $i+=4) { # for each extended element \n+\t\t\t#print "....$info[$i+2]\\n";\n+\t\tif ($info[$i+3] =~ /\\+/) {\n+\t\t\tfor (my $c = $info[$i+1] ; $c <= $info[$i+2] ; $c++) { @$pF[$c]=$val } ; # sequence Forward\n+\t\t} else {\n+\t\t\tfor (my $c = $info[$i+1] ; $c <= $info[$i+2] ; $c++) { @$pB[$c]=$val } ; # sequence Backward\n+\t\t}\n+\t}\n+\t\t#print "feedPos...:: ", join(".", @$pF[0..100]), "\\n";\n+\t\t#print "feedPos...:: ", join(".", @$pB[0..100]), "\\n";\n+}\n+#-----------------------\n+sub recupInfo {\tmy ($pInfo, @lines) = @_ ;\n+ for (my $i=0 ; $i <= ($#lines+1)*4-1 ; $i+=4) {\n+ \tmy @line = split("\\t",$lines[$i/4]);\n+\t\tpush(@$pInfo, $line[0], $line[3], $line[4], $line[6]) ; # 0=nom, 3=debut, 4=fin, 6=sens\n+\t}\n+\t#print "recupInfo::fin=", ($#lines+1)*4, "\\n" ;\n+}\n+#-----------------------\n+sub tagName { my ($seqN, $posB, $posE, $strand) = @_ ; \n+\tmy $tagN=$seqN.$strand.$posB."..".$posE;\n+\t\t#print "tagName:",join("_",@_)," et tagName:$tagN\\n";\n+return $tagN;\n+}\n+#-----------------------\n+sub transitionAnalysis {\n+my ($pos, $seq, $s, $pdebAmont, $pfinAmont, $pdebIn, $pfinIn, $pdebAval, $pfinAval, $ptag) = @_ ;\n+\tmy $enCours = 0 ; my $precedant = 0 ;\n+\t$enCours = @$ptag[$pos] ; \n+\t$precedant = ($s =~ /\\+/?@$ptag[$pos-1]:@$ptag[$pos+1]) ; \n+ if ($enCours ne $precedant) {\n+ \t#print "transi...:: $s, $pos, $precedant, $enCours\\n";\n+ \t#print "transition::$$pdebAmont, $$pfinAmont, $$pdebIn, $$pfinIn, $$pdebAval, $$pfinAval\\n";\n+ \tSWITCH: for ($precedant.$enCours) {\n+ \t/01/ && do { $$pdebAmont = $pos ; last SWITCH ;};\n+ /02/ && do { $$pdebIn = $pos ; last SWITCH ;};\n+ /10/ && do { $$pfinAval = ($s =~/\\+/?$pos-1:$pos+1) ; \n+ \t\tif (($s =~ /\\+/)and ($$pdebAval!=$$pfinAval)) {\n+ \t\t\tprintf "%s\\tsplit\\tutr3\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n+ \t\t\t\t$seq, $$pdebAval, $$pfinAval, $s, &tagName($seq, $$pdebAval, $$pfinAval, $s) ; \n+ \t\t\t#if ($$pdebAval==$$pfinAval) { print "transition 10 +\\n"};\n+ \t\t} elsif ($$pfinAval!=$$pdebAval) {\n+ \t\t\tprintf "%s\\tsplit\\tutr3\\t%s\\t%s\\t.\\t%s'..b'==$$pdebAmont) { print "transition 12 -\\n"} ;\n+ \t\t}\n+ \t\t$$pdebAmont = 0 ; $$pfinAmont = 0 ;\n+ \t\tlast SWITCH ;\n+ \t };\n+ /20/ && do { $$pfinIn=($s =~/\\+/?$pos-1:$pos+1) ; \n+ if (($s =~ /\\+/) and ($$pdebIn!=$$pfinIn)) {\n+ \tprintf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n+ \t\t\t\t$seq, $$pdebIn, $$pfinIn, $s, &tagName($seq, $$pdebIn, $$pfinIn, $s) ; \n+ \t\t} elsif ($$pfinIn!=$$pdebIn) {\n+ \t\t printf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n+ \t\t\t\t$seq, $$pfinIn, $$pdebIn, $s, &tagName($seq, $$pfinIn, $$pdebIn, $s) ; \n+ \t\t}\n+ \t\t$$pdebIn = 0 ; $$pfinIn = 0 ;\n+ \t\tlast SWITCH ;\n+ \t };\n+ /21/ && do { $$pdebAval=$pos ; $$pfinIn=($s =~/\\+/?$pos-1:$pos+1) ; \n+ if (($s =~ /\\+/) and ($$pdebIn!=$$pfinIn)) {\n+ \tprintf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n+ \t\t\t\t$seq, $$pdebIn, $$pfinIn, $s, &tagName($seq, $$pdebIn, $$pfinIn, $s) ; \n+ \t\t} elsif ($$pfinIn!=$$pdebIn) {\n+ \t\t\tprintf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n+ \t\t\t\t$seq, $$pfinIn, $$pdebIn, $s, &tagName($seq, $$pfinIn, $$pdebIn, $s) ; \n+ \t\t}\n+ \t\t#$$pdebIn = 0 ; $$pfinIn = 0 ;\n+ \t\tlast SWITCH ;\n+ \t };\n+ }\n+ }\n+ }\n+#-----------------------\t\n+my ($fileNameI, $fileNameE, $strand) = ("", "", 0) ;\n+# command line check\n+foreach my $num (0 .. $#ARGV) {\n+ SWITCH: for ($ARGV[$num]) {\n+ /--input1|-i/ && do { \n+\t\t\t$fileNameI=$ARGV[$num+1]; \n+\t\t\topen ( fichierGffI, "< $fileNameI" ) or die "Can\'t open gff file: \\"$fileNameI\\"\\n" ; \n+\t\t\tlast };\n+\t/--input2|-j/ && do { \n+\t\t\t$fileNameE=$ARGV[$num+1]; \n+\t\t\topen ( fichierGffE, "< $fileNameE" ) or die "Can\'t open gff file: \\"$fileNameE\\"\\n" ; \n+\t\t\tlast };\n+ /--strand|-s/ && do { \n+\t\t\tif ($ARGV[$num+1] eq "s") { $strand=1}; \n+\t\t\tlast };\n+ /--help|-h/ && do { exec("pod2text $0\\n") ; die };\n+ }\n+}\n+# memory declarations:\n+my @infoI ; my @infoE ;\n+my $seqName ;\n+my @tagF ; my @tagB ; # Forward and Backward sequence\n+# data retrieval:\n+my @linesI = <fichierGffI> ; my @linesE = <fichierGffE> ;\n+close fichierGffI ; close fichierGffE ;\n+\t\t#print "gff files read ; number of lines : $#lines1 + $#lines2\\n";\n+\t\t# positions management\n+&recupInfo(\\@infoI, @linesI) ;\n+&recupInfo(\\@infoE, @linesE) ;\n+# treatement: \n+# transform gff lines into chromosomal position tags : 0 for nothing, 1 resp. 2 for extended resp. included elements\n+if (($#infoI) and ($#infoE)) { \n+\t$seqName=$infoI[0] ;\n+\t\t#print "fin : $infoE[$#infoE-1]\\n";\n+\tfor (my $i=0 ; $i <= $infoE[$#infoE-1] ; $i++) { $tagF[$i] = 0 ; $tagB[$i] = 0 ; } ; # "O" tag in all chr. positions\n+\t\t#print "seqName : $seqName\\n" ;\n+\t&feedPositionTab(1, \\@tagF, \\@tagB, @infoE) ; # "1" tag for all extended elements\n+\t&feedPositionTab(2, \\@tagF, \\@tagB, @infoI) ; # "2" tag for all included elements\n+\t\t#print join("", @tagF), "\\n";\n+\t\t#print join("", @tagB), "\\n";\n+\t# transition management:\n+\tmy ($beginUpstream, $endUpstream, $beginIncluded, $endIncluded, $beginDownstream, $endDownstream) \n+\t\t= (0, 0, 0, 0, 0, 0) ;\n+\tfor (my $i=1 ; $i <= $#tagF-1 ; $i+=1) {\n+\t\t&transitionAnalysis($i, $seqName, "+", \\$beginUpstream, \\$endUpstream, \\$beginIncluded, \\$endIncluded, \\$beginDownstream, \\$endDownstream, \\@tagF) ;\n+\t}\n+\t($beginUpstream, $endUpstream, $beginIncluded, $endIncluded, $beginDownstream, $endDownstream) = ($infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1]) ;\n+\tfor (my $i=$#tagB-1 ; $i >= 1 ; $i-=1) {\n+\t\t&transitionAnalysis($i, $seqName, "-", \\$beginUpstream, \\$endUpstream, \\$beginIncluded, \\$endIncluded, \\$beginDownstream, \\$endDownstream, \\@tagB) ;\n+\t}\n+}\n+exit(0) ;\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,21 @@
+<tool id="splitTranscriptGff" name="splitTranscriptGff">
+ <description> Define UTRs and intergenic operonic regions from a transcript file and following a reference file </description>
+ <command interpreter="perl">
+ splitTranscriptGff.pl -i $referenciesFile -j $transcriptsFile > $outputFile
+ </command>
+
+ <inputs>
+ <param name="referenciesFile" type="data" label="Referencies Input File" format="gff" />
+ <param name="transcriptsFile" type="data" label="Transcripts Input File" format="gff" />
+ </inputs>
+
+ <outputs>
+ <data format="gff3" name="outputFile" label="[splitTranscript] Output File"/>
+ </outputs>
+
+ <help>
+ Note that iputs files should be sorted by increasing positions and that expressed referencies should be included in transcripts.
+
+ Command example: perl splitTranscriptGff.pl -i annotations.gff -j transcripts.gff > TUTag.gff3
+ </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,79 @@
+#!/usr/bin/perl -w
+###
+# But : protocol permettant la detection d'RNA non codant potentiel
+#
+# Entrees : fichier de mapping Smart gff3
+# fichier gff des gènes
+# fichier gff des clusters Cis regulateur potentiel
+#
+# Sortie : fichier gff des clusters ARN nc
+#
+###------------------------------------------------------
+
+use vars qw($USAGE);
+use strict;
+
+=head1 NAME
+
+protocol_NC_V2_CTN3.pl
+
+=head1 SYNOPSIS
+
+% strictlyIncludeGff.pl -i toSelect.gff3 -t template.gff3 > result.gff3
+
+=head1 DESCRIPTION
+
+strictlyIncludeGff.pl - print elements strictly include in template (gff files)
+
+    -i|--input fileName  gff input file name
+    -t|--template fileName  gff template file name
+    [-h|--help] help mode then die
+
+=head1 AUTHOR - CTN - mar.11
+(from RNA-Vibrio/protocol_NC_V2_CTN3.pl - Claire KUCHLY)
+
+=cut
+
+#----------------------------------------------------------------------------
+# check command line :
+my $outFileName = "outSIG.gff3";
+if ($#ARGV==0) {
+ die (exec("pod2text $0\n"));
+} else {
+    foreach my $num (0 .. $#ARGV) {
+ SWITCH: for ($ARGV[$num]) {
+ /--input|-i/ && do { open(ARN,"<$ARGV[$num+1]")
+ or die "Error: Can't open \"$ARGV[$num+1]\", $!";
+ last };
+ /--template|-t/ && do { open(SEED,"<$ARGV[$num+1]")
+ or die "Error : Can't open file \"$ARGV[$num+1]\", $!";
+ last };
+ /--help|-h/ && do { exec("pod2text $0\n") ; die };
+ }
+    }
+    ##NC_011753.2 RefSeq gene 367 834 . - . locus_tag=VS_0001;db_xref=GeneID:7162789
+#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
+    my @seed ;
+    my $s=0;
+    while (my $seedLine = <SEED> ) {
+ my @list = split(/\t/,$seedLine);
+ $seed[$s][0]= $list[3] ; # position begin seed
+ $seed[$s][1]= $list[4] ; # position end seed
+ $seed[$s][2]= $list[6] ; # seed sens
+ $seed[$s][3]= $list[0] ; # chromesome name
+ $s++;
+    }
+    close SEED ;
+    while(my $ligne = <ARN>){
+ $s=0;
+ my @list = split(/\t/,$ligne);
+ while (($s <= $#seed)) {
+ if (($seed[$s][3] eq $list[0]) and ($seed[$s][0] <= $list[3]) and ($seed[$s][1] >= $list[4]) and ($seed[$s][2] eq $list[6])) { # if list include in seed + same direction
+ print "$ligne";
+ }
+ $s++;
+ }
+    }
+    close ARN ;
+    exit(0);
+}

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,17 @@
+<tool id="strictlyIncludeGff" name="strictly include Gff">
+  <description>Prints the elements which are strictly included in the template.</description>
+  <command interpreter="perl"> strictlyIncludeGff.pl -i $inputFile -t $template > $outputFile
+  </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+    <param name="template" type="data" label="template File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[strictlyIncludeGff] Output File"/>
+  </outputs>
+
+  <help>
+ command example: perl strictlyIncludeGff.pl -i toSelect.gff3 -t template.gff -o result.gff3
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,124 @@
+#! /usr/bin/env python
+
+import optparse, os, shutil
+from optparse import OptionParser
+
+
+def image(text, url):
+ return "<center>%s</center><img src='%s'>" % (text, url)
+
+
+def __main__():
+ description = "Write all results in one HTML file."
+ parser = OptionParser(description = description)
+ parser.add_option("", "--input1Gff1", dest="input1Gff3_1", action="store", type="string", help="First gff3 result in the first analyse.(TRANS detection)")
+ parser.add_option("", "--input1Gff2", dest="input1Gff3_2", action="store", type="string", help="Second gff3 result in the first analyse. (TRANS detection)")
+ parser.add_option("", "--input1PNG1", dest="input1PNG1", action="store", type="string", help="PNG (getSize) result in the first analyse. (TRANS detection)")
+ parser.add_option("", "--input1PNG2", dest="input1PNG2", action="store",type="string", help="PNG (plot) result in the first analyse. (TRANS detection)")
+ parser.add_option("", "--input2Gff1", dest="input2Gff3_1", action="store", type="string", help="First gff3 result in the second analyse. (ANTISENSE detection)")
+ parser.add_option("", "--input2Gff2", dest="input2Gff3_2", action="store", type="string", help="Second gff3 result in the second analyse. (ANTISENSE detection)")
+ parser.add_option("", "--input2PNG1", dest="input2PNG1", action="store", type="string", help="PNG (getSize) result in the second analyse. (ANTISENSE detection)")
+ parser.add_option("", "--input2PNG2", dest="input2PNG2", action="store", type="string", help="PNG (plot) result in the second analyse. (ANTISENSE detection)")
+ parser.add_option("", "--input3Gff1", dest="input3Gff3_1", action="store", type="string", help="First gff3 result in the third analyse. (CIS detection)")
+ parser.add_option("", "--input3Gff2", dest="input3Gff3_2", action="store", type="string", help="Second gff3 result in the third analyse. (CIS detection)")
+ parser.add_option("", "--input3PNG1", dest="input3PNG1", action="store", type="string", help="PNG (getSize) result in the third analyse. (CIS detection)")
+ parser.add_option("", "--input3PNG2", dest="input3PNG2", action="store", type="string", help="PNG (plot) result in the third analyse. (CIS detection)")
+ parser.add_option("", "--outHTML", dest="outHTML", action="store", type="string", help="An HTML output.")
+ parser.add_option("", "--outImgDir", dest="imgDir", action="store", type="string", help="Copy all result images into imgDir, for Galaxy option.")
+ (options, args) = parser.parse_args()
+
+
+ if not os.path.exists(options.imgDir):
+ os.makedirs(options.imgDir)
+
+ shutil.copy(options.input1PNG1, options.imgDir)
+ shutil.copy(options.input1PNG2, options.imgDir)
+ shutil.copy(options.input2PNG1, options.imgDir)
+ shutil.copy(options.input2PNG2, options.imgDir)
+ shutil.copy(options.input3PNG1, options.imgDir)
+ shutil.copy(options.input3PNG2, options.imgDir)
+
+
+ outfile=open(options.outHTML, "w")
+ #print >>outfile, "<html><head><title>The results for ncRNAs detections.</title></head><body>"
+ print >>outfile, "<h1><center>The results for ncRNAs detections.</center></h1>"
+
+ #write results for the first analysis
+ print >>outfile, "<B><center><font color=red size=4>The results of intergenic sRNAs detection.(TRANS)</font></center></B>"
+ print >>outfile, "<center><strong>The results of comparison to already known ncRNA to validate some candidates.</strong></center><p>"
+ input1Gff1 = open(options.input1Gff3_1, "r")
+ lines = input1Gff1.readlines()
+ input1Gff1.close()
+ for line in lines:
+ print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
+ print >>outfile, "<p>"
+ print >>outfile, "<center><strong>The results of comparison to already known ncRNA to see which ncRNAs are not detected.</strong></center><p>"
+ input1Gff2 = open(options.input1Gff3_2, "r")
+ lines = input1Gff2.readlines()
+ input1Gff2.close()
+ for line in lines:
+ print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
+ print >>outfile, "<p>"
+ img_input1PNG1 = os.path.basename(options.input1PNG1)
+ image1=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input1PNG1)
+ print >>outfile, "%s" % image1
+ print >>outfile, "<p>"
+ img_input1PNG2 = os.path.basename(options.input1PNG2)
+ image2=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input1PNG2)
+ print >>outfile, "%s" % image2
+ print >>outfile, "<BR><p>"
+
+
+ #write results for the second analysis
+ print >>outfile, "<B><center><font color=red size=4>The results of asRNAs detection.(ANTISENSE)</font></center></B>"
+ print >>outfile, "<center><strong>The results of comparison to already known ncRNA to validate some candidates.</strong></center><p>"
+ input2Gff1 = open(options.input2Gff3_1, "r")
+ lines = input2Gff1.readlines()
+ input2Gff1.close()
+ for line in lines:
+ print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
+ print >>outfile, "<p>"
+ print >>outfile, "<center><strong>The results of comparison to already known ncRNA to see which ncRNAs are not detected.</strong></center><p>"
+ input2Gff2 = open(options.input2Gff3_2, "r")
+ lines = input2Gff2.readlines()
+ input2Gff2.close()
+ for line in lines:
+ print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
+ print >>outfile, "<p>"
+ img_input2PNG1 = os.path.basename(options.input2PNG1)
+ image1=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input2PNG1)
+ print >>outfile, "%s" % image1
+ print >>outfile, "<p>"
+ img_input2PNG2 = os.path.basename(options.input2PNG2)
+ image2=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input2PNG2)
+ print >>outfile, "%s" % image2
+ print >>outfile, "<BR><p>"
+
+
+ #write results for the third analysis
+ print >>outfile, "<B><center><font color=red size=4>The results of long 5'UTRs detection.(CIS)</font></center></B>"
+ print >>outfile, "<center><strong>The results of comparison to already known ncRNA to validate some candidates.</strong></center><p>"
+ input3Gff1 = open(options.input3Gff3_1, "r")
+ lines = input3Gff1.readlines()
+ input3Gff1.close()
+ for line in lines:
+ print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
+ print >>outfile, "<p>"
+ print >>outfile, "<center><strong>The results of comparison to already known ncRNA to see which ncRNAs are not detected.</strong></center><p>"
+ input3Gff2 = open(options.input3Gff3_2, "r")
+ lines = input3Gff2.readlines()
+ input3Gff2.close()
+ for line in lines:
+ print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
+ print >>outfile, "<p>"
+ img_input3PNG1 = os.path.basename(options.input3PNG1)
+ image1=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input3PNG1)
+ print >>outfile, "%s" % image1
+ print >>outfile, "<p>"
+ img_input3PNG2 = os.path.basename(options.input3PNG2)
+ image2=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input3PNG2)
+ print >>outfile, "%s" % image2
+ print >>outfile, "<BR><p>"
+
+
+if __name__=="__main__": __main__()

diff -r 5677346472b5 -r 0ab839023fe4 SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,36 @@
+<tool id="writeResToHTML" name="writeResToHTML">
+  <description>Write all ncRNAs analysis results into an HTML file (Only for ncRNAs analysis pipeline).</description>
+   <command interpreter="python"> writeResToHTML.py
+   --input1Gff1 $input1GffFile1 --input1Gff2 $input1GffFile2 --input1PNG1 $input1PNGFile1 --input1PNG2 $input1PNGFile2
+   --input2Gff1 $input2GffFile1 --input2Gff2 $input2GffFile2 --input2PNG1 $input2PNGFile1 --input2PNG2 $input2PNGFile2
+   --input3Gff1 $input3GffFile1 --input3Gff2 $input3GffFile2 --input3PNG1 $input3PNGFile1 --input3PNG2 $input3PNGFile2
+   --outHTML $outHTML
+   --outImgDir $outHTML.files_path
+   2> $log </command>
+
+  <inputs>
+      <param name="input1GffFile1" type="data" label="First gff3 result in intergenic sRNAs analysis. (TRANS detection) " format="gff3"/>
+      <param name="input1GffFile2" type="data" label="Second gff3 result in intergenic sRNAs analyse. (TRANS detection) " format="gff3"/>
+      <param name="input1PNGFile1" type="data" label="PNG (getSize) result intergenic sRNAs analyse.(TRANS detection)" format="png"/>
+      <param name="input1PNGFile2" type="data" label="PNG (plot) result in intergenic sRNAs analyse. (TRANS detection) " format="png"/>
+
+      <param name="input2GffFile1" type="data" label="First gff3 result in asRNAs analysis. (ANTISENSE detection) " format="gff3"/>
+      <param name="input2GffFile2" type="data" label="Second gff3 result in asRNAs analyse. (ANTISENSE detection) " format="gff3"/>
+      <param name="input2PNGFile1" type="data" label="PNG (getSize) result asRNAs analyse.(ANTISENSE detection)" format="png"/>
+      <param name="input2PNGFile2" type="data" label="PNG (plot) result in  asRNAs analyse. (ANTISENSE detection) " format="png"/>
+
+      <param name="input3GffFile1" type="data" label="First gff3 result in long 5'UTRs analysis. (CIS detection) " format="gff3"/>
+      <param name="input3GffFile2" type="data" label="Second gff3 result in long 5'UTRs analysis. (CIS detection) " format="gff3"/>
+      <param name="input3PNGFile1" type="data" label="PNG (getSize) result in long 5'UTRs analysis. (CIS detection) " format="png"/>
+      <param name="input3PNGFile2" type="data" label="PNG (plot) result in long 5'UTRs analysis. (CIS detection) " format="png"/>
+
+  </inputs>
+
+  <outputs>
+      <data format="HTML" name="outHTML" label="[writeRes2HTML] Output HTML File" help="This output file shows all results by ncRNAs detection analysis."/>
+      <data format="txt" name="log" label="[writeRes2HTML] Output log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/REF.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/REF.fasta Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,33148 @@\n+>C10HBa0111D09_LR276\t15142\t24441\t|Longueur=9300\n+GAACAAACAACCCCTTTTTGGAGGTGTTGGCGCGTCGTGCAGCTTACACTCAAAAGTTAA\n+AAAGTTGCCTTGCGATGCGGTCATGTTACAAACCTCTCTGCCTTAAATTAAATTCCATAA\n+CCAAGATTTGGAGGTGCCTCAACGATGCGCAGCCATGTCCCATATTTGGTCGCCTCGTTT\n+AAAAGTCAAGTTAGACTTAATTAAGAGGTCCAACTAGTGTAGGGGCGTTTTGAGTACTTG\n+TGGGATTTATTATAAACGGTTTTGAGTCACTTTAAACCCACTTCACCAATTAAAACAAAA\n+TCCTCAAGTTAAAACTCAATATCTTTCCATTCTCTCTCTCTAAAACCTTCATTGGAGATA\n+TTTGAAGCTCCACGGAAGAAGGTTAATTTTCCAAGGTTTCAATGAAAATTTCGTGTATAG\n+GTCTTCAATAAGGTATGGTGATTTCATCCTTGATTCTTCTATCATTCAAGGATCCAATTC\n+AAAGGTTTTTCAAAAGATCTCAAAAATCCTATTTCGAATTCTAAGTATGGGTTCTTCCAT\n+TTAAAGGTTTAAATGGATGAATTATGATGTTTTCAATGTTAGTTGATGTTTTTATGATAA\n+AAAAACTCCATGAACCCATGAGCATCCTAATTCTCTAATTTTGTCTTGTAAATTGAGTTT\n+GATAATTGTGATTGGTTATGGATGGAATTGTATTTAGATTGCTCTATATTGTTGATTCTT\n+ATTGTTAACCTATCTCTATATATGTAGAATTGAGATTGTAAGGATGAGTTAGTAATCTTG\n+GCTTTATGGGCTTTCGAATCCGGGTTTACCCCCTGGATGTAACCGGCATCCTCGCCCTTT\n+TTCAAGGACTAAGACCAACCTTTTAGTCTCATGTCATTACATTCATAGGTTGACAAATGC\n+GGAAAAATTTAAAACTTTCATTATCACTACTTGGAGGTTTACATAGACCTCTACATACAC\n+ATAAGATATATTCATATAGAGTATACATAGACCCTTCGTATAGGAAGGTTACATAGCCAT\n+CTACTTTTATTACACATACATATATATAAAATATAAAAATAGTCTAACGATTGTCTCATC\n+TCATACCCTCTAAACGATTATCACAATATGGGCATAACCCTTACATCAATCAAACAAGAG\n+CACATATAGGTCATACAAAAGTATAGTACTCAATTAAAAAGGAAAGAAATGAAAGAGTCT\n+TTAAGCTCATAACAAGTCCATAAGCTAGATTATGGCATTGACCTCAAAAGTTGAGGACCT\n+TATGTGCGTACACAAGCAAAACATGCTAAAAAGGGACTTTTTAGTCAAAACATGCCCATT\n+TATCCCTTTAAGAACCTACTACAAAGCCAACAAGTCATACCAACCAACCAAACATGCTTA\n+CTATCTCAACAAGTAATACTTATCCCAACATACTTGAAACCATGATTTACTACAACCCTA\n+TCACCAAGGAAAAATATCACAAGAATGAATAAGAGTCAATCATATCATGATAGAGAGACA\n+ACTATTCATGAATCCTTATCAACTCAACAAGTGCAATAACCAAGCAAAGCCTCATAACCT\n+TACTCAATCAAGTATCCTCAAAAAGAAACCATGACCAATGTCCAACTTTACCTAACATAG\n+CATTTAGGTTTACATTTTATCATATATTAACATTATGACCCAAGGCATACTCATTAGTAA\n+ACTAATTAATATATAATATCAACAATGTGCCATAGTAATCATATATACATAATATATCAT\n+CATAACATAAACATATATAAAAACCTCCTTCTAAGACTCCCCTCAAGGCTAACTAGTGAA\n+ATGTTTAGGTAGAGCCCCATACCCCTACCTAGATTAAGCTAGACCCCTTAGGTTATCCAA\n+GTTAGAGTTCAAGTCCTTTAATTCGTTTTACCTTTTGGGAACATCTTGCCCTAACCGACA\n+TAGACCACATGAGCTAGTGTGGGATACGGTTCCAAAAAACCCTACACAGAAAGAAGGCGG\n+ACTACTTGCCAAAGTATTACCAAAACATGAAACATAGCAACTACGTTGATCCACTAGCAA\n+GTATTTCTATAGGGGCAACATAGTTCAAGAACTCTGAGATATACTTGAGACCCTCTTTAT\n+GCGCCATGCATTATAGTCTCCAACCTCAAGAGTAATGTAGTGTTCCTACCTTCCCCATGT\n+GAGAAAGGACACTCCTCAATCTAGTTCACTCGGTGCTAAGCTAGAGACCCTTTTTGAAAT\n+GTCTTTAAGCCTTTAATTATCAATCATAGCTTAGCTTAGGTCATAGGGTATATCTCTTGT\n+ATAATCATCATCATCAATAGCTCAATAATAATTGTATGAGTATAAGTCCTTTCATCACAA\n+TTCATATAAGTGAGGTTAACATGTTAGCATTTCATTGCATATCAAGAAACATTGATGATT\n+CTTACCATCCTTGTATCACATACACCTTAATCAATCTCACAACATAGTCAGGACATATCA\n+ATTCAACATCATACCACCCTATAATCCTAATATAAGGCATACTCCAATATAACTTCACGT\n+CTTAACAAAAATTTATCACAATTGGAATTAAAGATAGAGATTCTAAGACTTAACAAGTCT\n+TCCTTGTAGTTCATCATCAAGGTCTTACCATCAACCCATAACTCAACCAAGTTTGGGGAG\n+TAACATCATCACACAATGATAATCAATAGGATAACAAGGCTAATTTCATCTCTATAACAC\n+AATTCAACACTAGATCATAACTTAAGACAAGATACATAGGCTAATTTCACACTATAATTC\n+ATAACCTAAATCACATCTCAAGAAATAGCATTATAGTCCTATAATTCATATTAATTTGTT\n+CATAATAACACAATAGGATAGTAATTTAATCAATAACCAAGTCAATTGAATGATCACAAT\n+ACAATATACATCAATATCACAAGCTAGGGTTAGGGATGAAGGATCATATTCTTCAATTTA\n+GACCAAACCACTAACAATTACCATAATAAAGTTTAAATTCATGTAAATGTATTCAATATA\n+ACCTAAATAAATCATTAACAACTCAATCCATAACTTCAATTTCGTAATTGAATGAAACCC\n+ATAAGAAAATTCACCTTTTGAAATCCATTTTAAAGAAACCCTTTGAGGAAAGAGCCTCAA\n+AGGTGAATTAGATCCCATATATTAATGTTTGATGATGAATTCGCCCCTTTCCATCCCCCA\n+AACCCTTATCCTTGCTAGTTTTTAATGGTGAGTTCAAGTAGAGAGAGAAATAAGAGAGAA\n+GGAAGAGAGTTTTTGTCTTAGAGTTCTAATTAATTTAATTGGGGTTGGGGATTTTATATG\n+CGTTTTAAGTTAGTTAATTAGTCACCCCTCAATACCTAACTAACCCCTGAACCACCTAAT\n+TAATTAAATGAATCAATATAAAAACATACAGGAAATTTGACCTTCACAGACGAGACCCCG\n+AACGACGGGCCATCTGTGAGTCAACGGTCCCTCACCCCTCCGTCCTGCACTCTATCGATC\n+AGTTCATAGACTGTGCAGGCAGATCAATTCTTCAACTTGTCTAAGTATGGGATGACGGTG\n+GTATCGACTCCCCGTCAGTCCACACACGGACCGTAGGTGGTCCCATCGATGCGCATTGTC\n+TAGTCCTTGTTTGTTCAAACACAAGGGCCTCAAGGGCCCTTGGTTGGTGCTTGGGGAGTC\n+GTACCCATACGTTTCAATCATGAAACAACTCAAAAACCTATAATCTATCCTTCCACCAAT\n+TTTTGTACCTTTCCGACTCTTAAAAGGTAGTCAAATAGGCTAAGGCACGCTAACACCCCT\n+TTGAACCAACTTCCTGGACGTTC'..b'CAATATTCCATATTGATCGCCAGCTTCCATTGCTACAAAAGA\n+TGCATATAGTATCAGCTTCTTTAGACAAAGCTATAAGAAGTATCAGTTGACCCAATTGAC\n+AAGTCACACGTGCTATAACAAGATATCACTTGACGCAAGTGACAAGTGTGAAGCTGACAG\n+GAAAATAGGCAATAGAATCCCTCATTCTTTGTTTATATATAGCAACCTATTACTTCAGTA\n+TCTGTTTACAAGTTCTGCACCACGATAAGTATAACTATTTAGAAATTATGAAGGGAGTGT\n+TGCACAAATTAGTCAGGGTTAGAATTTTAATAATCCAACACACCAGAAATTCTGAGGACT\n+ATGCCTAGCAGCTGAAATCACCACAACAAGTTCAAAGTCAAATCCTGGCTCTTCCACATC\n+CTTTCCCTTCGTACAGTAAACTGAACAGATGCCTTTTGGATATGTTTCACTCACATACTT\n+CATAATTTCAGCATCCATGGCAGACCTACACAAGAATTACCATGAATAGGAGAATTTGGC\n+AAGCAACGGTAATCACAAAGTCAGTACTCATAAACCAAGCCTCTGATGAGAAGAACTAAC\n+CATGTTAAGAAGGGAACTACTCACATAAAACAAAAACATGATTTTTATAATCTGCATGCC\n+AAGCATTAGGAACTATTGAAGAATTCACTGCGATCTCAGAATATAATCCTTTCATACTCT\n+TAATGCCTCCCAATTATACCAAATTTAGGTGTCTTACTACCATTTTAGTATCTGACATTG\n+ATGTCATTACCCATTAATTTGGAGACACTGGTTCCTCACCGATAAAGATGAACAAAAAGG\n+TAAACCAGTGTATTATGAATCAGATCTCTTTATGGAATAAAAACACACATGAATCATGAT\n+GTAGGATCTTATTAACACTTGAGGACTGAGGCGAAGATTACTAAGAATATCCCGAAAGAT\n+AAGACATAGAATGTAATATAGAAGGACAGAGAACAAATGATACTGATTAAGAAGAAGAGC\n+ATCTATATAGGAGAGATTAGCCTAAACTATTTATTTTCAGACTGATTTCGGTGAGCGCAG\n+ACCAAAACATGCAGCTTTTTTTTCAATTAAGCCGGAAAGACAATTTCCACAAGAAATGCA\n+ACTGTTCTAGCATATCTTAAACTATAATCTGAGTGCTGTCTGAGAACTGAGGTTGAATTT\n+GCAAGTCTTGTTGAGCATGGTTAAAAAATAAGTCCAATTAGGCAAAATAATTGTGAATGT\n+CATAATATAGAAGAACTTCAACATCTCAATGGGAAAAACAGAAAGTGAGTAGCTAAAAAG\n+GGGAGCAATACCAAAGATATTAAACTGAGAAATATATCTCATACCCTACAGATGCATACC\n+TATACTCTTCCACGAAAGCAGATGGAAGTTCTTCATCTCTTGCTGGCCTAACGTCTTTAC\n+AAACCTAGAATGCAGACATACCATGAGCTTACAAGAAGGGAGCATAAATTATTACACGGC\n+AACAATAACTAGGAAAAAAAGAGAGAGAGGAAATACATTTAATTTCACAGCCCTTGAATT\n+ATGAATAAAACCATGCACTTGTTATATTAAGCAGAGACTACTTCCACTTTTCCAATCTAT\n+TTACATCTACTTCATCACATTAACAAGAATAGAAAAGAAATGCATGTTATGTGACATTCA\n+GCACTGTAGACTAATGAGGATTAATTTACAACCATGGACCAGCTATAGCAGAAGAGACCT\n+TATTTTACTTTTCCAAAATTGGTATTTACTCCATCACCATTAACAAGAATAAAAAATGCA\n+TCTTATGTGAAGTTCATAACTATTGAATAATGAGGTTAATCAAGAGCTTGCTATATCAAA\n+GCACATTTTTCAAGTTTACAATTTCTTCATTCTTGGTATCCACTTCTATCATGAAAACCA\n+ACCTAACAGTTAACACAATTCACTCTGGTGTTTTGTTCTTTATCCCTAATGCCTGAGTGT\n+GTTTAACTAATCAAGTTCCAATCAGCCAGAAGATGTCTAAACATACTAAACTATAGATAA\n+ACAACATGAGCATAAAACCAGTTTATGTAGAGATTTTTAATTGCACAGCATAAAAAGGAG\n+TACCCCCGGCACTATATGTGCTTCTCTTTCTTTTTTTCTTACTTCGTCCTGATCTTTTTA\n+CTTTTCTTTTCCTTTCTTTCAGAATAAGCACATTTTTGGATATAGTCCCACCATCTCTAC\n+CAGGTTTATGTCTGTCCATATTATTGCTTTTTTGAATTACCTTCTAAACAAAATACTCAT\n+CCTTACAAACCTGCTTCCTCTTCTATGGATCCCACCACTCCATGCCCCGAAAAAAAAGAA\n+AGAAAAAGATAAGTTTTTCATAGTCACTTTAGATTATCATTTATTAAATTCTGTCGGCTC\n+AACTGATAGGAACAGTGAAATGGACTTTTCAATCATAAAAAGATAATAGAAGTTATTAAA\n+TGATTCCCTTCAAGATTATGAGCTTTTAAACTTACATATCATGCAACTATTGGGGAATTA\n+AGGGACTGGGGATTTGATGATAAATTCCAGCACCATTTTTGGTGCTTTTGTGTTTTTGCA\n+AGGTAGTTTGTTAGTGGCACATGGAAGGAGGTGCCTCATCCAATGAAATTATTAATCTTA\n+TCAACAAGAAGAAAGTCAAAACACCAAACTGTAAAAAATCCAAAAATAGCATTTTGCATT\n+GTGTACTAACTGAACAACGTACTTGCTTGACATGGTCAACTCTGGCAACCTGCGCAGTCC\n+GGGGATCAAGATACTCATCCTTATGAACCTCACTAAATGATGTAATCAGTACCTACAAAT\n+TAGTGAACAGCAACTTTACACAGCTAGATCATGAAAAATAGCTTCCAAGTGTCCATTACT\n+ACATAAATGAAAAGCATTATACTTTCTTTTTAGAAGAGGGGAACAAAATCTTAGACTTCA\n+TAAGGAACAGTTCTCCGGAAAGTTTTCTTTTCTATATTGAAGAAGTAATCATTTATTGAA\n+GTGGTGGAAATTCCCTAAGCTTTAAACAGATGTAGAGAACTTGTCCATAAACATGGTGCT\n+CAACAAACAACACCCAACTCATTCATATATAGTAGCGCCAGCATCCAAGTACCATGAAGT\n+ATATCTCTATCCTAAAAGCTTTACTTGAAGACTTATTAGATTAATCTTACTTATTTCTCA\n+TGCAACTTTTTTTTTGAAAAGTTTCTCAAGCATAACTTTTATCCTTTGTTTTCATTCAGT\n+CTTTGAGCTCTAAAGGATGCCTAGAGAGGTCATGAACCAAGTAAGGAAATTGCAACATCA\n+TATTGCTTCCAATTTACCTCATTTTTTATCTTCAACTACCGAAAACTACCAAATCTGAAA\n+TTATCAACTAAGGAAAATTACAACATCAGTACAATAAGTATTGCTTACAGTTCACTTAAT\n+ATTTCAATCTTCGACTACGGAAAACTGTCAAATCTGAAATTATCAATTTGAATGACACGA\n+TTAGTCTAACTTACATTTTCAAAGATCTAACTTACCCAATGAAAAAAGAAAACAAGAGAG\n+AGACTTACATCGCCACTTCTGTTTGGGAATTCGAGACAAATCAAGTGAGATTTGTTGTAC\n+GAAGGAAATGACTCCTCGGCCGCTTTCTTATATATATTTTCGTCCTTTAAAATAGCTCTA\n+ACATCTGCAAATCCAACCAAATGACGCTCAAAACCAAAAATGTAAAAAATAAACTGCCGA\n+TCGCAAATGAACACCAATGCGGTCACATTTTCAAGCACGAAAAAAGCTTCAAAATACAAA\n+AAACTTTAGCGCAGAAAATAAACGAAAGAGAAGAAGAAGAAGACCTTTGGCGACGTACTG\n+AATTTCGCCGGCTGGGGCATTAAGAAGGAACCATTTGGCAATCTCAAT\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/REF.fasta.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/REF.fasta.fai Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,198 @@
+C10HBa0111D09_LR276 9300 48 60 61
+C11HBa0029C01_LR281 10969 9552 60 61
+C11HBa0034I10_LR282 9056 20752 60 61
+C11HBa0054I23_LR283 10301 30008 60 61
+C11HBa0062I24_LR284 10050 40531 60 61
+C11HBa0064J13_LR285 9385 50797 60 61
+C11HBa0072I13_LR286 9556 60387 60 61
+C11HBa0089M02_LR287 9244 70150 60 61
+C11HBa0096D22_LR288 9184 79597 60 61
+C11HBa0107K14_LR289 9115 88983 60 61
+C11HBa0139J14_LR291 10002 98299 60 61
+C11HBa0143O06_LR374 10785 108517 60 61
+C11HBa0161D01_LR292 9057 119530 60 61
+C11HBa0168B23_LR293 9826 128787 60 61
+C11HBa0190J03_LR294 10992 138826 60 61
+C11HBa0249E07_LR279 10008 150051 60 61
+C11HBa0303G16_LR296 9430 160274 60 61
+C11HBa0323E19_LR297 9657 169910 60 61
+C11SLe0053P22_LR298 9827 179777 60 61
+C11SLm0052K14_LR376 10013 189817 60 61
+C12HBa115G22_LR301 10021 200043 60 61
+C12HBa120K4_LR313 10271 210279 60 61
+C12HBa144B17_LR302 9247 220768 60 61
+C12HBa149G24_LR381 9271 230217 60 61
+C12HBa165B12_LR303 9257 239690 60 61
+C12HBa183M6_LR379 9473 249148 60 61
+C12HBa221M9_LR377 10755 258826 60 61
+C12HBa224N6_LR382 9130 269807 60 61
+C12HBa26C13_LR299 9139 279136 60 61
+C12HBa326K10_LR306 10414 288478 60 61
+C12HBa90D9_LR311 9638 299111 60 61
+C12HBa93P12_LR312 9510 308956 60 61
+C12SLe124D18_LR385 10545 318673 60 61
+C12SLeRI72J6_LR378 9337 329441 60 61
+C12SLm103K8_LR380 10118 338981 60 61
+C01HBa0003D15_LR7 10776 349315 60 61
+C01HBa0163B20_LR10 9321 360318 60 61
+C01HBa0216G16_LR11 10332 369845 60 61
+C01HBa0256E08_LR13 9024 380399 60 61
+C01HBa0329A12_LR14 9536 389621 60 61
+BAC19_LR16 9760 399355 60 61
+C02HBa0008G02_LR67 9205 409327 60 61
+C02HBa0011O23_LR68 9399 418733 60 61
+C02HBa0016A12_LR19 9822 428336 60 61
+C02HBa0027B01_LR21 9222 438369 60 61
+C02HBa0030A21_LR22 9147 447792 60 61
+C02HBa0046M08_LR23 10763 457140 60 61
+C02HBa0072A04_LR26 9766 468130 60 61
+C02HBa0075D08_LR28 10744 478107 60 61
+C02HBa0124N09_LR31 9335 489077 60 61
+C02HBa0155D20_LR36 10743 498616 60 61
+C02HBa0155E05_LR37 10417 509587 60 61
+C02HBa0164H08_LR38 10279 520227 60 61
+C02HBa0167J21_LR39 9925 530725 60 61
+C02HBa0185P07_LR40 9818 540863 60 61
+C02HBa0190N21_LR41 10835 550895 60 61
+C02HBa0190P16_LR331 10808 561960 60 61
+C02HBa0194L19_LR42 10280 572997 60 61
+C02HBa0204A09_LR332 10029 583498 60 61
+C02HBa0204D01_LR334 9746 593743 60 61
+C02HBa0214B22_LR325 9581 603699 60 61
+C02HBa0215M12_LR319 9918 613488 60 61
+C02HBa0228I09_LR329 10933 623621 60 61
+C02HBa0236E02_LR326 9822 634785 60 61
+C02HBa0284G15_LR47 9034 644820 60 61
+C02HBa0291P19_LR48 9826 654052 60 61
+C02HBa0329G05_LR52 9637 664090 60 61
+C02SLe0010H16_LR53 10744 673936 60 61
+C02SLe0018B07_LR335 9222 684910 60 61
+C02SLe0034H10_LR327 10833 694334 60 61
+C02SLe0127J16_LR59 10965 705396 60 61
+C02SLe0132D01_LR60 10524 716591 60 61
+C02SLm0057H03_LR336 9514 727339 60 61
+C02SLm0057H03_LR64 9170 737059 60 61
+C02SLm0057H03_LR65 9532 746429 60 61
+C03HBa0012D06_LR72 10645 756168 60 61
+C03HBa0030O03_LR74 10569 767039 60 61
+C03HBa0034B23_LR76 10005 777833 60 61
+C03HBa0040F22_LR77 10227 788053 60 61
+C03HBa0054O21_LR78 9044 798497 60 61
+C03HBa0076J13_LR79 10097 807740 60 61
+C03HBa0233O20_LR82 9753 818053 60 61
+C03HBa0295I12_LR83 10258 828017 60 61
+C03HBa0318C22_LR84 10004 838495 60 61
+C03HBa0323D22_LR85 9222 848713 60 61
+C04HBa127N12_LR346 10533 858137 60 61
+C04HBa132O11_LR104 10306 868894 60 61
+C04HBa164O3_LR344 9345 879419 60 61
+C04HBa190C13_LR106 10719 888968 60 61
+C04HBa198I15_LR107 10673 899914 60 61
+C04HBa219H8_LR109 10174 910812 60 61
+C04HBa239P14_LR111 10483 921204 60 61
+C04HBa255I2_LR112 10650 931908 60 61
+C04HBa27G19_LR337 9788 942782 60 61
+C04HBa2G1_LR120 9322 952778 60 61
+C04HBa331L22_LR115 10697 962304 60 61
+C04HBa35C16_LR339 9494 973226 60 61
+C04HBa36C23_LR91 10103 982925 60 61
+C04HBa50I18_LR341 10825 993244 60 61
+C04HBa58E11_LR93 9927 1004296 60 61
+C04HBa66O12_LR94 9355 1014433 60 61
+C04HBa68N5_LR343 9886 1023989 60 61
+C04HBa6E18_LR87 9265 1034086 60 61
+C04HBa6O16_LR123 10386 1043552 60 61
+C04HBa78E4_LR98 9994 1054158 60 61
+C04HBa78J4_LR99 9165 1064363 60 61
+C04HBa80D3_LR100 9781 1073726 60 61
+C04HBa8K13_LR338 9345 1083716 60 61
+C04HBa96I8_LR101 9693 1093262 60 61
+C04SLm14G22_LR116 10306 1103164 60 61
+C04SLm39E17_LR117 9105 1113688 60 61
+C05HBa0003C20_LR126 9460 1122990 60 61
+C05HBa0006N20_LR128 10108 1132657 60 61
+C05HBa0019C24_LR143 9514 1142982 60 61
+C05HBa0042B19_LR129 10674 1152703 60 61
+C05HBa0057G22_LR130 9023 1163602 60 61
+C05HBa0058L13_LR131 9215 1172824 60 61
+C05HBa0108A18_LR132 10114 1182244 60 61
+C05HBa0131D04_LR133 9279 1192574 60 61
+C05HBa0135A02_LR134 10620 1202057 60 61
+C05HBa0138J03_LR135 10910 1212905 60 61
+C05HBa0145P19_LR136 9141 1224045 60 61
+C05HBa0261K11_LR139 9058 1233387 60 61
+C06HBa0024F02_LR152 10452 1242645 60 61
+C06HBa0036J15_LR145 9458 1253320 60 61
+C06HBa0066D13_LR353 10505 1262985 60 61
+C06HBa0066I09_LR156 9957 1273713 60 61
+C06HBa0103N18_LR158 10350 1283886 60 61
+C06HBa0106K23_LR159 10895 1294458 60 61
+C06HBa0120H21_LR161 10738 1305584 60 61
+C06HBa0144J05_LR355 10850 1316549 60 61
+C06HBa0147H20_LR146 10693 1327629 60 61
+C06HBa0197N20_LR164 9418 1338549 60 61
+C06HBa0217M17_LR166 9470 1348172 60 61
+C06HBa0222J18_LR167 9282 1357848 60 61
+C06HBa0304P16_LR358 10525 1367334 60 61
+C06SLe0093P23_LR171 10431 1378084 60 61
+C06SLm0009E16_LR172 9523 1388737 60 61
+C06SLm0022H01_LR173 9212 1398467 60 61
+C06SLm0067G18_LR359 10587 1407882 60 61
+C07HBa0002D20_LR197 9796 1418694 60 61
+C07HBa0002M15_LR175 10456 1428703 60 61
+C07HBa0012N15_LR200 9921 1439382 60 61
+C07HBa0018L21_LR201 9169 1449517 60 61
+C07HBa0116M01_LR182 9010 1458887 60 61
+C07HBa0130B18_LR183 10208 1468097 60 61
+C07HBa0140O20_LR184 9640 1478524 60 61
+C07HBa0224G23_LR186 10880 1488374 60 61
+C07HBa0229H10_LR187 10802 1499484 60 61
+C07HBa0287B22_LR188 10024 1510516 60 61
+C07HBa0308M01_LR189 9216 1520756 60 61
+C07HBa0309B15_LR190 9063 1530174 60 61
+C07HBa0309F18_LR191 9664 1539437 60 61
+C07SLe0008H22_LR192 10612 1549312 60 61
+C07SLe0099J13_LR193 10794 1560150 60 61
+C07SLe0111B06_LR194 9814 1571170 60 61
+C07SLm0119A22_LR209 10081 1581197 60 61
+C07SLm0140K05_LR206 10879 1591498 60 61
+C08HBa0006A17_LR229 10049 1602608 60 61
+C08HBa0012O06_LR211 9339 1612873 60 61
+C08HBa0018C13_LR232 9075 1622416 60 61
+C08HBa0025D10_LR235 10483 1631692 60 61
+C08HBa0025I17_LR236 10269 1642401 60 61
+C08HBa0149J12_LR217 10855 1652891 60 61
+C08HBa0165B06_LR218 10825 1663976 60 61
+C08HBa0201M14_LR220 10831 1675031 60 61
+C08HBa0239G21_LR221 9322 1686090 60 61
+C08HBa0336I24_LR223 10943 1695617 60 61
+C08SLm0118A18_LR226 10481 1706792 60 61
+C09HBa0022M02_LR247 10139 1717497 60 61
+C09HBa0036O20_LR250 9692 1727852 60 61
+C09HBa0038L16_LR251 10860 1737755 60 61
+C09HBa0049F08_LR253 10353 1748845 60 61
+C09HBa0059I05_LR254 10352 1759421 60 61
+C09HBa0099F14_LR257 10809 1769995 60 61
+C09HBa0099P03_LR258 10156 1781034 60 61
+C09HBa0100J12_LR259 9064 1791408 60 61
+C09HBa0102E23_LR260 9715 1800672 60 61
+C09HBa0109D11_LR262 10625 1810598 60 61
+C09HBa0113I06_LR360 9962 1821449 60 61
+C09HBa0116C14_LR240 9470 1831625 60 61
+C09HBa0142I14_LR265 9190 1841301 60 61
+C09HBa0165P17_LR241 10794 1850694 60 61
+C09HBa0176I09_LR266 10260 1861717 60 61
+C09HBa0191P09_LR267 9629 1872195 60 61
+C09HBa0194K19_LR362 10346 1882034 60 61
+C09HBa0203J14_LR243 10369 1892602 60 61
+C09HBa0226D21_LR244 10625 1903193 60 61
+C09SLe0068C01_LR272 9113 1914044 60 61
+C09SLe0076N09_LR363 10488 1923358 60 61
+C09SLe0085A10_LR364 9300 1934069 60 61
+C09SLe0130H12_LR273 9470 1943571 60 61
+C09SLm0008K04_LR274 10746 1953248 60 61
+C09SLm0018L06_LR366 9448 1964222 60 61
+C09SLm0037I08_LR367 9433 1973876 60 61
+C09SLm0094A22_LR246 10193 1983516 60 61
+C09SLm0129J22_LR373 9064 1993927 60 61
+C09SLm0143I09_LR365 10488 2003192 60 61

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/SR1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/SR1.fasta Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,2500 @@\n+>HWI-EAS337_3:7:1:415:1217/1\n+GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n+>HWI-EAS337_3:7:1:208:1489/1\n+GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n+>HWI-EAS337_3:7:1:278:1153/1\n+GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n+>HWI-EAS337_3:7:1:1178:755/1\n+GGTGAGAGTGGTTGGTTGATGGTAAAACCATTGAAT\n+>HWI-EAS337_3:7:1:277:1259/1\n+GGGTGACAAAGAAAACAAAAGGGACATGGTACTTGG\n+>HWI-EAS337_3:7:1:447:1231/1\n+GACTTGTGGAAGAGTTGGAATGGAAAGCTGGAGCCT\n+>HWI-EAS337_3:7:1:300:1199/1\n+GTTTTTGCATATAGATCTCTTTGTAAAGATATCCAT\n+>HWI-EAS337_3:7:1:247:1210/1\n+GATAGCTTTGACTATAGGACTTTTATGTATGTGTTG\n+>HWI-EAS337_3:7:1:1154:1517/1\n+GAATGTTGCAGACCTTACTCCTACCTATGAAGCACA\n+>HWI-EAS337_3:7:1:164:1869/1\n+GTTTGATAGGAATTTATTTCTTCTTCGACATCCACC\n+>HWI-EAS337_3:7:1:415:1194/1\n+GATGGTTGACACATTAAGAACATTCTCACCGGTCTC\n+>HWI-EAS337_3:7:1:645:1892/1\n+GATAGTAAGCACCCCTCACTTCCAACCCAAAGATTG\n+>HWI-EAS337_3:7:1:33:1446/1\n+GTTATTCTTTCTTTCTCAAATGGATGCAGTAATGCA\n+>HWI-EAS337_3:7:1:1194:1427/1\n+GAAAAATCACATTTTTTTGTTTGATAAAAACCCAGA\n+>HWI-EAS337_3:7:1:624:1913/1\n+GACATCTTCAACTCCGGAGTTTTGAGTAACATTATA\n+>HWI-EAS337_3:7:1:437:1202/1\n+GTACTTATGATGAAACTGAGATCAACTACCACCTCC\n+>HWI-EAS337_3:7:1:1386:1787/1\n+GTTTAGCTAGTATTAAGGCTAGAAATGGATATGATG\n+>HWI-EAS337_3:7:1:227:1155/1\n+GATAGCAGCAAGGTTATTGGAATCTAAGCAATCTAC\n+>HWI-EAS337_3:7:1:472:1025/1\n+GAAGTGATACTCATAAAACTATTTAGAAAGTTAATT\n+>HWI-EAS337_3:7:1:220:1482/1\n+GCTATATGAGAATTCAGGCCACTTGTAGTTCGATAA\n+>HWI-EAS337_3:7:1:1699:1966/1\n+GATGAAGGATACTACAAAAAAAAGGGTTATTTTGTG\n+>HWI-EAS337_3:7:1:547:1084/1\n+GTGGTCAGGTCCTCTTCAAGTGATACAATGTTCCCC\n+>HWI-EAS337_3:7:1:464:1097/1\n+GAAATTGAAGCTAGTTATTGACAGTTTACCAAGTTA\n+>HWI-EAS337_3:7:1:171:1480/1\n+GATAATACTATTAGCACATCAGCCTCTAGATGAGAC\n+>HWI-EAS337_3:7:1:293:1251/1\n+GTGGTAGTGAGCTCCGTGGTGAACAAGATGACGGAA\n+>HWI-EAS337_3:7:1:647:1863/1\n+GGGTTTCAGATTAGTAAGTTATAGTGAAAAAATATA\n+>HWI-EAS337_3:7:1:263:1275/1\n+GCTACGTCTGCTCTAACTCCTAATATGATCCTGTAT\n+>HWI-EAS337_3:7:1:1112:215/1\n+GGTGTTGATTTCACAAGGAGGAATACTCATCTAAAA\n+>HWI-EAS337_3:7:1:319:1275/1\n+GTTATAGTTCTTGACAACAAAGTACAGAGGTGGTCC\n+>HWI-EAS337_3:7:1:1310:1480/1\n+GTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\n+>HWI-EAS337_3:7:1:1203:1470/1\n+GTGATATCTTTAACTAATTCTTCACGCATCTTTTCT\n+>HWI-EAS337_3:7:1:415:1242/1\n+GACACATTATAAAATGATTAACAGACAGAAAGTACC\n+>HWI-EAS337_3:7:1:1160:1471/1\n+GTAGTTGTGGAGAATGGTGCTTGCTTGGGTTGTTTA\n+>HWI-EAS337_3:7:1:42:1475/1\n+GTTAAATGAGTTTAATGAATTAAATTAGTCTATATT\n+>HWI-EAS337_3:7:1:155:1439/1\n+GAAGAGAAGCCAGATACTCAGTCTCATGGTGGCGAA\n+>HWI-EAS337_3:7:1:1164:1447/1\n+GGAGAGGAAAGTGAAATTTAGGTTTAGACTTCGTTT\n+>HWI-EAS337_3:7:1:277:1169/1\n+GGTTAAGCACAGCTGCACCAGCCAAAGCAGCAGAGC\n+>HWI-EAS337_3:7:1:359:1249/1\n+GCTACATGTTTATGTGAGAAATATTATTTCAGTTGT\n+>HWI-EAS337_3:7:1:1230:1425/1\n+GAATTCTGCGGCTAACTGTGGCATCGTGCAGCTGAT\n+>HWI-EAS337_3:7:1:1526:1772/1\n+GGATAATTCATCATAAAATAGACCTTAGGGCAGTAG\n+>HWI-EAS337_3:7:1:1171:720/1\n+GTTGATTGCGGAATAATATTTGCTTTTTTAGTTTTC\n+>HWI-EAS337_3:7:1:1663:896/1\n+GTTGGGTTGGTTGAGCGAGAGGATTTTGCATCTGGT\n+>HWI-EAS337_3:7:1:382:1159/1\n+GTGATAGGACCATTCCATAGTTTAGATGTATAATTG\n+>HWI-EAS337_3:7:1:113:1855/1\n+GAATTTCAGCTTGCAAACTGTTTGGCTTTCCATTCA\n+>HWI-EAS337_3:7:1:1356:1757/1\n+GTACTGAAAGATATGATAATATACATTGTTTGACTT\n+>HWI-EAS337_3:7:1:124:757/1\n+GATACATAACACAATCAGTTGATCGAAACAAAACCA\n+>HWI-EAS337_3:7:1:1211:751/1\n+GAAAGGTTAAGGCAAGGCCTGCTTAGTAAACAGGGA\n+>HWI-EAS337_3:7:1:524:840/1\n+GTGTAGTTGGCTTCATATCAATTGACGGATGTTTCA\n+>HWI-EAS337_3:7:1:478:1078/1\n+GTTAAGTGTGGAAATGAGAAGTTCTAATAGTACTTG\n+>HWI-EAS337_3:7:1:1250:656/1\n+GAAGTTTTTTTAACGAGTGCACACGTTAGAACTCGC\n+>HWI-EAS337_3:7:1:1301:1484/1\n+GTTGCGGAAGGAGCAAAGGCATTGTATGTAGTGTCA\n+>HWI-EAS337_3:7:1:1036:1425/1\n+GGTTGTCAAGCGTTCAGCTCGAGATATATAATGTGA\n+>HWI-EAS337_3:7:1:165:1435/1\n+GAGCAAAAGGGGTTGTGACTCTTGAAGAGCTGAAGA\n+>HWI-EAS337_3:7:1:612:13/1\n+GGTAACATAGAGGAGGAAATATGGTGAAAACTTGAA\n+>HWI-EAS337_3:7:1:485:1045/1\n+GTAACTTTATTTTATAATTTTTGATTTTTATCCGAA\n+>HWI-EAS337_3:7:1:609:1117/1\n+GCAATTACAATAACAGGAAATACATCTAGCAGACTT\n+>HWI-EAS337_3:7:1:1182:1510/1\n+GGGAACCTAAAAGGCAAAATGAATTGAGTCTGTATT\n+>HWI-EAS337_3:7:1:1530:1678/1\n+GTTTTCAAACACAAACTACTTCTCCTAAAGCGGAAG\n+>HWI-EAS337_3:7:1:1745:324'..b'>HWI-EAS337_3:7:1:1190:1457/1\n+GGAGCTAAAGTCCTAAGCTTGAGATCCAATAAACTG\n+>HWI-EAS337_3:7:1:1067:1958/1\n+GTTCATGACATCCACCAACTTGTTTGTCTGTGGCTC\n+>HWI-EAS337_3:7:1:87:714/1\n+GTGAGGAAAATGAAAAGTAAATAGGCAGATGCAGTA\n+>HWI-EAS337_3:7:1:566:576/1\n+GAACACAGGGCTTCAGGGTCTAATATCCTGGCAGCG\n+>HWI-EAS337_3:7:1:1316:1455/1\n+GAATTTATTTCAGTTCTTCTATTCTTTTTCTCTTCA\n+>HWI-EAS337_3:7:1:1734:1346/1\n+GACACCTGACAGGAAGGGATCAACGAGCTATCTTTA\n+>HWI-EAS337_3:7:1:1315:1665/1\n+GATCTTCTGAACGTTGAAACTTTAATAGTTCGAGGA\n+>HWI-EAS337_3:7:1:319:1450/1\n+GTCCCCCATAATGTTCTTGATAACCTTCCTCTTTCC\n+>HWI-EAS337_3:7:1:1565:1478/1\n+GATCCTAACTTGCTTGGAACTGAGATATAGTTGTTG\n+>HWI-EAS337_3:7:1:33:25/1\n+GAATATAATCTTAAACATGTAGAGTTTTGAATACTT\n+>HWI-EAS337_3:7:1:1281:1649/1\n+GATTAATTCCGTTAGTGAAAAATACAAAATGGAATT\n+>HWI-EAS337_3:7:1:783:509/1\n+GTCATAGTTTTAAGGCGTGGAAACAGCCTCTAGCAG\n+>HWI-EAS337_3:7:1:527:1823/1\n+GTCGGGACCGGAGAAAAGTATTATTTTAGAGATTAT\n+>HWI-EAS337_3:7:1:361:602/1\n+GAAGCGAAAGGGCCATGTTCTTCGTACCTCCGTACA\n+>HWI-EAS337_3:7:1:1111:669/1\n+GTGTAGTCATGTGAGGCTTTTGTCTCAGAAAAAAGT\n+>HWI-EAS337_3:7:1:1404:1238/1\n+GTAAATTAGGTATAGAAGGAGTAGAGAACAATCTTT\n+>HWI-EAS337_3:7:1:1325:1626/1\n+GTATGCCAGAGCGTCTTTTTCCTTTGGCATGGGGGC\n+>HWI-EAS337_3:7:1:625:642/1\n+GAAACTGTAGCTATATGACTATCATTTGAATTTTGT\n+>HWI-EAS337_3:7:1:1377:1833/1\n+GAAAATGCACCTAAAGTTCTCCCTAAAAACTACTCG\n+>HWI-EAS337_3:7:1:964:815/1\n+GAACCCTACGGATGGAAGAGGAAAGGCGAGGATTAA\n+>HWI-EAS337_3:7:1:283:1284/1\n+GTATGAAAAGCTTCCACATCCCCACTGGATGTTGAC\n+>HWI-EAS337_3:7:1:374:1453/1\n+GTCAGTCTATGAGCGAATCAGAATGATATTATTGGA\n+>HWI-EAS337_3:7:1:98:638/1\n+GTTACGTTTTCGTATTTGAATGATGTGATCTCAGGG\n+>HWI-EAS337_3:7:1:309:1564/1\n+GTCTGATAATTTTTCCTGTTTTCTATTTATGTACCA\n+>HWI-EAS337_3:7:1:863:418/1\n+GATAAAACATAAAAGAAATTCCTTCTATAATGAGCA\n+>HWI-EAS337_3:7:1:1532:1997/1\n+GTTGTATTTCTAAATGTTTCTACTCCGATTTTTTTT\n+>HWI-EAS337_3:7:1:1079:566/1\n+GAGTAACGTTTATGGAATCTACATAATTCATTATTA\n+>HWI-EAS337_3:7:1:42:911/1\n+GAGAAAACCATTCACCATCTTTTGCCGTTATAGTGC\n+>HWI-EAS337_3:7:1:428:1520/1\n+GCAAAGAAAGAAGGATTTCCTAACCAAGAAGATTTT\n+>HWI-EAS337_3:7:1:986:591/1\n+GAGTTTCTTGCTCCACCACCTTTACCGAAATCTCCA\n+>HWI-EAS337_3:7:1:181:1099/1\n+GGTCGACGTAAGAGATCTGCAGGGCTATTACTCATT\n+>HWI-EAS337_3:7:1:509:832/1\n+GTGAAGTATGGGTGGAAATGCTTGCGTATGCTGCTA\n+>HWI-EAS337_3:7:1:510:597/1\n+GGGTCTGGAATAGTAATGCGCTGATTCTAGTAAAGT\n+>HWI-EAS337_3:7:1:1765:1489/1\n+GTCAATTTTTTCTTTGTTTAAATCCGGGGAGGCTAG\n+>HWI-EAS337_3:7:1:417:1560/1\n+GTAACCTTCCCAGTGTCTCCTTAAGAAAGACTTGGA\n+>HWI-EAS337_3:7:1:1047:854/1\n+GTTGAAATTCCTGATTTTCCATGTGCATCATAAGCC\n+>HWI-EAS337_3:7:1:1296:202/1\n+GGTGTTGGAGTTGGATTTGTTTCTGCTTTGATATCC\n+>HWI-EAS337_3:7:1:502:642/1\n+GATGATTCTTGCTGGTTAAGTTGAGATGGGTTATAA\n+>HWI-EAS337_3:7:1:82:1651/1\n+GGCATCCTGTTCATCCTTCTCTTCATTTTTAGGCGT\n+>HWI-EAS337_3:7:1:1505:1274/1\n+GAAACTTTTCAAAAAAAAAGTTGCATGAGAAATAAG\n+>HWI-EAS337_3:7:1:5:1770/1\n+GTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\n+>HWI-EAS337_3:7:1:115:1005/1\n+GATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\n+>HWI-EAS337_3:7:1:354:1708/1\n+GCATCCGACAGTGACTTAGACGATGAGGAATACGAG\n+>HWI-EAS337_3:7:1:1639:1500/1\n+GTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\n+>HWI-EAS337_3:7:1:766:243/1\n+GTGGCATCTATGGAAGATAAATTGGAGATTGTTGCT\n+>HWI-EAS337_3:7:1:920:144/1\n+GTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\n+>HWI-EAS337_3:7:1:389:268/1\n+GGTCAATTAGAGAGGGCAACCACCCTCAAAGAATTT\n+>HWI-EAS337_3:7:1:294:1868/1\n+GAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\n+>HWI-EAS337_3:7:1:1147:62/1\n+GAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\n+>HWI-EAS337_3:7:1:787:1759/1\n+GGTTTTATTAGAATTGGTAGCTGTTCTGATTTTCTG\n+>HWI-EAS337_3:7:1:425:1939/1\n+GCTAATTGTGGTGTCTGGGTCTATGTGGCTAAACTT\n+>HWI-EAS337_3:7:1:187:1132/1\n+GTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\n+>HWI-EAS337_3:7:1:1739:1840/1\n+GGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\n+>HWI-EAS337_3:7:1:1505:1876/1\n+GAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\n+>HWI-EAS337_3:7:1:447:192/1\n+GACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\n+>HWI-EAS337_3:7:1:21:2019/1\n+GTATGAGGTAAAAGATGATAACCTGTCTTCCAGCCC\n+>HWI-EAS337_3:7:1:1593:652/1\n+GTGATGAGTAAAACATCATCATATGAACTTGAAGAG\n+>HWI-EAS337_3:7:1:1254:1660/1\n+GAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\n+>HWI-EAS337_3:7:1:291:629/1\n+GTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/SR1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/SR1.fastq Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/SR2.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/SR2.fastq Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,5000 @@\n+@HWI-EAS337_3:7:1:415:1217/2\n+TAAGAACTTGGCTGATCGCCTACTTACTGCTTTTAC\n++HWI-EAS337_3:7:1:415:1217/2\n+VWWWVWVVVVVVVVUVWVWVVWWVWVVVUVTTTTTN\n+@HWI-EAS337_3:7:1:208:1489/2\n+CGTGTTTTTGGTTGTGCATAAGGCTTTTTAAAGTAA\n++HWI-EAS337_3:7:1:208:1489/2\n+WWWWWWWWWVVWWQWVWVWWVUVWWWWWWVRTRTTR\n+@HWI-EAS337_3:7:1:278:1153/2\n+GTCAGGCCGCATTGATGGGGGATGGGTTTCCCCCCA\n++HWI-EAS337_3:7:1:278:1153/2\n+WWWVWWWWWWWWWVVVVVVVVVVWWWVVVVTTTTTR\n+@HWI-EAS337_3:7:1:1178:755/2\n+GACATTTCAATTACATTCATCTTACCATCACCTATA\n++HWI-EAS337_3:7:1:1178:755/2\n+WVWVWWWWWVWWVWWVWWVVWWWVVWVWVVTTTTTR\n+@HWI-EAS337_3:7:1:277:1259/2\n+TTTTGGCTCATCAGGATCTTCTGAATCACTTGAAGA\n++HWI-EAS337_3:7:1:277:1259/2\n+WWWWWWWWWWWWWWVVWWWWWVVVVWWVWWTRTTTS\n+@HWI-EAS337_3:7:1:447:1231/2\n+TCAACAAGAGAAAGGAGACGAAAAAGTAAATCCAAC\n++HWI-EAS337_3:7:1:447:1231/2\n+WWWWWWWWVWWWWVVWWWWWVVWWWWVVWWTTTTTR\n+@HWI-EAS337_3:7:1:300:1199/2\n+AAAGGTATTGGTTATGCTGAAATGTTTTCTCCTATT\n++HWI-EAS337_3:7:1:300:1199/2\n+WWWWVVWWWWVVVVWVVWVWWVWVWWWWVWTTTTTS\n+@HWI-EAS337_3:7:1:247:1210/2\n+AGCTATTACCAAAATTAAACTTCACTGCTCAAAACG\n++HWI-EAS337_3:7:1:247:1210/2\n+VWWVWWWWWWWVWVKWWVVWWWVWWVVWVWTTTTTR\n+@HWI-EAS337_3:7:1:1154:1517/2\n+CTAACTTCAATAATCAAGCTTGTCAGTGGAAGAAAA\n++HWI-EAS337_3:7:1:1154:1517/2\n+WWWWWWWWVVWVVWWVVVWWWVWVVVVVVVTTTTTR\n+@HWI-EAS337_3:7:1:164:1869/2\n+TAATGTCCTAGAGAAGAAGTCCTTGTGGTGGATGTC\n++HWI-EAS337_3:7:1:164:1869/2\n+WWWWVWWWWWVWVWWWWWVWWWWWVWWVVVTTTTTN\n+@HWI-EAS337_3:7:1:415:1194/2\n+GAGATGTATTCAAGCTAAGCTTCAAGCAGCGCAGAG\n++HWI-EAS337_3:7:1:415:1194/2\n+WWWWWWWWWWWWWVWWWVWWWWWVVVVVVWTTTTTR\n+@HWI-EAS337_3:7:1:645:1892/2\n+TTATTGTGGAGGTACAAAAAAGATATCTGCAACTAT\n++HWI-EAS337_3:7:1:645:1892/2\n+WWWWWVWVVWVVVWVVWVVVVVVWVWWWVWTTTTTR\n+@HWI-EAS337_3:7:1:33:1446/2\n+TTTTGGAATAAAATGATATCTGCATCTGTTGAATCT\n++HWI-EAS337_3:7:1:33:1446/2\n+WWWWWVWWWWWVWWVWWWWWWVWWWWWVWWTTTTTS\n+@HWI-EAS337_3:7:1:1194:1427/2\n+TGTAAAAAGCTTTGTTCAGCTGTGACAAGAAGAGAA\n++HWI-EAS337_3:7:1:1194:1427/2\n+WWWWWWVVWWWWWWVWWVVWWVWUVVVVWVTTTTTR\n+@HWI-EAS337_3:7:1:624:1913/2\n+CAATATGCAGTTGTCAAGTAAGGGAAGTTCACCATT\n++HWI-EAS337_3:7:1:624:1913/2\n+WWWWVWVWVVWWVVWVWVWVVUVUVVVWWVTTTTTR\n+@HWI-EAS337_3:7:1:437:1202/2\n+GACTGAGATATTAATTAAGGAGATAGATATCTCTAG\n++HWI-EAS337_3:7:1:437:1202/2\n+WWWWWVWVWVWWVWWVWVVWWVWVWVWWWVTTTTTR\n+@HWI-EAS337_3:7:1:1386:1787/2\n+AACTAAACACTTACCAAAATCACCTTGTTTAGCAGC\n++HWI-EAS337_3:7:1:1386:1787/2\n+WWWWWWWWVWVWVWWQVVVVVWWWWVVWVWTRTTTR\n+@HWI-EAS337_3:7:1:227:1155/2\n+ACTAAGAACTTGAGAATTGACAGAACCTGTGGACAA\n++HWI-EAS337_3:7:1:227:1155/2\n+WWVVWWVVWWVVWVWWWVVVWVVVVVVVVVTTTTTP\n+@HWI-EAS337_3:7:1:472:1025/2\n+TTCTACTTAATTTTATTCAATGATATTTAATTAAAA\n++HWI-EAS337_3:7:1:472:1025/2\n+WWWWWWWWWVWWWWWWWWWVWVWWVWWWVWTTTTTS\n+@HWI-EAS337_3:7:1:220:1482/2\n+GATTCAATGTTGAAAACTCGGCTGTATTCCAACATT\n++HWI-EAS337_3:7:1:220:1482/2\n+WWWWWWWWVWWVWWWVWWWVVWWVVOVWUVOTROTR\n+@HWI-EAS337_3:7:1:1699:1966/2\n+ATTTTGTGTGGATCAAAAACTCCCACAGATCGATAT\n++HWI-EAS337_3:7:1:1699:1966/2\n+VWWWWVVVWVVVWWVWVVVWWWVVVVVVVVTTTTTS\n+@HWI-EAS337_3:7:1:547:1084/2\n+ATTTTCAGATTCTTTCAAATAAGGTTCACTCGTTAG\n++HWI-EAS337_3:7:1:547:1084/2\n+VWWWWWWVWWWWWWWWVWVWVVUVWWWVVWTTTTTN\n+@HWI-EAS337_3:7:1:464:1097/2\n+TCTACTCTCCTTACTCGGTTTGTTGGATTTCATTGG\n++HWI-EAS337_3:7:1:464:1097/2\n+VWVVWWWWVWVWVWVWVVUWVVWWVUVWWWTQTTCN\n+@HWI-EAS337_3:7:1:171:1480/2\n+AGATCATATCCTACCCTACTGTGGCAAAGGCGTGCT\n++HWI-EAS337_3:7:1:171:1480/2\n+VWWWWWWWWWWWVWWWVWWVVVVVWWVVVVTTTTTS\n+@HWI-EAS337_3:7:1:293:1251/2\n+CATATGTCCCAATGAAGAGGAGATGTGCATATGTAC\n++HWI-EAS337_3:7:1:293:1251/2\n+WWWWWWWWWWWVWWWWWWWVVWWVWVVVVVTTTTTR\n+@HWI-EAS337_3:7:1:647:1863/2\n+TTGAAATGAAAAGAGAATGTACTGCAAACGATCTTC\n++HWI-EAS337_3:7:1:647:1863/2\n+WWWWWWWWWWWWWWVWWWWWWWWVWVWWWWVVVVVS\n+@HWI-EAS337_3:7:1:263:1275/2\n+CAATAACTGGAGCGATGTTAGTAACTGCTGACCACA\n++HWI-EAS337_3:7:1:263:1275/2\n+WVVWWWWWVVVVWWWWWVWWVWWWWWVWWVTTTTTS\n+@HWI-EAS337_3:7:1:1112:215/2\n+TAACTAAGATATAGGTATACTCTCTCTACATACAAC\n++HWI-EAS337_3:7:1:1112:215/2\n+VWVWWWWVVVVWWVVWWWVWVVWVVVWWWVTTTTTR\n+@HWI-EAS337_3:7:1:319:1275/2\n+CATTATGTACAAGGATTGAACCTGGTTTGGAACAAC\n++HWI-EAS337_3:7:1:319:1275/2\n+WWWVWWVWWWVWVVWWWVWWWWWVVVWWVVTTTTTR\n+@HWI-EAS337_3:7:1:1310:1480/2'..b'A\n++HWI-EAS337_3:7:1:986:591/2\n+WWWWWWWVVWVWWVWWVWWVWWVWWWVWWWTTTROR\n+@HWI-EAS337_3:7:1:181:1099/2\n+CAAGTGAATCCCCATGACAGCATGGAGTAAGAAGGT\n++HWI-EAS337_3:7:1:181:1099/2\n+WVWWWWWWWVWWWWVVVWWVWVWWWVWVVVTTTTTR\n+@HWI-EAS337_3:7:1:509:832/2\n+AGAGATCAAGACTCTTCAATCACTACGTTGTGGATG\n++HWI-EAS337_3:7:1:509:832/2\n+VVVWWWWWVVVWWWWVWVVWVVWWVVVVWVTQQSTP\n+@HWI-EAS337_3:7:1:510:597/2\n+TGAATAGGTCGCTGGTATTGCTTCCTGTGGGGTTTA\n++HWI-EAS337_3:7:1:510:597/2\n+WWWWWWWWWWWWWWWWVWWWWWWVWWVVVVQRTTTR\n+@HWI-EAS337_3:7:1:1765:1489/2\n+AAGAAAATGAATGGATGTGTAATATATGCTATGCAT\n++HWI-EAS337_3:7:1:1765:1489/2\n+WWVWWWVWWWVVWWWVWVVVWVWWWWWVWWTTTTTR\n+@HWI-EAS337_3:7:1:417:1560/2\n+ATTTACACTATAATATTTATTTTTAAGACTTATGGA\n++HWI-EAS337_3:7:1:417:1560/2\n+WVWWWWWWWWWWWWWWVVWVWVWVWVVWWWTTTNQS\n+@HWI-EAS337_3:7:1:1047:854/2\n+GAGCTTTCCATGTTTTGACTTCTTCTTTACTACTAA\n++HWI-EAS337_3:7:1:1047:854/2\n+WWWWVWWWWVVVWWWWVVWWWWWWVWWWVVTTTTTN\n+@HWI-EAS337_3:7:1:1296:202/2\n+AGCTAGAAAGCATCAAATGCACATTTATACTAAATA\n++HWI-EAS337_3:7:1:1296:202/2\n+VWWWWWVWWWWWWWWVVWWWWWWWWVWWWVTSTTTS\n+@HWI-EAS337_3:7:1:502:642/2\n+ATACATCACTGTGTTACCTAAAAAGAGATTACAAGA\n++HWI-EAS337_3:7:1:502:642/2\n+WVVVWWVWWVWWVWVVWWWOWVVVVWVLVWTTTTTN\n+@HWI-EAS337_3:7:1:82:1651/2\n+GCTAAAAAGTACTATTTCAAAGAAGGATGAAGAGAT\n++HWI-EAS337_3:7:1:82:1651/2\n+WWWVWWWWWVVWVVWWWWWWWVWVVVVWWWTTTTTS\n+@HWI-EAS337_3:7:1:1505:1274/2\n+CAACAAACAACACCCAACTCATTCATATATAGTAGC\n++HWI-EAS337_3:7:1:1505:1274/2\n+WVWWWVVWVWWWWWWVWWVWVVWWVVVWVWTTTTTS\n+@HWI-EAS337_3:7:1:5:1770/2\n+ATGAACTTCAGATATATGGCAACAGACTTAATTAGA\n++HWI-EAS337_3:7:1:5:1770/2\n+WWWWWWWWWWVWWWWWWVVWWWWVVVWWWVTTTTRS\n+@HWI-EAS337_3:7:1:115:1005/2\n+TGTCCTAAATACTTACAGCAGCTGGGATGGTTTTCC\n++HWI-EAS337_3:7:1:115:1005/2\n+WWWWWWWWVWWWWWVWWVWVVWWVWVVWVVTTTTTR\n+@HWI-EAS337_3:7:1:354:1708/2\n+GAATCCTTCGTCACAACGGTGAAAGTTGACGGAGCA\n++HWI-EAS337_3:7:1:354:1708/2\n+WWWVWWWWWVRWVWWVWVWVVVVWWWWWVVTTTTTR\n+@HWI-EAS337_3:7:1:1639:1500/2\n+AACTTTCTAAATATTCGATAATCAATTCTCCAATAT\n++HWI-EAS337_3:7:1:1639:1500/2\n+WWVWWWWWVWVWWWWWVVWVVWWVVWVWVVTTTTTS\n+@HWI-EAS337_3:7:1:766:243/2\n+TATTACAGTAAGACAAGAACTTCAAGAAGCTTTAGA\n++HWI-EAS337_3:7:1:766:243/2\n+WVWWWWWVVWVVVWWWVVVWWVWVVVVVVVTTTTRR\n+@HWI-EAS337_3:7:1:920:144/2\n+ATATTTCAGTGGAAACACAAAAATCCAAGGAAGAAA\n++HWI-EAS337_3:7:1:920:144/2\n+WVWWWWWVVNWVWVWVWVWVVWVRVVWWVVTTTTTR\n+@HWI-EAS337_3:7:1:389:268/2\n+CCAACAGAGAGAAAAGGGAATTCTTTGAGGGTACCT\n++HWI-EAS337_3:7:1:389:268/2\n+WWWWWVWWWWWWWWWVWVWVWVWWWWVWVVTTTTTS\n+@HWI-EAS337_3:7:1:294:1868/2\n+GGAACATCATTTCACACTTTCAAACACTTAGCAACA\n++HWI-EAS337_3:7:1:294:1868/2\n+VVWVVVWWWVWWWWVVWWVWVWVVVVWVVWTTTTTR\n+@HWI-EAS337_3:7:1:1147:62/2\n+TAGTTCTCCAGGTTCGGCAACCACAAATACATCATT\n++HWI-EAS337_3:7:1:1147:62/2\n+WWWWWWWWWVWVVWVVVWVWWWVVVVVVVWTTTTTR\n+@HWI-EAS337_3:7:1:787:1759/2\n+CTGGATTATTATATGATGATATTTGAACTTTCCTTA\n++HWI-EAS337_3:7:1:787:1759/2\n+VWVWWWWVWWVWVWVWVWWWWWWWVVVVWVTTTTTR\n+@HWI-EAS337_3:7:1:425:1939/2\n+TAACTTTTCTATCTGGTTTCTATGTTTTCCAGCTCT\n++HWI-EAS337_3:7:1:425:1939/2\n+WVWWWWWWWWWWWWVVVWWWWWWUVWVWVVTQTTTS\n+@HWI-EAS337_3:7:1:187:1132/2\n+GAATAAAAAAAGACAACAACATATCAAGATACAAAG\n++HWI-EAS337_3:7:1:187:1132/2\n+WWWVWVWWWWWVWWVWWWVWWVWWWVWWVWTTTTTR\n+@HWI-EAS337_3:7:1:1739:1840/2\n+AAAACTGTCCTTCCCGTCAAACTTTTGGGTCAAAAG\n++HWI-EAS337_3:7:1:1739:1840/2\n+WWVWWWWWWWWWWWWVWWWVVWVCWWVVVVTTTTTR\n+@HWI-EAS337_3:7:1:1505:1876/2\n+CATTCTCTAAACTAGTGAACTTGACAACACTTGATC\n++HWI-EAS337_3:7:1:1505:1876/2\n+WWWWWWWWWWWWWWWWVWWWWWVWVVWVVVTTTTTS\n+@HWI-EAS337_3:7:1:447:192/2\n+GTGAGTGAAACATATCCAAAAGGCATCTGTTCAGTT\n++HWI-EAS337_3:7:1:447:192/2\n+WVWWWVWWWWWVWVVWWWVVVVVWWVWVVWTTTTTS\n+@HWI-EAS337_3:7:1:21:2019/2\n+ATTATGTTTACGGGACAATTGTATGTTCCATTATCT\n++HWI-EAS337_3:7:1:21:2019/2\n+VWVWWWWWWWWWWWWWWWWWVWVWUWVVWWTTTTTR\n+@HWI-EAS337_3:7:1:1593:652/2\n+TTATTTAGCTGTATAATTCCTCTTGTTTTAAGCATA\n++HWI-EAS337_3:7:1:1593:652/2\n+WWVVWVWWWWWWWVWVVVWVWWWWVWWWWWTKTTTS\n+@HWI-EAS337_3:7:1:1254:1660/2\n+AGTAATGGCATCACATATGGGTAGGAAAAATGTACA\n++HWI-EAS337_3:7:1:1254:1660/2\n+WVWWWVWVWVWVVWVWWWVWWVVWWVVVVWTTTTTS\n+@HWI-EAS337_3:7:1:291:629/2\n+ATGAAGGGTTTTTTTGTTCTCTAATGTCATCTTATT\n++HWI-EAS337_3:7:1:291:629/2\n+WWWWVVWWWWWWWWWVWWVWVWVVVQWVVWTTTTTS\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/bamFile.bam

Binary file SMART/data/bamFile.bam has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/dummy.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/dummy.fasta Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,6 @@
+>HWI-EAS337_3:7:1:415:1217/1
+GATGTGCAGACTTTTCACGCAGGACTACATCACTGT
+>HWI-EAS337_3:7:1:208:1489/1
+GGAAACATATGCACATAAACGTTGAAATCATGCTTA
+>HWI-EAS337_3:7:1:1154:1517/1
+GAATGTTGCAGACCTTACTCCTACCTATGAAGCACA

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/expRef.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/expRef.fasta Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/expRef_withoutSplit.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/expRef_withoutSplit.fasta Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/output.png

Binary file SMART/data/output.png has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part0.bam

Binary file SMART/data/part0.bam has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part0.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/part0.sam Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,448 @@\n+@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n+@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n+@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n+@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n+@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n+@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n+@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n+@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n+@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n+@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n+@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n+@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n+@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n+@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n+@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n+@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n+@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n+@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n+@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n+@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n+@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n+@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n+@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n+@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n+@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n+@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n+@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n+@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n+@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n+@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n+@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n+@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n+@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n+@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n+@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n+@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n+@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n+@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n+@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n+@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n+@SQ\tSN:BAC19_LR16\tLN:9760\n+@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n+@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n+@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n+@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n+@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n+@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n+@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n+@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n+@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n+@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n+@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n+@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n+@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n+@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n+@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n+@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n+@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n+@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n+@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n+@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n+@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n+@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n+@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n+@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n+@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n+@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n+@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n+@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n+@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n+@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n+@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n+@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n+@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n+@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n+@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n+@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n+@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n+@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n+@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n+@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n+@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n+@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n+@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n+@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n+@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n+@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n+@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n+@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n+@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n+@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n+@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n+@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n+@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n+@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n+@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n+@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n+@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n+@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n+@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n+@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n+@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n+@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n+@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n+@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n+@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n+@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n+@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n+@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n+@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n+@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n+@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n+@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0116C14_LR240,+6179,36M,0;\n+HWI-EAS337_3:7:1:1567:1795\t16\tC07HBa0309B15_LR190\t4202\t37\t36M\t*\t0\t0\tATAAACCCTAACACTTCTTCTCCTGTTACTGTTTCC\tUUUUMUVVWWVVVVWVVWVVWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:216:392\t0\tC04HBa96I8_LR101\t4961\t37\t36M\t*\t0\t0\tGAAGATTTTTTAATCTTGTGGTCTTAAACGTGTCAC\tWWWWWWWWWWWWWWWWWVWVUWWWWVVWWQUSUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:38:1803\t16\tC04HBa50I18_LR341\t2928\t37\t36M\t*\t0\t0\tCATACCTGATTTTTTGCCATATGTTTGTGATTTCAC\tUUUUUUWWWWWWWWWWWVWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:425:1196\t16\tC06HBa0217M17_LR166\t4141\t37\t36M\t*\t0\t0\tCAATCCTGCTTTAGCACTGCACTGTGATCTCTGCTC\tUUUUUUVVVVVWWVWWWVWWTVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:181:410\t16\tC02HBa0072A04_LR26\t2348\t37\t36M\t*\t0\t0\tAAATCCTAATTACTTCCCCCCTCATTTCATTATTTC\tUUUUUUWWWVWWVWWWWVVVVWWWWVWVVWWWWWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1065:1826\t16\tC09SLe0130H12_LR273\t4026\t0\t36M\t*\t0\t0\tTTATTCCCTGTCCATAGAAGGCATAAATAAGCAATC\tUUUUUUWVWWWVVWVWWWWVWVWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0116C14_LR240,+5410,36M,0;\n+HWI-EAS337_3:7:1:79:1444\t16\tC02HBa0027B01_LR21\t4337\t0\t36M\t*\t0\t0\tTCGGAAAGAAAACATCTAGAGCAGAGATCAATATTC\tRQUUUUWWWWWWVVWVVWWWWVWWWWWWWWWWWVWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02SLe0018B07_LR335,+4851,36M,0;\n+HWI-EAS337_3:7:1:1634:1526\t16\tC09SLe0085A10_LR364\t607\t37\t36M\t*\t0\t0\tCCTTCTAGCAAAAGCCCTTCGAGTAGTCATATCTTC\tRRUUUUWVTWWWWVVVWWWVWWWWWWWVWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:462:1320\t16\tC12HBa326K10_LR306\t6488\t25\t36M\t*\t0\t0\tAAAGAAAAAAATAATAAATAATAATTAAATCAAAAC\tUUCUUUWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:2C28G4\n+HWI-EAS337_3:7:1:532:1095\t0\tC04HBa50I18_LR341\t1\t37\t36M\t*\t0\t0\tGGTCTGGCTATTTGATTTCCACCTGCTACCCCCGCA\tWWWVWWWWWWVWWVVVWWVWWVVVTVVWVVUUUSUJ\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:27:305\t4\t*\t0\t0\t*\t*\t0\t0\tGATTGATTCAGTAATTGAGTTAACCTCTGAAAGAGA\tWWWVVWWWWWVWWWWVVWVWWWWWVWVWQVUUURRU\n+HWI-EAS337_3:7:1:832:1960\t16\tC07HBa0002M15_LR175\t7167\t0\t36M\t*\t0\t0\tCTTTGCCAAAACAAAGTGAAGTACTGGATGTACCTC\tPUUUUQKWWWWVWWWWWWWWWWWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0194K19_LR362,+1277,36M,0;C11HBa0029C01_LR281,-7887,36M,1;\n+HWI-EAS337_3:7:1:1312:645\t16\tC06HBa0120H21_LR161\t186\t37\t36M\t*\t0\t0\tTCAACTTTAGCTGCACCCTTGAATATGTGATTGTGC\tUQUUUUVVWWVWWVWVVVWWWWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1107:226\t0\tC12HBa326K10_LR306\t5231\t37\t36M\t*\t0\t0\tGCAGAAAAAACGAATTTGTTTTGGCCCCCACTTCGT\tWWWWWWWWWWWWVVWWWVWWWWVQVVWWVVUUUURU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:274:1287\t0\tC09SLm0008K04_LR274\t10575\t37\t36M\t*\t0\t0\tGAGGATTTGCCTAGAGGTGCTTTTGTTTGTGAATAT\tWWWWWWWWWWWWWVVVVWWWWWWWVWWWVWRUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1772:270\t4\t*\t0\t0\t*\t*\t0\t0\tGGAAATACTAACTANANNNNNNNNNNNNNNNNNNNN\tWWVVWWWWWVWVWWCVCCCCCCCCCCCCCCCCCCC@\n+HWI-EAS337_3:7:1:1704:1373\t16\tC06HBa0217M17_LR166\t3360\t37\t36M\t*\t0\t0\tCAAACAATTCGAAGGTTTCCACAAATCAGCTTTACC\tQUUSSUWOVVWWWWWWWVVSWWWWSWWWVWWWWWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:10A25\n+HWI-EAS337_3:7:1:241:903\t16\tC06HBa0066I09_LR156\t5444\t37\t36M\t*\t0\t0\tCATATTGAATTTGCTCATTCGACTTGGTACATTACC\tRUUUUUWVWWWWWWWVWWWVWWVWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:404:1924\t16\tC08HBa0239G21_LR221\t4431\t37\t36M\t*\t0\t0\tGAGTGTTTGCAATCCAAGTGTTCGAGTTGCATCGAC\tUUUUUUWWWWVWWVWWWWWWWWWWWWWWWWWWWWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:23:1455\t0\tC02HBa0190P16_LR331\t3279\t37\t36M\t*\t0\t0\tGGGGAATTGTTATGCTTTAAACTAATGGGAAAAGAA\tWWWWWWWWWWWWWVWVWWWVWVVWWWTVTWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part1.bam

Binary file SMART/data/part1.bam has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part1.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/part1.sam Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,448 @@\n+@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n+@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n+@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n+@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n+@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n+@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n+@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n+@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n+@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n+@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n+@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n+@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n+@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n+@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n+@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n+@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n+@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n+@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n+@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n+@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n+@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n+@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n+@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n+@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n+@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n+@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n+@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n+@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n+@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n+@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n+@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n+@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n+@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n+@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n+@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n+@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n+@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n+@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n+@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n+@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n+@SQ\tSN:BAC19_LR16\tLN:9760\n+@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n+@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n+@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n+@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n+@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n+@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n+@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n+@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n+@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n+@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n+@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n+@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n+@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n+@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n+@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n+@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n+@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n+@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n+@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n+@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n+@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n+@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n+@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n+@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n+@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n+@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n+@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n+@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n+@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n+@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n+@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n+@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n+@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n+@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n+@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n+@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n+@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n+@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n+@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n+@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n+@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n+@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n+@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n+@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n+@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n+@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n+@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n+@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n+@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n+@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n+@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n+@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n+@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n+@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n+@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n+@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n+@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n+@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n+@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n+@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n+@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n+@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n+@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n+@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n+@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n+@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n+@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n+@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n+@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n+@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n+@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n+@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'NM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:879:430\t0\tC04HBa36C23_LR91\t274\t37\t36M\t*\t0\t0\tGAGAGATCCTAAAGAGACACAGGAACTAGATCAAAA\tWWWWWWWWWWWWWWWWWWWWWWWWWVWWWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1754:671\t16\tC05HBa0138J03_LR135\t4988\t25\t36M\t*\t0\t0\tCTCATTGACCTACAATTTTTATATGCCGTGGCTCAC\tRUUUUUVWVVWVVWWVWWWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:14G8C12\n+HWI-EAS337_3:7:1:14:1473\t0\tC04HBa50I18_LR341\t1\t37\t36M\t*\t0\t0\tGGTCTGGCTATTTGATTTCCACCTGCTACCCCCGCA\tWWVWVVWWWWWWWWWWWWWWWWWWVVWVWVUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:425:150\t16\tC02HBa0124N09_LR31\t1935\t37\t36M\t*\t0\t0\tTCCTCCAAGAAACTTCTCCGATTACTCTCCATTTGC\tRSUUSSWWVWWWWWWWWVVWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:773:145\t16\tC04HBa164O3_LR344\t8533\t37\t36M\t*\t0\t0\tTTAGTATTGATTGTAAGGTGATCTGTTGGAAATTAC\tUUUUUUVWVVWWVVWWVWWVWWVVWWWWWWWWWWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:4A31\n+HWI-EAS337_3:7:1:1191:747\t16\tC06HBa0147H20_LR146\t3868\t37\t36M\t*\t0\t0\tTGCGATAAAGAACAACTTCAATTGCAAAGTTAGATC\tKUUUUUWVWQWVVWVVVVVWWVWVVRWVWWWVWWVV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:103:821\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTTCTGTAGATGTTT\tWWWWWWWWVWVWWVVWWUVVVVUVJWKVJKKURUUU\n+HWI-EAS337_3:7:1:1689:453\t0\tC09HBa0142I14_LR265\t8410\t37\t36M\t*\t0\t0\tGAAAAAAACACTCTGATTTGTAAGAGAGAATTTAAC\tWWWWWWWWWWWWWWWVWWVWVVVWWVWWVVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:259:111\t16\tC09SLe0076N09_LR363\t6949\t0\t36M\t*\t0\t0\tATTCTGAGGACTATGCCTAGCAGCTGAAATCACCAC\tUUUUUUWVVVQVWVVVVWUWWVWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-6949,36M,0;\n+HWI-EAS337_3:7:1:577:868\t16\tC12HBa149G24_LR381\t8433\t37\t36M\t*\t0\t0\tGTATGAAGAGGAGAAACTGCAAAAAGAAGCCTTAGC\tUUURUUVVWWWWWWWWVVVVWWWWWWWVWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1223:332\t16\tC04HBa27G19_LR337\t2599\t37\t36M\t*\t0\t0\tTTCCATGAAAACATTTTCCTTCATACCAAACACACC\tRRSRUUWWWWWVWWWWWVVWWVWWWVVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1626:1930\t16\tC12HBa326K10_LR306\t9855\t37\t36M\t*\t0\t0\tGGATAAAGTTTGCGTATACACCACCCTTCCCATACC\tUUUUUUWVVVVWVVWWVWVWWWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1310:342\t0\tC07HBa0287B22_LR188\t267\t25\t36M\t*\t0\t0\tGTTTATATGATCAGAAACGGATCCAGTTTCAGACAT\tWWWWWWWWVWWWWVWWWWTWWWWVWVVWWVURUUUU\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:12G4T18\n+HWI-EAS337_3:7:1:629:1071\t0\tC02SLm0057H03_LR65\t8840\t37\t36M\t*\t0\t0\tGAATCACCGTCGGTGCCGTCATCGGTGATGGTGGAC\tWWWWWWWWWWWWWVVWWVVWVVVWWVWDVTUMSUPC\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1306:1849\t16\tC04HBa8K13_LR338\t8095\t37\t36M\t*\t0\t0\tTTATTGAATCTGGGTTTGATCTCAAATCGAAGATAC\tUUUUUUWVVVVWWWWWWWWWVWVWWVWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1169:328\t0\tC04HBa164O3_LR344\t1860\t37\t36M\t*\t0\t0\tGATTGAATATCTCTCAACTAGAATAGACTCACCAAT\tWWWWWWWWWWWWWWVWWWWWVVWWWWVVWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:752:1933\t16\tC06HBa0106K23_LR159\t10004\t37\t36M\t*\t0\t0\tTGACAAACAGCAGCCAAAATACTACAGCCTGGAATC\t@CRQJUVQVOQWVVUWWWWWWWWVWWWWWWWWWWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:4C31\n+HWI-EAS337_3:7:1:1726:1896\t0\tC09HBa0194K19_LR362\t423\t23\t36M\t*\t0\t0\tGTTCAAGGCTCGTTTAGTGGTTAAAGGATATTCACA\tWWWWWWVWWWWWVWWVWVWUVWVVWVTVWVUUSUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C11HBa0029C01_LR281,-8741,36M,1;\n+HWI-EAS337_3:7:1:382:233\t16\tC04HBa80D3_LR100\t3451\t37\t36M\t*\t0\t0\tAATAAAGGCATCAAGAAGTATACAACCAAATTCTTC\tUUUUUUWWTWVVWWWWWWWWWVVWWVVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1209:738\t16\tC09SLm0018L06_LR366\t5546\t37\t36M\t*\t0\t0\tTTTTCCAGACAAGCACGAAACTATATAAACCATTAC\tRUUUUSVVWJWWPVWVWWWWVWWWWVWWVVWWVWVW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part2.bam

Binary file SMART/data/part2.bam has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part2.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/part2.sam Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,448 @@\n+@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n+@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n+@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n+@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n+@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n+@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n+@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n+@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n+@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n+@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n+@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n+@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n+@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n+@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n+@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n+@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n+@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n+@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n+@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n+@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n+@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n+@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n+@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n+@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n+@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n+@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n+@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n+@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n+@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n+@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n+@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n+@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n+@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n+@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n+@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n+@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n+@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n+@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n+@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n+@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n+@SQ\tSN:BAC19_LR16\tLN:9760\n+@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n+@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n+@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n+@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n+@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n+@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n+@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n+@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n+@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n+@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n+@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n+@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n+@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n+@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n+@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n+@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n+@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n+@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n+@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n+@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n+@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n+@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n+@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n+@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n+@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n+@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n+@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n+@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n+@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n+@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n+@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n+@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n+@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n+@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n+@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n+@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n+@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n+@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n+@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n+@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n+@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n+@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n+@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n+@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n+@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n+@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n+@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n+@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n+@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n+@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n+@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n+@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n+@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n+@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n+@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n+@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n+@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n+@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n+@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n+@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n+@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n+@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n+@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n+@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n+@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n+@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n+@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n+@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n+@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n+@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n+@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n+@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'155\t0\tC09SLe0076N09_LR363\t6097\t0\t36M\t*\t0\t0\tGTTTTGAAGCGTTAACCCTTAGACTGGTTAAGAACA\tWWVWWVWWVWVVWWWOWWWTVTVWVVVWWTURUUSU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,+6097,36M,0;\n+HWI-EAS337_3:7:1:1760:1107\t0\tC04HBa164O3_LR344\t5136\t37\t36M\t*\t0\t0\tGAATTGATTGATCGACAACAATAAATTAGTGTGGTG\tWWWWWWWWWVWWWUWWWWWWWWWVVVWWVWUUUSUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:881:441\t0\tC06HBa0066D13_LR353\t2550\t37\t36M\t*\t0\t0\tGCCTTTTAGCATGATCAAAGTCAAACCTTATCTGTC\tWWWWWWWWVWWWVWWWWWVVWWWWWVVWWVUUUSUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:713:1296\t16\tC12SLeRI72J6_LR378\t4507\t37\t36M\t*\t0\t0\tTTGCCCACTATCTGTTATGTCCATGATTTTTCCTCC\tUUUUUUVWWWVWWWWWWWWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1703:1622\t16\tC05HBa0058L13_LR131\t135\t37\t36M\t*\t0\t0\tTAAGATCTTGAAAACTAAATGAAGCACTAAGAACAC\tUUUUUUKWWWWWWWLWWWWWWWWWVWVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:114:374\t16\tC09HBa0113I06_LR360\t2601\t37\t36M\t*\t0\t0\tTCTGAATATGTATTTGCTTTTTTAGGGAAAAATAAC\tRSUUUUVWVVWWWWVVUVWWWWWWWWWWWWWWVWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:32A3\n+HWI-EAS337_3:7:1:1688:1598\t0\tC04HBa80D3_LR100\t8540\t37\t36M\t*\t0\t0\tGACCGAAAGATGTCTAGTGGAAAGTAGCCCAGTGGA\tWWWWWWWWWWWWWWWWWWVVWWWWWWWWVWUUUSUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1164:1787\t4\t*\t0\t0\t*\t*\t0\t0\tGAAGTGTGAAATTGATAGATTTAGTGGGCGCACCAA\tWWWWWWWWWWVWWWWWWWWWWWWVVWVWVWUUUUUU\n+HWI-EAS337_3:7:1:321:1064\t16\tC05HBa0145P19_LR136\t7916\t37\t36M\t*\t0\t0\tACCATAAAACTGAGTCGTCAGAATGATTCTGCTTCC\tURUUUUWVWVVVWWWVWWWWWWWWWWWWWWWWVWVW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:168:1419\t0\tC07HBa0309B15_LR190\t1250\t0\t36M\t*\t0\t0\tGGATCTGTTGACCTTCCTAGGTCAAGACAAGTTTCA\tWWWWWWWWWVWWWWWWWWVVVWWWWVWWWWSUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C04HBa255I2_LR112,+3312,36M,0;\n+HWI-EAS337_3:7:1:174:1900\t0\tC02HBa0167J21_LR39\t7262\t37\t36M\t*\t0\t0\tGAGGATGATCCATTAACCGCAGGAACAGTGGAGACT\tWWWWWWWWWWWWWWWVWWWWVWWWWWVWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:399:1239\t16\tC04HBa35C16_LR339\t2042\t37\t36M\t*\t0\t0\tAAGCACATAACACTTCCACCATAAATGGACCATACC\tULSUUUWVWVVVWLVWVWVVWVWWWWVVWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:213:1008\t4\t*\t0\t0\t*\t*\t0\t0\tGATCTGTACACCTCTCTTTTTGCTGAATATATCAGC\tWWWWWWWWVWWWVWWWVVWVWVWWVVTWWWUUUURU\n+HWI-EAS337_3:7:1:656:1907\t16\tC05HBa0131D04_LR133\t767\t37\t36M\t*\t0\t0\tGAAGGTAATAGCATGATATATACCATGACAAGAATC\tUUUSUUWWVWVQWVVWVWWWVWVVWWWWVWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1622:793\t16\tC06HBa0120H21_LR161\t4515\t37\t36M\t*\t0\t0\tTGCTCAGATCCTCTTTCCTTTCTCCTAACCATAATC\tRUSUUUVWVWVVWVWWWVWWWVWVWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:736:1713\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTTCTGTAGATGTTT\tWWWWWWWWVWVVVWVWWQUUVVQVOWCVQQRUKUUU\n+HWI-EAS337_3:7:1:168:2006\t0\tC03HBa0012D06_LR72\t6920\t37\t36M\t*\t0\t0\tGCAGACCAAGCAAAATTATATTTATCTGGTATGGCT\tWWWWWWWWWWWWWWWULWWVWVWWWVTVQTUSSUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1707:99\t0\tC12HBa224N6_LR382\t6492\t37\t36M\t*\t0\t0\tGTAATGAGTAGATATGCAATTCAATGTCTTTCACTT\tWWWWWWWWWVWWWWWWWWWWWWVVWWVVVWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:278:1921\t16\tC11SLm0052K14_LR376\t2794\t37\t36M\t*\t0\t0\tATGTGATAATGTCATCTGTATAAATGTTCTTAAAAC\tRUSUUUVWWVVVUWWWWWWWWVWWWWWWVWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:307:1516\t16\tC11HBa0143O06_LR374\t6204\t37\t36M\t*\t0\t0\tAACAGTAGTATGCTGCATACCTTTTCAGTGGCAACC\tUUUUUUWWVWWWVWWVWWWVWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:489:749\t16\tC09SLm0143I09_LR365\t5492\t0\t36M\t*\t0\t0\tAACAATAGACCAGCAAAATATAGGATATCCAGAAAC\tUUJUURWVWKKWVQWWWWWWVWWWWWWVWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,-5492,36M,0;\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part3.bam

Binary file SMART/data/part3.bam has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part3.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/part3.sam Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,448 @@\n+@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n+@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n+@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n+@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n+@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n+@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n+@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n+@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n+@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n+@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n+@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n+@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n+@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n+@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n+@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n+@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n+@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n+@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n+@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n+@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n+@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n+@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n+@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n+@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n+@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n+@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n+@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n+@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n+@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n+@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n+@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n+@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n+@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n+@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n+@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n+@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n+@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n+@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n+@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n+@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n+@SQ\tSN:BAC19_LR16\tLN:9760\n+@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n+@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n+@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n+@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n+@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n+@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n+@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n+@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n+@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n+@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n+@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n+@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n+@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n+@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n+@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n+@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n+@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n+@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n+@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n+@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n+@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n+@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n+@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n+@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n+@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n+@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n+@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n+@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n+@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n+@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n+@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n+@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n+@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n+@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n+@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n+@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n+@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n+@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n+@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n+@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n+@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n+@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n+@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n+@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n+@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n+@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n+@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n+@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n+@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n+@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n+@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n+@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n+@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n+@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n+@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n+@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n+@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n+@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n+@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n+@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n+@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n+@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n+@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n+@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n+@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n+@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n+@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n+@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n+@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n+@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n+@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n+@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'\t16\tC04HBa35C16_LR339\t4733\t37\t36M\t*\t0\t0\tTTCTTGAATTATTCCCGCTAACTAGGACCCGAGTCC\tUUSUUUWWVVWVVVVVVVVWWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:15:1065\t4\t*\t0\t0\t*\t*\t0\t0\tGGGTTTAGGGTTTAGGGTTTAGGGTTTAGGGTTTAG\tWWVWWWVVWWWWVWVWWVWWWVUVWVWVTVQSUUUU\n+HWI-EAS337_3:7:1:651:1154\t16\tC09SLm0037I08_LR367\t3924\t37\t36M\t*\t0\t0\tCCTTCCCATTTCTTTATAGCATCATATTCTAGAACC\tUUUUUUWVWVVWVVVWVWWWWVWWVWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:412:1548\t16\tC05HBa0145P19_LR136\t3086\t37\t36M\t*\t0\t0\tCTTCATCCAGAGGGAGCTGAACAAGGACAGTTTATC\tOUUSUUVTWWWVVWWWUWWWWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:565:893\t16\tC09HBa0100J12_LR259\t5501\t0\t36M\t*\t0\t0\tAGCAGTTTGTTGGCTGCTGTTTTGAAGAGTGGAGTC\tUUUUUUWWWWVWVVWWWWWWWVWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0129J22_LR373,-5501,36M,0;\n+HWI-EAS337_3:7:1:1086:894\t0\tC07HBa0309F18_LR191\t6311\t37\t36M\t*\t0\t0\tGTGAAGCAGAAGATCTGCAATAACTCTTAAATTTGT\tWWWWWWWVWVVWWWWWWVVSVVVVWVVVVVUUUUSU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:904:892\t16\tC02HBa0016A12_LR19\t1326\t0\t36M\t*\t0\t0\tTGTGAAGAAAGAGGTCGTTTAAAAATCTGAAGTTCC\tUUUUUUWWWWWWWWWVWWWWWWWWWWWVWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02HBa0236E02_LR326,+8462,36M,0;\n+HWI-EAS337_3:7:1:1201:1415\t0\tC06HBa0144J05_LR355\t1215\t0\t36M\t*\t0\t0\tGAGTTGGCAAAGTAGGAAGATCTCTTGAAGATTCAA\tWWWWWWWWWWVWWWWWWVWVWWWWWWVVWWUUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C01HBa0003D15_LR7,+308,36M,0;\n+HWI-EAS337_3:7:1:157:1509\t0\tC05HBa0042B19_LR129\t1213\t37\t36M\t*\t0\t0\tGAATTAAGAAACGATTTCACTGAAGAACATAGAACT\tVVWWWVWVSWVVVVSVQHWQWWWQVVVQWWUUUUOS\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1150:1510\t16\tC04HBa164O3_LR344\t2069\t37\t36M\t*\t0\t0\tCCGACTAATGATATAATAAGGATGGTTTCATCTCCC\tRUSURUVWWWWWWVWVWWVWVWVVWWWWWWWWVWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:757:1140\t0\tC05HBa0138J03_LR135\t1491\t37\t36M\t*\t0\t0\tGAAAAAACTGAACTGATAAATGCCTACGAAAGATGT\tWWWWWWWWWWVWWWWWWWWWVWWWWVWVVVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:397:1260\t16\tC04HBa239P14_LR111\t7648\t37\t36M\t*\t0\t0\tTGGCCAAGGATTGCAGATTAACTATAGTAAGAAAGC\tUUUSRUVVVVVWWWWWWWWWVWWWWWWWWVWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:370:525\t0\tC05HBa0135A02_LR134\t3139\t37\t36M\t*\t0\t0\tGTATAGGGTTTGAGCTAAAGTTGTTGACTTTAATTG\tWWWWWWWWWWWWWWWWWWWVWWVWWVWVWWUUUUUP\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:782:729\t0\tC04HBa35C16_LR339\t254\t37\t36M\t*\t0\t0\tGACTCTATCCGAAGGTGTTTTTTTGATGGATTAACC\tWWWWWWWWWWWWWWWVWWWWWWWWUVWQQVUUUSUR\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:31C4\n+HWI-EAS337_3:7:1:1602:38\t16\tC07HBa0002M15_LR175\t8458\t37\t36M\t*\t0\t0\tCTTCTGCTCTCTGTCCTTGTACAGGACAGACTCATC\tUURUSUWUVVVRVVVVVVVVVUWWVVWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:204:1509\t16\tC09SLe0068C01_LR272\t1470\t37\t36M\t*\t0\t0\tACTTTGTCACACTTAACCCTGTGAATCCTTGCTCTC\tUUUUUUVWWWWVVWWWWWWVWWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1525:1263\t0\tC09SLe0085A10_LR364\t5595\t37\t36M\t*\t0\t0\tGGGTAAGAAGCATGTCACATCTATAAATATGAGGCT\tWWWWWWWWWWWWWWWWWWWWWWWWWVWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:895:1075\t16\tC09SLe0130H12_LR273\t8142\t0\t36M\t*\t0\t0\tTAGACATTTCTGCTCAATTTCGTTTTTTTGCTAGCC\tUUUUUUVVVWWWVWWWWWWWVWWWWWVWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0116C14_LR240,+1294,36M,0;\n+HWI-EAS337_3:7:1:1204:1920\t16\tC07HBa0002D20_LR197\t3427\t37\t36M\t*\t0\t0\tTGGCTGTCTCCCCTTCTTTTATTCCAGGGGAAAAAC\tUUUUUUVVVVVWWWWVWWWWWWWVVWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:115:342\t0\tC06HBa0197N20_LR164\t5182\t37\t36M\t*\t0\t0\tGAATAACCTCCATAGTAGGCTTGAGAAATTGCTTTA\tWWWWWWWWVWVWWWWVVWWVWWWVWWWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part4.bam

Binary file SMART/data/part4.bam has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/part4.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/part4.sam Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,448 @@\n+@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n+@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n+@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n+@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n+@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n+@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n+@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n+@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n+@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n+@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n+@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n+@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n+@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n+@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n+@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n+@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n+@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n+@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n+@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n+@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n+@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n+@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n+@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n+@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n+@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n+@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n+@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n+@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n+@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n+@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n+@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n+@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n+@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n+@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n+@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n+@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n+@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n+@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n+@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n+@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n+@SQ\tSN:BAC19_LR16\tLN:9760\n+@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n+@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n+@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n+@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n+@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n+@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n+@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n+@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n+@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n+@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n+@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n+@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n+@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n+@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n+@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n+@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n+@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n+@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n+@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n+@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n+@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n+@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n+@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n+@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n+@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n+@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n+@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n+@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n+@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n+@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n+@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n+@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n+@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n+@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n+@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n+@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n+@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n+@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n+@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n+@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n+@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n+@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n+@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n+@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n+@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n+@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n+@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n+@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n+@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n+@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n+@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n+@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n+@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n+@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n+@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n+@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n+@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n+@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n+@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n+@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n+@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n+@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n+@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n+@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n+@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n+@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n+@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n+@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n+@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n+@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n+@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n+@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'\t*\t0\t0\tCTTATTTCTCATGCAACTTTTTTTTTGAAAAGTTTC\tRUUUUSWWVWWWWWWVWWWWWWWVWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-9770,36M,0;\n+HWI-EAS337_3:7:1:5:1770\t0\tC02HBa0185P07_LR40\t1386\t37\t36M\t*\t0\t0\tGTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\tWWWWWWWWWWVWWWWWWWWWWWWWWWWWVVSUUQUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:115:1005\t0\tC11HBa0161D01_LR292\t3394\t37\t36M\t*\t0\t0\tGATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\tWWWWWWWWWVVWWWVVVVWWVVVWWWVVWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:354:1708\t0\tC09HBa0165P17_LR241\t4985\t37\t36M\t*\t0\t0\tGCATCCGACAGTGACTTAGACGATGAGGAATACGAG\tWWWWWWWWWWWWVWWWWWVWWVWWVWWVWWUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1639:1500\t0\tC11SLm0052K14_LR376\t2483\t37\t36M\t*\t0\t0\tGTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\tWWWWWWVVWWWWWVVWVVWUVVVVVVVVWVUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:766:243\t16\tC07HBa0308M01_LR189\t6701\t37\t36M\t*\t0\t0\tAGCAACAATCTCCAATTTATCTTCCATAGATGCCAC\tUSJUURWWVTVVVWWWWWWWVWWVVWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:920:144\t0\tC05HBa0138J03_LR135\t8153\t37\t36M\t*\t0\t0\tGTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\tWWVVWWWWWVWPWVWWVWWSWVSWWHWWLVUCPUUH\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:389:268\t16\tC12SLeRI72J6_LR378\t6610\t37\t36M\t*\t0\t0\tAAATTCTTTGAGGGTGGTTGCCCTCTCTAATTGACC\tUUUUUSVVWWWWWWWWWWWWVVWWWWVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:294:1868\t0\tC04HBa58E11_LR93\t7287\t25\t36M\t*\t0\t0\tGAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\tVWVWWWWWWWWVQWWWWOWVVWWVWVVWQWUURULU\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0T0G34\n+HWI-EAS337_3:7:1:1147:62\t0\tC02HBa0204D01_LR334\t6554\t37\t36M\t*\t0\t0\tGAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\tWWWWWWWWWWWWVWWWWWWVWWVWVVWVVWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:787:1759\t16\tC02SLe0018B07_LR335\t8378\t0\t36M\t*\t0\t0\tCAGAAAATCAGAACAGCTACCAATTCTAATAAAACC\tHUUUUUVVTVVWVUWWUWWUVWWWWWWWVWWWWVVW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02HBa0027B01_LR21,+810,36M,0;\n+HWI-EAS337_3:7:1:425:1939\t16\tC09SLe0076N09_LR363\t1546\t0\t36M\t*\t0\t0\tAAGTTTAGCCACATAGACCCAGACACCACAATTAGC\tUUUUUUWVVVWVVWWWVVVVWWWVWWWWVWWVWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-1546,36M,0;\n+HWI-EAS337_3:7:1:187:1132\t0\tC02HBa0027B01_LR21\t10\t0\t36M\t*\t0\t0\tGTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\tWVWWWWWWWWWWWWWWWWWWWTVWVWWWVVUUUUUU\tXT:A:R\tNM:i:1\tX0:i:2\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:21T14\tXA:Z:C02SLe0018B07_LR335,-9178,36M,1;\n+HWI-EAS337_3:7:1:1739:1840\t0\tC02HBa0072A04_LR26\t2868\t37\t36M\t*\t0\t0\tGGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\tWWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1505:1876\t0\tC07SLe0111B06_LR194\t8673\t37\t36M\t*\t0\t0\tGAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\tWWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:447:192\t0\tC09SLm0143I09_LR365\t6957\t0\t36M\t*\t0\t0\tGACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\tWWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,+6957,36M,0;\n+HWI-EAS337_3:7:1:21:2019\t16\tC09SLm0037I08_LR367\t1298\t37\t36M\t*\t0\t0\tGGGCTGGAAGACAGGTTATCATCTTTTACCTCATAC\tUUURUUWWWVVQWWWWWWWWWWWWWWWVVWWVVWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1593:652\t0\tC04HBa8K13_LR338\t2175\t37\t36M\t*\t0\t0\tGTGATGAGTAAAACATCATCATATGAACTTGAAGAG\tWWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:28A7\n+HWI-EAS337_3:7:1:1254:1660\t0\tC12HBa326K10_LR306\t8100\t37\t36M\t*\t0\t0\tGAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\tWWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:291:629\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\tWWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/samFile.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/samFile.sam Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,1448 @@\n+@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n+@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n+@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n+@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n+@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n+@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n+@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n+@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n+@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n+@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n+@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n+@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n+@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n+@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n+@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n+@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n+@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n+@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n+@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n+@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n+@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n+@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n+@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n+@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n+@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n+@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n+@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n+@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n+@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n+@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n+@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n+@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n+@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n+@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n+@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n+@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n+@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n+@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n+@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n+@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n+@SQ\tSN:BAC19_LR16\tLN:9760\n+@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n+@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n+@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n+@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n+@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n+@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n+@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n+@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n+@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n+@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n+@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n+@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n+@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n+@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n+@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n+@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n+@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n+@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n+@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n+@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n+@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n+@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n+@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n+@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n+@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n+@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n+@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n+@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n+@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n+@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n+@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n+@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n+@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n+@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n+@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n+@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n+@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n+@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n+@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n+@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n+@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n+@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n+@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n+@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n+@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n+@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n+@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n+@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n+@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n+@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n+@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n+@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n+@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n+@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n+@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n+@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n+@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n+@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n+@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n+@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n+@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n+@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n+@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n+@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n+@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n+@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n+@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n+@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n+@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n+@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n+@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n+@SQ\tSN:C05HBa0006N20_LR128\tLN:101'..b'\t*\t0\t0\tCTTATTTCTCATGCAACTTTTTTTTTGAAAAGTTTC\tRUUUUSWWVWWWWWWVWWWWWWWVWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-9770,36M,0;\n+HWI-EAS337_3:7:1:5:1770\t0\tC02HBa0185P07_LR40\t1386\t37\t36M\t*\t0\t0\tGTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\tWWWWWWWWWWVWWWWWWWWWWWWWWWWWVVSUUQUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:115:1005\t0\tC11HBa0161D01_LR292\t3394\t37\t36M\t*\t0\t0\tGATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\tWWWWWWWWWVVWWWVVVVWWVVVWWWVVWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:354:1708\t0\tC09HBa0165P17_LR241\t4985\t37\t36M\t*\t0\t0\tGCATCCGACAGTGACTTAGACGATGAGGAATACGAG\tWWWWWWWWWWWWVWWWWWVWWVWWVWWVWWUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1639:1500\t0\tC11SLm0052K14_LR376\t2483\t37\t36M\t*\t0\t0\tGTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\tWWWWWWVVWWWWWVVWVVWUVVVVVVVVWVUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:766:243\t16\tC07HBa0308M01_LR189\t6701\t37\t36M\t*\t0\t0\tAGCAACAATCTCCAATTTATCTTCCATAGATGCCAC\tUSJUURWWVTVVVWWWWWWWVWWVVWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:920:144\t0\tC05HBa0138J03_LR135\t8153\t37\t36M\t*\t0\t0\tGTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\tWWVVWWWWWVWPWVWWVWWSWVSWWHWWLVUCPUUH\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:389:268\t16\tC12SLeRI72J6_LR378\t6610\t37\t36M\t*\t0\t0\tAAATTCTTTGAGGGTGGTTGCCCTCTCTAATTGACC\tUUUUUSVVWWWWWWWWWWWWVVWWWWVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:294:1868\t0\tC04HBa58E11_LR93\t7287\t25\t36M\t*\t0\t0\tGAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\tVWVWWWWWWWWVQWWWWOWVVWWVWVVWQWUURULU\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0T0G34\n+HWI-EAS337_3:7:1:1147:62\t0\tC02HBa0204D01_LR334\t6554\t37\t36M\t*\t0\t0\tGAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\tWWWWWWWWWWWWVWWWWWWVWWVWVVWVVWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:787:1759\t16\tC02SLe0018B07_LR335\t8378\t0\t36M\t*\t0\t0\tCAGAAAATCAGAACAGCTACCAATTCTAATAAAACC\tHUUUUUVVTVVWVUWWUWWUVWWWWWWWVWWWWVVW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02HBa0027B01_LR21,+810,36M,0;\n+HWI-EAS337_3:7:1:425:1939\t16\tC09SLe0076N09_LR363\t1546\t0\t36M\t*\t0\t0\tAAGTTTAGCCACATAGACCCAGACACCACAATTAGC\tUUUUUUWVVVWVVWWWVVVVWWWVWWWWVWWVWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-1546,36M,0;\n+HWI-EAS337_3:7:1:187:1132\t0\tC02HBa0027B01_LR21\t10\t0\t36M\t*\t0\t0\tGTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\tWVWWWWWWWWWWWWWWWWWWWTVWVWWWVVUUUUUU\tXT:A:R\tNM:i:1\tX0:i:2\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:21T14\tXA:Z:C02SLe0018B07_LR335,-9178,36M,1;\n+HWI-EAS337_3:7:1:1739:1840\t0\tC02HBa0072A04_LR26\t2868\t37\t36M\t*\t0\t0\tGGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\tWWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1505:1876\t0\tC07SLe0111B06_LR194\t8673\t37\t36M\t*\t0\t0\tGAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\tWWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:447:192\t0\tC09SLm0143I09_LR365\t6957\t0\t36M\t*\t0\t0\tGACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\tWWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,+6957,36M,0;\n+HWI-EAS337_3:7:1:21:2019\t16\tC09SLm0037I08_LR367\t1298\t37\t36M\t*\t0\t0\tGGGCTGGAAGACAGGTTATCATCTTTTACCTCATAC\tUUURUUWWWVVQWWWWWWWWWWWWWWWVVWWVVWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:1593:652\t0\tC04HBa8K13_LR338\t2175\t37\t36M\t*\t0\t0\tGTGATGAGTAAAACATCATCATATGAACTTGAAGAG\tWWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:28A7\n+HWI-EAS337_3:7:1:1254:1660\t0\tC12HBa326K10_LR306\t8100\t37\t36M\t*\t0\t0\tGAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\tWWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n+HWI-EAS337_3:7:1:291:629\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\tWWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/sortedBamFile.bam

Binary file SMART/data/sortedBamFile.bam has changed

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/test.gff.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/test.gff.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,218 @@\n+C02HBa0185P07_LR40\tSMART\ttranscript\t3889\t3924\t36\t-\t.\tName=HWI-EAS337_3:7:1:415:1217;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:415:1217;identity=100\n+C11SLe0053P22_LR298\tSMART\ttranscript\t2130\t2165\t36\t-\t.\tName=HWI-EAS337_3:7:1:1178:755;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1178:755;identity=100\n+C06HBa0144J05_LR355\tSMART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:277:1259;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:277:1259;identity=100\n+C08HBa0165B06_LR218\tSMART\ttranscript\t3619\t3654\t36\t-\t.\tName=HWI-EAS337_3:7:1:447:1231;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:447:1231;identity=100\n+C02HBa0329G05_LR52\tSMART\ttranscript\t4746\t4781\t36\t-\t.\tName=HWI-EAS337_3:7:1:1154:1517;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1154:1517;identity=100\n+C04HBa80D3_LR100\tSMART\ttranscript\t423\t458\t36\t-\t.\tName=HWI-EAS337_3:7:1:164:1869;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:164:1869;identity=100\n+C01HBa0216G16_LR11\tSMART\ttranscript\t648\t683\t36\t-\t.\tName=HWI-EAS337_3:7:1:415:1194;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:415:1194;identity=100\n+C05HBa0145P19_LR136\tSMART\ttranscript\t3686\t3721\t36\t-\t.\tName=HWI-EAS337_3:7:1:645:1892;quality=25;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=2;ID=HWI-EAS337_3:7:1:645:1892;identity=94\n+C08HBa0012O06_LR211\tSMART\ttranscript\t1768\t1803\t36\t-\t.\tName=HWI-EAS337_3:7:1:33:1446;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWI-EAS337_3:7:1:33:1446;identity=97\n+C09HBa0194K19_LR362\tSMART\ttranscript\t9168\t9203\t36\t+\t.\tName=HWI-EAS337_3:7:1:1194:1427;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1194:1427;identity=100\n+C09SLm0018L06_LR366\tSMART\ttranscript\t5034\t5069\t36\t+\t.\tName=HWI-EAS337_3:7:1:624:1913;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:624:1913;identity=100\n+C09SLe0085A10_LR364\tSMART\ttranscript\t6700\t6735\t36\t-\t.\tName=HWI-EAS337_3:7:1:437:1202;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:437:1202;identity=100\n+C09HBa0113I06_LR360\tSMART\ttranscript\t1764\t1799\t36\t-\t.\tName=HWI-EAS337_3:7:1:1386:1787;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1386:1787;identity=100\n+C11HBa0143O06_LR374\tSMART\ttranscript\t8925\t8960\t36\t-\t.\tName=HWI-EAS337_3:7:1:227:1155;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:227:1155;identity=100\n+C06HBa0066D13_LR353\tSMART\ttranscript\t6619\t6654\t36\t-\t.\tName=HWI-EAS337_3:7:1:472:1025;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:472:1025;identity=100\n+C07SLe0099J13_LR193\tSMART\ttranscript\t3528\t3563\t36\t+\t.\tName=HWI-EAS337_3:7:1:220:1482;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:220:1482;identity=100\n+C07HBa0224G23_LR186\tSMART\ttranscript\t9232\t9267\t36\t-\t.\tName=HWI-EAS337_3:7:1:1699:1966;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:1699:1966;identity=100\n+C07HBa0224G23_LR186\tSMART\ttranscript\t3761\t3796\t36\t-\t.\tName=HWI-EAS337_3:7:1:547:1084;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:547:1084;identity=100\n+C02HBa0291P19_LR48\tSMART\ttranscript\t131\t166\t36\t+\t.\tName=HWI-EAS337_3:7:1:464:1097;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:464:1097;identity=100\n+C12SLm103K8_LR380\tSMART\ttranscript\t7346\t7381\t36\t+\t.\tName=HWI-EAS337_3:7:1:171:1480;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:171:1480;identity=100\n+C11HBa0143O06_LR374\tSMART\ttranscript\t7925\t7960\t36\t-\t.\tName'..b'=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1254:1491;identity=100\n+C12HBa93P12_LR312\tSMART\ttranscript\t4116\t4151\t36\t+\t.\tName=HWI-EAS337_3:7:1:42:1990;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:42:1990;identity=100\n+C09SLe0130H12_LR273\tSMART\ttranscript\t3257\t3292\t36\t-\t.\tName=HWI-EAS337_3:7:1:1319:766;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:1319:766;identity=100\n+C07HBa0309B15_LR190\tSMART\ttranscript\t4202\t4237\t36\t-\t.\tName=HWI-EAS337_3:7:1:1567:1795;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1567:1795;identity=100\n+C04HBa96I8_LR101\tSMART\ttranscript\t4961\t4996\t36\t+\t.\tName=HWI-EAS337_3:7:1:216:392;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:216:392;identity=100\n+C04HBa50I18_LR341\tSMART\ttranscript\t2928\t2963\t36\t-\t.\tName=HWI-EAS337_3:7:1:38:1803;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:38:1803;identity=100\n+C06HBa0217M17_LR166\tSMART\ttranscript\t4141\t4176\t36\t-\t.\tName=HWI-EAS337_3:7:1:425:1196;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:425:1196;identity=100\n+C02HBa0072A04_LR26\tSMART\ttranscript\t2348\t2383\t36\t-\t.\tName=HWI-EAS337_3:7:1:181:410;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:181:410;identity=100\n+C09SLe0130H12_LR273\tSMART\ttranscript\t4026\t4061\t36\t-\t.\tName=HWI-EAS337_3:7:1:1065:1826;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:1065:1826;identity=100\n+C02HBa0027B01_LR21\tSMART\ttranscript\t4337\t4372\t36\t-\t.\tName=HWI-EAS337_3:7:1:79:1444;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:79:1444;identity=100\n+C09SLe0085A10_LR364\tSMART\ttranscript\t607\t642\t36\t-\t.\tName=HWI-EAS337_3:7:1:1634:1526;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1634:1526;identity=100\n+C12HBa326K10_LR306\tSMART\ttranscript\t6488\t6523\t36\t-\t.\tName=HWI-EAS337_3:7:1:462:1320;quality=25;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=2;ID=HWI-EAS337_3:7:1:462:1320;identity=94\n+C04HBa50I18_LR341\tSMART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:532:1095;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:532:1095;identity=100\n+C07HBa0002M15_LR175\tSMART\ttranscript\t7167\t7202\t36\t-\t.\tName=HWI-EAS337_3:7:1:832:1960;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=0;ID=HWI-EAS337_3:7:1:832:1960;identity=100\n+C06HBa0120H21_LR161\tSMART\ttranscript\t186\t221\t36\t-\t.\tName=HWI-EAS337_3:7:1:1312:645;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1312:645;identity=100\n+C12HBa326K10_LR306\tSMART\ttranscript\t5231\t5266\t36\t+\t.\tName=HWI-EAS337_3:7:1:1107:226;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1107:226;identity=100\n+C09SLm0008K04_LR274\tSMART\ttranscript\t10575\t10610\t36\t+\t.\tName=HWI-EAS337_3:7:1:274:1287;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:274:1287;identity=100\n+C06HBa0217M17_LR166\tSMART\ttranscript\t3360\t3395\t36\t-\t.\tName=HWI-EAS337_3:7:1:1704:1373;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWI-EAS337_3:7:1:1704:1373;identity=97\n+C06HBa0066I09_LR156\tSMART\ttranscript\t5444\t5479\t36\t-\t.\tName=HWI-EAS337_3:7:1:241:903;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:241:903;identity=100\n+C08HBa0239G21_LR221\tSMART\ttranscript\t4431\t4466\t36\t-\t.\tName=HWI-EAS337_3:7:1:404:1924;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:404:1924;identity=100\n+C02HBa0190P16_LR331\tSMART\ttranscript\t3279\t3314\t36\t+\t.\tName=HWI-EAS337_3:7:1:23:1455;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:23:1455;identity=100\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/test_clusterize.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/test_clusterize.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,213 @@\n+C12HBa326K10_LR306\tS-MART\ttranscript\t3066\t3101\t36\t-\t.\tName=HWI-EAS337_3:7:1:263:1275;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:263:1275;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t3148\t3183\t36\t-\t.\tName=HWI-EAS337_3:7:1:1262:1508;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1262:1508;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t4561\t4596\t36\t-\t.\tName=HWI-EAS337_3:7:1:1187:1977;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=6;quality=0;ID=HWI-EAS337_3:7:1:1187:1977;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t5231\t5266\t36\t+\t.\tName=HWI-EAS337_3:7:1:1107:226;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1107:226;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t6488\t6523\t36\t-\t.\tName=HWI-EAS337_3:7:1:462:1320;nbMismatches=2;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=25;ID=HWI-EAS337_3:7:1:462:1320;identity=94\n+C07HBa0130B18_LR183\tS-MART\ttranscript\t9167\t9202\t36\t+\t.\tName=HWI-EAS337_3:7:1:65:1436;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:65:1436;identity=100\n+C09HBa0109D11_LR262\tS-MART\ttranscript\t10202\t10237\t36\t-\t.\tName=HWI-EAS337_3:7:1:172:1019;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:172:1019;identity=100\n+C04HBa50I18_LR341\tS-MART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:532:1095;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:532:1095;identity=100\n+C04HBa50I18_LR341\tS-MART\ttranscript\t2928\t2963\t36\t-\t.\tName=HWI-EAS337_3:7:1:38:1803;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:38:1803;identity=100\n+C04HBa50I18_LR341\tS-MART\ttranscript\t4171\t4206\t36\t+\t.\tName=HWI-EAS337_3:7:1:37:1418;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:37:1418;identity=100\n+C04HBa6E18_LR87\tS-MART\ttranscript\t3416\t3451\t36\t+\t.\tName=HWI-EAS337_3:7:1:4:1451;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:4:1451;identity=100\n+C04HBa6E18_LR87\tS-MART\ttranscript\t4296\t4331\t36\t-\t.\tName=HWI-EAS337_3:7:1:1526:1772;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1526:1772;identity=100\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t3107\t3142\t36\t+\t.\tName=HWI-EAS337_3:7:1:300:1184;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:300:1184;identity=100\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t3360\t3395\t36\t-\t.\tName=HWI-EAS337_3:7:1:1704:1373;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1704:1373;identity=97\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t4141\t4176\t36\t-\t.\tName=HWI-EAS337_3:7:1:425:1196;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:425:1196;identity=100\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t5919\t5954\t36\t-\t.\tName=HWI-EAS337_3:7:1:498:810;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:498:810;identity=97\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t6075\t6110\t36\t-\t.\tName=HWI-EAS337_3:7:1:364:1210;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:364:1210;identity=100\n+C04HBa219H8_LR109\tS-MART\ttranscript\t8651\t8686\t36\t+\t.\tName=HWI-EAS337_3:7:1:1160:1471;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences='..b'_LR115\tS-MART\ttranscript\t10241\t10276\t36\t-\t.\tName=HWI-EAS337_3:7:1:1160:1426;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1160:1426;identity=100\n+C02SLe0132D01_LR60\tS-MART\ttranscript\t5644\t5679\t36\t+\t.\tName=HWI-EAS337_3:7:1:644:33;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:644:33;identity=100\n+C02HBa0167J21_LR39\tS-MART\ttranscript\t836\t871\t36\t-\t.\tName=HWI-EAS337_3:7:1:395:1182--HWI-EAS337_3:7:1:736:505;nbElements=2;score=36;quality=37;feature=transcript;ID=HWI-EAS337_3:7:1:395:1182\n+C02HBa0167J21_LR39\tS-MART\ttranscript\t1972\t2007\t36\t-\t.\tName=HWI-EAS337_3:7:1:647:1863;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:647:1863;identity=100\n+C02SLe0018B07_LR335\tS-MART\ttranscript\t8723\t8758\t36\t-\t.\tName=HWI-EAS337_3:7:1:1633:1841;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1633:1841;identity=100\n+C02HBa0155E05_LR37\tS-MART\ttranscript\t4273\t4308\t36\t-\t.\tName=HWI-EAS337_3:7:1:34:1851;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=23;ID=HWI-EAS337_3:7:1:34:1851;identity=97\n+C08SLm0118A18_LR226\tS-MART\ttranscript\t4396\t4431\t36\t+\t.\tName=HWI-EAS337_3:7:1:600:1107;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:600:1107;identity=100\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t1887\t1922\t36\t+\t.\tName=HWI-EAS337_3:7:1:1545:1519;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1545:1519;identity=97\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t4337\t4372\t36\t-\t.\tName=HWI-EAS337_3:7:1:79:1444;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:79:1444;identity=100\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t5114\t5149\t36\t-\t.\tName=HWI-EAS337_3:7:1:1177:1504;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1177:1504;identity=100\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t5948\t5983\t36\t-\t.\tName=HWI-EAS337_3:7:1:31:1659;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:31:1659;identity=100\n+C07SLe0111B06_LR194\tS-MART\ttranscript\t8883\t8918\t36\t+\t.\tName=HWI-EAS337_3:7:1:1477:1673;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1477:1673;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t932\t967\t36\t-\t.\tName=HWI-EAS337_3:7:1:141:1433;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:141:1433;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t2303\t2338\t36\t-\t.\tName=HWI-EAS337_3:7:1:1254:1491;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1254:1491;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t2348\t2383\t36\t-\t.\tName=HWI-EAS337_3:7:1:181:410;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:181:410;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t6848\t6883\t36\t-\t.\tName=HWI-EAS337_3:7:1:373:2009;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:373:2009;identity=97\n+C08HBa0165B06_LR218\tS-MART\ttranscript\t3619\t3654\t36\t-\t.\tName=HWI-EAS337_3:7:1:447:1231;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:447:1231;identity=100\n+C08HBa0165B06_LR218\tS-MART\ttranscript\t8558\t8593\t36\t+\t.\tName=HWI-EAS337_3:7:1:538:1054;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:538:1054;identity=97\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/data/test_clusterize2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/data/test_clusterize2.gff3 Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,214 @@\n+C12HBa326K10_LR306\tS-MART\ttranscript\t3066\t3101\t36\t-\t.\tName=HWI-EAS337_3:7:1:263:1275;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:263:1275;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t3148\t3183\t36\t-\t.\tName=HWI-EAS337_3:7:1:1262:1508;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1262:1508;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t4561\t4596\t36\t-\t.\tName=HWI-EAS337_3:7:1:1187:1977;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=6;quality=0;ID=HWI-EAS337_3:7:1:1187:1977;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t5231\t5266\t36\t+\t.\tName=HWI-EAS337_3:7:1:1107:226;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1107:226;identity=100\n+C12HBa326K10_LR306\tS-MART\ttranscript\t6488\t6523\t36\t-\t.\tName=HWI-EAS337_3:7:1:462:1320;nbMismatches=2;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=25;ID=HWI-EAS337_3:7:1:462:1320;identity=94\n+C07HBa0130B18_LR183\tS-MART\ttranscript\t9167\t9202\t36\t+\t.\tName=HWI-EAS337_3:7:1:65:1436;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:65:1436;identity=100\n+C09HBa0109D11_LR262\tS-MART\ttranscript\t10202\t10237\t36\t-\t.\tName=HWI-EAS337_3:7:1:172:1019;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:172:1019;identity=100\n+C04HBa50I18_LR341\tS-MART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:532:1095;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:532:1095;identity=100\n+C04HBa50I18_LR341\tS-MART\ttranscript\t2928\t2963\t36\t-\t.\tName=HWI-EAS337_3:7:1:38:1803;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:38:1803;identity=100\n+C04HBa50I18_LR341\tS-MART\ttranscript\t4171\t4206\t36\t+\t.\tName=HWI-EAS337_3:7:1:37:1418;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:37:1418;identity=100\n+C04HBa6E18_LR87\tS-MART\ttranscript\t3416\t3451\t36\t+\t.\tName=HWI-EAS337_3:7:1:4:1451;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:4:1451;identity=100\n+C04HBa6E18_LR87\tS-MART\ttranscript\t4296\t4331\t36\t-\t.\tName=HWI-EAS337_3:7:1:1526:1772;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1526:1772;identity=100\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t3107\t3142\t36\t+\t.\tName=HWI-EAS337_3:7:1:300:1184;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:300:1184;identity=100\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t3360\t3395\t36\t-\t.\tName=HWI-EAS337_3:7:1:1704:1373;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1704:1373;identity=97\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t4141\t4176\t36\t-\t.\tName=HWI-EAS337_3:7:1:425:1196;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:425:1196;identity=100\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t5919\t5954\t36\t-\t.\tName=HWI-EAS337_3:7:1:498:810;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:498:810;identity=97\n+C06HBa0217M17_LR166\tS-MART\ttranscript\t6075\t6110\t36\t-\t.\tName=HWI-EAS337_3:7:1:364:1210;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:364:1210;identity=100\n+C04HBa219H8_LR109\tS-MART\ttranscript\t8651\t8686\t36\t+\t.\tName=HWI-EAS337_3:7:1:1160:1471;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences='..b'_LR115\tS-MART\ttranscript\t10241\t10276\t36\t-\t.\tName=HWI-EAS337_3:7:1:1160:1426;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1160:1426;identity=100\n+C02SLe0132D01_LR60\tS-MART\ttranscript\t5644\t5679\t36\t+\t.\tName=HWI-EAS337_3:7:1:644:33;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:644:33;identity=100\n+C02HBa0167J21_LR39\tS-MART\ttranscript\t836\t871\t36\t-\t.\tName=HWI-EAS337_3:7:1:395:1182--HWI-EAS337_3:7:1:736:505;nbElements=2;score=36;quality=37;feature=transcript;ID=HWI-EAS337_3:7:1:395:1182\n+C02HBa0167J21_LR39\tS-MART\ttranscript\t1972\t2007\t36\t-\t.\tName=HWI-EAS337_3:7:1:647:1863;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:647:1863;identity=100\n+C02SLe0018B07_LR335\tS-MART\ttranscript\t8723\t8758\t36\t-\t.\tName=HWI-EAS337_3:7:1:1633:1841;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1633:1841;identity=100\n+C02HBa0155E05_LR37\tS-MART\ttranscript\t4273\t4308\t36\t-\t.\tName=HWI-EAS337_3:7:1:34:1851;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=23;ID=HWI-EAS337_3:7:1:34:1851;identity=97\n+C08SLm0118A18_LR226\tS-MART\ttranscript\t4396\t4431\t36\t+\t.\tName=HWI-EAS337_3:7:1:600:1107;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:600:1107;identity=100\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t1887\t1922\t36\t+\t.\tName=HWI-EAS337_3:7:1:1545:1519;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1545:1519;identity=97\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t4337\t4372\t36\t-\t.\tName=HWI-EAS337_3:7:1:79:1444;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:79:1444;identity=100\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t5114\t5149\t36\t-\t.\tName=HWI-EAS337_3:7:1:1177:1504;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1177:1504;identity=100\n+C02HBa0027B01_LR21\tS-MART\ttranscript\t5948\t5983\t36\t-\t.\tName=HWI-EAS337_3:7:1:31:1659;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:31:1659;identity=100\n+C07SLe0111B06_LR194\tS-MART\ttranscript\t8883\t8918\t36\t+\t.\tName=HWI-EAS337_3:7:1:1477:1673;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1477:1673;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t932\t967\t36\t-\t.\tName=HWI-EAS337_3:7:1:141:1433;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:141:1433;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t2303\t2338\t36\t-\t.\tName=HWI-EAS337_3:7:1:1254:1491;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1254:1491;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t2348\t2383\t36\t-\t.\tName=HWI-EAS337_3:7:1:181:410;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:181:410;identity=100\n+C02HBa0072A04_LR26\tS-MART\ttranscript\t6848\t6883\t36\t-\t.\tName=HWI-EAS337_3:7:1:373:2009;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:373:2009;identity=97\n+C08HBa0165B06_LR218\tS-MART\ttranscript\t3619\t3654\t36\t-\t.\tName=HWI-EAS337_3:7:1:447:1231;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:447:1231;identity=100\n+C08HBa0165B06_LR218\tS-MART\ttranscript\t8558\t8593\t36\t+\t.\tName=HWI-EAS337_3:7:1:538:1054;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:538:1054;identity=97\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/CleanTranscriptFile.xml
--- a/SMART/galaxy/CleanTranscriptFile.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/CleanTranscriptFile.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,8 +1,5 @@
-<tool id="CleanTranscriptFile" name="clean transcript file">
- <description>Clean a transcript file so that it is useable for S-MART.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="CleanTranscriptFile" name="Clean Transcript File">
+ <description> Clean a transcript file so that it is useable for S-MART.</description>
<command interpreter="python"> ../Java/Python/CleanTranscriptFile.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'gff':
-f gff
@@ -54,7 +51,6 @@
<outputs>
<data name="outputFile" format="gtf">
<change_format>
- <when input="formatType.FormatInputFileName" value="gtf" format="gtf" />
<when input="formatType.FormatInputFileName" value="gff" format="gff" />
<when input="formatType.FormatInputFileName" value="gff3" format="gff3" />
</change_format>
@@ -70,8 +66,4 @@
</test>
</tests>

- <help>
- A GFF/GTF file (please consult http://www.sequenceontology.org/gff3.shtml to know more about the GFF3 format, and http://mblab.wustl.edu/GTF22.html for the GTF format) may contain different sources of information: chromosome size, genes, transcripts, etc. S-MART mostly works on transcripts. This scripts filters the input file to keep the information you really want, based on the feature (3rd column).
- </help>
-
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/Clusterize.xml
--- a/SMART/galaxy/Clusterize.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/Clusterize.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,8 +1,5 @@
-<tool id="MergingDataClusterize" name="clusterize">
- <description>Clusterize features when their genomic intervals overlap.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="MergingDataClusterize" name="Clusterize">
+ <description>Clusterizes the reads when their genomic intervals overlap.</description>
<command interpreter="python">
../Java/Python/clusterize.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -13,6 +10,8 @@
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
+ #elif $formatType.FormatInputFileName == 'csv':
+ -f csv
#elif $formatType.FormatInputFileName == 'sam':
-f sam
#elif $formatType.FormatInputFileName == 'gtf':
@@ -22,6 +21,7 @@
$colinear
$normalize
-d $distance
+ $log $outputFileLog
</command>

<inputs>
@@ -31,6 +31,7 @@
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
+ <option value="csv">csv</option>
<option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
@@ -46,6 +47,9 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
+ <when value="csv">
+ <param name="inputFileName" format="csv" type="data" label="Input File"/>
+ </when>
<when value="sam">
<param name="inputFileName" format="sam" type="data" label="Input File"/>
</when>
@@ -54,20 +58,16 @@
</when>
</conditional>

- <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Only merge collinear features"/>
- <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Normalize counts" help="Only works if the nbOccurrences tag is set."/>
- <param name="distance" type="text" value="0" label="merge features if their relative distance is within N nt"/>
+ <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
+ <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
+ <param name="distance" type="text" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
+ <data name="outputFileLog" format="txt">
+ <filter>log</filter>
+ </data>
</outputs>
-
- <help>
-The script clusterizes the input genomic data. Two features are clusterized when their genomic intervals overlap. The output is a GFF3 file, where each element is a cluster. The number of elements in the cluster is given by the tag **nbElements**. The name of a cluster is the concatation of the names of its reads (like **read1--read2--read3**). Note that if the size of the name of the cluster exceeds 100 characters, it is truncated to the first 100 characters.
-
-Some options may clusterize the features which are closer than a given distance.
-
-By default, the tool clusterizes all features which overlap (or nearly overlap), even if they are on different strands. If you want to clusterize the features which are on the same strand only, you can specify it.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/CollapseReads.xml
--- a/SMART/galaxy/CollapseReads.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/CollapseReads.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="collapseReads" name="collapse reads">
- <description>Merges two genomic features if they have exactly the same genomic coordinates.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+ <description>Merges two reads if they have exactly the same genomic coordinates.</description>
<command interpreter="python">
../Java/Python/CollapseReads.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -52,16 +49,11 @@
</when>
</conditional>

- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Merges features even if they are on different strands."/>
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>

- <help>
-Merge two input genomic coordinates iff they are exactly the same. If two or more genomic coordinates are merged, the tag **nbElements** is updated accordingly. As a consequence, all the reads which are exactly the same appear as one genomic coordinate.
-
-This is especially useful for short RNA sequencing (where you want to count the number of read per miRNA, siRNA, etc.) or 5' capped short reads.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/CompareOverlappingSmallQuery.xml
--- a/SMART/galaxy/CompareOverlappingSmallQuery.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/CompareOverlappingSmallQuery.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,8 +1,5 @@
-<tool id="CompareOverlappingSmallQuery" name="compare overlapping small query">
- <description>Provide the queries that overlap with a reference, when the query data set is small.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="CompareOverlappingSmallQuery" name="Compare Overlapping Small Query">
+ <description>Provide the queries that overlap with a reference, when the query is small.</description>
<command interpreter="python">
../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -158,8 +155,8 @@
<when value="No">
</when>
</conditional>
- <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
- <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
+ <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must nested in a query"/>
+ <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must nested in a query"/>
<conditional name="OptionCollinearOrAntiSens">
<param name="OptionCA" type="select" label="Collinear or anti-sens">
<option value="Collinear">Collinear</option>
@@ -173,31 +170,11 @@
<when value="NONE">
</when>
</conditional>
- <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
- <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
+ <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
+ <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>
-
- <help>
-This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
-
-It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
-
-Various modifiers are also available:
-
--Invert selection (report those which do not overlap).
-
--Restrict to colinear / anti-sense overlapping data.
-
--Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
-
--Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.
-
-The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.
-
-Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/CompareOverlappingSmallRef.xml
--- a/SMART/galaxy/CompareOverlappingSmallRef.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/CompareOverlappingSmallRef.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,8 +1,5 @@
-<tool id="CompareOverlappingSmallRef" name="compare overlapping small reference">
- <description>Provide the queries that overlap with a reference, when the reference dataset is small.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="CompareOverlappingSmallRef" name="Compare Overlapping Small Reference">
+ <description>Provide the queries that overlap with a reference, when the reference is small.</description>
<command interpreter="python">
../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -158,8 +155,8 @@
<when value="No">
</when>
</conditional>
- <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
- <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
+ <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must nested in a query"/>
+ <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must nested in a query"/>
<conditional name="OptionCollinearOrAntiSens">
<param name="OptionCA" type="select" label="Collinear or anti-sens">
<option value="Collinear">Collinear</option>
@@ -173,31 +170,11 @@
<when value="NONE">
</when>
</conditional>
- <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
- <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
+ <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
+ <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>
-
- <help>
-This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
-
-It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
-
-Various modifiers are also available:
-
--Invert selection (report those which do not overlap).
-
--Restrict to colinear / anti-sense overlapping data.
-
--Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
-
--Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.
-
-The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.
-
-Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile.xml
--- a/SMART/galaxy/ConvertTranscriptFile.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/ConvertTranscriptFile.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,13 +1,12 @@
-<tool id="ConvertTranscriptFile" name="convert transcript file">
+<tool id="ConvertTranscriptFile" name="Convert transcript file">
   <description>Convert a file from a format to another.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
   <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFormatType.inputFileName
  #if $inputFormatType.FormatInputFileName == 'gff3':
   -f gff3
  #elif $inputFormatType.FormatInputFileName == 'bed':
-f bed
+   #elif $inputFormatType.FormatInputFileName == 'gff2':
+ -f gff2
  #elif $inputFormatType.FormatInputFileName == 'bam':
-f blast
  #elif $inputFormatType.FormatInputFileName == 'sam':
@@ -17,6 +16,10 @@
           #end if

-g $outputFormatType.outFormat
+     #if $optionSequence.choose == 'Yes':
+    -s $optionSequence.value
+ #end if
+

  -n $name
  $strand
@@ -28,6 +31,7 @@
  <param name="FormatInputFileName"  type="select" label="Input File Format">
  <option value="gff3">GFF3</option>
  <option value="bed">BED</option>
+   <option value="gff2">GFF2</option>
  <option value="bam">BAM</option>
  <option value="sam">SAM</option>
  <option value="gtf">GTF</option>
@@ -38,6 +42,9 @@
  <when value="bed">
  <param name="inputFileName" format="bed" type="data" label="Input File"/>
  </when>
+   <when value="gff2">
+   <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+   </when>
  <when value="bam">
  <param name="inputFileName" format="bam" type="data" label="Input File"/>
  </when>
@@ -54,6 +61,7 @@
  <param name="outFormat"  type="select" label="Please choose the format that you want to convert to (corresponding to your input file format).">
  <option value="gff3">GFF3</option>
  <option value="bed">BED</option>
+   <option value="gff2">GFF2</option>
  <option value="wig">WIG</option>
  <option value="sam">SAM</option>
  <option value="csv">CSV</option>
@@ -63,6 +71,8 @@
  </when>
  <when value="bed">
  </when>
+     <when value="gff2">
+   </when>
  <when value="wig">
  </when>
      <when value="sam">
@@ -75,6 +85,18 @@

   <param name="name" type="text" value="SMART" label="name for the transcripts"/>

+ <conditional name="optionSequence">
+ <param name="choose" type="select" label="give the corresponding Multi-Fasta file (useful for EMBL format)">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="value" type="data" format="mfa" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
   <param name="strand" type="boolean" truevalue="-t" falsevalue="" checked="false" label="consider the 2 strands as different (only useful for writing WIG files)"/>

   </inputs>
@@ -83,7 +105,7 @@
    <data name="outputFile" format="gff3" label="$inputFormatType.FormatInputFileName to $outputFormatType.outFormat">
<change_format>
<when input="outputFormatType.outFormat" value="bed" format="bed" />
- <when input="outputFormatType.outFormat" value="gff" format="gff" />
+ <when input="outputFormatType.outFormat" value="gff2" format="gff2" />
<when input="outputFormatType.outFormat" value="wig" format="wig" />
<when input="outputFormatType.outFormat" value="sam" format="sam" />
<when input="outputFormatType.outFormat" value="csv" format="csv" />
@@ -93,6 +115,5 @@
   </outputs>

   <help>
-Simple conversion tool.
   </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToCsv" name="Bed -> Csv">
+  <description>Convert Bed File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[bed -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToGff2" name="Bed -> Gff2">
+  <description>Convert Bed File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[bed -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToGff3" name="Bed -> Gff3">
+  <description>Convert Bed File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[bed -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BedToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToSam.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToSam" name="Bed -> Sam">
+  <description>Convert Bed File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[bed -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToCsv" name="Blast (-m 8) -> Csv">
+  <description>Convert Blast (-m 8) File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[blast -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToGff2" name="Blast (-m 8) -> Gff2">
+  <description>Convert Blast (-m 8) File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[blast -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToGff3" name="Blast (-m 8) -> Gff3">
+  <description>Convert Blast (-m 8) File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[blast -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToSam" name="Blast (-m 8) -> Sam">
+  <description>Convert Blast (-m 8) File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[blast -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_FastqToFasta" name="Fastq -> Fasta">
+  <description>Convert Fastq File to Fasta File.</description>
+  <command interpreter="python"> ../Java/Python/fastqToFasta.py -i $inputFile -o $outputFile 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="fastq"/>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[fastq -> fasta] Output File"/>
+    <data format="txt" name="logFile" label="[fastq -> fasta] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToCsv" name="Gff2 -> Csv">
+  <description>Convert Gff2 File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[gff2 -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToGff3" name="Gff2 -> Gff3">
+  <description>Convert Gff2 File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[gff2 -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToSam" name="Gff2 -> Sam">
+  <description>Convert Gff2 File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[gff2 -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToCsv" name="Gff3 -> Csv">
+  <description>Convert Gff3 File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[gff3 -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToGff2" name="Gff3 -> Gff2">
+  <description>Convert Gff3 File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[gff3 -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToSam" name="Gff3 -> Sam">
+  <description>Convert Gff3 File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[gff3 -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToWig" name="Gff3 -> Wig">
+  <description>Convert Gff3 File to Wig File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g wig yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="wig" name="outputFile" label="[gff3 -> wig] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> wig] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToCsv" name="Sam -> Csv">
+  <description>Convert Sam File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[sam -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToGff2" name="Sam -> Gff2">
+  <description>Convert Sam File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[sam -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToGff3" name="Sam -> Gff3">
+  <description>Convert Sam File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[sam -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/CountReadGCPercent.xml
--- a/SMART/galaxy/CountReadGCPercent.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/CountReadGCPercent.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
-<tool id="CountReadGCPercent" name="count read GC percent">
+<tool id="CountReadGCPercent" name="count read GCpercent">
     <description>Count GC percent for each read against a genome.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
     <command interpreter="python"> ../Java/Python/CountReadGCPercent.py -i $inputFastaFile -j $inputGffFile -o $outputFile</command>
   <inputs>
       <param name="inputFastaFile" type="data" label="Input reference fasta File" format="fasta"/>
@@ -10,11 +7,10 @@
   </inputs>

   <outputs>
-    <data format="gff3" name="outputFile" label="[count read GC percent] Output File"/>
+    <data format="gff3" name="outputFile" label="[CountReadGCPercent] Output File"/>
    </outputs>

   <help>
-Count the GC% of a FASTA file.
   </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/DiffExpAnal.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/DiffExpAnal.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,19 @@
+<tool id="testDiffExpAnal" name="Differential Expression Analysis">
+  <description>Differential expression analysis for sequence count data</description>
+  <command interpreter="sh"> ../DiffExpAnal/testR.sh $inputFile $columnsOfGeneName $columnsOfCondition1 $columnsOfCondition2 $outputFileCSV $outputFilePNG 2>$outputLog </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+ <param name="columnsOfGeneName" type="text" value="0" label="Please indicate the column numbers of gene names with ',' separator. If There are not gene names, default value is 0."/>
+ <param name="columnsOfCondition1" type="text" value="1,2" label="Please indicate the column numbers of condition1 with ',' separator."/>
+ <param name="columnsOfCondition2" type="text" value="3,4" label="Please indicate the column numbers of condition2 with ',' separator."/>
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="outputFileCSV" label="[DiffExpAnal] Output CSV File"/>
+ <data format="png" name="outputFilePNG" label="[DiffExpAnal] Output PNG File"/>
+    <data format="tabular" name="outputLog" label="[DiffExpAnal] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/FindOverlaps_optim.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/FindOverlaps_optim.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,16 @@
+<tool id="findOverlaps" name="findOverlaps">
+ <description>Finds the overlapped reference reads.</description>
+ <command interpreter="python">
+ ../Java/Python/FindOverlaps_optim.py -i $inputRef -j $inputQ -o $outputFileGff
+ </command>
+
+  <inputs>
+    <param name="inputRef" type="data" label="Input Reference File" format="gff3"/>
+    <param name="inputQ" type="data" label="Input Query File" format="gff3"/>
+  </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/GetDifferentialExpression.xml
--- a/SMART/galaxy/GetDifferentialExpression.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/GetDifferentialExpression.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="GetDifferentialExpression" name="get differential expression">
<description>Get the differential expression between 2 conditions using Fisher's exact test, on regions defined by a third file.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/GetDifferentialExpression.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -54,6 +51,7 @@
$simple
$adjusted

+
#if $optionSimplePara.simplePara == 'Yes':
-S $optionSimplePara.paraValue
#end if
@@ -65,6 +63,7 @@
#if $optionFDR.FDR == 'Yes':
-d $optionFDR.FDRValue
#end if
+ $plot $outputFilePNG
</command>

<inputs>
@@ -155,8 +154,8 @@
</when>
</conditional>

- <param name="simple" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Normalize using the number of reads in each condition"/>
- <param name="adjusted" type="boolean" truevalue="-a" falsevalue="" checked="false" label="Normalize using the number of reads of interquartile expression region"/>
+ <param name="simple" type="boolean" truevalue="-s" falsevalue="" checked="false" label="normalize using the number of reads in each condition"/>
+ <param name="adjusted" type="boolean" truevalue="-a" falsevalue="" checked="false" label="normalize using the number of reads of 'mean' regions"/>

<conditional name="optionSimplePara">
<param name="simplePara" type="select" label="provide the number of reads" >
@@ -171,7 +170,7 @@
</conditional>

<conditional name="optionFixedSizeFactor">
- <param name="FSF" type="select" label="Give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization)">
+ <param name="FSF" type="select" label="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization)">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -194,21 +193,18 @@
</when>
</conditional>

+ <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="plot cloud plot"/>
+
</inputs>

<outputs>
<data name="outputFileGff" format="gff3" label="[GetDifferentialExpression]out file"/>
+ <data name="outputFilePNG" format="PNG" label="[GetDifferentialExpression]PNG file">
+ <filter>plot</filter>
+ </data>
</outputs>

<help>
-This tool compares two sets of data and find the differential expression. One very important component of the tool is the reference set. Actually, to use the tool, you need the two input sets of data, of course, and the reference set. The reference set is a set of genomic coordinates and, for each interval, it will count the number of feature on each sample and compute the differential expression. For each reference interval, it will output the direction of the regulation (up or down, with respect to the first input set), and a *p*-value from a Fisher exact test.
-
-This reference set seems boring. Why not computing the differential expression without this set? The answer is: the differential expression of what? I cannot guess it. Actually, you might want to compare the expression of genes, of small RNAs, of transposable elements, of anything... So the reference set can be a list of genes, and in this case, you can compute the differential expression of genes. But you can also compute many other things.
-
-Suppose that you cluster the data of your two input samples (you can do it with the *clusterize* and the *mergeTranscriptLists* tools). You now have a list of all the regions which are transcribed in at least one of the input samples. This can be your reference set. This reference set is interesting since you can detect the differential expression of data which is outside any annotation.
-
-Suppose now that you clusterize using a sliding window the two input samples (you can do it with the *clusterizeBySlidingWindows* and the *mergeSlidingWindowsClusters* tools). You can now select all the regions of a given size which contain at least one read in one of the two input samples (do it with *selectByTag* and the tag **nbElements**). Again, this can be an other interesting reference set.
-
-In most cases, the sizes of the two input samples will be different, so you should probably normalize the data, which is an available option. The ---rather crude--- normalization increases the number of data in the least populated sample and decreases the number of data in the most populated sample to the average number of data.
+ example: python GetDifferentialExpression.py -i input1 -f gff3 -j input2 -g gff3 -k ref -l gff3 -o output.gff3
</help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/GetFlanking.xml
--- a/SMART/galaxy/GetFlanking.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/GetFlanking.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="GetFlanking" name="get flanking">
<description>Get the flanking regions of a set of reference.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/GetFlanking.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -55,6 +52,9 @@
#end if

    -o $outputFile
+
+
+
</command>

<inputs>
@@ -117,7 +117,7 @@
</conditional>

<conditional name="OptionUpDownStream">
- <param name="OptionUD" type="select" label="Only provide upstream/dowstream features">
+ <param name="OptionUD" type="select" label="UpStream or DownStream">
<option value="UpStream">UpStream</option>
<option value="DownStream">DownStream</option>
<option value="NONE" selected="true">NONE</option>
@@ -131,8 +131,8 @@
</conditional>

<conditional name="OptionColinearOrAntiSens">
- <param name="OptionCA" type="select" label="Only provide collinear/antisens features">
- <option value="Colinear">Collinear</option>
+ <param name="OptionCA" type="select" label="Colinear or anti-sens">
+ <option value="Colinear">Colinear</option>
<option value="AntiSens">AntiSens</option>
<option value="NONE" selected="true">NONE</option>
</param>
@@ -172,23 +172,8 @@

    <outputs>
-    <data format="gff3" name="outputFile" label="[get flanking] output file"/>
+    <data format="gff3" name="outputFile" label="[GetFlanking] Output File"/>
    </outputs>

- <help>
-This tool prints the elements from the second set of genomic intervals which are closest to (in other words, are flanking) the elements from the first set. You can also play on different parameters:
-
-- restrict the search to downstream or upstream elements, or print downstream and upstream elements,
-
-- only consider collinear flanking elements,
-
-- only consider anti-sense flanking elements,
-
-- only consider elements which are close enough (using some given distance),
-
-- only consider flanking elements which do not overlap with the reference element.
-
-Notice that elements from the second sets may be printed at most once, whether they are the flanking element of several elements from the first or not.
- </help>

</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/SelectByTag.xml
--- a/SMART/galaxy/SelectByTag.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/SelectByTag.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,16 +1,17 @@
<tool id="SelectByTag" name="select by tag">
- <description>Keep the genomic coordinates such that a value of a given tag.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+ <description>Keeps the genomic coordinates such that a value of a given tag.</description>
<command interpreter="python">
../Java/Python/SelectByTag.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
#elif $formatType.FormatInputFileName == 'gff':
-f gff
#elif $formatType.FormatInputFileName == 'gff2':
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
#elif $formatType.FormatInputFileName == 'gtf':
-f gtf
#end if
@@ -36,11 +37,16 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
+ <option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
<when value="gff">
<param name="inputFileName" format="gff" type="data" label="Input File"/>
</when>
@@ -50,6 +56,9 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
<when value="gtf">
<param name="inputFileName" format="gtf" type="data" label="Input File"/>
</when>
@@ -58,12 +67,12 @@
<param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>

<conditional name="OptionValue">
- <param name="Value" type="select" label="given value for the tag">
+ <param name="Value" type="select" label="value of tag">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="valeur" type="integer" value="1"/>
+ <param name="valeur" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
</when>
<when value="No">
</when>
@@ -75,7 +84,7 @@
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="max" type="integer" value="1"/>
+ <param name="max" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
</when>
<when value="No">
</when>
@@ -87,14 +96,14 @@
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="min" type="integer" value="1"/>
+ <param name="min" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
</when>
<when value="No">
</when>
</conditional>

<conditional name="OptionDefault">
- <param name="default" type="select" label="give this value if tag is not present">
+ <param name="default" type="select" label="gives this value if tag is not present">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -107,16 +116,7 @@
</inputs>

<outputs>
- <data name="outputFileGff" format="gff3" label="[select by tag] output file"/>
+ <data name="outputFileGff" format="gff3" label="[SelectByTag] Output File"/>
</outputs>

- <help>
-The script reads a list of genomic coordinates and output all the features with specific tag values. If you want to know more about tags, please consult the GFF format page: http://www.sequenceontology.org/gff3.shtml
-
-The tools reads the input file, and more specifically the tag that you specified. You can mention a lower and a upper bound for its value, or a specific value, and the tool will print all the features such that the tags are between the specified bounds or matches the string.
-
-A tag has to be present for each feature. If not, you can specify a default value which will be used if the tag is absent.
-
-This tool can be used to select the clusters with a minimum number of elements (the tag **nbElements** counts the number of elements per clusters) or to select the reads which have mapped less than *n* times (the tag **nbOccurrences** counts the number of mappings per read).
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/WrappGetLetterDistribution.xml
--- a/SMART/galaxy/WrappGetLetterDistribution.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/WrappGetLetterDistribution.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
-<tool id="getLetterDistribution1" name="get letter distribution">
-    <description>Calculate distribution for each nucleotide per position for all short reads</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="getLetterDistribution1" name="Get Letter Distribution">
+    <description>Calculate distribution for each nucleotide per position for all short reads (S-MART)</description>
     <command interpreter="python">
    WrappGetLetterDistribution.py -i $inputFileName
#if $formatType.FormatInputFileName == 'fasta':
@@ -28,9 +25,9 @@
     </inputs>

     <outputs>
-                <data name="ouputFileNameCSV" format="tabular" label="[get letter distribution] CSV file"/>
-                <data name="ouputFileNamePNG1" format="png" label="[get letter distribution] PNG file 1"/>
-                <data name="ouputFileNamePNG2" format="png" label="[get letter distribution] PNG file 2"/>
+                <data name="ouputFileNameCSV" format="tabular" label="[getLetterDistribution] CSV File"/>
+                <data name="ouputFileNamePNG1" format="png" label="[getLetterDistribution] PNG File 1"/>
+                <data name="ouputFileNamePNG2" format="png" label="[getLetterDistribution] PNG File 2"/>
     </outputs>
     <tests>
      <test>
@@ -39,10 +36,5 @@
             <output name="outputFileNameCSV" file="exp_getletterdistribution_short_fastq.csv" />
         </test>
     </tests>
+</tool>

- <help>
-The script gets the nucleotide distribution of the input sequence list. It outputs two files. The first file shows the nucleotide distribution of the data. More precisely, a point (*x*, *y*) on the curve **A** shows that *y* sequences have *x* % of **A**.
-
-The second plot shows the average nucleotide distribution for each position of the read. You can use it to detect a bias in the first nucleotides, for instance. A point *x*, *y* on the curve **A** shows that at the position *x*, there are *y*% of **A**. A point (*x*, *y*) on the curve **#** tells you that *y* % of the sequences contain not less than *x* nucleotides. By definition, this latter line is a decreasing function. It usually explains why the tail of the other curves are sometimes erratic: there are few sequences.
- </help>
-</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/changeGffFeatures.xml
--- a/SMART/galaxy/changeGffFeatures.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/changeGffFeatures.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,23 +1,16 @@
-<tool id="changeGffFeatures" name="change GFF features">
- <description>Change a feature in a GFF file (the feature is the 3rd column).</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="changeGffFeatures" name="change gff Features">
+ <description>Changes one feature name by an other name (the feature name can be found on the 3rd column).</description>
<command interpreter="bash">
../Java/Python/changeGffFeatures.sh $inputFile $inputFeature $outputFeature >$outputFile
</command>
       <inputs>
      <param name="inputFile" type="data" label="Input File" format="gff"/>
-       <param name="inputFeature" type="text" value="exon" label="The feature you want to change"/>
-       <param name="outputFeature" type="text" value="exon" label="The new feature"/>
+       <param name="inputFeature" type="text" value="exon" label="A given feature, you must choose a feature name(on the 3rd column)."/>
+       <param name="outputFeature" type="text" value="exon" label="You must choose an other feature name(on the 3rd column)."/>
       </inputs>

       <outputs>
-             <data name="outputFile" format="gff" label="[change GFF features] Output File"/>
+             <data name="outputFile" format="gff" label="[changeGffFeatures] Output File"/>
       </outputs>
-
-   <help>
- This script changes the third column of a GFF3 file (please refer to http://www.sequenceontology.org/gff3.shtml to know more about this format).
-   </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/changeTagName.xml
--- a/SMART/galaxy/changeTagName.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/changeTagName.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,10 +1,9 @@
<tool id="changeTagName" name="change tag name">
- <description>Change the name of a tag in a GFF file.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+ <description>Changes the name of tag of a list of transcripts.</description>
<command interpreter="python">
../Java/Python/changeTagName.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
#elif $formatType.FormatInputFileName == 'gff':
-f gff
#elif $formatType.FormatInputFileName == 'gff2':
@@ -22,10 +21,14 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
</param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
<when value="gff">
<param name="inputFileName" format="gff" type="data" label="Input File"/>
</when>
@@ -37,15 +40,15 @@
</when>
</conditional>

- <param name="Tag" type="text" label="tag option" help="The tag you want to change"/>
- <param name="name" type="text" label="name option" help="A new name for the tag"/>
+ <param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>
+ <param name="name" type="text" value="None" label="name option" help="new name for the tag, you must choose a new name."/>
+
+
+
</inputs>

<outputs>
- <data name="outputFileGff" format="gff3" label="[change tag name] Output File"/>
+ <data name="outputFileGff" format="gff3" label="[changeTagName] Output File"/>
</outputs>

- <help>
- Change the name of a tag in the 9th field of a GFF3 file (please consult http://www.sequenceontology.org/gff3.shtml to know more about this format).
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/cleanGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/cleanGff.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,18 @@
+<tool id="cleanGff" name="clean Gff">
+   <description>Cleans a GFF file as given by NCBI and outpus a Gff3 file.</description>
+   <command interpreter="python"> ../Java/Python/cleanGff.py -i $inputFile
+   -t $type
+   -o $outputFile
+   </command>
+
+       <inputs>
+       <param name="inputFile" type="data" label="Input File" format="gff"/>
+       <param name="type" type="text" value="tRNA,rRNA,ncRNA,CDS" label="tag option, compulsory option" help="lists of comma separated types that you want to keep.EX: ncRNA,tRNA,rRNA,CDS"/>
+       </inputs>
+
+       <outputs>
+           <data format="gff3" name="outputFile" label="[cleanGff] Output File"/>
+       </outputs>
+
+</tool>
+

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/clusterize.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/clusterize.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,67 @@
+<tool id="MergingDataClusterize" name="Clusterize">
+ <description>Clusterizes the reads when their genomic intervals overlap.</description>
+ <command interpreter="python">
+ ../Java/Python/clusterize.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'csv':
+ -f csv
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #end if
+ -o $outputFileGff
+ $colinear
+ $normalize
+ -d $distance
+ $log $outputFileLog
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="csv">csv</option>
+ <option value="sam">sam</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="csv">
+ <param name="inputFileName" format="csv" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
+ <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
+ <param name="distance" type="integer" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[clusterize]output file"/>
+ <data name="outputFileLog" format="txt" label="[clusterize]output file">
+ <filter>log</filter>
+ </data>
+ </outputs>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/clusterizeBySlidingWindows.xml
--- a/SMART/galaxy/clusterizeBySlidingWindows.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/clusterizeBySlidingWindows.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
-<tool id="clusterizeBySlidingWindows" name="clusterize by sliding windows">
- <description>Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region).</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="clusterizeBySlidingWindows" name="clusterize By SlidingWindows">
+ <description>Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.</description>
<command interpreter="python">
../Java/Python/clusterizeBySlidingWindows.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -76,16 +73,16 @@

<param name="size" type="text" value="50000" label="Size option" help="Size of the regions."/>
<param name="overlap" type="text" value="50" label="Overlap option" help="Overlap between two consecutive regions."/>
- <param name="normalize" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Normalize option for only GFF3 file format" help="(only work if the tag nbOccurrences is set)"/>
- <param name="strands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="Consider the two strands separately"/>
+ <param name="normalize" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Normalize option for only GFF3 file format" help="This option normalizes (Warning!! Only for GFF3 file!)"/>
+ <param name="strands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strands option" help="Consider the two strands separately."/>

<conditional name="OptionTag">
- <param name="tag" type="select" label="Use a given tag as input (instead of summing number of features)">
+ <param name="tag" type="select" label="use a given tag as input (instead of summing number of features)">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="value" type="select" label="tag name"/>
+ <param name="value" type="text" value="None" label="tag option" help="write a tag name you want to observe."/>
</when>
<when value="No">
</when>
@@ -93,18 +90,12 @@

<conditional name="OptionsOperation">
- <param name="operation" type="select" label="combine tag value with given operation">
+ <param name="operation" type="select" label="combine tag value with given operation [choice (sum, avg, med, min, max)]">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="value" type="select" label="operation" help="You can ONLY choose one of following operation : sum, avg, med, min, max.">
- <option value="sum">sum</option>
- <option value="avg">average</option>
- <option value="med">median</option>
- <option value="min">minimum</option>
- <option value="max">maximum</option>
- </param>
+ <param name="value" type="text" value="None" label="operation option" help="You can ONLY choose one of fowlling operation : sum, avg, med, min, max."/>
</when>
<when value="No">
</when>
@@ -123,19 +114,19 @@
</when>
</conditional>

+ <param name="strand" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strand option" help="This option considers the two strands separately."/>
+ <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="This option creates a png file."/>
+ <param name="excel" type="boolean" truevalue="-x" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
+
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
+ <data name="excelOutput" format="csv">
+ <filter>excel</filter>
+ </data>
+ <data name="plotPng" format="png">
+ <filter>plot</filter>
+ </data>
</outputs>
-
- <help>
-Sliding windows are a convenient ways to clusterize data mapped on the genome. There are two important parameters of a sliding window: the size of the window and the size of the overlap.
-
-By default, sliding windows count the number of reads in each window. However, you can basically merge any information which is contained in the tags. You can compute the average, sum, median, max or min of the tags for each window. For instance, every window can contain the average cluster size, if you merge clusters instead of reads.
-
-The output file is a GFF3 file, where each element is a window. There is a special tag for each window, whose name is **nbElements** if you counted the number of transcripts per sliding window. However, if you performed a **min** (resp. **max**, **sum**, **median**, **average**) operation on the tags **value** of the transcripts, then the tag of the window will be **minValue** (resp. **maxValue**, **sumValue**, **medValue**, **avgValue**). You can also specify the name of your tag (which is actually advised: **nbReadsInSample1** will always be more informative than **nbElements**).
-
-You also have different option, which can select the *n* % highest regions, or the regions with at least *n* features in it, or even the regions with at least *n* unique features. This last option is useful when you want to cluster the reads which have mapped only once, for instance.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/compareOverlapping.xml
--- a/SMART/galaxy/compareOverlapping.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/compareOverlapping.xml Tue Apr 30 14:33:21 2013 -0400

b'@@ -1,8 +1,5 @@\n-<tool id="CompareOverlapping" name="compare overlapping">\n+<tool id="CompareOverlapping" name="Compare Overlapping">\n \t<description>Print all the transcripts from a first file which overlap with the transcripts from a second file.</description>\n-\t<requirements>\n-\t\t<requirement type="set_environment">PYTHONPATH</requirement>\n-\t</requirements>\n \t<command interpreter="python">\n \t\t../Java/Python/CompareOverlapping.py -i $formatType.inputFileName1\n \t\t#if $formatType.FormatInputFileName1 == \'bed\':\n@@ -110,7 +107,7 @@\n \t\t\t</when>\n \t\t\t<when value="gtf">\n \t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n-\t\t\t</when>\n+\t\t\t\t\t\t\t\t </when>\n \t\t</conditional>\n \n \t\t<conditional name="formatType2">\n@@ -142,54 +139,58 @@\n \t\t\t</when>\n \t\t</conditional>\n \n+\n+\n \t\t<conditional name="optionNFirstFile1">\n-\t\t\t<param name="NFirstForFile1" type="select" label="Shrink the queries to their first N nt.">\n+\t\t\t<param name="NFirstForFile1" type="select" label="NFirst for file 1" help="only consider the n first nucleotides of the transcripts in file 1">\n \t\t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="size" />\n+\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="n first nucleotides for input file 1" />\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \t\t<conditional name="optionNFirstFile2">\n-\t\t\t<param name="NFirstForFile2" type="select" label="Shrink the references to their first N nt.">\n+\t\t\t<param name="NFirstForFile2" type="select" label="NFirst for file 2" help="only consider the n first nucleotides of the transcripts in file 2">\n \t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="firstNtFile2" type="integer" value="1" label="size" />\n+\t\t\t\t<param name="firstNtFile2" type="integer" value="1" label="n first nucleotides for input file 1" />\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \n \t\t<conditional name="optionNLastFile1">\n-\t\t\t<param name="NLastForFile1" type="select" label="Shrink the queries to their last N nt.">\n+\t\t\t<param name="NLastForFile1" type="select" label="NLast for file 1">\n \t\t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="lastNtFile1" type="integer" value="1" label="size"/>\n+\t\t\t\t<param name="lastNtFile1" type="integer" value="1" label="n last nucleotides for input file 1" help="only consider the n last nucleotides of the transcripts in file 1"/>\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \t\t<conditional name="optionNLastFile2">\n-\t\t\t<param name="NLastForFile2" type="select" label="Shrink the references to their last N nt.">\n+\t\t\t<param name="NLastForFile2" type="select" label="NLast for file 2">\n \t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="size"/>\n+\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \n+\n+\n \t\t<conditional name="optionExtentionCinqFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extend the query features towards the 5\' end">\n+\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n \t\t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n@@ -200,8 +201,9 @@\n \t\t\t</when>\n \t\t</conditional>\n \n+\n \t\t<conditional name="optionExtentionCinqFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extend the reference features towards 5\' end">\n+\t\t\t<param name="extent'..b'ption value="No" selected="true">No</option>\n \t\t\t</param>\n@@ -225,7 +227,7 @@\n \t\t</conditional>\n \n \t\t<conditional name="optionExtentionTroisFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extend the reference features towards 3\' end">\n+\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n \t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n@@ -237,7 +239,7 @@\n \t\t</conditional>\n \n \t\t<conditional name="OptionColinearOrAntiSens">\n-\t\t\t<param name="OptionCA" type="select" label="Report queries which are collinear/antisens w.r.t. a reference">\n+\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n \t\t\t\t<option value="Colinear">Colinear</option>\n \t\t\t\t<option value="AntiSens">AntiSens</option>\n \t\t\t\t<option value="NONE" selected="true">NONE</option>\n@@ -273,38 +275,14 @@\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n-\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Also report queries which overlap with the introns of references, or queries such that a reference is in one of its intron"/>\n-\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>\n-\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>\n+\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n+\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n+\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n+\t\t\n \t</inputs>\n-\t\t\n+\n \t<outputs>\n \t\t<data name="outputFileGff" format="gff3"/>\n \t</outputs> \n \t\n-\t<help>\n-This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).\n- \n-It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.\n-\n-Various modifiers are also available:\n-\n--Restrict query / reference set to the first nucleotide. Useful to check if the TSS of one set overlap with the other one.\n-\n--Extend query / reference set on the 5\' / 3\' direction. Useful to check if one set is located upstream / downstream the other one.\n-\n--Include introns in the comparison.\n-\n--Invert selection (report those which do not overlap).\n-\n--Restrict to colinear / anti-sense overlapping data.\n-\n--Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.\n-\n--Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.\n-\n-The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.\n-\n-Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.\n-\t</help>\n </tool>\n'

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/computeCoverage.xml
--- a/SMART/galaxy/computeCoverage.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/computeCoverage.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
-<tool id="ComputeCoverage" name="compute coverage">
+<tool id="ComputeCoverage" name="Compute coverage">
     <description>Compute the coverage of a set with respect to another set.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
     <command interpreter="python">
         ../Java/Python/ComputeCoverage.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -103,11 +100,8 @@
         </inputs>

<outputs>
- <data name="outputFileGff" format="gff3" label="[compute coverage] output file"/>
+ <data name="outputFileGff" format="gff3" label="[computeCoverage] OUTPUT file"/>
</outputs>

- <help>
-This tool considers a query and a reference files, and gives the coverage of the query file by the reference. The output file is similar to the query file, where a tag **coverage** has been added.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/coordinatesToSequence.xml
--- a/SMART/galaxy/coordinatesToSequence.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/coordinatesToSequence.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,8 +1,5 @@
<tool id="coordinatesToSequence" name="coordinates to sequence">
<description>Coordinates to Sequences: Extract the sequences from a list of coordinates.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/coordinatesToSequence.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -62,7 +59,4 @@
<data name="outputFileFasta" format="fasta" label="coordinates to sequences output"/>
</outputs>

- <help>
-You can use this tool, if you just want to convert your mapping data to genomic coordinates, without any filtering. It requires a genomic coordinates file together with its format, an output format (GFF3, BED, etc...), the genome, and prints you the corresponding file.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/findTss.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/findTss.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,59 @@
+<tool id="findTss" name="findTss">
+ <description>Find the transcription start site of a list of transcripts.</description>
+ <command interpreter="python">
+ ../Java/Python/findTss.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+
+ -o $outputFileGff
+ $colinear
+ $normalize
+ -d $distance
+ $excel $excelOutput
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="colinear" type="boolean" truevalue="-e" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (Warning!! Only for GFF3 file!!!!!)"/>
+ <param name="distance" type="text" value="10" label="distance option" help="Limit the maximum distance between two reads"/>
+ <param name="excel" type="boolean" truevalue="-c" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[findTss] Output File"/>
+ <data name="excelOutput" format="csv" label="[findTss] CSV File">
+ <filter>excel</filter>
+ </data>
+ </outputs>
+
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getDifference.xml
--- a/SMART/galaxy/getDifference.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getDifference.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
-<tool id="getDifference" name="get difference">
- <description>Gets all the regions of the genome, except the one given in an annotation file. Alternatively, it may also give all the elements from the first set which does not ovelap with the second set (at the nucleotide level).</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+<tool id="getDifference" name="get Difference">
+ <description>Gets all the regions of the genome, except the one given or get all the elements from the first set which does not ovelap with the second set (at the nucleotide level).</description>
<command interpreter="python">
../Java/Python/getDifference.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -34,6 +31,7 @@
-g gtf
#end if

+
$split

#if $OptionSequence.option == "Yes":
@@ -104,7 +102,8 @@
</when>
</conditional>

- <param name="split" type="boolean" truevalue="-p" falsevalue="" checked="false" label="When comparing to a set of genomic coordinates, do not join into exons."/>
+ <param name="split" type="boolean" truevalue="-p" falsevalue="" checked="false" label="split option" help="When comparing to a set of genomic coordinates, do not join."/>
+

<conditional name="OptionSequence">
<param name="option" type="select" label="Compare with a reference fasta file.">
@@ -122,12 +121,7 @@

<outputs>
- <data name="outputFileGff" format="gff3" label="[get difference] output file."/>
+ <data name="outputFileGff" format="gff3" label="[getDifference]output File."/>
</outputs>

- <help>
-This tools has two different (but similar) uses. When given two sets of transcripts, it trims the elements of the set so that they do not overlap with the second set.
-
-When only one set of transcripts is given, together with a reference genome, it produces a list of transcripts which complements the first set.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getDistance.xml
--- a/SMART/galaxy/getDistance.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getDistance.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,8 +1,5 @@
<tool id="GetDistance" name="get distance">
- <description>Give the distances between every data from the first input set with respect to the data from the second input set.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+ <description>Give the distances between every data from the first input set and the data from the second input set</description>
<command interpreter="python">
../Java/Python/getDistance.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -43,6 +40,16 @@
-a
#end if

+ #if $OptionFirstNucl5.FirstNu5 == "Yes":
+ -s $OptionFirstNucl5.first5File1
+ -S $OptionFirstNucl5.first5File2
+ #end if
+
+ #if $OptionFirstNucl3.FirstNu3 == "Yes":
+ -e $OptionFirstNucl3.first3File1
+ -E $OptionFirstNucl3.first3File2
+ #end if
+
#if $OptionMinDistance.MinD == "Yes":
-m $OptionMinDistance.minDistance
#end if
@@ -70,6 +77,8 @@
#end if

-o $outputFilePng
+ $outputDistance $outputFileDistance
+
</command>

<inputs>
@@ -131,12 +140,14 @@
</when>
</conditional>

+ <param name="outputDistance" type="boolean" truevalue="-O" falsevalue="" checked="false" label="distance option" help="This option create a GFF3 output file containing the distance for each element of the query."/>
+
<param name="absolute" type="boolean" truevalue="-b" falsevalue="" checked="false" label="absolute value option" help="This option gives the absolute value of the distance."/>
<param name="proportion" type="boolean" truevalue="-p" falsevalue="" checked="false" label="proportion option" help="This option gives the proportion on the y-axis instead of the number of distances."/>

<conditional name="OptionColinearOrAntiSens">
- <param name="OptionCA" type="select" label="Provide distribution of distances between collinear/antisense pairs of features">
- <option value="Colinear">Collinear</option>
+ <param name="OptionCA" type="select" label="Colinear or anti-sens">
+ <option value="Colinear">Colinear</option>
<option value="AntiSens">AntiSens</option>
<option value="NONE" selected="true">NONE</option>
</param>
@@ -148,8 +159,34 @@
</when>
</conditional>

+ <conditional name="OptionFirstNucl5">
+ <param name="FirstNu5" type="select" label="only consider the n first 5' nucleotides for input files">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="first5File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>
+ <param name="first5File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionFirstNucl3">
+ <param name="FirstNu3" type="select" label="only consider the n first 3' nucleotides for input files">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="first3File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>
+ <param name="first3File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
<conditional name="OptionMinDistance">
- <param name="MinD" type="select" label="Minimum distance between two features">
+ <param name="MinD" type="select" label="minimum distance considered between two transcripts">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -161,7 +198,7 @@
</conditional>

<conditional name="OptionMaxDistance">
- <param name="MaxD" type="select" label="Maximum distance between two features">
+ <param name="MaxD" type="select" label="maximum distance considered between two transcripts">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -230,18 +267,9 @@

<outputs>
<data name="outputFilePng" format="png"/>
+ <data name="outputFileDistance" format="gff3">
+ <filter>outputDistance</filter>
+ </data>
</outputs>

- <help>
-Give the distances between every data from the first input set and the data from the second input set. It outputs the size distribution. Each point (*x*, *y*) tells you that there exists *y* pairs of elements which are separated by *x* nucleotides.
-
-The general algorithm is the following. For each element of the first input set, it finds the closest element of the second set and computes the distance between the two elements. The distance is zero if the two elements overlap. This distance may not exist if the element of the first input set is alone on its chromosome (or contig).
-
-Actually, considering an element from the first input set, the algorithm will look at the vicinity of this element (1kb by default). You can increase the size of the vicinity using the appropriate option.
-
-As in *compare overlapping*, you can shrink or extend your sets of genomic coordinates, so that you can get the distance between starts of reads and starts or genes, for instance. You can also compute the distance from elements which are on the same strand only (which is not the case by default) or on the opposite strand only.
-
-You have several options for the output plot. You can first choose the region on the *x*-axis you want to plot. You can also display histograms instead of line plot. In this case, the data are summed into buckets, whose sizes are given as an option. For instance, a bucket of size *s* at the point (*x*, *y*) means that there are *y* pairs of elements which are separated by *x* to *x + s* nucleotides.
- </help>
-
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getDistribution.xml
--- a/SMART/galaxy/getDistribution.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getDistribution.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="getDistribution" name="get distribution">
- <description>Get Distribution: Get the distribution of the genomic coordinates along a genome.</description>
-    <requirements>
-        <requirement type="set_environment">PYTHONPATH</requirement>
-    </requirements>
+ <description>Get Distribution: Get the distribution of the genomic coordinates on a genome.</description>
<command interpreter="python">
../Java/Python/GetDistribution.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -13,6 +10,8 @@
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
+ #elif $formatType.FormatInputFileName == 'csv':
+ -f csv
#elif $formatType.FormatInputFileName == 'sam':
-f sam
#elif $formatType.FormatInputFileName == 'gtf':
@@ -60,6 +59,8 @@
$bothStrands
$average
$normalize
+ $csv $outputCSV
+ $gff $outputGFF
-m
-o $outputFile

@@ -72,6 +73,7 @@
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
+ <option value="csv">csv</option>
<option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
@@ -87,6 +89,9 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
+ <when value="csv">
+ <param name="inputFileName" format="csv" type="data" label="Input File"/>
+ </when>
<when value="sam">
<param name="inputFileName" format="sam" type="data" label="Input File"/>
</when>
@@ -98,7 +103,7 @@
<param name="refFile" format="fasta" type="data" label="reference genome file"/>

<conditional name="optionNbBin">
- <param name="Nb" type="select" label="number of points">
+ <param name="Nb" type="select" label="number of bins">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -109,20 +114,8 @@
</when>
</conditional>

- <conditional name="optionChrom">
- <param name="chrom" type="select" label="if you wish to plot only one chromosome, mention the chromosome name">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="chromValue" type="text" value="chromName" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
<conditional name="optionStart">
- <param name="start" type="select" label="if you wish to plot only one locus, mention its start position">
+ <param name="start" type="select" label="start from a given region">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -134,7 +127,7 @@
</conditional>

<conditional name="optionEnd">
- <param name="end" type="select" label="if you wish to plot only one locus, mention its end position">
+ <param name="end" type="select" label="end from a given region">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -146,7 +139,7 @@
</conditional>

<conditional name="optionHeight">
- <param name="height" type="select" label="height of the figure">
+ <param name="height" type="select" label="height of the graphics">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -158,7 +151,7 @@
</conditional>

<conditional name="optionWidth">
- <param name="width" type="select" label="width of the figure">
+ <param name="width" type="select" label="width of the graphics">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -193,6 +186,18 @@
</when>
</conditional>

+ <conditional name="optionChrom">
+ <param name="chrom" type="select" label="plot only one given chromosome">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="chromValue" type="text" value="chromName" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
<conditional name="optionColor">
<param name="color" type="select" label="color of the lines (separated by commas and no space)">
<option value="Yes">Yes</option>
@@ -205,8 +210,9 @@
</when>
</conditional>

+
<param name="bothStrands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="plot one curve per strand"/>
- <param name="average" type="boolean" truevalue="-a" falsevalue="" checked="false" label="plot the number of element per bin (instead of sum)"/>
+ <param name="average" type="boolean" truevalue="-a" falsevalue="" checked="false" label="plot plot average (instead of sum)"/>

<conditional name="optionNames">
<param name="names" type="select" label="name for the tags (separated by commas and no space)">
@@ -220,18 +226,23 @@
</when>
</conditional>

- <param name="normalize" type="boolean" truevalue="-z" falsevalue="" checked="false" label="normalize data (when panel sizes are different)"/>
+ <param name="normalize" type="boolean" truevalue="-z" falsevalue="" checked="false" label="normalize data (when panels are different)"/>
+ <param name="csv" type="boolean" truevalue="-x" falsevalue="" checked="false" label="write a .csv file."/>
+ <param name="gff" type="boolean" truevalue="-g" falsevalue="" checked="false" label="write a .gff file."/>
</inputs>

<outputs>
- <data name="outputFile" format="png" label="[get distribution] output PNG file"/>
+ <data name="outputFile" format="png" label="[getDistribution] out png file"/>
+ <data name="outputCSV" format="csv" label="[getDistribution] output csv file">
+ <filter>csv</filter>
+ </data>
+
+ <data name="outputGFF" format="gff" label="[getDistribution] output gff file">
+ <filter>gff</filter>
+ </data>
</outputs>

     <help>
-Print a density profile of the data for each chromosome. You have to provide the reference genome, to know the sizes of the chromosomes. You can also provide the number of points (called *bins*) you want per chromosome.
-
-By default, only one curve is plotted per chromosome, but you can plot one curve per strand and per chromosome (the minus strand will be plotted with non-positive values on the *y*-axis).
-
-If you want, you can also plot a specific region, by mentionning the chromosome, the start and the end positions of the region.
+        This script gives a .tar out file, if you want to take look at the results, you have to download it.
     </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getExons.xml
--- a/SMART/galaxy/getExons.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getExons.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="getExons" name="get exons">
     <description>Get the exons of a set of transcripts.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
     <command interpreter="python">
../Java/Python/getExons.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -70,7 +67,7 @@
     </inputs>

     <outputs>
-        <data format="gff3" name="outputFileGff" label="[get exons] output file"/>
+        <data format="gff3" name="outputFileGff" label="[getExons -> gff3] Output File"/>
     </outputs>
<tests>
     <test>
@@ -80,8 +77,4 @@
       <output name="outputFileGff" file="exp_getExons.gff3" />
     </test>
</tests>
-
- <help>
-Provide all the exons of an annotation file.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getIntrons.xml
--- a/SMART/galaxy/getIntrons.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getIntrons.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="getIntrons" name="get introns">
     <description>Get the introns of a set of transcripts.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
     <command interpreter="python">
../Java/Python/getIntrons.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -54,7 +51,7 @@
     </inputs>

     <outputs>
-        <data format="gff3" name="outputFileGff" label="[get introns] output file"/>
+        <data format="gff3" name="outputFileGff" label="[getIntrons -> gff3] Output File"/>
     </outputs>
<tests>
     <test>
@@ -64,8 +61,4 @@
     </test>
   </tests>

- <help>
-Provide all the introns of an annotation file.
- </help>
-
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getNb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getNb.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,78 @@
+<tool id="getNumber" name="get number">
+ <description>Get the distribution of exons per transcripts, or mapping per read, or transcript per cluster.</description>
+ <command interpreter="python">
+ ../Java/Python/getNb.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+ -o $outputFilePNG
+ -q $query
+ $barPlot
+ #if $optionXMAX.XMAX == 'Yes':
+ -x $optionXMAX.xMaxValue
+ #end if
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="query" type="text" value="None" label="compulsory option, choice (exon, transcript, cluster)" />
+ <param name="barPlot" type="boolean" truevalue="-b" falsevalue="" checked="false" label="use barplot representation"/>
+
+ <conditional name="optionXMAX">
+ <param name="XMAX" type="select" label="maximum value on the x-axis to plot ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="xMaxValue" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFilePNG" format="png" label="[getNB]out file"/>
+ </outputs>
+
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getReadDistribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getReadDistribution.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,67 @@
+<tool id="getReadDistribution" name="get read distribution">
+ <description>Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented.</description>
+ <command interpreter="python">
+ ../Java/Python/WrappGetReadDistribution.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'fasta':
+ -f fasta
+ #elif $formatType.FormatInputFileName == 'fastq':
+ -f fastq
+ #end if
+
+ #if $optionnumber.number == 'Yes':
+ -n $optionnumber.bestNumber
+ #end if
+ #if $optionpercent.percent == 'Yes':
+ -p $optionpercent.percentage
+ #end if
+ -o $outputFile
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Sequence input File Format ">
+ <option value="fasta">fasta</option>
+ <option value="fastq">fastq</option>
+ </param>
+ <when value="fasta">
+ <param name="inputFileName" format="fasta" type="data" label="Sequence input File"/>
+ </when>
+ <when value="fastq">
+ <param name="inputFileName" format="fastq" type="data" label="Sequence input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="optionnumber">
+ <param name="number" type="select" label="keep the best n">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="bestNumber" type="integer" value="0"  />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionpercent">
+ <param name="percent" type="select" label="keep the best n percentage">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="percentage" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFile" format="tar" label="[getReadDistribution] tar out file" help="You can not see the results directly from galaxy, but you can download this tar output file."/>
+ </outputs>
+
+    <help>
+        This script gives a .tar out file, if you want to take look at the results, you have to download it.
+    </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getSequence.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getSequence.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,21 @@
+<tool id="getSequence" name="get sequence">
+  <description>Get a single sequence in a FASTA file.</description>
+  <command interpreter="python"> ../Java/Python/getSequence.py -i $inputFile
+ -n $name
+   -o $outputFile
+
+  </command>
+
+
+  <inputs>
+    <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
+   <param name="name" type="text" value="None" label="name of the sequence [compulsory option]"/>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[getSequence] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getSizes.xml
--- a/SMART/galaxy/getSizes.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getSizes.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="GetSizes" name="get sizes">
<description>Get the sizes of a set of genomic coordinates.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/getSizes.py -i $formatType.inputFileName $formatType.FormatInputFileName

@@ -24,6 +21,7 @@
-b $OptionY.yLabValue
#end if
$barPlot
+ $excel $excelOutput
</command>

<inputs>
@@ -86,7 +84,7 @@
</conditional>

<conditional name="OptionXMax">
- <param name="xMax" type="select" label="maximum x-value to plot">
+ <param name="xMax" type="select" label="maximum value on the x-axis to plot [format: int]">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -121,16 +119,17 @@
</when>
</conditional>

+
+
<param name="barPlot" type="boolean" truevalue="-B" falsevalue="" checked="false" label="use barplot representation"/>
+
+ <param name="excel" type="boolean" truevalue="-c" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
</inputs>

<outputs>
- <data name="outputFile" format="png" label="[get sizes] output file"/>
+ <data name="outputFile" format="png" label="[Get sizes] output file"/>
+ <data name="excelOutput" format="csv">
+ <filter>excel</filter>
+ </data>
</outputs>
-
- <help>
-Get the sequence/annotation size distribution. A point (*x*, *y*) means that *y* elements have a size of *x* nucleotides.
-
-When your mapping include exon/intron structures, you can decide to count the size of the introns, the sizes of the exons or the size of the first exons.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getWigData.xml
--- a/SMART/galaxy/getWigData.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getWigData.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,28 +1,17 @@
-<tool id="getWigData" name="get WIG data">
+<tool id="getWigData" name="get wig data">
     <description>Compute the average data for some genomic coordinates using WIG files</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
     <command interpreter="python">
../Java/Python/getWigData.py -i $inputGff3File -f gff3 -w $inputWigFile -t $tagName -$strand -o $outputFile
</command>

     <inputs>
-     <param name="inputGff3File" type="data" label="Input Gff3 File" format="gff3"/>
-    <param name="inputWigFile" type="data" label="Input Wig File" format="wig"/>
- <param name="tagName" type="text" value="None" label="tag option" help="choose a tag name to write the wig information to output file."/>
+     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
+    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
+ <param name="tagName" type="text" value="None" label="tag option (compulsory option)" help="choose a tag name to write the wig information to output file."/>
<param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>
     </inputs>

     <outputs>
-        <data format="gff3" name="outputFile" label="[get WIG data] output file"/>
+        <data format="gff3" name="outputFile" label="[getWigData -> gff3] Output File"/>
     </outputs>
-
- <help>
-Reads a transcript list, computes the average value of some WIG data (please consult http://genome.ucsc.edu/goldenPath/help/wiggle.html to know more about this format) for each transcript and adds a tag corresponding to this average value to the transcript.
-
-The script finds all the data which correspond to the genomic coordinates of a transcript, average these data and store the result into a tag. Then, the transcripts are written in an output file, together with the tag.
-
-You can then plot your data using *plotTranscriptList.py*.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getWigDistance.xml
--- a/SMART/galaxy/getWigDistance.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getWigDistance.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,28 +1,17 @@
-<tool id="getWigDistance" name="get WIG distance">
+<tool id="getWigDistance" name="get wig distance">
     <description>Compute the average data around some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
     <command interpreter="python">
../Java/Python/getWigDistance.py -i $inputGff3File -f gff3 -w $inputWigFile -a 0.0 -d $distance $strand -o $outputFile
</command>

     <inputs>
-     <param name="inputGff3File" type="data" label="Input Gff3 File" format="gff3"/>
-    <param name="inputWigFile" type="data" label="Input Wig File" format="wig"/>
- <param name="distance" type="integer" value="1000" label="Distance around positions."/>
- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Consider both strands separately."/>
+     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
+    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
+ <param name="distance" type="integer" value="1000" label="distance option (compulsory option)" help="Distance around position.Be Careful! The value must be upper than 0"/>
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>
     </inputs>

     <outputs>
-        <data name="outputFile" format="png" label="[get WIG distance] PNG output file"/>
+        <data name="outputFile" format="png" label="[getWigDistance] PNG output File"/>
     </outputs>
-
- <help>
-Plots the average data contained in a set of WIG files (please consult http://genome.ucsc.edu/goldenPath/help/wiggle.html to know more about this format) around the first nucleotides of a annotation file.
-
-The tool needs an transcript list, some WIG files, and a distance. For each transcript, it collects all the values around its first nucleotide, the radius being given by the distance. Then, it computes the average value for each position. A point (*x*, *y*) means that the average value in the WIG file for a nucleotide distant by *x* nucleotides from the first nucleotide of an input transcript is *y*.
-
-You can possibly use a log scale for the *y*-axis.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/getWigProfile.xml
--- a/SMART/galaxy/getWigProfile.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/getWigProfile.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
-<tool id="getWigProfile" name="get WIG profile">
+<tool id="getWigProfile" name="get wig profile">
<description>Compute the average profile of some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/getWigProfile.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -67,12 +64,7 @@
</inputs>

<outputs>
- <data name="outputFilePNG" format="png" label="[get WIG profile] output file"/>
+ <data name="outputFilePNG" format="png" label="[getWigProfile]out file"/>
</outputs>

- <help>
-Computes the average distribution of the WIG data (please consult http://genome.ucsc.edu/goldenPath/help/wiggle.html to know more about this format) along the transcripts given in input, and possibly before and after the transcripts.
-
-The main inputs of the functions are a file containing a list of transcripts (or any sets of genomic interval) and a directory containing a set of WIG files (one file per chromosome, or one file per chromosome and per strand). The function then computes the WIG profile of each transcript. The user can also define a region around the transcripts that should also be plotted (in this case, the profile will include the WIG values which overlap with the transcript as well as the 5' and 3' regions). Since the transcript do not necessarily have the same sizes, all profiles will be extended or shrinked to fit in a size which is given by the user. If the resulting profile is a bit bumpy, the user can also smoothen the curve by using a linear smoothing function (the size of the smoothing window is given by the user). Finally, the user may want to plot the WIG data for the opposite strand too (if the strand specific WUG data are available).
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/mapperAnalyzer.xml
--- a/SMART/galaxy/mapperAnalyzer.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/mapperAnalyzer.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="mapperAnalyzer" name="mapper analyzer">
- <description>Read the output of an aligner, print statistics and possibly translate into GFF, BED or GBrowse formats. </description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+ <description>Read the output of an aligner, print statistics and possibly translate into BED or GBrowse formats. </description>
<command interpreter="python">
../Java/Python/mapperAnalyzer.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -92,6 +89,7 @@
</when>
</conditional>

+
<conditional name="optionnumber">
<param name="number" type="select" label="max. number of occurrences of a sequence">
<option value="Yes">Yes</option>
@@ -182,24 +180,7 @@
</inputs>

<outputs>
- <data name="outputFileGFF" format="gff3" label="[mapper analyzer] output file"/>
+ <data name="outputFileGFF" format="gff3" label="[mapperAnalyzer] out file"/>
</outputs>

- <help>
-Maybe the first program you may use. It reads a set of mapping given by the tool you have used to map your data on the reference genome and translate it to a set of genomic coordinates. You also have the possibility to extract only those that you are interested in (few matches in the genome, few errors in the mapping, etc.). You can also select those reads which map less than a given of times in the genome. Moreover, you can output the data in various different formats, which you can use to visualize them *via* UCSC genome browser or GBrowse. Unmatched reads can be written in an other file, in case you would like to try to map them with another tool (may sometimes work!).
-
-You can filter your data according to:
-
-- number of errors in the mapping
-
-- number of occurrences of the mapping in the genome
-
-- size of the read mapped
-
-- number of gaps in the mapping
-
-The script needs an input file (your mapped reads) together with its format and the read sequences file together with its format (FASTA or FASTQ). If you want, you can also append the results of this script to another GFF3 file. This is useful when the GFF3 file is the result of the mapping using another tool.
-
-By default, any gap in the alignment to the reference sequence is treated like an exon. You can decide to remove this feature by merging short introns (actually, gaps).
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/mappingToCoordinates.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/mappingToCoordinates.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,57 @@
+<tool id="mappingToCoordinates" name="mapping to coordinates">
+ <description>Converts a mapping type file(given by a mapping tool) to a GFF3 type file.</description>
+ <command interpreter="python">
+ ../Java/Python/mappingToCoordinates.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'blast -8'
+ -f blast
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+ -o $outputFileGff
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="sam">sam</option>
+ <option value="blast -8">blast</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="blast -8">
+ <param name="inputFileName" format="blast" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/mergeSlidingWindowsClusters.xml
--- a/SMART/galaxy/mergeSlidingWindowsClusters.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/mergeSlidingWindowsClusters.xml Tue Apr 30 14:33:21 2013 -0400

@@ -1,8 +1,5 @@
<tool id="mergeSlidingWindowsClusters" name="merge sliding windows clusters">
<description>Merges two files containing the results of a sliding windows clustering.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/mergeSlidingWindowsClusters.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -102,9 +99,5 @@
<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>
-
- <help>
-Sliding windows are also useful to compare two (or more!) sets of data. This can be very valuable when you want to compare differential expression in two different conditions. When you have two different sliding windows sets, this function merges them into one, where each window contains the two pieces of information. You may want to plot the data afterwards using the *plot transcript list* function.
- </help>

</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/mergeTranscriptLists.xml
--- a/SMART/galaxy/mergeTranscriptLists.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/mergeTranscriptLists.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="mergeTranscriptLists" name="merge transcript lists">
<description>Merge the elements of two lists of genomic coordinates.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/mergeTranscriptLists.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -48,6 +45,9 @@
#end if

-o $outputFileGff
+
+
+
</command>

<inputs>
@@ -142,12 +142,7 @@
</inputs>

<outputs>
- <data name="outputFileGff" format="gff3" label="[merge transcript lists] output file"/>
+ <data name="outputFileGff" format="gff3" label="[mergeTranscriptLists]out file"/>
</outputs>

- <help>
-The script is similar to *compare overlapping*, except that when data of two different sets overlap, they are merged. You can use the same parameters as *compare overlapping* and use them to look for transcription on both strands, for example.
-
-Optionally, you can also add to the output all the elements from the first set which do not overlap with the second set.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/modifyFasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/modifyFasta.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,49 @@
+<tool id="modifyFasta" name="modify fasta">
+  <description>Extend or shring a list of sequences.</description>
+  <command interpreter="python"> ../Java/Python/modifyFasta.py -i $inputFile
+   #if $OptionStart.start == "Yes":
+ -s $OptionStart.startValue
+   #end if
+
+   #if $OptionEnd.end == "Yes":
+ -e $OptionEnd.endValue
+   #end if
+   -o $outputFile
+
+  </command>
+
+
+  <inputs>
+    <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
+ <conditional name="OptionStart">
+ <param name="start" type="select" label="keep first nucleotides">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="startValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionEnd">
+ <param name="end" type="select" label="keep last nucleotides">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="endValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[modifyFasta] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/modifyGenomicCoordinates.xml
--- a/SMART/galaxy/modifyGenomicCoordinates.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/modifyGenomicCoordinates.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="modifyGenomicCoordinates" name="modify genomic coordinates">
   <description>Extend or shrink a list of genomic coordinates.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
   <command interpreter="python"> ../Java/Python/modifyGenomicCoordinates.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
-f bed
@@ -69,7 +66,7 @@
</conditional>

<conditional name="OptionStart">
- <param name="start" type="select" label="shrink to the start of the feature">
+ <param name="start" type="select" label="restrict to the start of the transcript">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -81,7 +78,7 @@
</conditional>

<conditional name="OptionEnd">
- <param name="end" type="select" label="shrink to the end of the feature">
+ <param name="end" type="select" label="restrict to the end of the transcript">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -121,18 +118,9 @@
   </inputs>

   <outputs>
-    <data format="gff3" name="outputFile" label="[modify genomic coordinates] output file"/>
+    <data format="gff3" name="outputFile" label="[modifyGenomicCoordinates] Output File"/>
   </outputs>

   <help>
-This tool reads a list of transcripts and modifies each feature by:
-
-- shrinking it to the *n* first nucleotides or the *n* last nucleotides, or
-
-- extending it to *n* nucleotides towards the 5' direction (upstream) or the 3' direction (downstream).
-
-Note that the 5' or 3' direction depends on the orientation of the feature (the 5' end of a transcript located on the minus strand is on the right hand of this transcript!).
-
-The tool needs a transcript file, its format, and outputs a new transcript file.
   </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/modifySequenceList.xml
--- a/SMART/galaxy/modifySequenceList.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/modifySequenceList.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="modifySequenceList" name="modify sequence list">
   <description>Extend or shring a list of sequences. </description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
   <command interpreter="python"> ../Java/Python/modifySequenceList.py -i $inputFile -f fasta
#if $OptionStart.Start == "Yes":
-s $OptionStart.StartVal
@@ -43,10 +40,7 @@
   </inputs>

   <outputs>
-     <data format="fasta" name="outputFile" label="[modify sequence list] output file"/>
+     <data format="fasta" name="outputFile" label="[modifySequenceList] Output File"/>
   </outputs>

-  <help>
-  This tool reads a list of sequences (in multi-FASTA/Q format) that you provide and shrinks each sequence to the *n* first nucleotides or the *n* last nucleotides.
-  </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/plot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plot.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,71 @@
+<tool id="plot" name="Plot">
+ <description>Plot some information from a list of transcripts.</description>
+ <command interpreter="python">
+ ../Java/Python/plot.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+ -x $xLabel
+
+                -y $yLabel
+
+         -X $XVal
+                -Y $YVal
+
+         #if $optionLog.log == 'Yes' :
+     -l $optionLog.logOnAxisLabel
+                #end if
+
+                -s $shape
+ -o $outputFile
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="xLabel" type="text" value="value1" label="x label option" help="Choose one of the tags of 9th column in GFF file to be plotted as X-axis. Warning: You can only choose the tag value is digital."/>
+                <param name="yLabel" type="text" value="value2" label="y label option" help="Choose one of the tags of 9th column in GFF file to be plotted as Y-axis. You can only choose the tag value is digital."/>
+                <param name="XVal" type="float" value="0.0" label="value for x when tag is not present "/>
+
+ <param name="YVal" type="float" value="0.0" label="value for y when tag is not present"/>
+
+
+                <conditional name="optionLog">
+ <param name="log" type="select" label="calculate log option" help="use log on x- or y-axis (write 'x', 'y' or 'xy')">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="logOnAxisLabel" type="text" value="y" label="use log on x- or y-axis (write 'x', 'y' or 'xy')"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+                <param name="shape" type="text" value="barplot" label="shape of the plot [format: choice (barplot, line, points, heatPoints)]"/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFile" format="png" label="[plot] Output file"/>
+ </outputs>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/plotCoverage.xml
--- a/SMART/galaxy/plotCoverage.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/plotCoverage.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="plotCoverage" name="plot coverage">
<description>Plot the coverage of the first data with respect to the second one.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/WrappPlotCoverage.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -109,8 +106,8 @@
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
+ <option value="gff2">sam</option>
+ <option value="gff3">gtf</option>
</param>
<when value="bed">
<param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
@@ -172,7 +169,7 @@
</conditional>

<conditional name="optiontitle">
- <param name="title" type="select" label="title for the figure">
+ <param name="title" type="select" label="title of the plots ">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -259,16 +256,10 @@
</inputs>

<outputs>
- <data name="outputFile" format="tar" label="[plot coverage] tar output file" help="You can not see the results directly from galaxy, but you can download this tar output file."/>
+ <data name="outputFile" format="tar" label="[plotCoverage] tar out file" help="You can not see the results directly from galaxy, but you can download this tar output file."/>
</outputs>

     <help>
-Plot the coverage of the first set of genomic coordinates with respect to the second set of genomic coordinates. For each element of the second set (we will suppose that they are annotated genes), it computes the number of  elements of the first set (reads, for instance) which overlap it.
-
-Alternatively, if the first file is in GFF format, and contains the **Target** file, you can omit the second file. However, a fasta file corresponding to the second file should be given (to compute the size of the reference elements).
-
-The tool produces two plots per gene. The first plot gives the coverage: a point (*x*, *y*) means that *y* reads cover the *x* th nucleotide of the gene. The second figure displays the (possibly spliced) gene in black, and the overlapping reads (blue is colinear, red is anti-sense).
-
-This script gives a .tar out file, if you want to take look at the results, you have to download it.
+        This script gives a .tar out file, if you want to take look at the results, you have to download it.
     </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/plotGenomeCoverage.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plotGenomeCoverage.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,62 @@
+<tool id="plotGenomeCoverage" name="plot genome coverage">
+  <description>Get the coverage of a genome. </description>
+  <command interpreter="python"> ../Java/Python/plotGenomeCoverage.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+ -r $reference
+   -o $outputFile
+  </command>
+
+
+  <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="reference" type="data" label="reference Fasta File" format="fasta"/>
+  </inputs>
+
+  <outputs>
+    <data format="png" name="outputFile" label="[plotGenomeCoverage] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/plotRepartition.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plotRepartition.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,59 @@
+<tool id="plotRepartition" name="plot repartition">
+ <description>Plot the repartition of different data on a whole genome. (This tool uses only 1 input file, the different values are stored in the tags. )</description>
+ <command interpreter="python">
+ ../Java/Python/WrappPlotRepartition.py -i $inputFileName
+ -n $names
+ $normalize
+ #if $optionColor.Color == 'Yes':
+ -c $optionColor.colValue
+ #end if
+ -f $format
+
+ #if $optionLog.log == 'Yes':
+ -l $optionLog.logVal
+ #end if
+
+ -o $outputFilePNG
+ </command>
+
+ <inputs>
+ <param name="inputFileName" type="data" label="Input Gff3 File" format="gff3"/>
+ <param name="names" type="text" value="None" label="name for the tags (separated by commas and no space) [compulsory option]"/>
+ <param name="normalize" type="boolean" truevalue="-r" falsevalue="" checked="false" label="normalize data (when panels are different)"/>
+ <param name="format" type="text" value="png" label="format of the output file[default: png]"/>
+
+ <conditional name="optionColor">
+ <param name="Color" type="select" label="scolor of the lines (separated by commas and no space) ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="colValue" type="text" value="None"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionLog">
+ <param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="logVal" type="text" value=" "/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFilePNG" format="tar" label="[plotRepartition]out file"/>
+ </outputs>
+
+ <help>
+ This script gives a .tar out file, if you want to take look at the results, you have to download it.
+ </help>
+
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/plotTranscriptList.xml
--- a/SMART/galaxy/plotTranscriptList.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/plotTranscriptList.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,16 +1,17 @@
<tool id="plotTranscriptList" name="plot transcript list">
<description>Plot some information from a list of transcripts. </description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/plotTranscriptList.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'gff':
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
-f gff
#elif $formatType.FormatInputFileName == 'gff2':
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
#elif $formatType.FormatInputFileName == 'gtf':
-f gtf
#end if
@@ -32,7 +33,10 @@
-m $optionyLab.labVal
#end if

- $log
+ #if $optionyLog.log == 'Yes':
+ -l $optionyLog.logVal
+ #end if
+
-s $shape
-b $bucket

@@ -42,11 +46,16 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
+ <option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
<when value="gff">
<param name="inputFileName" format="gff" type="data" label="Input File"/>
</when>
@@ -56,13 +65,16 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
<when value="gtf">
<param name="inputFileName" format="gtf" type="data" label="Input File"/>
</when>
</conditional>

- <param name="xVal" type="text" value="None" label="tag for the x value"/>
- <param name="yVal" type="text" value="None" label="tag for the y value"/>
+ <param name="xVal" type="text" value="None" label="tag for the x value [compulsory option]"/>
+ <param name="yVal" type="text" value="None" label="tag for the y value [compulsory option]"/>

<conditional name="optionz">
<param name="z" type="select" label="tag for the z value ">
@@ -80,7 +92,7 @@

<param name="YVal" type="float" value="0.0" label="value for y when tag is not present"/>

- <param name="ZVal" type="float" value="0.0" label="value for z when tag is not present (if applicable)"/>
+ <param name="ZVal" type="float" value="0.0" label="value for z when tag is not present"/>

<conditional name="optionxLab">
<param name="xLab" type="select" label="label on the x-axis ">
@@ -105,12 +117,17 @@
</when>
</conditional>

- <param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
- <option value="" selected="true">No</option>
- <option value="-l x">log on the x-axis</option>
- <option value="-l y">log on the y-axis</option>
- <option value="-l xy">log on the x- and y-axis</option>
- </param>
+ <conditional name="optionyLog">
+ <param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="logVal" type="text" value=" "/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>

<param name="shape" type="text" value="barplot" label="shape of the plot [format: choice (barplot, line, points, heatPoints)]"/>
<param name="bucket" type="float" value="1.0" label="bucket size (for the line plot)"/>
@@ -118,18 +135,7 @@
</inputs>

<outputs>
- <data name="outputFilePNG" format="png" label="[plot transcript list] output file"/>
+ <data name="outputFilePNG" format="png" label="[plotTranscriptList]out file"/>
</outputs>

- <help>
-Plot the data attached as tags in a transcript list. This can be used for displaying the comparison of different sets of sliding windows, for instance.
-
-The tool reads the tags of a transcript file (actually, a GFF3 file). It considers more specifically the tag names that you specify as parameter. If you use only one tag name, you can display a line plot. In this case, you have to specify a bucket size *s* (which is by defaut 1) and a point (*x*, *y*) tells you that there are *y* transcripts with tag values *x* to *x + s*.
-
-You can display could plots if you use two tag names. Each point represents the values of the two tags of a transcript. If you use three variables, the third variable will be the color of the point. You can also use a log scale and name the axes of the plot.
-
-Each transcript must contain the tags which are specified. If not, you should provide a default value, which is used when the tag is not present.
-
-If you use a cloud plot, you can compute the Spearman's rho to quantify a correlation between your two tag values.
- </help>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/qualToFastq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/qualToFastq.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="qualToFastq" name="qual -> Fastq">
+  <description>Convert a file in FASTA/Qual format to FastQ format.</description>
+  <command interpreter="python"> ../Java/Python/qualToFastq.py -f $inputFastaFile -q $inputQualFile -o $outputFile </command>
+  <inputs>
+    <param name="inputFastaFile" type="data" label="Input fasta File" format="fasta"/>
+    <param name="inputQualFile" type="data" label="Input qual File" format="txt"/>
+  </inputs>
+
+  <outputs>
+    <data format="fastq" name="outputFile" label="[qual -> Fastq] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/removeExonLines.xml
--- a/SMART/galaxy/removeExonLines.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/removeExonLines.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,15 +1,12 @@
<tool id="removeExonLines" name="remove exon lines">
   <description>Removes the lines containing Exon.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
   <command interpreter="sh"> ../Java/Python/removeExonLines.sh $inputFile > $outputFile  </command>
   <inputs>
     <param name="inputFile" type="data" label="Input File" format="gff3"/>
   </inputs>

   <outputs>
-    <data format="gff3" name="outputFile" label="[remove exon line] output file"/>
+    <data format="gff3" name="outputFile" label="[removeExonLine] Output File"/>
   </outputs>

   <help>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/restrictFromSize.xml
--- a/SMART/galaxy/restrictFromSize.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/restrictFromSize.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,13 +1,8 @@
<tool id="restrictFromSize" name="restrict from size">
<description>Select the elements of a list of sequences or transcripts with a given size.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
<command interpreter="python">
../Java/Python/restrictFromSize.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'fasta':
- -f fasta
- #elif $formatType.FormatInputFileName == 'bed':
+ #if $formatType.FormatInputFileName == 'bed':
-f bed
#elif $formatType.FormatInputFileName == 'gff':
-f gff
@@ -34,7 +29,6 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="fasta">fasta</option>
<option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
@@ -42,9 +36,6 @@
<option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
- <when value="fasta">
- <param name="inputFileName" format="fasta" type="data" label="Input File"/>
- </when>
<when value="bed">
<param name="inputFileName" format="bed" type="data" label="Input File"/>
</when>
@@ -92,11 +83,11 @@
</inputs>

<outputs>
- <data name="outputFileGff" format="gff3" label="[restrict from size] output file"/>
+ <data name="outputFileGff" format="gff3" label="[restrictFromSize] Output File"/>
</outputs>

<help>
-Reads a list of sequences or genomic coordinates and outputs those which are longer and / or shorter than a given size ---which you provide.
+ command example: restrictFromSize.py -i cis_e10_cluster20InSeed2515_nbEUp10.gff3 -f gff -o cis_e10_cluster20InSeed2515_nbEUp10_lgUp50 -m 50
</help>

</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/restrictSequenceList.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/restrictSequenceList.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,16 @@
+<tool id="restrictSequenceList" name="restrict sequence list">
+  <description>Keep the elements of a list of sequences whose name is mentionned in a given file.</description>
+  <command interpreter="python"> ../Java/Python/restrictSequenceList.py -i $inputFile -f fasta -n $name -o $outputFile </command>
+
+  <inputs>
+ <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
+ <param name="name" type="data" label="The txt file contains the names of the transcripts." format="txt"/>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[restrictSequenceList] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/restrictTranscriptList.xml
--- a/SMART/galaxy/restrictTranscriptList.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/restrictTranscriptList.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="restrictTranscriptList" name="restrict transcript list">
-  <description>Select the features which are located in a given locus.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+  <description>Keep the coordinates which are located in a given position.</description>
   <command interpreter="python"> ../Java/Python/restrictTranscriptList.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
-f bed
@@ -78,7 +75,7 @@
</conditional>

<conditional name="OptionStart">
- <param name="start" type="select" label="start region of the locus">
+ <param name="start" type="select" label="restrict to the start of the transcript">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -90,7 +87,7 @@
</conditional>

<conditional name="OptionEnd">
- <param name="end" type="select" label="end region of the locus">
+ <param name="end" type="select" label="restrict to the end of the transcript">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -103,11 +100,10 @@
   </inputs>

   <outputs>
-    <data format="gff3" name="outputFile" label="[restrict transcript list] output file"/>
+    <data format="gff3" name="outputFile" label="[restrictTranscriptList] Output File"/>
   </outputs>

   <help>
-Reads a list of genomic coordinates and outputs those which on a given chromosome and / or between two given positions.
   </help>
<tests>
     <test>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/test/CollapseReads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/test/CollapseReads.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,49 @@
+<tool id="collapseReads" name="collapseReads">
+ <description>Merges two reads if they have exactly the same genomic coordinates.</description>
+ <command interpreter="python">
+ ../Java/Python/CollapseReads.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+ -$strand
+ -o $outputFileGff
+ --galaxy
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,91 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from SMART.galaxy.WrappGetLetterDistribution import WrappGetLetterDistribution
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+SMART_DATA = SMART_PATH + "/data"
+
+class Test_F_WrappGetLetterDistribution(unittest.TestCase):
+
+
+    def setUp(self):
+        self._dirTest = "%s/galaxy/test" % SMART_PATH
+        self._iwrappFastq = WrappGetLetterDistribution()
+        self._iwrappFasta = WrappGetLetterDistribution()
+        self._expOutputCSV = "expOutputTomate.csv"
+
+    def test_wrappFasta(self):
+        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
+        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
+        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
+        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
+        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
+        self._iwrappFasta._inputFileFormat = "fasta"
+        self._iwrappFasta._csv = True
+        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
+            self._iwrappFasta.wrapp()
+            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
+            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG))
+            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNameCSV))
+            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFasta._outputFileNameCSV,self._expOutputCSV))
+        else:
+            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
+
+
+#    def test_wrappFasta_withoutCSV_Opt(self):
+#        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
+#        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
+#        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
+#        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
+#        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
+#        self._iwrappFasta._inputFileFormat = "fasta"
+#        self._iwrappFasta._csv = False
+#        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
+#            self._iwrappFasta.wrapp()
+#            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
+#            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG))
+#        else:
+#            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
+#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
+#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH)
+#
+#
+#    def test_wrappFastq(self):
+#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
+#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
+#        self._iwrappFastq._inputFileFormat = "fastq"
+#        self._iwrappFastq._csv = True
+#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
+#            self._iwrappFastq.wrapp()
+#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
+#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG))
+#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNameCSV))
+#            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFastq._outputFileNameCSV,self._expOutputCSV))
+#        else:
+#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq
+#
+#
+#    def test_wrappFastq_withoutCSV_Opt(self):
+#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
+#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
+#        self._iwrappFastq._inputFileFormat = "fastq"
+#        self._iwrappFastq._csv = False
+#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
+#            self._iwrappFastq.wrapp()
+#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
+#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG))
+#        else:
+#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq
+#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
+#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/testArgum.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/testArgum.xml Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,24 @@
+<tool id="test_argument" name="test_argu" version="1.0.0">
+  <description>To test the arguments from shell.</description>
+  <command>
+../testArgu.sh $test_out
+#for $i in $replicate_groups
+#for $j in $i.replicates
+$j.bam_alignment:#slurp
+#end for
+#end for
+    >> $Log_File </command>
+  <inputs>
+ <param format="gff3" name="anno_input_selected" type="data" label="Genome annotation in GFF3 file" help="A tab delimited format for storing sequence features and annotations"/>
+   <repeat name="replicate_groups" title="Replicate group" min="2">
+     <repeat name="replicates" title="Replicate">
+      <param format="fastq" name="bam_alignment" type="data" label="BAM alignment file" help="BAM alignment file. Can be generated from SAM files using the SAM Tools."/>
+     </repeat>
+   </repeat>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="test_out" label="DESeq result"/>
+ <data format="txt" name="Log_File" label="DESeq result"/>
+  </outputs>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/testR.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/testR.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,19 @@
+<tool id="testDiffExpAnal" name="Differential Expression Analysis">
+  <description>Differential expression analysis for sequence count data (DESeq)</description>
+  <command interpreter="sh"> ../DiffExpAnal/testR.sh $inputFile $columnsOfGeneName $columnsOfCondition1 $columnsOfCondition2 $outputFileCSV $outputFilePNG 2>$outputLog </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+ <param name="columnsOfGeneName" type="text" value="0" label="Please indicate the column numbers of gene names with ',' separator. If There are not gene names, default value is 0."/>
+ <param name="columnsOfCondition1" type="text" value="1,2" label="Please indicate the column numbers of condition1 with ',' separator."/>
+ <param name="columnsOfCondition2" type="text" value="3,4" label="Please indicate the column numbers of condition2 with ',' separator."/>
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="outputFileCSV" label="[DiffExpAnal] Output CSV File"/>
+ <data format="png" name="outputFilePNG" label="[DiffExpAnal] Output PNG File"/>
+    <data format="tabular" name="outputLog" label="[DiffExpAnal] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/tool_conf.xml
--- a/SMART/galaxy/tool_conf.xml Mon Apr 29 03:45:52 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,48 +0,0 @@
-  <section id="s_mart" name="S-MART" version="">
-    <label id="Smart_Comparison" text="Comparison Tools" version=""/>
-      <tool file="s_mart/CompareOverlappingSmallQuery.xml"/>
-      <tool file="s_mart/CompareOverlappingSmallRef.xml"/>
-      <tool file="s_mart/compareOverlapping.xml"/>
-      <tool file="s_mart/getDifference.xml"/>
-      <tool file="s_mart/computeCoverage.xml"/>
-      <tool file="s_mart/GetFlanking.xml"/>
-      <tool file="s_mart/GetDifferentialExpression.xml"/>
-    <label id="Smart_Merge" text="Merge Tools" version=""/>
-      <tool file="s_mart/clusterize.xml"/>
-      <tool file="s_mart/mergeTranscriptLists.xml"/>
-      <tool file="s_mart/CollapseReads.xml"/>
-      <tool file="s_mart/clusterizeBySlidingWindows.xml"/>
-      <tool file="s_mart/mergeSlidingWindowsClusters.xml"/>
-    <label id="Smart_Visualization" text="Visualization Tools" version=""/>
-      <tool file="s_mart/getDistribution.xml"/>
-      <tool file="s_mart/getDistance.xml"/>
-      <tool file="s_mart/getSizes.xml"/>
-      <tool file="s_mart/plotCoverage.xml"/>
-      <tool file="s_mart/WrappGetLetterDistribution1.xml"/>
-      <tool file="s_mart/plotTranscriptList.xml"/>
-    <label id="Smart_Sequence" text="Sequence Tools" version=""/>
-      <tool file="s_mart/CountReadGCPercent.xml"/>
-    <label id="Smart_Modification" text="Modification Tools" version=""/>
-      <tool file="s_mart/modifyGenomicCoordinates.xml"/>
-      <tool file="s_mart/modifySequenceList.xml"/>
-      <tool file="s_mart/trimSequences.xml"/>
-    <label id="Smart_Selection" text="Selection Tools" version=""/>
-      <tool file="s_mart/getExons.xml"/>
-      <tool file="s_mart/getIntrons.xml"/>
-      <tool file="s_mart/restrictFromSize.xml"/>
-      <tool file="s_mart/restrictTranscriptList.xml"/>
-    <label id="Smart_Conversion" text="Conversion Tools" version=""/>
-      <tool file="s_mart/ConvertTranscriptFile.xml"/>
-      <tool file="s_mart/coordinatesToSequence.xml"/>
-      <tool file="s_mart/mapperAnalyzer.xml"/>
-    <label id="Smart_WIG" text="WIG Manipulation Tools" version=""/>
-      <tool file="s_mart/getWigData.xml"/>
-      <tool file="s_mart/getWigDistance.xml"/>
-      <tool file="s_mart/getWigProfile.xml"/>
-    <label id="Smart_GFF" text="GFF Manipulation Tools" version=""/>
-      <tool file="s_mart/CleanTranscriptFile.xml"/>
-      <tool file="s_mart/changeTagName.xml"/>
-      <tool file="s_mart/changeGffFeatures.xml"/>
-      <tool file="s_mart/removeExonLines.xml"/>
-      <tool file="s_mart/SelectByTag.xml"/>
-  </section>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/tool_dependencies.xml
--- a/SMART/galaxy/tool_dependencies.xml Mon Apr 29 03:45:52 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <set_environment version="1.0">
-        <environment_variable name="PYTHONPATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>
-    </set_environment>
-</tool_dependency>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/trimAdaptor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/trimAdaptor.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,47 @@
+<tool id="trimAdaptor" name="trim adaptors">
+  <description>Remove the 3' adaptor of a list of reads.</description>
+  <command interpreter="python"> ../Java/Python/trimAdaptor.py -i $inputFile -f fastq
+   -a $adaptor
+   #if $OptionError.Error == "Yes":
+ -e $OptionError.ErrorVal
+ #end if
+   $noAdaptor $noAdaptorFile
+   -o $outputFile
+  </command>
+
+
+  <inputs>
+    <param name="inputFile" type="data" label="Input fastq File" format="fastq"/>
+ <param name="adaptor" type="text" value="None" label="adaptor [compulsory option]"/>
+ <conditional name="OptionError">
+ <param name="Error" type="select" label="number of errors in percent">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ErrorVal" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <param name="noAdaptor" type="boolean" truevalue="-n" falsevalue="" checked="false" label="log option" help="file name where to print sequences with no adaptor"/>
+  </inputs>
+
+  <outputs>
+    <data format="fastq" name="outputFile" label="[trimAdaptor] Output File"/>
+ <data name="noAdaptorFile" format="fastq" label="[trimAdaptor] Log File">
+ <filter>noAdaptor</filter>
+ </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="inputFile" value="short_fastq.fastq" />
+      <param name="adaptor" value="AAAA" />
+      <param name ="Error" value="No"/>
+      <param name ="noAdaptor" value="False"/>
+      <output name="outputFile" file="exp_trimadaptator_short_fastq.fastq" />
+    </test>
+  </tests>
+  <help>
+  </help>
+</tool>

diff -r 5677346472b5 -r 0ab839023fe4 SMART/galaxy/trimSequences.xml
--- a/SMART/galaxy/trimSequences.xml Mon Apr 29 03:45:52 2013 -0400
+++ b/SMART/galaxy/trimSequences.xml Tue Apr 30 14:33:21 2013 -0400

[

@@ -1,8 +1,5 @@
<tool id="trimSequences" name="trim sequences">
-  <description>Remove the 5' and/or 3' adapters of a list of reads.</description>
- <requirements>
- <requirement type="set_environment">PYTHONPATH</requirement>
- </requirements>
+  <description>Remove the 5' and/or 3' adaptors of a list of reads.</description>
   <command interpreter="python"> ../Java/Python/trimSequences.py -i $inputFile -f fastq
    #if $OptionFPADP.FPADP == "Yes":
-5 $OptionFPADP.fivePAdaptor
@@ -10,7 +7,10 @@
#if $OptionTPADP.TPADP == "Yes":
-3 $OptionTPADP.threePAdaptor
#end if
- -e $errors
+   #if $OptionError.Error == "Yes":
+ -e $OptionError.ErrorVal
+ #end if
+
$indels
    $noAdaptor5p $noAdaptorFile5p
    $noAdaptor3p $noAdaptorFile3p
@@ -23,7 +23,7 @@
     <param name="inputFile" type="data" label="Input fastq File" format="fastq"/>

<conditional name="OptionFPADP">
- <param name="FPADP" type="select" label="5' adapter">
+ <param name="FPADP" type="select" label="5'adaptor">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -35,7 +35,7 @@
</conditional>

<conditional name="OptionTPADP">
- <param name="TPADP" type="select" label="3' adapter">
+ <param name="TPADP" type="select" label="3'adaptor">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -46,37 +46,49 @@
</when>
</conditional>

- <param name="errors" type="integer" label="number of errors in percent" value="0" />
+ <conditional name="OptionError">
+ <param name="Error" type="select" label="number of errors in percent">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ErrorVal" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
<param name="indels" type="boolean" truevalue="-d" falsevalue="" checked="false" label="indels option" help="also accept indels"/>
- <param name="noAdaptor5p" type="boolean" truevalue="-n" falsevalue="" checked="false" label="noAdaptor 5' option" help="file name where to print sequences with no 5' adapter "/>
- <param name="noAdaptor3p" type="boolean" truevalue="-m" falsevalue="" checked="false" label="noAdaptor 3' option" help="file name where to print sequences with no 3' adapter "/>
+ <param name="noAdaptor5p" type="boolean" truevalue="-n" falsevalue="" checked="false" label="noAdaptor 5' option" help="file name where to print sequences with no 5' adaptor "/>
+ <param name="noAdaptor3p" type="boolean" truevalue="-m" falsevalue="" checked="false" label="noAdaptor 3' option" help="file name where to print sequences with no 3' adaptor "/>
+
+

   </inputs>

   <outputs>
-    <data format="fastq" name="outputFile" label="[trim sequences] output file"/>
- <data name="noAdaptorFile5p" format="fastq" label="[trim sequences] noAdapter5p file">
+    <data format="fastq" name="outputFile" label="[trimSequences] Output File"/>
+ <data name="noAdaptorFile5p" format="fastq" label="[trimSequences] noAdaptor5p File">
<filter>noAdaptor5p</filter>
</data>
- <data name="noAdaptorFile3p" format="fastq" label="[trim sequences] noAdapter3p file">
+ <data name="noAdaptorFile3p" format="fastq" label="[trimSequences] noAdaptor3p File">
<filter>noAdaptor3p</filter>
</data>
   </outputs>

   <help>
-This function removes the adaptor from the 5' or 3' end of your reads. It can even recognize the adaptators which are partially present. You can specify whether you are ready to accept indels or not.
   </help>
   <tests>
<test>
   <param name="inputFile" value="short_fastq.fastq" />
   <param name="FPADP" value="Yes"/>
- <param name="fivePAdaptor" value="AAAA" />
+       <param name="fivePAdaptor" value="AAAA" />
<param name="TPADP" value="No"/>
- <param name="errors" value="1"/>
+       <param name ="Error" value="No"/>
<param name="indels" value="False"/>
- <param name="noAdaptor5p" value="False"/>
+       <param name ="noAdaptor5p" value="False"/>
<param name= "noAdaptor3p" value="False"/>
- <output name="outputFile" file="exp_trimsequences_short_fastq.fastq" />
+       <output name="outputFile" file="exp_trimsequences_short_fastq.fastq" />
</test>
   </tests>
</tool>

diff -r 5677346472b5 -r 0ab839023fe4 commons/__init__.pyc

Binary file commons/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/LoggerFactory.pyc

Binary file commons/core/LoggerFactory.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/__init__.pyc

Binary file commons/core/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/AbstractChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/AbstractChecker.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,61 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.checker.IChecker import IChecker
+from commons.core.LoggerFactory import LoggerFactory
+
+
+## Enable a Logger in your Checker.
+#
+#  Subclasses of  AbstractChecker have a already a logger enabled (referenced by self._log attribute). Subclasses also already implements IChecker.
+#  All you have to do is to call __init__() method in your own constructor.
+class AbstractChecker( IChecker ):
+
+    ## Constructor
+    #
+    # @param logFileName name of log file where logger outputs
+    #
+    def __init__(self, logFileName):
+        self._log = LoggerFactory.createLogger(logFileName)
+
+
+    ## Set (change) default logger
+    #
+    # @param logger a new logger
+    #
+    def setLogger(self, logger):
+        self._log = logger
+
+
+    ## Return the logger instance
+    #
+    def getLogger(self):
+        return self._log

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/CheckerException.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/CheckerException.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,52 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Exception raised during check
+#
+# This class wraps Exception class
+#
+class CheckerException( Exception ):
+
+    ## Constructor
+    #
+    # @param msg  message embedded in Exception class
+    def __init__(self,msg=""):
+        self.messages = []
+        self.msg = msg
+        Exception.__init__(self, msg)
+
+
+    def setMessages(self,lMessages):
+        self.messages = lMessages
+
+
+    def getMessages(self):
+        return self.messages

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/CheckerUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/CheckerUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,316 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import re\n+import glob\n+import ConfigParser\n+from ConfigParser import NoOptionError\n+from ConfigParser import NoSectionError\n+from commons.core.checker.CheckerException import CheckerException\n+\n+\n+## A set of static methods used to perform checks.\n+#\n+#\n+class CheckerUtils( object ):\n+ \n+ ## Check if blastName param is in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]\n+ # \n+ # @param blastName name to check\n+ # @return True if name is in list False otherwise\n+ #\n+ def isBlastNameNotInBlastValues( blastName ):\n+ blastValuesSet = set( ["blastn", "blastp", "blastx", "tblastn", "tblastx"] )\n+ blastNameSet = set( [ blastName ] )\n+ return not blastNameSet.issubset( blastValuesSet )\n+ \n+ isBlastNameNotInBlastValues = staticmethod( isBlastNameNotInBlastValues )\n+ \n+ \n+ ## Check if param is NOT "TRUE" and NOT false "FALSE"\n+ #\n+ # @param param str to check\n+ # @return True if param is not eq to "TRUE" AND not eq to "FALSE", false otherwise \n+ #\n+ def isNotTRUEisNotFALSE( param ):\n+ return param != "TRUE" and param != "FALSE"\n+ \n+ isNotTRUEisNotFALSE = staticmethod( isNotTRUEisNotFALSE )\n+ \n+ \n+ ## Check if resource (file or dir) do NOT exists\n+ # \n+ # @param resource file or dir to check\n+ # @return True if resource exists False otherwise\n+ #\n+ def isRessourceNotExits( resource ):\n+ return not os.path.exists( resource )\n+ \n+ isRessourceNotExits = staticmethod( isRessourceNotExits )\n+ \n+ \n+ ## Check a specific E-value format: de-dd \n+ #\n+ # @param param E-value to check\n+ # @return True if format is de-dd False otherwise\n+ #\n+ def isNotAeValueWithOneDigit2DecimalsAtLeast( param ):\n+ # \\d\\d stands for 2 digits and more ???\n+ return not re.match( "\\de\\-\\d\\d", param )\n+ \n+ isNotAeValueWithOneDigit2DecimalsAtLeast = staticmethod( isNotAeValueWithOneDigit2DecimalsAtLeast )\n+ \n+ \n+ ## Check a number format\n+ #\n+ # @param param value to check\n+ # @return True if param is a number (d+) False otherwise\n+ #\n+ def isNotANumber( param ):\n+ return not re.match( "\\d+", param )\n+ \n+ isNotANumber = staticmethod( isNotANumber )\n+ \n+\n+ ## Check if an executable is in the user\'s PATH\n+ #\n+ # @param exeName name of t'..b'me)\n+ \n+ checkSectionInConfigFile = staticmethod( checkSectionInConfigFile )\n+ \n+ \n+ ## Check if an option is in a specified section in the configuration file\n+ #\n+ # @param config filehandle of configuration file\n+ # @param sectionName string of section name\n+ # @param optionName string of option name to check\n+ # @exception NoOptionError: if option not found raise a NoOptionError\n+ #\n+ def checkOptionInSectionInConfigFile( config, sectionName, optionName ):\n+ config.get( sectionName, optionName )\n+ \n+ checkOptionInSectionInConfigFile = staticmethod( checkOptionInSectionInConfigFile )\n+ \n+ \n+ ## Check version number coherency between configFile and CHANGELOG\n+ #\n+ # @param config ConfigParser Instance of configuration file\n+ # @param changeLogFileHandle CHANGELOG file handle\n+ # @exception NoOptionError: if option not found raise a NoOptionError\n+ #\n+ def checkConfigVersion( changeLogFileHandle, config ):\n+ line = changeLogFileHandle.readline()\n+ while not line.startswith("REPET release "):\n+ line = changeLogFileHandle.readline()\n+ numVersionChangeLog = line.split()[2]\n+ \n+ numVersionConfig = config.get("repet_env", "repet_version")\n+ \n+ if not numVersionChangeLog == numVersionConfig:\n+ message = "*** Error: wrong config file version. Expected version num is " + numVersionChangeLog + " but actual in config file is " + numVersionConfig\n+ raise CheckerException(message)\n+ \n+ checkConfigVersion = staticmethod( checkConfigVersion )\n+ \n+ \n+ ## Get version number from CHANGELOG\n+ #\n+ # @param changeLogFile CHANGELOG file name\n+ #\n+ def getVersionFromChangelogFile(changeLogFileName):\n+ with open(changeLogFileName) as changeLogFileHandle:\n+ line = changeLogFileHandle.readline()\n+ while not line.startswith("REPET release "):\n+ line = changeLogFileHandle.readline()\n+ numVersionChangeLog = line.split()[2]\n+ return numVersionChangeLog\n+ \n+ \n+ getVersionFromChangelogFile = staticmethod( getVersionFromChangelogFile )\n+ \n+ \n+ ## Check if headers of an input file contain only alpha numeric characters and "_ : . -"\n+ #\n+ # @param fileHandler file handle\n+ # @exception CheckerException if bad header raise a CheckerException\n+ #\n+ def checkHeaders( fileHandler ):\n+ lHeaders = CheckerUtils._getHeaderFromFastaFile(fileHandler)\n+ p = re.compile(\'[^a-zA-Z0-9_:\\.\\-]\', re.IGNORECASE)\n+ lWrongHeaders = []\n+ for header in lHeaders:\n+ errList=p.findall(header)\n+ if len( errList ) > 0 :\n+ lWrongHeaders.append(header)\n+ if lWrongHeaders != []:\n+ exception = CheckerException()\n+ exception.setMessages(lWrongHeaders)\n+ raise exception\n+ \n+ checkHeaders = staticmethod( checkHeaders ) \n+ \n+ \n+ def _getHeaderFromFastaFile( inFile ):\n+ lHeaders = []\n+ while True:\n+ line = inFile.readline()\n+ if line == "":\n+ break\n+ if line[0] == ">":\n+ lHeaders.append( line[1:-1] )\n+ return lHeaders\n+ \n+ _getHeaderFromFastaFile = staticmethod( _getHeaderFromFastaFile ) \n+\n+\n+ ## Return True if an option is in a specified section in the configuration file, False otherwise\n+ #\n+ # @param config handler of configuration file\n+ # @param sectionName string of section name\n+ # @param optionName string of option name to check\n+ #\n+ def isOptionInSectionInConfig( configHandler, section, option ):\n+ try:\n+ CheckerUtils.checkOptionInSectionInConfigFile( configHandler, section, option ) \n+ except NoOptionError:\n+ return False\n+ return True\n+ \n+ isOptionInSectionInConfig = staticmethod( isOptionInSectionInConfig )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/ConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/ConfigChecker.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,226 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import re\n+import sys\n+from commons.core.utils.RepetConfigParser import RepetConfigParser\n+from commons.core.checker.ConfigValue import ConfigValue\n+from commons.core.checker.IChecker import IChecker\n+from commons.core.checker.RepetException import RepetException\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Rule(object):\n+ \n+ def __init__(self, mandatory= False, isPattern=False, type="", set=(), help =""):\n+ self.mandatory = mandatory\n+ self.isPattern = isPattern\n+ self.type = type\n+ self.set = set\n+ self.help = help\n+ \n+class ConfigRules(object):\n+ \n+ def __init__(self, configName = "", configDescription = ""):\n+ self.configName = configName\n+ self.configDescription = configDescription\n+ self.dRules4Sections={}\n+ \n+ def _addRule(self, section, option="DEFAULT", mandatory=False, isPattern=False, type="", set=(), help =""):\n+ if not self.dRules4Sections.has_key(section):\n+ self.dRules4Sections[section] = {}\n+ self.dRules4Sections[section][option]=Rule(mandatory, isPattern, type.lower(), set) \n+ \n+ def addRuleSection(self, section, mandatory=False, isPattern=False, help = ""):\n+ self._addRule(section = section, option = "DEFAULT", mandatory = mandatory, isPattern = isPattern, help = "")\n+ \n+ def addRuleOption(self, section, option, mandatory=False, isPattern=False, type="", set=(), help = ""):\n+ self._addRule(section = section, option = option, mandatory = mandatory, isPattern = isPattern, type = type, set=set , help = "")\n+ \n+ def isSectionMandatory(self, section):\n+ if self.dRules4Sections.has_key(section):\n+ if self.dRules4Sections[section].has_key("DEFAULT"):\n+ return self.dRules4Sections[section]["DEFAULT"].mandatory\n+ return False\n+ \n+ def isOptionMandatory(self, section, option):\n+ if self.dRules4Sections.has_key(section):\n+ if self.dRules4Sections[section].has_key(option):\n+ return self.dRules4Sections[section][option].mandatory\n+ return False\n+ \n+ def getRule(self, section, option):\n+ if self.dRules4Sections.has_key(section):\n+ if self.dRules4Sections[section].has_key(option):\n+ return self.dRules4Sections[section][option]\n+ '..b'on(sectionName, optionName):\n+ missingOption += "\\n - [%s]: %s" % (sectionName, optionName)\n+ if missingOption != "":\n+ raise RepetException ("Error in configuration file %s, following options are missing: %s\\n" % (self._configFileName, missingOption))\n+ \n+ def getSectionNamesAccordingPatternRules (self, sectionWordOrPattern, isPattern): \n+ lSectionsFoundAccordingPatternRules=[]\n+ if isPattern == False:\n+ if self._iRawConfig.has_section(sectionWordOrPattern):\n+ lSectionsFoundAccordingPatternRules.append(sectionWordOrPattern)\n+ else:\n+ for sectionName in self._iRawConfig.sections():\n+ if re.search(sectionWordOrPattern, sectionName, re.IGNORECASE):\n+ lSectionsFoundAccordingPatternRules.append(sectionName)\n+ return lSectionsFoundAccordingPatternRules\n+ \n+ def getOptionsNamesAccordingPatternRules(self, sectionName, optionWordOrPattern, isPattern):\n+ lOptionsFoundAccordingPatternRules=[]\n+ if isPattern == False:\n+ if self._iRawConfig.has_option(sectionName, optionWordOrPattern):\n+ lOptionsFoundAccordingPatternRules.append(optionWordOrPattern)\n+ else :\n+ for optionName in self._iRawConfig.options(sectionName):\n+ if re.search(optionWordOrPattern, optionName, re.IGNORECASE)!= None:\n+ lOptionsFoundAccordingPatternRules.append(optionName)\n+ return lOptionsFoundAccordingPatternRules\n+ \n+ def extendConfigRulesWithPatternRules(self):\n+ for sectionName in self._iConfigRules.dRules4Sections.keys():\n+ dRules4OptionsOfThisSection = self._iConfigRules.dRules4Sections[sectionName] \n+ lRawSections=[]\n+ if dRules4OptionsOfThisSection.has_key("DEFAULT"):\n+ mandatorySection = dRules4OptionsOfThisSection["DEFAULT"].mandatory\n+ isPatternSection = dRules4OptionsOfThisSection["DEFAULT"].isPattern\n+ lRawSections=self.getSectionNamesAccordingPatternRules(sectionName, isPatternSection)\n+ for rawSectionName in lRawSections:\n+ self._iExtendedConfigRules.addRuleSection(rawSectionName, "DEFAULT", mandatorySection )\n+ if mandatorySection and (len(lRawSections)==0):\n+ self._iExtendedConfigRules.addRuleSection(sectionName, "DEFAULT", mandatorySection )\n+ else:\n+ lRawSections.append(sectionName) \n+ for optionName in dRules4OptionsOfThisSection.keys():\n+ setOption = dRules4OptionsOfThisSection[optionName].set\n+ isPatternOption = dRules4OptionsOfThisSection[optionName].isPattern\n+ mandatoryOption = dRules4OptionsOfThisSection[optionName].mandatory\n+ typeOption = dRules4OptionsOfThisSection[optionName].type\n+ if optionName != "DEFAULT":\n+ for rawSectionName in lRawSections:\n+ lRawOptions=self.getOptionsNamesAccordingPatternRules(rawSectionName, optionName, isPatternOption)\n+ for rawOptionName in lRawOptions:\n+ self._iExtendedConfigRules.addRuleOption(rawSectionName, rawOptionName, mandatoryOption, False, typeOption, setOption)\n+ if mandatoryOption and (len(lRawOptions)==0):\n+ self._iExtendedConfigRules.addRuleOption(rawSectionName, optionName, mandatoryOption, False, typeOption, setOption)\n+ \n+ def getConfig(self):\n+ self.checkIfExistsConfigFile()\n+ iConfig = self.readConfigFile()\n+ self.setRawConfig(iConfig)\n+ self.extendConfigRulesWithPatternRules()\n+ self.checkMandatorySections()\n+ self.checkMandatoryOptions()\n+ self.setConfig(iConfig)\n+ return self._iRawConfig\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/ConfigException.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/ConfigException.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,53 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.checker.RepetException import RepetException
+
+##  A exception raised by check() method of class ConfigChecker
+#
+# This class allow storage of multiple messages (see messages attribute).
+# Example: use one instance of ConfigException class for one section in configuration file.
+# All messages relatives to this section are stored in messages attribute.
+class ConfigException( RepetException ):
+
+    ## Constructor
+    #
+    # @param msg message embedded in Exception class
+    #
+    def __init__(self, msg, messages = []):
+        RepetException.__init__(self, msg)
+        self.messages = messages
+
+    def getMessages(self):
+        return self.messages
+
+    def setMessages(self, messages):
+        self.messages = messages
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/ConfigValue.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/ConfigValue.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,70 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+class ConfigValue(object):
+
+    def __init__(self):
+        self.dOptionsValues4Sections={}
+
+    def has_section(self,sectionName):
+        return self.dOptionsValues4Sections.has_key(sectionName)
+
+    def has_option(self, sectionName, optionName):
+        isOptionExist = False
+        if self.has_section(sectionName):
+            isOptionExist = self.dOptionsValues4Sections[sectionName].has_key(optionName)
+        return isOptionExist
+
+    def sections(self):
+        lSectionsKeys = self.dOptionsValues4Sections.keys()
+        return lSectionsKeys
+
+    def options(self, sectionName):
+        lOptionsKeys = []
+        if self.has_section(sectionName):
+            lOptionsKeys = self.dOptionsValues4Sections[sectionName].keys()
+        return lOptionsKeys
+
+    def get(self, sectionName, optionName):
+        if self.has_option(sectionName, optionName):
+            return self.dOptionsValues4Sections[sectionName][optionName]
+        return None
+
+    def set(self, sectionName, optionName, optionValue):
+        if not (self.has_section(sectionName)):
+            self.dOptionsValues4Sections[sectionName] = {}
+        self.dOptionsValues4Sections[sectionName][optionName] = optionValue
+
+    def setdOptionsValues4Sections(self, dOptionsValues4Sections):
+        self.dOptionsValues4Sections = dOptionsValues4Sections
+
+    def __eq__(self, o):
+        return self.dOptionsValues4Sections == o.dOptionsValues4Sections

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/IChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/IChecker.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,45 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for a checker
+#
+# This class emulates an interface for a checker.
+#
+# All checkers are subclasses of IChecker.
+#
+class IChecker( object ):
+
+    ## perform check, raise a CheckerException if error occurred
+    #
+    # @param arg a collecting parameter: put here all you need to perform check
+    #
+    def check(self, arg=""):
+        pass

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/OldConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/OldConfigChecker.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,101 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import ConfigParser
+from ConfigParser import NoOptionError
+from commons.core.checker.IChecker import IChecker
+from commons.core.checker.ConfigException import ConfigException
+
+
+## A checker for a configuration file
+#
+#
+# A configuration file is formatted as follow:
+#
+# [section1]
+#
+# option_name1: option_value1
+#
+# option_name2: option_value2
+#
+# option_name3: option_value3
+#
+# [section2]
+#
+# ...
+#
+#
+# This class performs 3 checkes on a configuration file:
+#
+# (i) check if file exists
+#
+# (ii) check if section exists
+#
+# (iii) check if option exists
+#
+class ConfigChecker( IChecker ):
+
+    ## Constructor A checker for configuration file.
+    #
+    # @param  sectionName name of section to check in configuration file
+    # @param  optionsDict dictionary with option(s) to check as keys and empty strings ("") as values
+    def __init__ (self, sectionName, optionsDict):
+        self._sectionName = sectionName
+        self._optionsDict = optionsDict
+
+
+    ## Perform 3 checks : file exists, sections exists, option exists
+    #
+    # @param configFile configuration file to check
+    # @exception ConfigException with a list of messages
+    def check (self, configFile):
+        config = ConfigParser.ConfigParser()
+        msg = []
+        try:
+            config.readfp( open(configFile) )
+        except IOError, e:
+            msg.append("CONFIG FILE not found - " + e.message)
+            raise ConfigException("", msg)
+
+        if not (config.has_section(self._sectionName)):
+            msg.append("[" + self._sectionName + "]" + " section not found - ")
+            raise ConfigException("", msg)
+
+        isExceptionOccured = False
+        for key in self._optionsDict.keys():
+            try:
+                self._optionsDict[key] = config.get(self._sectionName, key)
+            except NoOptionError, e:
+                msg.append("[" + self._sectionName + "]" + " - " + e.message)
+                isExceptionOccured = True
+
+        if (isExceptionOccured):
+            raise ConfigException("", msg)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/RepetException.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/RepetException.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,51 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+class RepetException(Exception):
+
+    def __init__(self, msg):
+        Exception.__init__(self)
+        self._message = msg
+
+    def __str__(self):
+        return self._message
+
+    def getMessage(self):
+        return self._message
+
+    def setMessage(self, msg):
+        self._message = msg
+
+
+class RepetDataException(RepetException):
+
+    def __init__(self, msg):
+        RepetException.__init__(self, msg)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/RepetException.pyc

Binary file commons/core/checker/RepetException.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/__init__.pyc

Binary file commons/core/checker/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/test/TestSuite_Checker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/TestSuite_Checker.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import sys
+import unittest
+import Test_CheckerUtils
+import Test_ConfigChecker
+
+
+def main():
+
+    TestSuite_Checker = unittest.TestSuite()
+
+    TestSuite_Checker.addTest( unittest.makeSuite( Test_CheckerUtils.Test_CheckerUtils, "test" ) )
+    TestSuite_Checker.addTest( unittest.makeSuite( Test_ConfigChecker.Test_ConfigChecker, "test" ) )
+
+    runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+    runner.run( TestSuite_Checker )
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/test/Test_CheckerUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_CheckerUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,535 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import ConfigParser\n+from commons.core.checker.CheckerUtils import CheckerUtils\n+from commons.core.checker.CheckerException import CheckerException\n+from ConfigParser import NoOptionError\n+from ConfigParser import NoSectionError\n+\n+class Test_CheckerUtils( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self.queueFileName = "queueName.txt"\n+ self.configFileName = "dummyConfig.cfg"\n+ \n+ def tearDown(self):\n+ if os.path.exists(self.queueFileName):\n+ os.remove(self.queueFileName)\n+ if os.path.exists(self.configFileName):\n+ os.remove(self.configFileName)\n+ \n+ def test_isBlastNameInBlastValues( self ):\n+ correctValueList = [ "blastn", "blastp", "blastx", "tblastn", "tblastx" ]\n+ for value in correctValueList:\n+ self.assertFalse( CheckerUtils.isBlastNameNotInBlastValues( value ) )\n+ \n+ incorrectValueList = [ "badbalst", "wublast" ]\n+ for value in incorrectValueList:\n+ self.assertTrue( CheckerUtils.isBlastNameNotInBlastValues( value ) )\n+ \n+ def test_isNotTRUEisNotFALSE( self ):\n+ correctValueList = [ "TRUE", "FALSE" ]\n+ for value in correctValueList:\n+ self.assertFalse( CheckerUtils.isNotTRUEisNotFALSE( value ) )\n+ \n+ incorrectValueList = [ "True", "False" ]\n+ for value in incorrectValueList:\n+ self.assertTrue( CheckerUtils.isNotTRUEisNotFALSE( value ) )\n+ \n+ def test_isRessourceNotExists( self ):\n+ fileName = "dummyFile.txt"\n+ self.assertTrue( CheckerUtils.isRessourceNotExits( fileName ) )\n+ os.system( "touch %s" % ( fileName ) )\n+ self.assertFalse( CheckerUtils.isRessourceNotExits( fileName ) )\n+ os.remove( fileName )\n+ \n+ def test_isNotAeValueWithOneDigit2DecimalsAtLeast( self ):\n+ correctEValueList = [ "5e-32", "7e-45", "1e-2122", "9e-32" ]\n+ for value in correctEValueList:\n+ self.assertFalse( CheckerUtils.isNotAeValueWithOneDigit2DecimalsAtLeast( value ) )\n+ \n+ incorrecEValueStr = [ "10e-32", "2e-3", "2e-2", "1", "cxhhe" ]\n+ for value in incorrecEValueStr:\n+ self.assertTrue( CheckerUtils.isNotAeValueWithOneDigit2DecimalsAtLeast( value ) )\n+ \n+ def test_isNotADigit( self ):\n'..b'CACCTTCAAA\\n")\n+ fastaFileHandler.write(">DmelC:hr4_Blas-ter_Piler_1.0_Map_9\\n")\n+ fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n+ fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n+ fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n+ fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n+ fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n+ fastaFileHandler.close()\n+ \n+ def _writeFastaFile_with_pipe(self, fastaFileName): \n+ fastaFileHandler = open(fastaFileName, "w")\n+ fastaFileHandler.write(">DmelChr4_Blaster_Piler_0.0_Map_3\\n")\n+ fastaFileHandler.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\\n")\n+ fastaFileHandler.write("TTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAGA\\n")\n+ fastaFileHandler.write("GCTGAGAGCGCTACAGCGAACAGCTCTTTTCTACACATAAAGTGATAGCAGACAACTGTA\\n")\n+ fastaFileHandler.write("TGTGTGCACACGTGTGCTCATGCATTGTAAATTTGACAAAATATGCCCTTCACCTTCAAA\\n")\n+ fastaFileHandler.write(">DmelC|hr4_Blas-ter_Piler_1.0_Map_9\\n")\n+ fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n+ fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n+ fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n+ fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n+ fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n+ fastaFileHandler.close()\n+ \n+ def _writeFastaFile_with_equal(self, fastaFileName): \n+ fastaFileHandler = open(fastaFileName, "w")\n+ fastaFileHandler.write(">DmelChr4_Blaster_Piler_0.0_Map_3\\n")\n+ fastaFileHandler.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\\n")\n+ fastaFileHandler.write("TTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAGA\\n")\n+ fastaFileHandler.write("GCTGAGAGCGCTACAGCGAACAGCTCTTTTCTACACATAAAGTGATAGCAGACAACTGTA\\n")\n+ fastaFileHandler.write("TGTGTGCACACGTGTGCTCATGCATTGTAAATTTGACAAAATATGCCCTTCACCTTCAAA\\n")\n+ fastaFileHandler.write(">DmelC:hr4_Blas=ter_Piler_1.0_Map_9\\n")\n+ fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n+ fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n+ fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n+ fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n+ fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n+ fastaFileHandler.close()\n+\n+ def _writeChangeLogFile(self, changeLogFileName ):\n+ changeLogFileHandler = open(changeLogFileName, "w")\n+ changeLogFileHandler.write("ChangeLog of REPET\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.write("REPET release 1.3.6\\n")\n+ changeLogFileHandler.write("(release date XX/XX/2010)\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.close()\n+\n+ def _writeConfigFile(self, lineVersion):\n+ configFileHandler = open(self.configFileName, "w")\n+ configFileHandler.write("[repet_env]\\n")\n+ configFileHandler.write(lineVersion)\n+ configFileHandler.write("repet_host: <your_MySQL_host>\\n")\n+ configFileHandler.close()\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_CheckerUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/test/Test_ConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_ConfigChecker.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,569 @@\n+from commons.core.checker.ConfigChecker import ConfigChecker \n+from commons.core.checker.ConfigChecker import ConfigRules\n+from commons.core.checker.RepetException import RepetException\n+import os\n+import unittest\n+\n+class Test_ConfigChecker(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._configFileName = "testConfigChecker.cfg"\n+ self._iMock = MockConfig()\n+ \n+ def test_checkIfExistsConfigFile_file_exist(self):\n+ f=open(self._configFileName, "w")\n+ f.close()\n+ \n+ doesFileExists = True\n+ iConfigRules = ConfigRules()\n+ try:\n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)\n+ iConfigChecker.checkIfExistsConfigFile()\n+ except RepetException:\n+ doesFileExists = False\n+ os.remove(self._configFileName) \n+ self.assertTrue(doesFileExists)\n+ \n+ def test_checkIfExistsConfigFile_file_not_exist(self):\n+ iConfigRules = ConfigRules()\n+ expMsg ="CONFIG FILE not found - \'%s\'" %self._configFileName\n+ doesFileExists = True\n+ try:\n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRules) \n+ iConfigChecker.checkIfExistsConfigFile() \n+ except RepetException, re:\n+ doesFileExists = False\n+ self.assertFalse(doesFileExists)\n+ self.assertEqual(expMsg, re.getMessage())\n+ \n+ def test_readConfigFile(self):\n+ self._iMock.write_config(self._configFileName)\n+ iConfigRules = ConfigRules()\n+ expDictRawConfigValues = {"dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""},\n+ \'analysis1\': {\'description\': \'\',\n+ \'gff_name\': \'BLASTX.gff2\',\n+ \'name\': \'BLASTXWheat2\',\n+ \'program\': \'BLASTX2\',\n+ \'programversion\': \'3.32\',\n+ \'sourcename\': \'dummyDesc_BLASTX2\'}\n+ }\n+ isNoExceptionRaised = True\n+ try: \n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)\n+ iConfig = iConfigChecker.readConfigFile()\n+ iConfigChecker.setRawConfig(iConfig)\n+ obsDictRawConfigValues = iConfigChecker._iRawConfig.dOptionsValues4Sections\n+ except RepetException:\n+ isNoExceptionRaised = False\n+ os.remove(self._configFileName)\n+ self.assertTrue(isNoExceptionRaised)\n+ self.assertEquals(obsDictRawConfigValues, expDictRawConfigValues)\n+ \n+ def test_readConfigFile_section_define_twice(self):\n+ self._iMock.write_case_section_define_twice(self._configFileName)\n+ iConfigRules = ConfigRules()\n+ expMsg = "Duplicate section exist in config file %s" %self._configFileName\n+ expDictRawConfigValues = {"dir_name": {"work_dir":"toto"},\n+ "analysis1" : {"name": "BLASTXWheat2",\n+ "program" : "BLASTX2",\n+ "programversion" : "3.32",\n+ "sourcename" :"dummyDesc_BLASTX2",\n+ "description" : "",\n+ "gff_name" :"BLASTX.gff2"}\n+ }\n+ doesNoExceptionRaised = True\n+ try:\n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRu'..b' configF.write( "sourcename: dummyDesc_BLASTX\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+ #configuration file with section with option depends on presence of other options\n+ def write_with_one_option_depends_of_an_other_one(self, configFileName ):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto\\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "abbreviation: T.aestivum\\n")\n+ configF.write( "genus: Triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat\\n")\n+ configF.write( "program: BLASTX\\n")\n+ configF.write( "programversion: 3.3\\n")\n+ configF.write( "sourcename: src_BLASTX\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis2]\\n")\n+ configF.write( "name: GMHMMWheat\\n")\n+ configF.write( "program: GMHMM\\n")\n+ configF.write( "programversion: 4.3\\n")\n+ configF.write( "sourcename: src_GMHMM\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: GMHMM.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[target]\\n")\n+ configF.write( "target_used: yes\\n")\n+ configF.write( "target_used_list: target.lst\\n")\n+ configF.close()\n+ \n+ def write_case_pattern_rule(self, configFileName ):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto\\n" ) \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "abbreviation: T.aestivum\\n")\n+ configF.write( "genus: Triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat\\n")\n+ configF.write( "program: BLASTX\\n")\n+ configF.write( "programversion: 3.3\\n")\n+ configF.write( "sourcename: src_BLASTX\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis2]\\n")\n+ configF.write( "name: GMHMMWheat\\n")\n+ configF.write( "program: GMHMM\\n")\n+ configF.write( "programversion: 4.3\\n")\n+ configF.write( "sourcename: src_GMHMM\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: GMHMM.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[target]\\n")\n+ configF.write( "target_used: yes\\n")\n+ configF.write( "target_used_list: target.lst\\n")\n+ configF.write( "\\n")\n+ configF.write( "[section_with_option_pattern]\\n")\n+ configF.write( "option1: value1\\n")\n+ configF.write( "option2: value2\\n")\n+ configF.write( "[second_section_with_option_pattern]\\n")\n+ configF.write( "option1: value1\\n")\n+ configF.write( "option2: value2\\n")\n+ configF.close()\n+ \n+ def write_config_case(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "min_SSR_coverage: 0.50\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/test/Test_ConfigValue.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_ConfigValue.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,217 @@\n+import unittest\n+from commons.core.checker.ConfigValue import ConfigValue\n+\n+class Test_ConfigValue(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._iConfigValue = ConfigValue()\n+ \n+ def test__eq__True(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ iConfigValue1 = ConfigValue() \n+ iConfigValue1.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ self.assertEqual(self._iConfigValue, iConfigValue1)\n+ \n+ def test__eq__False_not_same_section(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organisms" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ iConfigValue1 = ConfigValue() \n+ iConfigValue1.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ self.assertNotEqual(self._iConfigValue, iConfigValue1)\n+ \n+ \n+ def test__eq__False_not_same_option(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "family":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ iConfigValue1 = ConfigValue() \n+ iConfigValue1.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ self.assertNotEqual(self._iConfigValue, iConfigValue1)\n+ \n+ def test__eq__False_not_same_value(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"vitis",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n'..b'+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ obsOptionExist = self._iConfigValue.has_option("organism","toto")\n+ self.assertFalse(obsOptionExist)\n+ obsOptionExist = self._iConfigValue.has_option("toto","genus")\n+ self.assertFalse(obsOptionExist)\n+\n+ def test_sections(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ expListSections = ["dir_name", "organism"]\n+ obsListSections = self._iConfigValue.sections()\n+ self.assertEquals(expListSections, obsListSections)\n+ \n+ def test_sections_empty_config(self):\n+ self._iConfigValue.dOptionsValues4Sections = {}\n+ expListSections = []\n+ obsListSections = self._iConfigValue.sections()\n+ self.assertEquals(expListSections, obsListSections)\n+\n+ def test_options(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ expListOptions = [\'abbreviation\', \'common_name\', \'genus\', \'species\', \'comment\']\n+ obsListOptions = self._iConfigValue.options("organism")\n+ self.assertEquals(expListOptions, obsListOptions)\n+ \n+ expListOptions = ["work_dir"]\n+ obsListOptions = self._iConfigValue.options("dir_name")\n+ self.assertEquals(expListOptions, obsListOptions)\n+ \n+ def test_options_empty_config(self):\n+ self._iConfigValue.dOptionsValues4Sections = {}\n+ expListOptions = []\n+ obsListOptions = self._iConfigValue.options("toto")\n+ self.assertEquals(expListOptions, obsListOptions)\n+\n+ def test_set(self):\n+ self._iConfigValue.dOptionsValues4Sections = {}\n+ expDictOptionsValue = {"dir_name" : {"work_dir":"toto"}}\n+ self._iConfigValue.set("dir_name", "work_dir", "toto")\n+ obsDictOptionsValue = self._iConfigValue.dOptionsValues4Sections\n+ self.assertEquals(expDictOptionsValue, obsDictOptionsValue)\n+ \n+ def test_get(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ expValue = "aestivum"\n+ obsValue = self._iConfigValue.get("organism", "species")\n+ self.assertEquals(expValue, obsValue)\n+ expValue = None\n+ obsValue = self._iConfigValue.get("toto", "species")\n+ self.assertEquals(expValue, obsValue)\n+ expValue = None\n+ obsValue = self._iConfigValue.get("organism", "dummyopt")\n+ self.assertEquals(expValue, obsValue) \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/test/Test_F_ConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_F_ConfigChecker.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,214 @@\n+from commons.core.checker.ConfigChecker import ConfigChecker \n+from commons.core.checker.ConfigChecker import ConfigRules\n+from commons.core.checker.ConfigValue import ConfigValue\n+from commons.core.checker.RepetException import RepetException\n+import unittest\n+import os\n+\n+class Test_F_ConfigChecker(unittest.TestCase):\n+ \n+ #TODO: AJouter test (wrong type, etc..)\n+ def setUp(self):\n+ self._configFileName = "test_conf_checker"\n+ \n+ def tearDown(self):\n+ os.remove(self._configFileName)\n+ \n+ def test_run(self):\n+ iMock = MockConfig()\n+ iMock.write_config(self._configFileName)\n+ \n+ iConfigRules = ConfigRules()\n+ iConfigRules.addRuleSection(section="dir_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="dir_name", option ="work_dir", mandatory=True)\n+ iConfigRules.addRuleSection(section="organism", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="comment")\n+ iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="name", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="program", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="sourcename", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="description")\n+ iConfigRules.addRuleOption(section="analysis", option ="gff_name")\n+ \n+ iConfigChecker = ConfigChecker(self._configFileName,iConfigRules)\n+ \n+ obsValidatedConfig = iConfigChecker.getConfig()\n+ \n+ expValidatedConfig = ConfigValue()\n+ d = {"dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""},\n+ \'analysis1\': {\'description\': \'\',\n+ \'gff_name\': \'BLASTX.gff2\',\n+ \'name\': \'BLASTXWheat2\',\n+ \'program\': \'BLASTX2\',\n+ \'programversion\': \'3.32\',\n+ \'sourcename\': \'dummyDesc_BLASTX2\'}\n+ }\n+ expValidatedConfig.setdOptionsValues4Sections(d)\n+ \n+ self.assertEquals(expValidatedConfig, obsValidatedConfig)\n+ \n+ \n+ def test_run_exception_section_missing(self):\n+ iMock = MockConfig()\n+ iMock.write_config_section_missing(self._configFileName)\n+ \n+ iConfigRules = ConfigRules()\n+ iConfigRules.addRuleSection(section="dir_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="dir_name", option ="work_dir", mandatory=True)\n+ iConfigRules.addRuleSection(section="organism", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="comment")\n+ iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n+ iConfigRules.addRuleOption('..b'on ="work_dir", mandatory=True)\n+ iConfigRules.addRuleSection(section="organism", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="comment")\n+ iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="name", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="program", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="sourcename", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="description")\n+ iConfigRules.addRuleOption(section="analysis", option ="gff_name")\n+ \n+ iConfigChecker = ConfigChecker(self._configFileName,iConfigRules)\n+ \n+ expMessage = "Error in configuration file %s, following options are missing: \\n - [organism]: abbreviation\\n"% self._configFileName\n+ \n+ try :\n+ obsValidatedConfig = iConfigChecker.getConfig()\n+ except RepetException, e:\n+ obsMessage = e.getMessage()\n+\n+ self.assertEquals(expMessage, obsMessage)\n+ \n+class MockConfig (object):\n+ \n+ def write_config(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "abbreviation: T.aestivum\\n")\n+ configF.write( "genus: triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat2\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "programversion: 3.32\\n")\n+ configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff2\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+ def write_config_section_missing(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat2\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "programversion: 3.32\\n")\n+ configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff2\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+ def write_config_option_missing(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "genus: triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat2\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "programversion: 3.32\\n")\n+ configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff2\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/checker/test/Test_OldConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_OldConfigChecker.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,104 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import os
+from commons.core.checker.OldConfigChecker import ConfigChecker
+from commons.core.checker.ConfigException import ConfigException
+
+class Test_ConfigChecker( unittest.TestCase ):
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+
+    def testFileNotFound(self):
+        exceptionExpected = None
+        configChecker = ConfigChecker("",{})
+        try :
+            configChecker.check("noExistsFile.cfg")
+        except ConfigException, ce:
+            exceptionExpected = ce
+
+        self.assertTrue(exceptionExpected != None)
+        msg = exceptionExpected.messages[0]
+        self.assertTrue(msg.startswith("CONFIG FILE not found - "))
+
+
+    def testNoSectionInConfigFile (self):
+        exceptionExpected = None
+        dummyFile = open("dummyFile.cfg", "w")
+        configChecker = ConfigChecker("dummySection",{})
+        try :
+            configChecker.check("dummyFile.cfg")
+        except ConfigException, ce:
+            exceptionExpected = ce
+
+        self.assertTrue(exceptionExpected != None)
+        msg = exceptionExpected.messages[0]
+        self.assertTrue(msg.startswith("[dummySection]" + " section not found - "))
+
+        os.remove("dummyFile.cfg")
+
+
+    def testNoOptionInConfigFile (self):
+        exceptionExpected = None
+        MockConfigFile("dummyConfig.cfg",{})
+        configChecker = ConfigChecker("blaster_config",{"dummy":""})
+        try :
+            configChecker.check("dummyConfig.cfg")
+        except ConfigException, ce:
+            exceptionExpected = ce
+
+        self.assertTrue(exceptionExpected != None)
+        msg = exceptionExpected.messages[0]
+        self.assertTrue(msg.startswith("[blaster_config] - No option 'dummy' in section: 'blaster_config'"))
+        os.remove("dummyConfig.cfg")
+
+
+class MockConfigFile:
+
+    def __init__ (self, fileName, optionsDict):
+        self._fileName = fileName
+        config = open(fileName, "w");
+        config.write("[blaster_config]\n")
+        for key in optionsDict.keys():
+            config.write(key + ":" + optionsDict[key] + "\n")
+        config.close()
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_ConfigChecker ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/Align.pyc

Binary file commons/core/coord/Align.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/Map.pyc

Binary file commons/core/coord/Map.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/Range.pyc

Binary file commons/core/coord/Range.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/__init__.pyc

Binary file commons/core/coord/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/TestSuite_coord.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/TestSuite_coord.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_Align
+import Test_AlignUtils
+import Test_Map
+import Test_MapUtils
+import Test_Match
+import Test_MatchUtils
+import Test_Path
+import Test_PathUtils
+import Test_Range
+import Test_Set
+import Test_SetUtils
+
+
+def main():
+
+    TestSuite_coord = unittest.TestSuite()
+
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Align.Test_Align, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_AlignUtils.Test_AlignUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Map.Test_Map, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_MapUtils.Test_MapUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Match.Test_Match, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_MatchUtils.Test_MatchUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Path.Test_Path, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_PathUtils.Test_PathUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Range.Test_Range, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Set.Test_Set, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_SetUtils.Test_SetUtils, "test" ) )
+
+    runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+    runner.run( TestSuite_coord )
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_Align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Align.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,518 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Map import Map\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Range import Range\n+\n+\n+class Test_Align( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._align = Align()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n+ \n+ def tearDown(self):\n+ self._align = None\n+ \n+ def test_isEmpty_True(self):\n+ alignInstance = Align()\n+ \n+ self.assertTrue(alignInstance.isEmpty())\n+ \n+ def test_isEmpty_True_query_is_empty(self):\n+ alignInstance = Align()\n+ line = "\\t-1\\t-1\\tTE2\\t3\\t10\\t1e-20\\t30\\t90.2\\n"\n+ alignInstance.setFromString(line)\n+ \n+ self.assertTrue(alignInstance.isEmpty())\n+ \n+ def test_isEmpty_True_subject_is_empty(self):\n+ alignInstance = Align()\n+ line = "chr1\\t2\\t20\\t\\t-1\\t-1\\t1e-20\\t30\\t90.2\\n"\n+ alignInstance.setFromString(line)\n+ \n+ self.assertTrue(alignInstance.isEmpty())\n+ \n+ def test_isEmpty_False(self):\n+ alignInstance = Align()\n+ line = "chr1\\t2\\t20\\tTE2\\t3\\t10\\t1e-20\\t30\\t90.2\\n"\n+ alignInstance.setFromString(line)\n+ \n+ self.assertFalse(alignInstance.isEmpty())\n+ \n+ def test_read(self):\n+ line = "chr2\\t1\\t10\\tTE3\\t11\\t17\\t1e-20\\t30\\t90.2\\n"\n+ expReturn = 1\n+\n+ dummyMockAlignFile = "dummyMockAlignFile"\n+ mockAlignFileHandle = open(dummyMockAlignFile, "w")\n+ mockAlignFileHandle.write(line)\n+ mockAlignFileHandle.close()\n+ \n+ expAlignInstance = Align()\n+ expAlignInstance.setFromString(line)\n+\n+ mockAlignFileHandle = open(dummyMockAlignFile, "r")\n+ obsAlignInstance = Align()\n+ obsReturn = obsAlignInstance.read(mockAlignFileHandle)\n+ \n+ mockAlignFileHandle.close()\n+ os.remove(dummyMockAlignFile) \n+ \n+ self.assertEquals(expAlignInstance, obsAlignInstance) \n+ self.assertEquals(expReturn, obsReturn) \n+ \n+ def test_read_empty_file(self):\n+ expReturn = 0\n+ \n+ dummyMockAlignFile = "dummyMockAlignFile"\n+ mockAlignFileHandle = open(dummyMockAlignFile, "w")\n+ mockAlignFileHandle.close'..b'ject.setFromTuple( ( "repet", "sbj1", "1", "100" ) )\n+ \n+ obsMapQuery, obsMapSubject = self._align.getMapsOfQueryAndSubject()\n+ \n+ self.assertEqual( expMapQuery, obsMapQuery )\n+ self.assertEqual( expMapSubject, obsMapSubject )\n+ \n+ def test_getBin_bin_level_9(self):\n+ tuple = ("chr1","190000000","390000000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ expRes = 100000000.0\n+ obsRes = self._align.getBin()\n+ self.assertEquals(expRes, obsRes)\n+\n+ def test_getBin_bin_level_8(self):\n+ tuple = ("chr1","19000000","39000000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ expRes = 100000000.0\n+ obsRes = self._align.getBin()\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_7(self):\n+ tuple = ("chr1","1900000","3900000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ expRes = 10000000.0\n+ obsRes = self._align.getBin()\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_6(self):\n+ tuple = ("chr1","190000","390000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_5(self):\n+ tuple = ("chr1","19000","39000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 100000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_4(self):\n+ tuple = ("chr1","1900","3900","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 10000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_3(self):\n+ tuple = ("chr1","190","390","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_2(self):\n+ tuple = ("chr1","19","39","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_1(self):\n+ tuple = ("chr1","1","3","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ \n+ def test_switchQuerySubject_directS( self ):\n+ tuple = ("chr1","1","3","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple( tuple )\n+ exp = Align( Range("TE2","11","17"), Range("chr1","1","3"), "1e-20", "30", "90.2" )\n+ self._align.switchQuerySubject()\n+ self.assertEquals( exp, self._align )\n+ \n+ \n+ def test_switchQuerySubject_reverseS( self ):\n+ tuple = ("chr1","1","3","TE2","17","11","1e-20","30","90.2")\n+ self._align.setFromTuple( tuple )\n+ exp = Align( Range("TE2","11","17"), Range("chr1","3","1"), "1e-20", "30", "90.2" )\n+ self._align.switchQuerySubject()\n+ self.assertEquals( exp, self._align )\n+ \n+ \n+ def test_toStringAsGff( self ):\n+ self._align.setFromString( "chr1\\t1\\t10\\tTE3\\t11\\t17\\t1e-20\\t30\\t85.2\\n" )\n+ exp = "chr1\\tREPET\\tmatch\\t1\\t10\\t1e-20\\t+\\t.\\tID=23;Target=TE3 11 17"\n+ obs = self._align.toStringAsGff( ID="23" )\n+ self.assertEqual( obs, exp )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Align ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_AlignUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_AlignUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,777 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+import shutil\n+from commons.core.coord.AlignUtils import AlignUtils\n+from commons.core.coord.Align import Align\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Range import Range\n+\n+\n+class Test_AlignUtils( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n+ \n+ \n+ def tearDown( self ):\n+ self._uniqId = ""\n+ \n+ \n+ def test_getAlignListFromFile( self ):\n+ a1 = Align()\n+ a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) )\n+ a2 = Align()\n+ a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) )\n+ \n+ inFileName = "dummyFile_%s" % ( self._uniqId )\n+ inFileHandler = open( inFileName, "w" )\n+ a1.write( inFileHandler )\n+ a2.write( inFileHandler )\n+ inFileHandler.close()\n+ \n+ lExp = [ a1, a2 ]\n+ lObs = AlignUtils.getAlignListFromFile( inFileName )\n+ \n+ self.assertEqual( lExp, lObs )\n+ \n+ if os.path.exists( inFileName ):\n+ os.remove( inFileName )\n+ \n+ \n+ def test_getListOfScores( self ):\n+ a1 = Align()\n+ a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) )\n+ a2 = Align()\n+ a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) )\n+ lAligns = [ a1, a2 ]\n+ \n+ lExp = [ 89, 95 ]\n+ lObs = AlignUtils.getListOfScores( lAligns )\n+ \n+ self.assertEqual( lExp, lObs )\n+ \n+ \n+ def test_getScoreListFromFile( self ):\n+ alignFile = "dummyAlignFile"\n+ alignFileHandler = open( alignFile, "w" )\n+ alignFileHandler.write( "chr3\\t1\\t100\\tchr5\\t11\\t110\\t1e-52\\t133\\t87.2\\n" )\n+ alignFileHandler.write( "chr7\\t1\\t200\\tchr2\\t11\\t210\\t1e-78\\t235\\t98.9\\n" )\n+ alignFileHandler.close()\n+ \n+ lExp = [ 133, 235 ]\n+ lObs = AlignUtils.getScoreListFromFile( alignFile )\n+ self.assertEqual( lExp, lObs )\n+ \n+ os.remove( alignFile )\n+ \n+ \n+ def test_getScoreListFromFile_empty_file( self ):\n+ alignFile = "dummyAlignFile"\n+ '..b' iAlign2 = Align( Range("chr1",51,80), Range("TE1",161,190), 1e-20, 90.2, 30 )\n+ self.assertFalse( iAlign1.isOverlapping( iAlign2 ) )\n+ \n+ \n+ def test_mergeList( self ):\n+ iAlign1 = Align( Range("chr1",81,120), Range("TE1",91,130), 1e-20, 90.2, 30 )\n+ iAlign2 = Align( Range("chr2",51,80), Range("TE1",61,90), 1e-20, 90.2, 30 ) # different query\n+ iAlign3 = Align( Range("chr1",1,100), Range("TE1",11,110), 1e-20, 90.2, 30 ) # to be merged with 1st line\n+ iAlign4 = Align( Range("chr1",1,200), Range("TE2",11,210), 1e-20, 90.2, 30 ) # different subject\n+ iAlign5 = Align( Range("chr1",1,100), Range("TE1",501,600), 1e-20, 90.2, 30 ) # non-overlapping subject\n+ lAligns = [ iAlign1, iAlign2, iAlign3, iAlign4, iAlign5 ]\n+ \n+ iAlign6 = Align( Range("chr1",1,120), Range("TE1",11,130), 1e-20, 90.2, 30 )\n+ lExp = [ iAlign6, iAlign5, iAlign4, iAlign2 ]\n+ \n+ lObs = AlignUtils.mergeList( lAligns )\n+ \n+ self.assertEquals( lExp, lObs )\n+ \n+ \n+ def test_mergeFile_empty( self ):\n+ inFile = "dummyInFile.align"\n+ inF = open( inFile, "w" )\n+ inF.close()\n+ \n+ expFile = "dummyExpFile.align"\n+ expF = open( expFile, "w" )\n+ expF.close()\n+ \n+ obsFile = "dummyObsFile.align"\n+ AlignUtils.mergeFile( inFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+ def test_mergeFile( self ):\n+ iAlign = Align()\n+ \n+ inFile = "dummyInFile.align"\n+ inF = open( inFile, "w" )\n+ iAlign.setFromString( "chr1\\t81\\t120\\tTE1\\t91\\t130\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( inF )\n+ iAlign.setFromString( "chr2\\t51\\t80\\tTE1\\t61\\t90\\t1e-20\\t30\\t90.2\\n" ) # different query\n+ iAlign.write( inF )\n+ iAlign.setFromString( "chr1\\t1\\t100\\tTE1\\t11\\t110\\t1e-20\\t30\\t90.2\\n" ) # to be merged with 1st line\n+ iAlign.write( inF )\n+ iAlign.setFromString( "chr1\\t1\\t200\\tTE2\\t11\\t210\\t1e-20\\t30\\t90.2\\n" ) # different subject\n+ iAlign.write( inF )\n+ inF.close()\n+ \n+ expFile = "dummyExpFile.align"\n+ expF = open( expFile, "w" )\n+ iAlign.setFromString( "chr1\\t1\\t120\\tTE1\\t11\\t130\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( expF )\n+ iAlign.setFromString( "chr1\\t1\\t200\\tTE2\\t11\\t210\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( expF )\n+ iAlign.setFromString( "chr2\\t51\\t80\\tTE1\\t61\\t90\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( expF )\n+ expF.close()\n+ \n+ obsFile = "dummyObsFile.align"\n+ AlignUtils.mergeFile( inFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+ def test_updateScoresInFile( self ):\n+ iAlign = Align()\n+ \n+ inFile = "dummyInFile.align"\n+ inHandler = open( inFile, "w" )\n+ iAlign.setFromString( "query1\\t1\\t100\\tsubject1\\t1\\t95\\t1e-180\\t230\\t90.2\\n" )\n+ iAlign.write( inHandler )\n+ inHandler.close()\n+ \n+ expFile = "dummyExpFile.align"\n+ expHandler = open( expFile, "w" )\n+ iAlign.setFromString( "query1\\t1\\t100\\tsubject1\\t1\\t95\\t1e-180\\t%i\\t90.2\\n" % ( ( 100 - 1 + 1 ) * 90.2 / 100.0 ) )\n+ iAlign.write( expHandler )\n+ expHandler.close()\n+ \n+ obsFile = "dummyObsFile.align"\n+ AlignUtils.updateScoresInFile( inFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_ConvCoord.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_ConvCoord.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,205 @@\n+import unittest\n+import os\n+import time\n+from commons.core.coord.ConvCoord import ConvCoord\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.coord.Map import Map\n+\n+class Test_ConvCoord( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._i = ConvCoord()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._inData = "dummyInData_%s" % ( self._uniqId )\n+ self._mapData = "dummyMapData_%s" % ( self._uniqId )\n+ self._expData = "dummyExpData_%s" % ( self._uniqId )\n+ self._obsData = "dummyObsData_%s" % ( self._uniqId )\n+ self._iDb = DbFactory.createInstance()\n+ self._i._iDb = self._iDb\n+ \n+ def tearDown( self ):\n+ self._iDb.close()\n+\n+#TODO: handle duplicated matchs for path\n+# def test_convCoordsChkToChrFromFile_duplicated_matchs( self ):\n+# dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),\n+# "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }\n+# tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId \n+# self._writePathFileCoordOnChunk(tmpPathFileName)\n+# \n+# expPathFile = "dummyExpPathFile_%s" % self._uniqId\n+# self._writePathFileCoordOnChrWithOutDoublons(expPathFile)\n+# \n+# outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)\n+# \n+# obsPathFile = "dummyObsPathFile_%s" % self._uniqId\n+# self._iDb.exportDataToFile(outTableName, obsPathFile)\n+# \n+# self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n+# \n+# for f in [ expPathFile, obsPathFile, tmpPathFileName ]:\n+# os.remove( f )\n+# self._iDb.dropTable(outTableName)\n+ \n+#TODO: handle matchs out of chunk overlap ? For one side (=> path 128, remove path 152) ? For two sides (path 129, fusion with path 154) ?\n+# def test_convCoordsChkToChrFromFile_matchs_out_of_overlap( self ):\n+# dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),\n+# "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }\n+# tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId \n+# self._writePathFileCoordOnChunk_outOfOverlap(tmpPathFileName)\n+# \n+# expPathFile = "dummyExpPathFile_%s" % self._uniqId\n+# self._writePathFileCoordOnChrWithOutDoublons_outOfOverlap(expPathFile)\n+# \n+# outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)\n+# \n+# obsPathFile = "dummyObsPathFile_%s" % self._uniqId\n+# self._iDb.exportDataToFile(outTableName, obsPathFile)\n+# \n+# self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n+# \n+# for f in [ expPathFile, obsPathFile, tmpPathFileName ]:\n+# os.remove( f )\n+# self._iDb.dropTable(outTableName)\n+ \n+ def test_mergeCoordsOnChunkOverlaps( self ):\n+ dChunks2CoordMaps = { "chunk1": Map( "chunk1", "chromosome1", 1, 100 ),\n+ "chunk2": Map( "chunk2", "chromosome1", 91, 190 ),\n+ "chunk3": Map( "chunk3", "chromosome2", 1, 100 ) }\n+ tmpPathTable = "dummyTmpPathTable"\n+ linesToProcess = [\n+ "1" + "\\t" + "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.8" + "\\n", # hit within the 1st chunk\n+ "3" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "2" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" '..b'tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n+ pathFile.write("152\\tchunk2\\t3866\\t3889\\tCR1-19_HM_1p:classI:LINE\\t898\\t891\\t5e-21\\t4\\t34.98\\n")\n+ pathFile.write("153\\tchunk2\\t3951\\t4343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n+ pathFile.write("154\\tchunk2\\t3866\\t3889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+ pathFile.write("155\\tchunk2\\t3102\\t3199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n+ pathFile.close()\n+ \n+# def _writePathFileCoordOnChunk_outOfOverlap(self, pathFileName):\n+# pathFile = open( pathFileName, "w" )\n+# pathFile.write("123\\tchunk1\\t108397\\t108531\\tMariner2_AG_1p:classII:TIR\\t53\\t97\\t8e-19\\t28\\t35.56\\n")\n+# pathFile.write("123\\tchunk1\\t108545\\t109120\\tMariner2_AG_1p:classII:TIR\\t102\\t333\\t8e-19\\t87\\t27.97\\n")\n+# pathFile.write("124\\tchunk1\\t59607\\t59714\\tLINER1-2_NVi_2p:classI:?\\t502\\t537\\t3e-20\\t30\\t36.11\\n")\n+# pathFile.write("124\\tchunk1\\t59695\\t60156\\tLINER1-2_NVi_2p:classI:?\\t533\\t725\\t3e-20\\t90\\t36.79\\n")\n+# pathFile.write("125\\tchunk1\\t193027\\t193101\\tCR1-8_AG_1p:classI:LINE\\t470\\t448\\t1e-27\\t11\\t28.57\\n")\n+# pathFile.write("126\\tchunk1\\t102131\\t102178\\tTc1-1_TCa_1p:classII:TIR\\t288\\t274\\t5e-29\\t18\\t52.5\\n")\n+# pathFile.write("127\\tchunk1\\t59520\\t59606\\tNotoAg1_2p:classI:?\\t482\\t508\\t1e-13\\t14\\t30.61\\n")\n+# pathFile.write("128\\tchunk1\\t183866\\t193889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n+# pathFile.write("129\\tchunk1\\t183866\\t200000\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+# pathFile.write("150\\tchunk2\\t21176\\t21250\\tTc1-1_TCa_1p:classII:TIR\\t135\\t109\\t8e-32\\t21\\t41.57\\n")\n+# pathFile.write("151\\tchunk2\\t116603\\t116698\\tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n+# pathFile.write("152\\tchunk2\\t1\\t3889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n+# pathFile.write("153\\tchunk2\\t3951\\t4343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n+# pathFile.write("154\\tchunk2\\t1\\t13889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+# pathFile.write("155\\tchunk2\\t3102\\t3199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n+# pathFile.close()\n+# \n+# def _writePathFileCoordOnChrWithOutDoublons_outOfOverlap(self, pathFileName):\n+# file = open( pathFileName, "w" )\n+# file.write("123\\tdmel_chr4\\t868397\\t868531\\tMariner2_AG_1p:classII:TIR\\t53\\t97\\t8e-19\\t28\\t35.56\\n")\n+# file.write("123\\tdmel_chr4\\t868545\\t869120\\tMariner2_AG_1p:classII:TIR\\t102\\t333\\t8e-19\\t87\\t27.97\\n")\n+# file.write("124\\tdmel_chr4\\t819607\\t819714\\tLINER1-2_NVi_2p:classI:?\\t502\\t537\\t3e-20\\t30\\t36.11\\n")\n+# file.write("124\\tdmel_chr4\\t819695\\t820156\\tLINER1-2_NVi_2p:classI:?\\t533\\t725\\t3e-20\\t90\\t36.79\\n")\n+# file.write("125\\tdmel_chr4\\t953027\\t953101\\tCR1-8_AG_1p:classI:LINE\\t470\\t448\\t1e-27\\t11\\t28.57\\n")\n+# file.write("126\\tdmel_chr4\\t862131\\t862178\\tTc1-1_TCa_1p:classII:TIR\\t288\\t274\\t5e-29\\t18\\t52.5\\n")\n+# file.write("127\\tdmel_chr4\\t819520\\t819606\\tNotoAg1_2p:classI:?\\t482\\t508\\t1e-13\\t14\\t30.61\\n")\n+# file.write("128\\tdmel_chr4\\t943866\\t953889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n+# file.write("129\\tdmel_chr4\\t943866\\t963889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+# file.write("150\\tdmel_chr4\\t971176\\t971250\\tTc1-1_TCa_1p:classII:TIR\\t135\\t109\\t8e-32\\t21\\t41.57\\n")\n+# file.write("151\\tdmel_chr4\\t1066603\\t1066698\\tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n+# file.write("153\\tdmel_chr4\\t953951\\t954343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n+# file.write("155\\tdmel_chr4\\t953102\\t953199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n+# file.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_F_ConvCoord.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_F_ConvCoord.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,213 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.coord.ConvCoord import ConvCoord\n+import time\n+import subprocess\n+import os\n+import unittest\n+\n+class Test_F_ConvCoord(unittest.TestCase):\n+ \n+ def setUp( self ):\n+ self._i = ConvCoord()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._inData = "dummyInData_%s" % ( self._uniqId )\n+ self._mapData = "dummyMapData_%s" % ( self._uniqId )\n+ self._expData = "dummyExpData_%s" % ( self._uniqId )\n+ self._obsData = "dummyObsData_%s" % ( self._uniqId )\n+ self._iDb = DbFactory.createInstance()\n+ self._i._iDb = self._iDb\n+ \n+ def tearDown( self ):\n+ self._iDb.close()\n+ \n+ def test_run_as_script_alignFile_query( self ):\n+ configFile = "%s/dummyConfigFile_%s" % ( os.getcwd(), self._uniqId )\n+ configF = open( configFile, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._writeMapFile( self._mapData )\n+ \n+ linesToProcess = [ "chunk1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.800000" + "\\n", # hit within the 1st chunk\n+ "chunk1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 1st chunk\n+ "chunk2" + "\\t" + "2" + "\\t" + "9" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "chunk2" + "\\t" + "51" + "\\t" + "58" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit inside the 2nd chunk\n+ "chunk2" + "\\t" + "51" + "\\t" + "70" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n" # subject on reverse strand\n+ ]\n+ FileUtils.writeLineListInFile( self._inData, linesToProcess )\n+ \n+ refLines = [ "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.800000" + "\\n",\n+ "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",\n+ "chromosome1" + "\\t" + "141" + "\\t" + "148" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",\n+ "chromosome1" + "\\t" + "141" + "\\t" + "160" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n"\n+ ]\n+ FileUtils.writeLineListInFile( self._expData, refLines )\n+ \n+ cmd = "ConvCoord.py"\n+ cmd += " -i %s" % ( self._inData )\n+ cmd += " -f %s" % ( "align" )\n+ cmd += " -c %s" % ( "q" )\n+ cmd += " -m %s" % ( self._mapData )\n+ cmd += " -o %s" % ( self._obsData )\n+ cmd += " -C %s" % ( configFile )\n+ process = subprocess.Popen(cmd, shell = True)\n+ process.communicate()\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )\n+ \n+ os.remove( self._inData )\n+ os.remove(configFile)\n+ os.remove( self._mapData )\n+ os.r'..b'"8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 1st chunk\n+ "3" + "\\t" + "chunk2" + "\\t" + "2" + "\\t" + "9" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "4" + "\\t" + "chunk2" + "\\t" + "51" + "\\t" + "58" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit inside the 2nd chunk\n+ "5" + "\\t" + "chunk2" + "\\t" + "51" + "\\t" + "70" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n" # subject on reverse strand\n+ ]\n+ FileUtils.writeLineListInFile( self._inData, linesToProcess )\n+ self._iDb.createTable( self._inData, "path", self._inData, True )\n+ os.remove( self._inData )\n+ \n+ refLines = [ "1" + "\\t" + "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.8" + "\\n",\n+ "2" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",\n+ "3" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "4" + "\\t" + "chromosome1" + "\\t" + "141" + "\\t" + "148" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",\n+ "5" + "\\t" + "chromosome1" + "\\t" + "141" + "\\t" + "160" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n"\n+ ]\n+ FileUtils.writeLineListInFile( self._expData, refLines )\n+ \n+ cmd = "ConvCoord.py"\n+ cmd += " -i %s" % ( self._inData )\n+ cmd += " -f %s" % ( "path" )\n+ cmd += " -c %s" % ( "q" )\n+ cmd += " -m %s" % ( self._mapData )\n+ cmd += " -M %s" % ( "no" )\n+ cmd += " -o %s" % ( self._obsData )\n+ process = subprocess.Popen(cmd, shell = True)\n+ process.communicate()\n+ \n+ self._iDb.exportDataToFile( self._obsData, self._obsData )\n+ self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )\n+ \n+ os.remove( self._obsData )\n+ os.remove( self._expData )\n+ self._iDb.dropTable( self._mapData )\n+ self._iDb.dropTable( self._inData )\n+ self._iDb.dropTable( self._expData )\n+ self._iDb.dropTable( self._obsData )\n+\n+ def test_run(self):\n+ inFileName = "DmelChr4_chk.align.not_over.filtered"\n+ expFileName = "%s/Tools/DmelChr4_chr.align.not_over.filtered" % os.environ["REPET_DATA"]\n+ obsFileName = "obs.align"\n+ os.symlink("%s/Tools/%s" % (os.environ["REPET_DATA"], inFileName), inFileName)\n+ iConvCoord = ConvCoord()\n+ iConvCoord.setInputData(inFileName)\n+ iConvCoord.setMapData("%s/Tools/DmelChr4_chunks.map" % os.environ["REPET_DATA"])\n+ iConvCoord.setCoordinatesToConvert("qs")\n+ iConvCoord.setMergeChunkOverlaps(False)\n+ iConvCoord.setOutputData(obsFileName)\n+ iConvCoord.run()\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ \n+ os.remove(inFileName)\n+ os.remove(obsFileName)\n+ \n+ def _writeMapFile( self, mapFile ):\n+ mapF = open( mapFile, "w" )\n+ mapF.write( "chunk1\\tchromosome1\\t1\\t100\\n" )\n+ mapF.write( "chunk2\\tchromosome1\\t91\\t190\\n" )\n+ mapF.write( "chunk3\\tchromosome2\\t1\\t100\\n" )\n+ mapF.close()\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_Map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Map.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,183 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import os
+from commons.core.coord.Map import Map
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_Map( unittest.TestCase ):
+
+    def setUp(self):
+        self._map = Map()
+
+    def test_setFromString(self):
+        line = "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"   # test with '\t' separator
+        self._map.setFromString(line)
+        self.assertEqual( self._map.name, "MbQ12Gr2Cl2" )
+        self.assertEqual( self._map.seqname, "consensus1" )
+        self.assertEqual( self._map.start, 51 )
+        self.assertEqual( self._map.end, 1230 )
+        line = "MbQ12Gr2Cl2;consensus1;51;1230"   # test with ';' separator
+        self._map.setFromString(line,";")
+        self.assertEqual( self._map.name, "MbQ12Gr2Cl2" )
+        self.assertEqual( self._map.seqname, "consensus1" )
+        self.assertEqual( self._map.start, 51 )
+        self.assertEqual( self._map.end, 1230 )
+
+    def test___eq__(self):
+        self._map.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n" )
+        o = Map()
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n" )
+        self.assertEqual( self._map, o )   # same data
+        o.setFromString( "MbQ12Gr2Cl1\tconsensus1\t51\t1230\n" )
+        self.assertNotEqual( self._map, o )   # different name
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus2\t51\t1230\n" )
+        self.assertNotEqual( self._map, o )   # different seqname
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t15\t1230\n" )
+        self.assertNotEqual( self._map, o )   # different start
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t123000\n" )
+        self.assertNotEqual( self._map, o )   # different end
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t1230\t51\n" )
+        self.assertNotEqual( self._map, o )   # same start/end but in different order
+
+    def test_setFromTuple(self):
+        tuple = ("MbQ12Gr2Cl2", "consensus1","51","1230")
+        self._map.setFromTuple(tuple)
+
+        expMap = Map("MbQ12Gr2Cl2", "consensus1",51,1230)
+        obsMap = self._map
+
+        self.assertEquals(expMap, obsMap)
+
+    def test_read_empty_file(self):
+
+        fileName = "dummyFile"
+        os.system("touch " + fileName)
+        fileHandle = open(fileName, "r")
+
+        obsResult = self._map.read(fileHandle)
+        expResult = 0
+
+        fileHandle.close()
+        os.remove(fileName)
+
+        self.assertEquals(expResult, obsResult)
+
+    def test_read_uncompleted_line( self):
+        uncompletedLine = "MbQ12Gr2Cl2\tconsensus1\t51"
+        fileName = "dummyFile"
+
+        fileHandle = open(fileName, "w")
+        fileHandle.write(uncompletedLine)
+        fileHandle.close()
+
+        fileHandle = open(fileName, "r")
+
+        obsResult = self._map.read(fileHandle)
+        expResult = 0
+
+        fileHandle.close()
+        os.remove(fileName)
+
+        self.assertEquals(obsResult, expResult)
+
+    def test_read(self):
+        line =  "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"
+        fileName = "dummyFile"
+
+        fileHandle = open(fileName, "w")
+        fileHandle.write(line)
+        fileHandle.close()
+
+        fileHandle = open(fileName, "r")
+        self._map.read(fileHandle)
+        obsResult = self._map
+
+        expResult = Map()
+        expResult.setFromString(line)
+
+        fileHandle.close()
+        os.remove(fileName)
+
+        self.assertEquals(obsResult, expResult)
+
+    def test_write(self):
+        line =  "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"
+        expFileName = "expFileName"
+
+        fileHandle = open(expFileName, "w")
+        fileHandle.write(line)
+        fileHandle.close()
+
+        obsFileName = "obsFileName"
+        fileHandle = open(obsFileName, "w")
+        self._map.setFromString(line)
+        self._map.write(fileHandle)
+        fileHandle.close()
+
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+
+        os.remove(obsFileName)
+        os.remove(expFileName)
+
+    def test_diff1(self):
+        map1 = Map("seq1","DmelChr4", 190000, 390000)
+        map2 = Map("seq2","DmelChr4", 290000, 590000)
+
+        expMap1 = Map("seq1", "DmelChr4", 190000, 289999)
+        expReturnedMap = Map()
+
+        obsReturnedMap = map1.diff(map2)
+        obsMap1 = map1
+
+        self.assertEquals(expMap1, obsMap1)
+        self.assertEquals(expReturnedMap, obsReturnedMap)
+
+    def test_diff2(self):
+        map1 = Map("seq1","DmelChr4", 190000, 590000)
+        map2 = Map("seq2","DmelChr4", 290000, 390000)
+
+        expMap1 = Map("seq1", "DmelChr4", 190000, 289999)
+        expReturnedMap = Map("seq1", "DmelChr4", 390001, 590000)
+
+        obsReturnedMap = map1.diff(map2)
+        obsMap1 = map1
+
+        self.assertEquals(expMap1, obsMap1)
+        self.assertEquals(expReturnedMap, obsReturnedMap)
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_Map ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_MapUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_MapUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,384 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import sys\n+from commons.core.coord.MapUtils import MapUtils\n+from commons.core.coord.Map import Map\n+from commons.core.coord.Set import Set\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_MapUtils( unittest.TestCase ):\n+ \n+ def test_getMapListSortedByIncreasingMinThenMax( self ):\n+ iMap1 = Map("name1", "chr1", 1, 350)\n+ iMap2 = Map("name2", "chr1", 1, 100)\n+ iMap3 = Map("name3", "chr1", 50, 350)\n+ iMap4 = Map("name4", "chr1", 5, 450)\n+ lMaps = [ iMap1, iMap2, iMap3, iMap4 ]\n+ \n+ expLMaps = [ iMap2, iMap1, iMap4, iMap3 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_ordered( self ):\n+ iMap1 = Map("name1", "chr1", 1, 100)\n+ iMap2 = Map("name2", "chr1", 1, 350)\n+ \n+ lMaps = [ iMap1, iMap2 ]\n+ expLMaps = [ iMap1, iMap2 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_unordered( self ):\n+ iMap1 = Map("name1", "chr1", 1, 350)\n+ iMap2 = Map("name2", "chr1", 1, 100)\n+ \n+ lMaps = [ iMap1, iMap2 ]\n+ expLMaps = [ iMap2, iMap1 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_nonOverlapping( self ):\n+ iMap1 = Map("name1", "chr1", 1, 350)\n+ iMap2 = Map("name2", "chr1", 400, 600)\n+ \n+ lMaps = [ iMap2, iMap1 ]\n+ expLMaps = [ iMap1, iMap2 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_sameMinThreeMaps( self ):\n+ iMap1 = Map("name1", "chr1", 350, 1)\n+ iMap2 = Map("name2", "chr1", 400, 1)\n+ iMap3 = Map("name3", "chr1", 500, 1)\n+ \n+ lMaps = [ iMap2, iMap1, iMap3 ]\n+ expLMaps = [ iMap1, iMap2, iMap3 ]\n+ \n+ obsLM'..b'SetFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ expSetFileHandler.write( "3\\tseq40\\tchr2\\t600\\t700\\n" )\n+ expSetFileHandler.write( "4\\tseq2\\tchr3\\t301\\t500\\n" )\n+ expSetFileHandler.close()\n+ \n+ obsFile = "dummyObsFile"\n+ \n+ MapUtils.convertMapFileIntoSetFile( mapInputFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expSetFile, obsFile ) )\n+ \n+ for f in [ expSetFile, mapInputFile, obsFile ]:\n+ os.remove( f )\n+\n+ def test_convertMapFileIntoSetFile_one_line(self):\n+ mapInputFile = "dummyExpFile"\n+ mapFileHandler = open( mapInputFile, "w" )\n+ mapFileHandler.write( "seq31\\tchr1\\t151\\t250\\n" )\n+ mapFileHandler.close()\n+\n+ expSetFile = "dummyexpSetFile"\n+ expSetFileHandler = open( expSetFile, "w" )\n+ expSetFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ expSetFileHandler.close()\n+ \n+ obsFile = "dummyObsFile"\n+ \n+ MapUtils.convertMapFileIntoSetFile( mapInputFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expSetFile, obsFile ) )\n+ \n+ for f in [ expSetFile, mapInputFile, obsFile ]:\n+ os.remove( f )\n+\n+ def test_convertMapFileIntoSetFile_empty_file(self):\n+ mapInputFile = "dummyFile.map"\n+ mapFileHandler = open( mapInputFile, "w" )\n+ mapFileHandler.close()\n+ \n+ expFile = "dummyExpFile.map.set"\n+ expFileHandler = open( expFile, "w" )\n+ expFileHandler.close()\n+ \n+ obsFile = "dummyFile.map.set"\n+ \n+ MapUtils.convertMapFileIntoSetFile( mapInputFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ expFile, mapInputFile, obsFile ]:\n+ os.remove( f )\n+ \n+ def test_writeListInFile_empty_list(self):\n+ lMaps = [ ]\n+ expFileName = "expFileName"\n+ fileHandle = open(expFileName, "w")\n+ fileHandle.close()\n+ \n+ obsFileName = "obsFileName"\n+ fileHandle = open(obsFileName, "w")\n+ MapUtils.writeListInFile(lMaps, obsFileName, "w")\n+ fileHandle.close()\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )\n+ \n+ os.remove(obsFileName)\n+ os.remove(expFileName)\n+ \n+ def test_writeListInFile_list_one_set(self):\n+ lMaps = [ Map( "map1", "map1seq", 1, 10 ) ]\n+ line = "map1\\tmap1seq\\t1\\t10\\n"\n+ \n+ expFileName = "expFileName"\n+ \n+ fileHandle = open(expFileName, "w")\n+ fileHandle.write(line)\n+ fileHandle.close()\n+ \n+ obsFileName = "obsFileName"\n+ fileHandle = open(obsFileName, "w")\n+ MapUtils.writeListInFile(lMaps, obsFileName, "w")\n+ fileHandle.close()\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )\n+ \n+ os.remove(obsFileName)\n+ os.remove(expFileName)\n+\n+ def test_getMinLengthOfMapFile(self):\n+ mapFileName = "%s/Gnome_tools/Vein_v4_scaffold_00001.fa.Nstretch.map" % os.environ["REPET_DATA"]\n+ expMinLengthofMapFile = 20\n+ iMap = MapUtils()\n+ obsMinLengthofMapFile = iMap.getMinLengthOfMapFile(mapFileName)\n+ self.assertEquals(expMinLengthofMapFile, obsMinLengthofMapFile)\n+ \n+ def test_getMaxLengthOfMapFile(self):\n+ mapFileName = "%s/Gnome_tools/Vein_v4_scaffold_00001.fa.Nstretch.map" % os.environ["REPET_DATA"]\n+ expMinLengthofMapFile = 6344\n+ iMap = MapUtils()\n+ obsMinLengthofMapFile = iMap.getMaxLengthOfMapFile(mapFileName)\n+ self.assertEquals(expMinLengthofMapFile, obsMinLengthofMapFile)\n+ \n+\n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_MapUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_Match.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Match.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,363 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+from commons.core.coord.Match import Match\n+from commons.core.coord.Path import Path\n+\n+\n+class Test_Match( unittest.TestCase ):\n+ \n+ def test_eq_match_equals( self ):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_name( self ):\n+ tuple1 = ("Name", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_start( self ):\n+ tuple1 = ("QName", 2, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_end( self ):\n+ tuple1 = ("QName", 1, 6, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_length( self ):\n+ tuple1 = ("QName", 1, 5, 6, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_length_perc( self ):\n+ tuple1 = ("QName", 1, 5, 5, 0.15, 0.2, "SName'..b'ple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match = Match()\n+ match.setFromTuple(tuple)\n+ expString = "QName\\t1\\t5\\t5\\t%f\\t%f\\tSName\\t5\\t25\\t20\\t%f\\t%g\\t15\\t%f\\t1" % (0.1,0.2,0.15,1e-20, 87.2)\n+ obsString = match.toString()\n+ self.assertEquals(expString, obsString)\n+ \n+ def test_getPathInstance( self ):\n+ tuple = ( "QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1 )\n+ match = Match()\n+ match.setFromTuple( tuple )\n+ tuple = ( 1, "QName", 1, 5, "SName", 5, 25, 1e-20, 15, 87.2 )\n+ exp = Path()\n+ exp.setFromTuple( tuple )\n+ obs = match.getPathInstance()\n+ self.assertEqual( exp, obs )\n+ \n+ def test_getQryIsIncluded(self):\n+ tuple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match = Match()\n+ match.setFromTuple(tuple)\n+ expString = "query QName (50 bp: 1-5) is contained in subject SName (133 bp: 5-25): id=87.20 - 0.100 - 0.200 - 0.150"\n+ obsString = match.getQryIsIncluded()\n+ self.assertEquals(expString, obsString)\n+ \n+ def test_isDoublonWith_Matchs_equals(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertTrue(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Matchs_unequals_on_MatchNumbers(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 86.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Matchs_unequals_on_SeqNames(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "Name", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Matchs_unequals_on_Coordinates(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 6, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Reversed_Matchs_equals(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("SName", 5, 25, 20, 0.15, 0.2, "QName", 1, 5, 5, 0.1, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertTrue(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Reversed_Matchs_unequals(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("SName", 5, 25, 20, 0.15, 0.2, "QName", 1, 6, 5, 0.1, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Match ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_MatchUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_MatchUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,439 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.MatchUtils import MatchUtils\n+from commons.core.coord.Match import Match\n+from commons.core.seq.BioseqDB import BioseqDB\n+\n+\n+class Test_MatchUtils( unittest.TestCase ):\n+ \n+ def test_getMatchListFromFile( self ):\n+ inFile = "dummyInFile"\n+ inFileHandler = open( inFile, "w" )\n+ inFileHandler.write( "query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n" )\n+ m1 = Match()\n+ m1.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m1.write( inFileHandler )\n+ m2 = Match()\n+ m2.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m2.write( inFileHandler )\n+ inFileHandler.close()\n+ \n+ lExp = [ m1, m2 ]\n+ \n+ lObs = MatchUtils.getMatchListFromFile( inFile )\n+ \n+ self.assertEquals( lExp, lObs )\n+ \n+ os.remove( inFile )\n+ \n+ def test_getDictOfListsWithSubjectAsKey( self ):\n+ m1 = Match()\n+ m1.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m2 = Match()\n+ m2.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ lMatch = [ m1, m2 ]\n+ \n+ dExp = { "SName1": [ m1 ], "SName2": [ m2 ] }\n+ \n+ dObs = MatchUtils.getDictOfListsWithSubjectAsKey( lMatch )\n+ \n+ self.assertEquals( dExp, dObs )\n+ \n+ def test_getDictOfListsWithQueryAsKey( self ):\n+ m1 = Match()\n+ m1.setFromTuple( ("QName1", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m2 = Match()\n+ m2.setFromTuple( ("QName2", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m3 = Match()\n+ m3.setFromTuple( ("QName1", 1, 5, 5, 0.1, 0.2, "SName3", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ lMatch = [ m1, m2, m3 ]\n+ \n+ dExp = { "QName1": [ m1, m3 ], "QName2": [ m2 ] }\n+ \n+ dObs = MatchUtils.getDictOfListsWithQueryAsKey( lMatch )\n+ \n+ self.assertEquals'..b'TTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGAATAAAAAATGATTATTTG\\n")\n+ f.write("CATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGGGATTCGTTTCATTCACCG\\n")\n+ f.close()\n+\n+ def _writeMatchFile2(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t1\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.close()\n+ \n+ def _writeMatchFile3(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header3\\t1\\t120\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t67\\t1\\n")\n+ f.close()\n+ \n+ def _writeMatchFile4(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header3\\t120\\t220\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100\\t1\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90\\t1\\n")\n+ f.close()\n+ \n+ def _writeExpAlignFile(self,fileName):\n+ f = open(fileName, "w")\n+ f.write("header2\\t1\\t120\\tBS31790\\t19\\t138\\t3e-68\\t238.0\\t100.0\\n")\n+ f.write("header3\\t120\\t220\\tBS31790\\t19\\t138\\t3e-65\\t238.0\\t100.0\\n")\n+ f.write("header4\\t1\\t120\\tBS31790\\t19\\t138\\t3e-67\\t244.0\\t90.0\\n")\n+ f.close()\n+ \n+ def _writeMatchFile5(self,fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header2\\t124\\t144\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header3\\t120\\t220\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100\\t1\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90\\t1\\n")\n+ f.close()\n+ \n+ def _writeExpMatchFile(self,fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100.000000\\t1\\n")\n+ f.write("header2\\t124\\t144\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100.000000\\t1\\n")\n+ f.write("header3\\t120\\t220\\t120\\t0.990000\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100.000000\\t2\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1.000000\\t0.941570\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90.000000\\t3\\n")\n+ f.close()\n+ \n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_MatchUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_MergedRange.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_MergedRange.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,119 @@
+import unittest
+from commons.core.coord.MergedRange import MergedRange
+from commons.core.coord.Match import Match
+
+class Test_MergedRange(unittest.TestCase):
+
+    def test_eq_True(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([1], 6, 10)
+        self.assertEquals(mr1, mr2)
+
+    def test_eq_different_list(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([1, 2], 6, 10)
+        self.assertNotEquals(mr1, mr2)
+
+    def test_eq_different_start(self):
+        mr1 = MergedRange([1], 5, 10)
+        mr2 = MergedRange([1], 6, 10)
+        self.assertNotEquals(mr1, mr2)
+
+    def test_eq_different_end(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([1], 6, 11)
+        self.assertNotEquals(mr1, mr2)
+
+    def test_isOverlapping_no( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 16, 20)
+        exp = False
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_yes( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 5, 20)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_range1_before_range2( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 8, 15)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_range1_after_range2( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 1, 8)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_range1_equal_range2( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 6, 10)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_merge_mr1_with_mr2(self):
+        otherMergedRange = MergedRange()
+        otherMergedRange._lId.append(3)
+        otherMergedRange._start = 1
+        otherMergedRange._end = 10
+
+        mr1 = MergedRange()
+        mr1._lId.append(1)
+        mr1._start = 6
+        mr1._end = 10
+
+        mr2 = MergedRange([2], 1, 15)
+        mr1.merge(mr2)
+
+        exp = MergedRange([1, 2], 1, 15)
+        self.assertEquals(exp, mr1)
+
+    def test_merge_mr2_with_mr1(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 1, 15)
+        mr2.merge(mr1)
+        exp = MergedRange([1, 2], 1, 15)
+        self.assertEquals(exp, mr2)
+
+    def test_setFromMatch(self):
+        tuple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)
+        iMatch = Match()
+        iMatch.setFromTuple(tuple)
+
+        expMergedRange = MergedRange([1], 1, 5)
+        obsMergedRange = MergedRange()
+        obsMergedRange.setFromMatch(iMatch)
+
+        self.assertEquals(expMergedRange, obsMergedRange)
+
+    def test_getMergedRangeListFromMatchList(self):
+        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)
+        iMatch1 = Match()
+        iMatch1.setFromTuple(tuple1)
+        tuple2 = ("QName", 10, 15, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 2)
+        iMatch2 = Match()
+        iMatch2.setFromTuple(tuple2)
+        lMatch = [iMatch1, iMatch2]
+
+        explMergedRange = [MergedRange([1], 1, 5), MergedRange([2], 10, 15)]
+        obslMergedRange = MergedRange.getMergedRangeListFromMatchList(lMatch)
+
+        self.assertEquals(explMergedRange, obslMergedRange)
+
+    def test_getMergedRangeListFromMatchList_empty_list(self):
+        lMatch = []
+        explMergedRange = []
+        obslMergedRange = MergedRange.getMergedRangeListFromMatchList(lMatch)
+
+        self.assertEquals(explMergedRange, obslMergedRange)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_Path.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Path.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,146 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+from commons.core.coord.Path import Path
+from commons.core.coord.Align import Align
+from commons.core.coord.Set import Set
+
+
+class Test_Path( unittest.TestCase ):
+
+    def setUp( self ):
+        self._path = Path()
+
+    def test_setFromTuple( self ):
+        line = "1\tchr1\t1\t10\tTE2\t11\t17\t1e-20\t30\t90.2"
+        self._path.setFromTuple( line.split("\t") )
+        self.assertEqual( self._path.id, 1 )
+        self.assertEqual( self._path.range_query.seqname, "chr1" )
+        self.assertEqual( self._path.range_query.start, 1 )
+        self.assertEqual( self._path.range_query.end, 10 )
+        self.assertEqual( self._path.range_subject.seqname, "TE2" )
+        self.assertEqual( self._path.range_subject.start, 11 )
+        self.assertEqual( self._path.range_subject.end, 17 )
+        self.assertEqual( self._path.e_value, float("1e-20") )
+        self.assertEqual( self._path.score, float("30") )
+        self.assertEqual( self._path.identity, float("90.2") )
+
+    def test___eq__( self ):
+        self._path.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
+        o = Path()
+        o.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
+        self.assertEqual( self._path,  o )
+        o.setFromString( "2\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
+        self.assertNotEqual( self._path,  o )
+        o.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t3000000\t90.2\n" )
+        self.assertNotEqual( self._path,  o )
+
+    def test_canMerge( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","2", "9","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertTrue(self._path.canMerge(o))
+
+    def test_canMerge_on_same_id ( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("1", "chr1","2", "9","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_same_chr( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr2","2", "9","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_diff_subj( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","2", "9","TE3","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_queries_that_do_not_overlap( self ):
+        tuple = ("1", "chr1","5", "11","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","1", "4","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_subjects_that_do_not_overlap( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","2", "9","TE2","1","10","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_getSubjectAsSetOfQuery( self ):
+        tuple = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        exp = Set(1,"TE2","chr1",1,10)
+        obs = self._path.getSubjectAsSetOfQuery()
+        self.assertEqual( exp, obs )
+
+    def test_getSubjectAsSetOfQuery_on_neg_strand( self ):
+        tuple = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        exp = Set(1,"TE2","chr1",10,1)
+        obs = self._path.getSubjectAsSetOfQuery()
+        self.assertEqual( exp, obs )
+
+    def test_toString( self ):
+        self._path.setFromString( "1\tchr1\t1\t10\tTE3\t11\t17\t1e-20\t30\t85.2\n" )
+        exp = "1\tchr1\t1\t10\tTE3\t11\t17\t%g\t30\t%f" % ( 1e-20, 85.2 )
+        obs = self._path.toString()
+        self.assertEqual( obs, exp )
+
+    def test_getAlignInstance( self ):
+        self._path.setFromTuple( ( "2", "chr3", "250", "151", "seq5", "1", "100", "1e-32", "147", "87.9" ) )
+        expAlign = Align()
+        expAlign.setFromTuple( ( "chr3", "151", "250", "seq5", "100", "1", "1e-32", "147", "87.9" ) )
+        obsAlign = self._path.getAlignInstance()
+        self.assertEqual( expAlign, obsAlign )
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_Path ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_PathUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_PathUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1667 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Set import Set\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Range import Range\n+from commons.core.coord.Align import Align\n+\n+\n+class Test_PathUtils ( unittest.TestCase ):\n+\n+ def test_getSetListFromQueries( self ):\n+ set1 = Set(1,"TE2","chr1",1,10)\n+ set2 = Set(1,"TE2","chr1",10,1)\n+ set3 = Set(1,"TE3","chr4",12,22)\n+ \n+ expList = [set1, set2, set3]\n+\n+ tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")\n+ tuple2 = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2")\n+ tuple3 = ("1","chr4","12","22","TE3","11","17","1e-20","30","90.2")\n+\n+ pathList = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] )\n+\n+ obsList = PathUtils.getSetListFromQueries( pathList )\n+\n+ self.assertEquals( expList, obsList )\n+ \n+ \n+ def test_getSetListFromQueries_on_empty_list( self ):\n+ expList = []\n+ obsList = PathUtils.getSetListFromQueries( [] )\n+\n+ self.assertEquals( expList, obsList )\n+ \n+ \n+ def test_getSetListFromQueries_on_list_size1( self ):\n+ set1 = Set(1,"TE2","chr1",1,10)\n+ \n+ expList = [set1]\n+ \n+ tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")\n+ path1 = Path()\n+ path1.setFromTuple(tuple1)\n+ \n+ pathList = [path1]\n+ obsList = PathUtils.getSetListFromQueries( pathList )\n+ \n+ self.assertEquals( expList, obsList )\n+ \n+ \n+ def test_getRangeListFromSubjects_initiallyOrdered_directStrand( self ):\n+ tuple1 = ("1","chr1","1","10","TE2","1","10","1e-20","30","90.2")\n+ tuple2 = ("1","chr1","21","30","TE2","11","20","1e-20","30","90.2")\n+ tuple3 = ("1","chr1","41","50","TE2","21","30","1e-20","30","90.2")\n+ lPaths = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] )\n+ \n+ iSet1 = Range( "TE2", 1, 10 )\n+ iSet2 = Range( "TE2", 11, 20 )\n+ iSet3 = Range( "TE2", 21, 30 )\n+ lExp = [ iSet1, iSet2, iSet3 ]\n+ \n+ lObs = PathUtils.getRangeListFromSubjects( lPaths )\n+ \n+ self.assertEquals( lExp, lObs )\n+'..b'\\t4641\\t0\\t585\\t97.3607\\n")\n+ f.write("9\\taurora-element\\t2265\\t2483\\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\\t3999\\t4218\\t0\\t361\\t96.347\\n")\n+ f.write("10\\taurora-element\\t2834\\t4045\\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\\t4800\\t6011\\t0\\t2074\\t97.0248\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t205\\t317\\t8.5e-37\\t157\\t93.75\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t305\\t417\\t8.5e-37\\t157\\t93.75\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t305\\t417\\t8.5e-37\\t157\\t93.75\\n")\n+ f.close() \n+ \n+ obsPathFile = "obsDummyPathFile"\n+ PathUtils.removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(pathFile, obsPathFile)\n+ \n+ expPathFile = "expDummyPathFile"\n+ f = open(expPathFile, "w")\n+ f.write("1\\tG4\\t1\\t3856\\tAtha5Chr4_Pals_Piler_3590_69_MAP_3\\t1\\t3856\\t0\\t7642\\t99.974100\\n")\n+ f.write("2\\trooA\\t1\\t386\\tAtha5Chr4_Pals_Piler_3589_69_MAP_3\\t1\\t386\\t6.3e-220\\t758\\t99.481900\\n")\n+ f.write("3\\trooA\\t7236\\t7621\\tAtha5Chr4_Pals_Piler_3536_69_MAP_3\\t1\\t386\\t6.3e-220\\t758\\t99.481900\\n")\n+ f.write("4\\trooA\\t387\\t7235\\tAtha5Chr4_Pals_Piler_3596_69_MAP_3\\t1\\t6849\\t0\\t13580\\t99.985400\\n")\n+ f.write("5\\taurora-element\\t4046\\t4257\\tAtha5Chr4_Pals_Piler_3540_69_MAP_3\\t1\\t204\\t6.1e-80\\t300\\t96.568600\\n")\n+ f.write("6\\taurora-element\\t274\\t381\\tAtha5Chr4_Pals_Piler_3595_23_MAP_3\\t177\\t284\\t0\\t191\\t97.222200\\n")\n+ f.write("6\\taurora-element\\t116\\t287\\tAtha5Chr4_Pals_Piler_3595_30_MAP_3\\t3\\t170\\t0\\t290\\t98.809500\\n")\n+ f.write("7\\taurora-element\\t393\\t902\\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\\t1467\\t1945\\t0\\t873\\t97.244100\\n")\n+ f.write("7\\taurora-element\\t1387\\t2271\\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\\t276\\t10780\\t0\\t1576\\t97.624400\\n")\n+ f.write("8\\taurora-element\\t2486\\t2828\\tAtha5Chr4_Pals_Piler_3595_50_MAP_3\\t4301\\t4641\\t0\\t585\\t97.360700\\n")\n+ f.write("9\\taurora-element\\t2265\\t2483\\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\\t3999\\t4218\\t0\\t361\\t96.347000\\n")\n+ f.write("10\\taurora-element\\t2834\\t4045\\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\\t4800\\t6011\\t0\\t2074\\t97.024800\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t205\\t317\\t8.5e-37\\t157\\t93.750000\\n")\n+ f.close()\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n+ \n+ os.remove(pathFile)\n+ os.remove(expPathFile)\n+ os.remove(obsPathFile)\n+ \n+ \n+ def test_getPathListWithoutDuplicatesOnQueryCoord(self):\n+ iPath1 = Path(1, Range("qry1",398,491), Range("sbj1",10,112), 0.0, 10, 98.7)\n+ iPath2 = Path(1, Range("qry1",451,492), Range("sbj1",124,169), 0.0, 10, 98.7)\n+ iPath3 = Path(1, Range("qry1",451,492), Range("sbj1",249,294), 0.0, 10, 98.7)\n+ lPaths = [iPath3, iPath2, iPath1]\n+ \n+ obslPaths = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)\n+ \n+ explPaths = [iPath1, iPath3]\n+ \n+ self.assertEquals(explPaths, obslPaths)\n+ \n+ \n+ def _makePathListFromTupleList ( self, tupleList ):\n+ pathList = []\n+ for tuple in tupleList:\n+ path = Path()\n+ path.setFromTuple(tuple)\n+ pathList.append(path)\n+ return pathList\n+ \n+ def _makePathListFromStringList (self, stringList):\n+ pathList = []\n+ for string in stringList:\n+ path = Path()\n+ path.setFromString(string)\n+ pathList.append(path)\n+ return pathList\n+ \n+ def _show (self, list):\n+ for item in list:\n+ print item.toString()\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_PathUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_Range.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Range.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,698 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+from commons.core.coord.Range import Range\n+from commons.core.coord.Range import getBin, getIdx\n+\n+\n+class Test_Range( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._range = Range()\n+ \n+ def test_setFromString(self):\n+ line = "chunk1\\t190000\\t390000"\n+ self._range.setFromString( line )\n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ line = "chunk1\\t190000\\t390000\\n"\n+ self._range.setFromString( line )\n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ line = "chunk1;190000;390000"\n+ self._range.setFromString( line, ";" )\n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ def test_setFromTuple(self):\n+ tuple = ("chunk1","190000","390000")\n+ self._range.setFromTuple( tuple)\n+ \n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ def test___eq__(self):\n+ self._range.setFromString( "chunk1\\t190000\\t390000\\n" )\n+ o = Range()\n+ o.setFromString( "chunk1\\t190000\\t390000\\n" )\n+ self.assertEqual( self._range, o )\n+ \n+ o.setFromString( "chunk1\\t190000\\t39" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ o.setFromString( "chromosome1\\t190000\\t390000" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ o.setFromString( "chunk1\\t390000\\t190000" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ o.setFromString( "chromosome1\\t390000\\t190000" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ def test_getMin(self):\n+ self._range.setFromTuple( ("chunk1", 190000, 390000) )\n+ expMin = 190000\n+ obsMin = self._range.getMin() \n+ self.assertTrue(expMin, obsMin)\n+ \n+ def test_getMax(self):\n+ self._range.setFromTuple( ("chunk1", 190000, 390000) )\n+ expMax = 390000\n+ obsMa'..b'2)\n+ obsRange1 = range1\n+ \n+ self.assertEquals(expRange1, obsRange1)\n+ self.assertEquals(expReturnedRange, obsReturnedRange)\n+ \n+ def test_getIdx(self):\n+ self.assertEqual(getIdx(1000,3),1000001)\n+ self.assertEqual(getIdx(999,3),1000000)\n+ self.assertEqual(getIdx(2000,3),1000002)\n+ self.assertEqual(getIdx(2000,4),2000000)\n+ self.assertEqual(getIdx(2000,5),3000000)\n+ self.assertEqual(getIdx(20000000,6),4000000)\n+ self.assertEqual(getIdx(20000000,5),3000200)\n+ self.assertEqual(getIdx(20000000,4),2002000)\n+ self.assertEqual(getIdx(20000000,3),1020000)\n+ \n+ def test_getBin_bin_level_9(self):\n+ tuple1 = ("chunk1", 190000000, 390000000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 100000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_8(self):\n+ tuple1 = ("chunk1", 19000000, 39000000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 100000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_7(self):\n+ tuple1 = ("chunk1", 1900000, 3900000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 10000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_6(self):\n+ tuple1 = ("chunk1", 190000, 390000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 1000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_5(self):\n+ tuple = ("chunk1", 19000, 39000)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ expRes = 100000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_4(self):\n+ tuple = ("chunk1", 1900, 3900)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 10000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_3(self):\n+ tuple = ("chunk1", 190, 390)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 1000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_2(self):\n+ tuple = ("chunk1", 19, 39)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 1000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_1(self):\n+ tuple = ("chunk1", 1, 3)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 1000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ \n+ def test_getBin_function(self):\n+ expBin = 2L\n+ obsBin = getBin(200, 2)\n+ \n+ self.assertEquals(expBin, obsBin)\n+ \n+ def test_findIdx(self):\n+ o = Range()\n+ o.setFromString( "chunk1\\t1000\\t2000\\n" )\n+ self.assertEqual(o.findIdx(),2000000)\n+ \n+ o.setFromString( "chunk1\\t2000\\t1000\\n" ) \n+ self.assertEqual(o.findIdx(),2000000)\n+ \n+ o.setFromString( "chunk1\\t200\\t999\\n" ) \n+ self.assertEqual(o.findIdx(),1000000)\n+ \n+ o.setFromString( "chunk1\\t1\\t20000000\\n" ) \n+ self.assertEqual(o.findIdx(),4000000)\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Range ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_Set.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Set.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,282 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+from commons.core.coord.Set import Set\n+from commons.core.coord.Map import Map\n+\n+\n+class Test_Set( unittest.TestCase ):\n+ \n+ def test__eq__sets_equals(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 1, "set1", "seq1", 1 ,2 ) \n+ self.assertEquals( set1, set2 )\n+ \n+ def test__eq__sets_not_equals_ids(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 2, "set1", "seq1", 1 ,2 ) \n+ self.assertNotEquals( set1, set2 )\n+ \n+ def test__eq__sets_not_equals_name(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 1, "set2", "seq1", 1 ,2 ) \n+ self.assertNotEquals( set1, set2 )\n+ \n+ def test__eq__on_empty_set(self):\n+ set1 = Set()\n+ set2 = Set()\n+ self.assertEquals( set1, set2 )\n+ \n+ def test_setFromTuple_equals_instances(self):\n+ tuple = ( 1, "set1", "seq1", 1, 2 )\n+ obsSet = Set()\n+ obsSet.setFromTuple(tuple)\n+ expSet = Set( 1, "set1", "seq1", 1, 2 )\n+ self.assertEquals( expSet, obsSet )\n+ \n+ def test_setFromTuple_not_equals_instances(self):\n+ tuple = ( 1, "set1", "seq1", 1, 2 )\n+ obsSet = Set()\n+ obsSet.setFromTuple(tuple)\n+ expSet = Set( 2, "set1", "seq1", 1, 2 )\n+ self.assertNotEquals( expSet, obsSet )\n+ \n+ def test_read_empty_line_file(self):\n+ fileName = "dummyFile"\n+ \n+ os.system(" touch " + fileName)\n+ \n+ fileHandler = open(fileName, "r")\n+ \n+ obsSet = Set()\n+ \n+ obsRes = obsSet.read( fileHandler )\n+ expRes = 0\n+ \n+ fileHandler.close()\n+ os.remove(fileName)\n+ \n+ self.assertEquals( expRes, obsRes )\n+\n+ def test_read_one_line_file(self):\n+ line = ( "1\\tset1\\tseq1\\t1\\t2" )\n+ fileName = "dummyFile"\n+ \n+ fileHandler = open( fileName, "w" )\n+ fileHandler.write( line )\n+ fileHandler.close()\n+ \n+ fileHandler = open( fileName, "r" )\n+ \n+ tuple = line.split("\\t")\n+ expSet = Set()\n+ expSet.setFromTuple(tuple)\n+ \n+ obsSet = Set()\n+ \n+ expRes = 1\n+ obsRes = obsSet.read(fileHandler)\n+ \n+ fileHandler.close()\n+ os.remove(fileName)\n+ '..b'et\n+ \n+ self.assertEquals( expSet, obsSet)\n+ \n+ def test_merge_first_id_smaller_than_second_id(self):\n+ firstSet = Set( 1, "set1", "seq1", 10, 40 )\n+ secondSet = Set( 2, "set2", "seq1", 20, 60 )\n+ \n+ firstSet.merge( secondSet )\n+ \n+ expSet = Set( 1, "set1", "seq1", 10, 60)\n+ obsSet = firstSet\n+ \n+ self.assertEquals( expSet, obsSet)\n+\n+ def test_merge_first_id_equals_second_id(self):\n+ firstSet = Set( 1, "set1", "seq1", 10, 40 )\n+ secondSet = Set( 1, "set2", "seq1", 20, 60 )\n+ \n+ firstSet.merge( secondSet )\n+ \n+ expSet = Set( 1, "set1", "seq1", 10, 60)\n+ obsSet = firstSet\n+ \n+ self.assertEquals( expSet, obsSet)\n+ \n+ def test_merge_different_seqnames(self):\n+ firstSet = Set( 2, "set1", "seq1", 10, 40 )\n+ secondSet = Set( 1, "set1", "seq2", 20, 60 )\n+ expSet = Set( 2, "set1", "seq1", 10, 40 )\n+ firstSet.merge( secondSet )\n+ obsSet = firstSet\n+ self.assertEquals( expSet, obsSet )\n+ \n+ def test_diff_on_empty_sets(self):\n+ firstSet = Set()\n+ firstSet.seqname = "seq1"\n+ secondSet = Set()\n+ secondSet.seqname = "seq2"\n+ \n+ obsSet = firstSet.diff( secondSet )\n+ expSet = Set()\n+ \n+ self.assertEquals( expSet, obsSet )\n+ \n+ def test_diff(self):\n+ firstSet = Set( 2, "set1", "seq1", 10, 80 )\n+ secondSet = Set( 1, "set2", "seq1", 20, 60 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 10, 19 )\n+ expSet2 = Set( 2, "set1", "seq1", 61, 80 ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_reverse(self):\n+ firstSet = Set( 2, "set1", "seq1", 20, 60 )\n+ secondSet = Set( 1, "set2", "seq1", 10, 80 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 0, 0 )\n+ expSet2 = Set( ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_list1_overlap_end_list2(self):\n+ firstSet = Set( 2, "set1", "seq1", 20, 100 )\n+ secondSet = Set( 1, "set2", "seq1", 10, 80 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 81, 100 ) \n+ expSet2 = Set( ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_with_empty_set1(self):\n+ set2 = Set( 1, "set1", "seq1", 2, 45 )\n+ set1 = Set( )\n+ \n+ expSet1 = Set( )\n+ expSet2 = Set( )\n+ \n+ obsSet2 = set1.diff( set2 )\n+ obsSet1 = set1\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_list2_overlap_end_list1(self):\n+ firstSet = Set( 2, "set1", "seq1", 10, 70 )\n+ secondSet = Set( 1, "set2", "seq1", 40, 100 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 10, 39 )\n+ expSet2 = Set( ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_set2map(self):\n+ set = Set( 1, "set", "seq", 1, 2 )\n+ \n+ expMap = Map( "set::1", "seq", 1, 2 )\n+ obsMap = set.set2map()\n+ \n+ self.assertEquals( expMap, obsMap )\n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Set ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_SetUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_SetUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1689 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.coord.Set import Set\n+from commons.core.coord.Map import Map\n+from commons.core.coord.SetUtils import SetUtils\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_SetUtils( unittest.TestCase ):\n+ \n+ def test_changeIdInList_on_empty_list(self):\n+ lSets = []\n+ SetUtils.changeIdInList( lSets , 1 )\n+ obsLSets = lSets\n+ expLSets = []\n+ self.assertEquals( expLSets , obsLSets )\n+ \n+ def test_changeIdInList_on_list_size_one(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ lSets = [ set1 ]\n+ SetUtils.changeIdInList( lSets , 9 )\n+ obsLSets = lSets\n+ set1 = Set( 9, "set1", "seq1", 1, 2 )\n+ expLSets = [ set1 ]\n+ self.assertEquals( expLSets , obsLSets )\n+ \n+ def test_changeIdInList(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 2, "set2", "seq2", 2, 3 )\n+ lSets = [ set1, set2 ]\n+ SetUtils.changeIdInList( lSets , 9 )\n+ obsLSets = lSets\n+ set1 = Set( 9, "set1", "seq1", 1, 2 )\n+ set2 = Set( 9, "set2", "seq2", 2, 3 )\n+ expLSets = [ set1, set2 ]\n+ \n+ self.assertEquals( expLSets , obsLSets )\n+ \n+ def test_getOverlapLengthBetweenLists_all_list_are_empty (self):\n+ lSets1 = []\n+ lSets2 = []\n+ \n+ expOverlapSize = 0\n+ obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n+ \n+ self.assertEquals( expOverlapSize, obsOverlapSize )\n+ \n+ def test_getOverlapLengthBetweenLists_list1_empty_list2_size_one (self):\n+ lSets1 = []\n+ lSets2 = [ Set( 9, "set1", "seq1", 1, 2 ) ]\n+ \n+ expOverlapSize = 0\n+ obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n+ \n+ self.assertEquals( expOverlapSize, obsOverlapSize )\n+ \n+ def test_getOverlapLengthBetweenLists_list1_empty_list2_size_two (self):\n+ lSets1 = []\n+ lSets2 = [ Set( 9, "set1", "seq1", 1, 2 ), Set( 9, "set2", "seq2", 2, 3 ) ]\n+ \n+ expOverlapSize = 0\n+ obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n+ \n+ self.assertEquals( expOverlapSize, obsOverlapSize )\n+ \n+ def test_getOverlapLengthBetweenLists_list1_si'..b'\n+ obsLSet = SetUtils.getSetListFromFile(file)\n+ os.remove(file)\n+ self.assertEqual( expLSet, obsLSet )\n+ \n+ \n+ def test_convertSetFileIntoMapFile( self ):\n+ setFile = "dummySetFile"\n+ setFileHandler = open( setFile, "w" )\n+ setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ setFileHandler.close()\n+ \n+ expFile = "dummyExpFile"\n+ expFileHandler = open( expFile, "w" )\n+ expFileHandler.write( "seq31\\tchr1\\t151\\t250\\n" )\n+ expFileHandler.write( "seq27\\tchr2\\t301\\t500\\n" )\n+ expFileHandler.close()\n+ \n+ obsFile = "dummyObsFile"\n+ \n+ SetUtils.convertSetFileIntoMapFile( setFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ setFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+ def test_getDictOfListsWithSeqnameAsKey_empty( self ):\n+ lSets = []\n+ dExp = {}\n+ dObs = SetUtils.getDictOfListsWithSeqnameAsKey( lSets )\n+ self.assertEquals( dExp, dObs )\n+ \n+ \n+ def test_getDictOfListsWithSeqnameAsKey( self ):\n+ lSets = [ Set( 1, "TE3", "chr2", 10, 50 ),\n+ Set( 2, "gene74", "chr1", 31, 800 ),\n+ Set( 3, "TE1", "chr1", 1, 30 ) ]\n+ dExp = { "chr1": [ Set( 2, "gene74", "chr1", 31, 800 ),\n+ Set( 3, "TE1", "chr1", 1, 30 ) ],\n+ "chr2": [ Set( 1, "TE3", "chr2", 10, 50 ) ] }\n+ dObs = SetUtils.getDictOfListsWithSeqnameAsKey( lSets )\n+ self.assertEquals( dExp, dObs )\n+ \n+ \n+ def test_filterOnLength( self ):\n+ lSets = [ Set( 1, "TE3", "chr2", 10, 50 ),\n+ Set( 2, "gene74", "chr1", 31, 800 ),\n+ Set( 3, "TE1", "chr1", 1, 30 ) ]\n+ lExp = [ Set( 2, "gene74", "chr1", 31, 800 ) ]\n+ lObs = SetUtils.filterOnLength( lSets, 100 )\n+ self.assertEqual( lExp, lObs )\n+ \n+ \n+ def test_getListOfNames( self ):\n+ setFile = "dummySetFile"\n+ setFileHandler = open( setFile, "w" )\n+ setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ setFileHandler.close()\n+ \n+ lExp = [ "seq31", "seq27" ]\n+ lObs = SetUtils.getListOfNames( setFile )\n+ \n+ self.assertEquals( lExp, lObs )\n+ \n+ os.remove( setFile )\n+ \n+ \n+ def test_getDictOfDictsWithNamesThenIdAsKeyFromFile( self ):\n+ setFile = "dummySetFile"\n+ setFileHandler = open( setFile, "w" )\n+ setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ setFileHandler.write( "3\\tseq27\\tchr3\\t1\\t100\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t601\\t650\\n" )\n+ setFileHandler.close()\n+ \n+ dExp = { "seq31": { 1: [ Set( 1, "seq31", "chr1", 151, 250 ) ] },\n+ "seq27": { 2: [ Set( 2, "seq27", "chr2", 301, 500 ),\n+ Set( 2, "seq27", "chr2", 601, 650 ) ],\n+ 3: [ Set( 3, "seq27", "chr3", 1, 100 ) ]\n+ }\n+ }\n+ dObs = SetUtils.getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile )\n+ \n+ self.assertEquals( dExp, dObs )\n+ \n+ os.remove( setFile )\n+ \n+ \n+ def _makeSetListFromTupleList (self, tupleList):\n+ setList = []\n+ for tuple in tupleList:\n+ set = Set()\n+ set.setFromTuple(tuple)\n+ setList.append(set)\n+ return setList\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_SetUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/coord/test/Test_SlidingWindow.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_SlidingWindow.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,109 @@
+import unittest
+from commons.core.coord.SlidingWindow import SlidingWindow
+from commons.core.coord.SlidingWindow import SlidingWindowToCountMatchingBases
+from commons.core.coord.Set import Set
+
+class Test_SlidingWindow( unittest.TestCase ):
+
+    def test_slideWindowOnce( self ):
+        expStart = 91
+        expEnd = 190
+        self.sw = SlidingWindow(100, 10)
+        self.sw.slideWindowOnce()
+        obsStart = self.sw._start
+        obsEnd = self.sw._end
+
+        self.assertEqual(expStart, obsStart)
+        self.assertEqual(expEnd, obsEnd)
+
+    def test_slideWindowOnceFourTime( self ):
+        expStart = 201
+        expEnd = 300
+        self.sw = SlidingWindow(100, 50)
+        i = 0
+        for i in range(4):
+            self.sw.slideWindowOnce()
+            i += 1
+        obsStart = self.sw._start
+        obsEnd = self.sw._end
+
+        self.assertEqual(expStart, obsStart)
+        self.assertEqual(expEnd, obsEnd)
+
+
+class Test_SlidingWindowToCountMatchingBases(unittest.TestCase):
+
+    def test_getSetLengthOnWindow_featureIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 1)
+        iSet = Set( 1, "TE3", "chr1", 21, 30 )
+        exp = 10
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getSetLengthOnWindow_windowIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 530 )
+        exp = 100
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getSetLengthOnWindow_featureOverlapLeft( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 130 )
+        exp = 40
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getSetLengthOnWindow_featureOverlapRight( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 121, 230 )
+        exp = 70
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getCoordSetOnWindow_featureIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 1)
+        iSet = Set( 1, "TE3", "chr1", 21, 30 )
+        expStart = 21
+        expEnd = 30
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+    def test_getCoordSetOnWindow_windowIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 530 )
+        expStart = 91
+        expEnd = 190
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+    def test_getCoordSetOnWindow_featureOverlapLeft( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 130 )
+        expStart = 91
+        expEnd = 130
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+    def test_getCoordSetOnWindow_featureOverlapRight( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 121, 230 )
+        expStart = 121
+        expEnd = 190
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_SlidingWindow ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/JobScriptTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/JobScriptTemplate.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '@@tmpDir@@'"
+ sys.stdout.flush()
+ if not os.path.exists( "@@tmpDir@@" ):
+ raise IOError("ERROR: temporary directory '@@tmpDir@@' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("@@tmpDir@@")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of free space in '@@tmpDir@@'" % minFreeGigaInTmpDir)
+
+ os.chdir("@@tmpDir@@")
+ newDir = "@@groupId@@_@@jobName@@_@@time@@"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ @@cmdStart@@
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ @@cmdFinish@@
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : @@tmpDir@@"
+ print "cDir is : @@cDir@@"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("@@cDir@@/%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "@@cDir@@/%s" % newDir)
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/JobScriptTemplateLight.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/JobScriptTemplateLight.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '@@tmpDir@@'"
+ sys.stdout.flush()
+ if not os.path.exists( "@@tmpDir@@" ):
+ raise IOError("ERROR: temporary directory '@@tmpDir@@' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("@@tmpDir@@")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of free space in '@@tmpDir@@'" % minFreeGigaInTmpDir)
+
+ os.chdir("@@tmpDir@@")
+ newDir = "@@groupId@@_@@jobName@@_@@time@@"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ @@cmdStart@@
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ @@cmdFinish@@
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ sys.stdout.flush()
+ sys.exit(1)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/JobScriptWithFilesCopyTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/JobScriptWithFilesCopyTemplate.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '@@tmpDir@@'"
+ sys.stdout.flush()
+ if not os.path.exists("@@tmpDir@@"):
+ raise IOError("ERROR: temporary directory '@@tmpDir@@' doesn't exist")
+
+ fileSize = 0
+ if not os.path.exists("@@groupId@@"):
+ @@cmdSize@@
+ freeGigaNeededInTmpDir = float(1 + fileSize)
+ freeSpace = os.statvfs("@@tmpDir@@")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < freeGigaNeededInTmpDir):
+ raise RepetException("ERROR: less than %.2fG of free space in '@@tmpDir@@'" % freeGigaNeededInTmpDir)
+
+ os.chdir("@@tmpDir@@")
+ if not os.path.exists("@@groupId@@"):
+ try:
+ os.mkdir("@@groupId@@")
+ except OSError, e :
+ if e.args[0] != 17:
+ raise RepetException("ERROR: can't create '@@groupId@@'")
+ os.chdir("@@groupId@@")
+ @@cmdCopy@@
+ else:
+ os.chdir("@@groupId@@")
+
+ newDir = "@@groupId@@_@@jobName@@_@@time@@"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ @@cmdStart@@
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ @@cmdFinish@@
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : @@tmpDir@@"
+ print "cDir is : @@cDir@@"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("@@cDir@@/%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "@@cDir@@/%s" % newDir)
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/Launcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/Launcher.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,229 @@\n+from commons.tools.CleanClusterNodesAfterRepet import CleanClusterNodesAfterRepet\n+from commons.core.stat.Stat import Stat\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.sql.Job import Job\n+import stat\n+import os\n+import re\n+import sys\n+import time\n+import glob\n+\n+class Launcher(object):\n+\n+ #TODO: remove unused parameters : query="", subject="", param="", job_table=""\n+ def __init__( self, jobdb, query="", subject="", param="", cdir="",\n+ tmpdir="", job_table="", queue="", groupid="", acro="X",\n+ chooseTemplateWithCopy = False, chooseTemplateLight = False):\n+ if jobdb.__class__.__name__ == "RepetJob":\n+ self.jobdb = TableJobAdaptatorFactory.createInstance(jobdb, "jobs")\n+ else:\n+ self.jobdb = jobdb\n+ self.jobdb.checkJobTable()\n+ if cdir == "":\n+ cdir = os.getcwd()\n+ self.cdir = cdir\n+ self.tmpdir = tmpdir\n+ self.groupid = groupid\n+ self.acronyme = acro\n+ self._chooseTemplateWithCopy = chooseTemplateWithCopy\n+ self._chooseTemplateLight = chooseTemplateLight\n+ self.queue, self.lResources = self.getQueueNameAndResources(queue)\n+ self._createJobInstance()\n+ self._nbJobs = 0\n+ \n+ def getQueueNameAndResources(self, configQueue):\n+ tokens = configQueue.replace("\'","").split(" ")\n+ queueName = ""\n+ lResources = []\n+ if tokens[0] != "":\n+ if re.match(".*\\.q", tokens[0]):\n+ queueName = tokens[0]\n+ lResources = tokens[1:]\n+ else:\n+ lResources = tokens\n+ return queueName, lResources\n+\n+ def createGroupidIfItNotExist(self):\n+ if self.groupid == "":\n+ self.job.groupid = str(os.getpid())\n+ else:\n+ self.job.groupid = self.groupid\n+\n+ def beginRun( self ):\n+ self.createGroupidIfItNotExist()\n+ if self.jobdb.hasUnfinishedJob(self.job.groupid):\n+ self.jobdb.waitJobGroup(self.job.groupid)\n+ else:\n+ self.jobdb.cleanJobGroup(self.job.groupid)\n+\n+ ## Launch one job in parallel\n+ #\n+ # @param cmdStart string command-line for the job to be launched\n+ # @param cmdFinish string command to retrieve result files\n+ # @warning the jobname has to be defined outside from this method\n+ #\n+ def runSingleJob(self, cmdStart, cmdFinish = "", cmdSize = "", cmdCopy = ""):\n+ if self._nbJobs == 0:\n+ self._nbJobs = 1\n+ pid = str(os.getpid())\n+ now = time.localtime()\n+ #TODO: rename ClusterLauncher_ ...\n+ pyFileName = self.cdir + "/ClusterLauncher_" + self.job.groupid + "_" +\\\n+ self.job.jobname + "_" + str(now[0]) + "-" + str(now[1]) +\\\n+ "-" + str(now[2]) + "_" + pid + ".py"\n+ self.job.launcher = pyFileName\n+ \n+ #TODO: to remove when refactoring is done\n+ cmdStart = self._indentCmd(cmdStart)\n+ cmdFinish = self._indentCmd(cmdFinish)\n+ \n+ iWriteScript = WriteScript(self.job, self.jobdb, self.cdir, self.tmpdir, self._chooseTemplateWithCopy, self._chooseTemplateLight)\n+ iWriteScript.run(cmdStart, cmdFinish, pyFileName, cmdSize, cmdCopy)\n+ os.chmod(pyFileName, stat.S_IRWXU+stat.S_IRGRP+stat.S_IXGRP+stat.S_IROTH+stat.S_IXOTH)\n+ sys.stdout.flush()\n+ log = self.jobdb.submitJob(self.job)\n+ if log != 0:\n+ print "ERROR while submitting job to the cluster"\n+ sys.exit(1)\n+ \n+ def endRun(self, cleanNodes = False):\n+ string = "waiting for %i job(s) with groupid \'%s\' (%s)" % (self._nbJobs, self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S"))\n+ print string; sys.stdout.flush()\n+ self.jobdb.waitJobGroup(self.job.groupid)\n+ if self._nbJobs > 1:\n+ '..b'()\n+ return stat \n+\n+ def clean( self, acronyme = "", stdout = True, stderr = True ):\n+ lFileToRemove = []\n+ if acronyme == "":\n+ acronyme = self.acronyme \n+ pattern = "ClusterLauncher*%s*.py" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern ))\n+ if stdout:\n+ pattern = "%s*.o*" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern )) \n+ if stderr:\n+ pattern = "%s*.e*" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern )) \n+ for file in lFileToRemove:\n+ os.remove(file)\n+ \n+ #TODO: handle of nodesMustBeCleaned => class attribute ?\n+ def runLauncherForMultipleJobs(self, acronymPrefix, lCmdsTuples, cleanMustBeDone = True, nodesMustBeCleaned = False):\n+ self.beginRun()\n+ print "submitting job(s) with groupid \'%s\' (%s)" % (self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S"))\n+ for cmdsTuple in lCmdsTuples:\n+ self._nbJobs += 1\n+ self.acronyme = "%s_%s" % (acronymPrefix, self._nbJobs)\n+ self.job.jobname = self.acronyme\n+ if len(cmdsTuple) == 2:\n+ self.runSingleJob(cmdsTuple[0], cmdsTuple[1])\n+ else:\n+ self.runSingleJob(cmdsTuple[0], cmdsTuple[1], cmdsTuple[2], cmdsTuple[3])\n+ self._createJobInstance()\n+ self.createGroupidIfItNotExist()\n+ self.acronyme = acronymPrefix\n+ self.endRun(nodesMustBeCleaned)\n+ if cleanMustBeDone:\n+ self.clean("%s_" % acronymPrefix)\n+ self.jobdb.close()\n+\n+ def prepareCommands(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []):\n+ cmdStart = ""\n+ for cmd in lCmdStart:\n+ cmdStart += "%s\\n\\t" % cmd\n+ for cmd in lCmds:\n+ cmdStart += "%s\\n\\t" % cmd\n+ cmdFinish = ""\n+ for cmd in lCmdFinish:\n+ cmdFinish += "%s\\n\\t" % cmd\n+ cmdSize = ""\n+ for cmd in lCmdSize:\n+ cmdSize += "%s\\n\\t\\t" % cmd\n+ cmdCopy = ""\n+ for cmd in lCmdCopy:\n+ cmdCopy += "%s\\n\\t\\t" % cmd\n+ return (cmdStart, cmdFinish, cmdSize, cmdCopy)\n+\n+ #TODO: to remove when refactoring is done\n+ def prepareCommands_withoutIndentation(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []):\n+ cmdStart = ""\n+ for cmd in lCmdStart:\n+ cmdStart += "%s\\n" % cmd\n+ for cmd in lCmds:\n+ cmdStart += "%s\\n" % cmd\n+ cmdFinish = ""\n+ for cmd in lCmdFinish:\n+ cmdFinish += "%s\\n" % cmd\n+ cmdSize = ""\n+ for cmd in lCmdSize:\n+ cmdSize += "%s\\n\\t\\t" % cmd\n+ cmdCopy = ""\n+ for cmd in lCmdCopy:\n+ cmdCopy += "%s\\n\\t\\t" % cmd\n+ return (cmdStart, cmdFinish, cmdSize, cmdCopy)\n+ \n+ def getSystemCommand(self, prg, lArgs):\n+ systemCmd = "log = os.system(\\"" + prg \n+ for arg in lArgs:\n+ systemCmd += " " + arg\n+ systemCmd += "\\")"\n+ return systemCmd\n+\n+ def cleanNodes(self):\n+ iCleanClusterNodeAfterRepet = CleanClusterNodesAfterRepet()\n+ iCleanClusterNodeAfterRepet.setLNodes(self.jobdb.getNodesListByGroupId(self.groupid))\n+ iCleanClusterNodeAfterRepet.setTempDirectory(self.tmpdir)\n+ iCleanClusterNodeAfterRepet.setPattern("%s*" % self.groupid)\n+ iCleanClusterNodeAfterRepet.run()\n+\n+ #TODO: to remove when refactoring is done\n+ def _indentCmd(self, cmd):\n+ lCmd = cmd.split("\\n")\n+ cmd_Tab = "%s\\n" % lCmd[0]\n+ for line in lCmd[1:-1]:\n+ cmd_Tab += "\\t%s\\n" % line\n+ return cmd_Tab\n+ \n+ def _createJobInstance(self):\n+ if self.lResources == []:\n+ #To have mem_free=1G:\n+ self.job = Job(queue=self.queue)\n+ else:\n+ self.job = Job(queue=self.queue, lResources=self.lResources)\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/Launcher2.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/Launcher2.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,294 @@\n+from commons.tools.CleanClusterNodesAfterRepet import CleanClusterNodesAfterRepet\n+from commons.core.stat.Stat import Stat\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.sql.Job import Job\n+import stat\n+import os\n+import re\n+import sys\n+import time\n+import glob\n+\n+class LauncherParameter(object):\n+\n+ def __init__(self, jobDB):\n+ self._jobDB = jobDB\n+ \n+ def getJobDB(self):\n+ return self._jobDB\n+\n+ def setQuery(self, query):\n+ self._query = query\n+\n+ def setSubject(self, subject):\n+ self._subject = subject\n+ \n+ def setParam(self, param):\n+ self._param = param\n+ \n+ def setCurrentDir(self, currentDir):\n+ self._currentDir = currentDir\n+ \n+ def getCurrentDir(self):\n+ return self._currentDir \n+\n+ def setTempDir(self, tempDir):\n+ self._tempDir = tempDir\n+ \n+ def getTempDir(self):\n+ return self._tempDir\n+ \n+ def setJobTable(self, jobTable):\n+ self._jobTable = jobTable\n+ \n+ def setQueue(self, queue):\n+ self._queue = queue\n+ \n+ def getQueue(self):\n+ return self._queue\n+ \n+ def setGroupId(self, groupId):\n+ self._groupId = groupId\n+ \n+ def getGroupId(self):\n+ return self._groupId\n+ \n+ def setAcronym(self, acronym):\n+ self._acronym = acronym\n+ \n+ def getAcronym(self):\n+ return self._acronym\n+ \n+ @staticmethod\n+ def createParameter(jobdb, groupid, acronym):\n+\tlauncherParameter = LauncherParameter(jobdb)\n+ launcherParameter.setQuery(os.getcwd())\n+ launcherParameter.setSubject("")\n+ launcherParameter.setParam("")\n+ launcherParameter.setCurrentDir(os.getcwd())\n+ launcherParameter.setTempDir(os.getcwd())\n+ launcherParameter.setJobTable("")\n+ launcherParameter.setQueue("")\n+ launcherParameter.setGroupId(groupid)\n+ launcherParameter.setAcronym(acronym)\n+\treturn launcherParameter \n+\n+ \n+class Launcher2(object):\n+\n+ #TODO: remove unused parameters : query="", subject="", param="", job_table=""\n+ def __init__(self, iLauncherParameter):\n+ jobdb = iLauncherParameter.getJobDB()\n+ cdir = iLauncherParameter.getCurrentDir()\n+ if jobdb.__class__.__name__ == "RepetJob":\n+ self.jobdb = TableJobAdaptatorFactory.createInstance(jobdb, "jobs")\n+ else:\n+ self.jobdb = jobdb\n+ self.jobdb.checkJobTable()\n+ if cdir == "":\n+ cdir = os.getcwd()\n+ self.cdir = cdir\n+ self.tmpdir = iLauncherParameter.getTempDir()\n+ self.groupid = iLauncherParameter.getGroupId()\n+ self.acronyme = iLauncherParameter.getAcronym()\n+ self._chooseTemplateWithCopy = False\n+ self._chooseTemplateLight = False\n+ self.queue, self.lResources = self.getQueueNameAndResources(iLauncherParameter.getQueue())\n+ self._createJobInstance()\n+ self._nbJobs = 0\n+ \n+ def getQueueNameAndResources(self, configQueue):\n+ tokens = configQueue.replace("\'","").split(" ")\n+ queueName = ""\n+ lResources = []\n+ if tokens[0] != "":\n+ if re.match(".*\\.q", tokens[0]):\n+ queueName = tokens[0]\n+ lResources = tokens[1:]\n+ else:\n+ lResources = tokens\n+ return queueName, lResources\n+\n+ def createGroupidIfItNotExist(self):\n+ if self.groupid == "":\n+ self.job.groupid = str(os.getpid())\n+ else:\n+ self.job.groupid = self.groupid\n+\n+ def beginRun( self ):\n+ self.createGroupidIfItNotExist()\n+ if self.jobdb.hasUnfinishedJob(self.job.groupid):\n+ self.jobdb.waitJobGroup(self.job.groupid)\n+ else:\n+ self.jobdb.cleanJobGroup(self.job.groupid)\n+\n+ ## Launch one job in parallel\n+ #\n+'..b'()\n+ return stat \n+\n+ def clean( self, acronyme = "", stdout = True, stderr = True ):\n+ lFileToRemove = []\n+ if acronyme == "":\n+ acronyme = self.acronyme \n+ pattern = "ClusterLauncher*%s*.py" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern ))\n+ if stdout:\n+ pattern = "%s*.o*" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern )) \n+ if stderr:\n+ pattern = "%s*.e*" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern )) \n+ for file in lFileToRemove:\n+ os.remove(file)\n+ \n+ #TODO: handle of nodesMustBeCleaned => class attribute ?\n+ def runLauncherForMultipleJobs(self, acronymPrefix, lCmdsTuples, cleanMustBeDone = True, nodesMustBeCleaned = False):\n+ self.beginRun()\n+ print "submitting job(s) with groupid \'%s\' (%s)" % (self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S"))\n+ for cmdsTuple in lCmdsTuples:\n+ self._nbJobs += 1\n+ self.acronyme = "%s_%s" % (acronymPrefix, self._nbJobs)\n+ self.job.jobname = self.acronyme\n+ if len(cmdsTuple) == 2:\n+ self.runSingleJob(cmdsTuple[0], cmdsTuple[1])\n+ else:\n+ self.runSingleJob(cmdsTuple[0], cmdsTuple[1], cmdsTuple[2], cmdsTuple[3])\n+ self._createJobInstance()\n+ self.createGroupidIfItNotExist()\n+ self.acronyme = acronymPrefix\n+ self.endRun(nodesMustBeCleaned)\n+ if cleanMustBeDone:\n+ self.clean("%s_" % acronymPrefix)\n+ self.jobdb.close()\n+\n+ def prepareCommands(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []):\n+ cmdStart = ""\n+ for cmd in lCmdStart:\n+ cmdStart += "%s\\n\\t" % cmd\n+ for cmd in lCmds:\n+ cmdStart += "%s\\n\\t" % cmd\n+ cmdFinish = ""\n+ for cmd in lCmdFinish:\n+ cmdFinish += "%s\\n\\t" % cmd\n+ cmdSize = ""\n+ for cmd in lCmdSize:\n+ cmdSize += "%s\\n\\t\\t" % cmd\n+ cmdCopy = ""\n+ for cmd in lCmdCopy:\n+ cmdCopy += "%s\\n\\t\\t" % cmd\n+ return (cmdStart, cmdFinish, cmdSize, cmdCopy)\n+\n+ #TODO: to remove when refactoring is done\n+ def prepareCommands_withoutIndentation(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []):\n+ cmdStart = ""\n+ for cmd in lCmdStart:\n+ cmdStart += "%s\\n" % cmd\n+ for cmd in lCmds:\n+ cmdStart += "%s\\n" % cmd\n+ cmdFinish = ""\n+ for cmd in lCmdFinish:\n+ cmdFinish += "%s\\n" % cmd\n+ cmdSize = ""\n+ for cmd in lCmdSize:\n+ cmdSize += "%s\\n\\t\\t" % cmd\n+ cmdCopy = ""\n+ for cmd in lCmdCopy:\n+ cmdCopy += "%s\\n\\t\\t" % cmd\n+ return (cmdStart, cmdFinish, cmdSize, cmdCopy)\n+ \n+ def getSystemCommand(self, prg, lArgs):\n+ systemCmd = "log = os.system(\\"" + prg \n+ for arg in lArgs:\n+ systemCmd += " " + arg\n+ systemCmd += "\\")"\n+ return systemCmd\n+\n+ def cleanNodes(self):\n+ iCleanClusterNodeAfterRepet = CleanClusterNodesAfterRepet()\n+ iCleanClusterNodeAfterRepet.setLNodes(self.jobdb.getNodesListByGroupId(self.groupid))\n+ iCleanClusterNodeAfterRepet.setTempDirectory(self.tmpdir)\n+ iCleanClusterNodeAfterRepet.setPattern("%s*" % self.groupid)\n+ iCleanClusterNodeAfterRepet.run()\n+\n+ #TODO: to remove when refactoring is done\n+ def _indentCmd(self, cmd):\n+ lCmd = cmd.split("\\n")\n+ cmd_Tab = "%s\\n" % lCmd[0]\n+ for line in lCmd[1:-1]:\n+ cmd_Tab += "\\t%s\\n" % line\n+ return cmd_Tab\n+ \n+ def _createJobInstance(self):\n+ if self.lResources == []:\n+ #To have mem_free=1G:\n+ self.job = Job(queue=self.queue)\n+ else:\n+ self.job = Job(queue=self.queue, lResources=self.lResources)\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/LauncherUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/LauncherUtils.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,31 @@
+class LauncherUtils(object):
+
+    @staticmethod
+    def createHomogeneousSizeList(lStringSizeTuples, maxSize):
+        lStringSizeTuplesSorted = sorted(lStringSizeTuples, key=lambda stringSizeTuple:(stringSizeTuple[1], stringSizeTuple[0]), reverse = True)
+        lStringSizeList = []
+        lStringSize = []
+        sumTupleSize = 0
+        iteratorFromBegin = 0
+        iteratorFromEnd = len(lStringSizeTuplesSorted) - 1
+        for tuple in lStringSizeTuplesSorted:
+            if sumTupleSize + tuple[1] < maxSize:
+                lStringSize.append(tuple[0])
+                sumTupleSize += tuple[1]
+            elif tuple[1] >= maxSize:
+                lStringSizeList.append([tuple[0]])
+            else:
+                tupleFromEnd = lStringSizeTuplesSorted[iteratorFromEnd]
+                while sumTupleSize + tupleFromEnd[1] < maxSize and iteratorFromBegin < iteratorFromEnd:
+                    lStringSize.append(tupleFromEnd[0])
+                    sumTupleSize += tupleFromEnd[1]
+                    del lStringSizeTuplesSorted[iteratorFromEnd]
+                    iteratorFromEnd -= 1
+                    tupleFromEnd = lStringSizeTuplesSorted[iteratorFromEnd]
+                lStringSizeList.append(lStringSize)
+                lStringSize = [tuple[0]]
+                sumTupleSize = tuple[1]
+            iteratorFromBegin += 1
+        if lStringSize:
+            lStringSizeList.append(lStringSize)
+        return lStringSizeList
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/WriteScript.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/WriteScript.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,76 @@
+import os
+import time
+
+class WriteScript(object):
+
+    def __init__(self, job = None, jobdb = None, cdir = "", tmpdir = "", chooseTemplateWithCopy = False, chooseTemplateLight = False):
+        self._iJob = job
+        self._iJobdb = jobdb
+        self._cDir = cdir
+        self._tmpDir = tmpdir
+        self._chooseTemplateWithCopy = chooseTemplateWithCopy
+        self._chooseTemplateLight = chooseTemplateLight
+
+    def run(self, cmdStart, cmdFinish, pyFileName, cmdSize = "", cmdCopy = ""):
+        if self._chooseTemplateLight:
+            d = self.createJobScriptLightDict(cmdStart, cmdFinish, cmdSize, cmdCopy)
+        else:
+            d = self.createJobScriptDict(cmdStart, cmdFinish, cmdSize, cmdCopy)
+        self.fillTemplate(pyFileName, d)
+
+    def fillTemplate(self, outputFileName, dict):
+        if self._chooseTemplateWithCopy:
+            inputFileName = "%s/commons/core/launcher/JobScriptWithFilesCopyTemplate.py" % os.environ["REPET_PATH"]
+        else:
+            inputFileName = "%s/commons/core/launcher/JobScriptTemplate.py" % os.environ["REPET_PATH"]
+
+        if self._chooseTemplateLight:
+            inputFileName = "%s/commons/core/launcher/JobScriptTemplateLight.py" % os.environ["REPET_PATH"]
+
+        input = open(inputFileName, "r")
+        data = input.read()
+        input.close()
+        for key, value in dict.items():
+            data = data.replace("@@%s@@" % key, value)
+        output = open(outputFileName, "w")
+        output.write(data)
+        output.close()
+
+    def createJobScriptDict(self, cmdStart, cmdFinish, cmdSize, cmdCopy):
+        dict = {
+         "tmpDir" : self._tmpDir,
+         "jobTableName" : self._iJobdb._table,
+         "groupId" : self._iJob.groupid,
+         "jobName" : self._iJob.jobname,
+         "launcher" : self._iJob.launcher,
+         "time" : time.strftime("%Y%m%d-%H%M%S"),
+         "repet_path" : os.environ["REPET_PATH"],
+         "repet_host" : os.environ["REPET_HOST"],
+         "repet_user" : os.environ["REPET_USER"],
+         "repet_pw" : os.environ["REPET_PW"],
+         "repet_db" : os.environ["REPET_DB"],
+         "repet_port" : os.environ["REPET_PORT"],
+         "cmdStart" : cmdStart,
+         "cmdFinish" : cmdFinish,
+         "cDir" : self._cDir,
+         "cmdSize" : cmdSize,
+         "cmdCopy" : cmdCopy
+            }
+        return dict
+
+    def createJobScriptLightDict(self, cmdStart, cmdFinish, cmdSize, cmdCopy):
+        dict = {
+         "tmpDir" : self._tmpDir,
+         "jobTableName" : self._iJobdb._table,
+         "groupId" : self._iJob.groupid,
+         "jobName" : self._iJob.jobname,
+         "launcher" : self._iJob.launcher,
+         "time" : time.strftime("%Y%m%d-%H%M%S"),
+         "repet_path" : os.environ["REPET_PATH"],
+         "cmdStart" : cmdStart,
+         "cmdFinish" : cmdFinish,
+         "cDir" : self._cDir,
+         "cmdSize" : cmdSize,
+         "cmdCopy" : cmdCopy
+            }
+        return dict

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/Test_Launcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/Test_Launcher.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,330 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.stat.Stat import Stat\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.Job import Job\n+import unittest\n+import os\n+import shutil\n+import time\n+import stat\n+\n+#TODO: Test_F_Launcher.py : to execute prepareCommands() and runSingleJob()\n+# to test runLauncherForMultipleJobs()\n+#TODO: check clean of "Test_runSingleJob"\n+#TODO: refactoring => choose between "self._queue" or "lResources" to set resources\n+class Test_Launcher(unittest.TestCase):\n+\n+ SARUMAN_NAME = "compute-2-46.local"\n+ \n+ def setUp(self):\n+ self._cDir = os.getcwd()\n+ self._tmpDir = self._cDir\n+ self._groupid = "test"\n+ self._jobTable = "dummyJobTable"\n+ self._iDb = DbFactory.createInstance()\n+ self._iDb.createTable(self._jobTable, "jobs", overwrite = True)\n+ self._jobdb = TableJobAdaptatorFactory.createInstance(self._iDb, self._jobTable)\n+ self._queue = ""\n+ self._configFileName = "dummyConfigFile"\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable(self._jobTable)\n+ self._iDb.close()\n+ FileUtils.removeFilesByPattern(\'*.e*\')\n+ FileUtils.removeFilesByPattern(\'*.o*\')\n+ FileUtils.removeFilesByPattern(\'launcherFileTest_BeginRun.py\')\n+ FileUtils.removeFilesByPattern(self._configFileName)\n+ FileUtils.removeFilesByPattern(\'ClusterLauncher_*\')\n+ \n+ def test__init__wrong_fields_for_job_table(self):\n+ self._iDb.dropTable(self._jobTable)\n+ sqlCmd = "CREATE TABLE " + self._jobTable \n+ sqlCmd += " ( jobid INT UNSIGNED"\n+ sqlCmd += ", jobname VARCHAR(255)"\n+ sqlCmd += ", groupid VARCHAR(255)"\n+ sqlCmd += ", command TEXT"\n+ sqlCmd += ", launcher VARCHAR(1024)"\n+ sqlCmd += ", queue VARCHAR(255)"\n+ sqlCmd += ", status VARCHAR(255)"\n+ sqlCmd += ", time DATETIME"\n+ sqlCmd += ", node VARCHAR(255) )"\n+ self._iDb.execute(sqlCmd)\n+ acronym = "Test__init__"\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", self._queue, self._groupid, acronym)\n+ lExpFields = sorted(["jobid", "jobname", "groupid", "launcher", "queue", "resources", "status", "time", "node"])\n+ lObsFields = sorted(self._iDb.getFieldList(self._jobTable))\n+ self.assertEquals(lExpFields, lObsFields)\n+ expJob = Job(queue = self._queue)\n+ obsJob = iLauncher.job\n+ self.assertEquals(expJob, obsJob)\n+ \n+ def test__init__withResources(self):\n+ queue = "main.q mem_free=3G"\n+ acronym = "Test__init__"\n+ expQueue = "main.q"\n+ explResources = [\'mem_free=3G\']\n+ expJob = Job(queue = expQueue, lResources = explResources)\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", queue, self._groupid, acronym)\n+ obsJob = iLauncher.job\n+ self.assertEquals(expJob, obsJob)\n+\n+ def test_createGroupidIfItNotExist(self):\n+ acronym = "checkGroupID"\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", self._queue, self._groupid, acronym)\n+ iLauncher.createGroupidIfItNotExist()\n+ obsGroupid = iLauncher.job.groupid\n+ self.assertEquals(self._groupid, obsGroupid)\n+\n+ def test_createGroupidIfItNotExist_without_groupid(self):\n+ groupid = ""\n+ acronym = "checkGroupID"\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", self._queue, groupid, acronym)\n+ iLauncher.createGroupidIfItNotExist()\n+ obsGroupid = iLauncher.job.groupid\n+ self.assertTrue(obsGroupid != "")\n+ \n+ def test_begi'..b'd)\n+ time.sleep(20)\n+ jobStatus = self._jobdb.getJobStatus(iLauncher.job)\n+ os.chdir(self._cDir)\n+ shutil.rmtree(acronym)\n+ self.assertEqual(jobStatus, "finished")\n+ \n+ def test_runSingleJob_catch_error_wrong_tmpDir(self):\n+ acronym = "Test_runSingleJob_catch_error"\n+ os.mkdir(acronym)\n+ os.chdir(acronym)\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", os.getcwd(), "%s/toto" % self._tmpDir, "", self._queue, self._groupid, acronym)\n+ iLauncher.job.groupid = self._groupid\n+ iLauncher.job.jobname = acronym\n+ iLauncher.job.queue = self._queue\n+ if Test_Launcher.SARUMAN_NAME == os.getenv("HOSTNAME"):\n+ iLauncher.job.lResources = ["test=TRUE"]\n+ cmd = "log = os.system(\\"touch \'YuFei\'\\")\\n"\n+ iLauncher.runSingleJob(cmd)\n+ time.sleep(20)\n+ jobStatus = self._jobdb.getJobStatus(iLauncher.job) \n+ os.chdir(self._cDir)\n+ shutil.rmtree(acronym)\n+ self.assertEqual(jobStatus, "error")\n+ \n+ def test_runSingleJob_catch_error_wrong_cmd(self):\n+ acronym = "Test_runSingleJob_catch_error"\n+ os.mkdir(acronym)\n+ os.chdir(acronym)\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", os.getcwd(), self._tmpDir, "", self._queue, self._groupid, acronym)\n+ iLauncher.job.groupid = self._groupid\n+ iLauncher.job.jobname = acronym\n+ iLauncher.job.queue = self._queue\n+ if Test_Launcher.SARUMAN_NAME == os.getenv("HOSTNAME"):\n+ iLauncher.job.lResources = ["test=TRUE"]\n+ cmd = "log = os.system(\\"truc -i toto\\")\\n"\n+ iLauncher.runSingleJob(cmd)\n+ time.sleep(20)\n+ jobStatus = self._jobdb.getJobStatus(iLauncher.job) \n+ self._jobdb.cleanJobGroup(self._groupid)\n+ os.chdir(self._cDir)\n+ shutil.rmtree(acronym)\n+ self.assertEqual(jobStatus, "error")\n+\n+ def test_prepareCommands(self):\n+ expCmdStart = "os.symlink(\\"../Yufei_chunks.fa\\", \\"Yufei_chunks.fa\\")\\n\\tos.symlink(\\"../Yufei_chunks.fa_cut\\", \\"Yufei_chunks.fa_cut\\")\\n\\tlog = os.system(\\"touch file\\")\\n\\t" \n+ expCmdFinish = "if os.path.exists(\\"yufei.align\\"):\\n\\t\\tshutil.move(\\"yufei.align\\", \\"yufeiLuo/.\\" )\\n\\t"\n+ expCmdSize = "fileSize = 3.2\\n\\t\\t"\n+ expCmdCopy = "shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa\\", \\".\\")\\n\\t\\tshutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa_cut\\", \\".\\")\\n\\t\\t"\n+ \n+ lCmdStart = []\n+ lCmdStart.append("os.symlink(\\"../Yufei_chunks.fa\\", \\"Yufei_chunks.fa\\")")\n+ lCmdStart.append("os.symlink(\\"../Yufei_chunks.fa_cut\\", \\"Yufei_chunks.fa_cut\\")")\n+ lCmds = []\n+ lCmds.append("log = os.system(\\"touch file\\")")\n+ lCmdFinish = []\n+ lCmdFinish.append("if os.path.exists(\\"yufei.align\\"):")\n+ lCmdFinish.append("\\tshutil.move(\\"yufei.align\\", \\"yufeiLuo/.\\" )") \n+ lCmdSize = []\n+ lCmdSize.append("fileSize = 3.2") \n+ lCmdCopy = []\n+ lCmdCopy.append("shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa\\", \\".\\")")\n+ lCmdCopy.append("shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa_cut\\", \\".\\")")\n+\n+ iLauncher = Launcher(self._jobdb)\n+ obsCmdStart, obsCmdFinish, obsCmdSize, obsCmdCopy = iLauncher.prepareCommands(lCmds, lCmdStart, lCmdFinish, lCmdSize, lCmdCopy) \n+ \n+ self.assertEquals(expCmdStart, obsCmdStart)\n+ self.assertEquals(expCmdFinish, obsCmdFinish) \n+ self.assertEquals(expCmdSize, obsCmdSize)\n+ self.assertEquals(expCmdCopy, obsCmdCopy)\n+ \n+ def test_getSystemCommand(self):\n+ prg = "touch"\n+ lArgs = []\n+ lArgs.append("file")\n+ expCmd = "log = os.system(\\"touch file\\")"\n+ iLauncher = Launcher(self._jobdb)\n+ obsCmd = iLauncher.getSystemCommand(prg, lArgs)\n+ self.assertEquals(expCmd, obsCmd)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/Test_Launcher2.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/Test_Launcher2.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,356 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.launcher.Launcher2 import Launcher2\n+from commons.core.launcher.Launcher2 import LauncherParameter\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.stat.Stat import Stat\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.Job import Job\n+import unittest\n+import os\n+import shutil\n+import time\n+import stat\n+\n+#TODO: Test_F_Launcher2.py : to execute prepareCommands() and runSingleJob()\n+# to test runLauncher2ForMultipleJobs()\n+#TODO: check clean of "Test_runSingleJob"\n+#TODO: refactoring => choose between "self._queue" or "lResources" to set resources\n+class Test_Launcher2(unittest.TestCase):\n+\n+ SARUMAN_NAME = "compute-2-46.local"\n+ \n+ def setUp(self):\n+ self._cDir = os.getcwd()\n+ self._tmpDir = self._cDir\n+ self._groupid = "test"\n+ self._jobTable = "dummyJobTable"\n+ self._iDb = DbFactory.createInstance()\n+ self._iDb.createTable(self._jobTable, "jobs", overwrite = True)\n+ self._jobdb = TableJobAdaptatorFactory.createInstance(self._iDb, self._jobTable)\n+ self._queue = ""\n+ self._configFileName = "dummyConfigFile"\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable(self._jobTable)\n+ self._iDb.close()\n+ FileUtils.removeFilesByPattern(\'*.e*\')\n+ FileUtils.removeFilesByPattern(\'*.o*\')\n+ FileUtils.removeFilesByPattern(\'Launcher2FileTest_BeginRun.py\')\n+ FileUtils.removeFilesByPattern(self._configFileName)\n+ FileUtils.removeFilesByPattern(\'ClusterLauncher2_*\')\n+ \n+ def test__init__wrong_fields_for_job_table(self):\n+ self._iDb.dropTable(self._jobTable)\n+ sqlCmd = "CREATE TABLE " + self._jobTable \n+ sqlCmd += " ( jobid INT UNSIGNED"\n+ sqlCmd += ", jobname VARCHAR(255)"\n+ sqlCmd += ", groupid VARCHAR(255)"\n+ sqlCmd += ", command TEXT"\n+ sqlCmd += ", Launcher2 VARCHAR(1024)"\n+ sqlCmd += ", queue VARCHAR(255)"\n+ sqlCmd += ", status VARCHAR(255)"\n+ sqlCmd += ", time DATETIME"\n+ sqlCmd += ", node VARCHAR(255) )"\n+ self._iDb.execute(sqlCmd)\n+ acronym = "Test__init__"\n+\tlauncherParameter = LauncherParameter.createParameter(self._jobdb, self._groupid, acronym)\n+ iLauncher2 = Launcher2(launcherParameter)\n+\n+\n+ lExpFields = sorted(["jobid", "jobname", "groupid", "launcher", "queue", "resources", "status", "time", "node"])\n+ lObsFields = sorted(self._iDb.getFieldList(self._jobTable))\n+ self.assertEquals(lExpFields, lObsFields)\n+ expJob = Job(queue = self._queue)\n+ obsJob = iLauncher2.job\n+ self.assertEquals(expJob, obsJob)\n+ \n+ def test__init__withResources(self):\n+ queue = "main.q mem_free=3G"\n+ acronym = "Test__init__"\n+ expQueue = "main.q"\n+ explResources = [\'mem_free=3G\']\n+ expJob = Job(queue = expQueue, lResources = explResources)\n+ \n+\tlauncherParameter = LauncherParameter.createParameter(self._jobdb, self._groupid, acronym);\n+\tlauncherParameter.setQueue(queue)\n+ iLauncher2 = Launcher2(launcherParameter)\n+\n+ obsJob = iLauncher2.job\n+ self.assertEquals(expJob, obsJob)\n+\n+ def test_createGroupidIfItNotExist(self):\n+ acronym = "checkGroupID"\n+\t\n+\tlauncherParameter = LauncherParameter.createParameter(self._jobdb, self._groupid, acronym);\n+ iLauncher2 = Launcher2(launcherParameter)\n+ iLauncher2.createGroupidIfItNotExist()\n+ obsGroupid = iLauncher2.job.groupid\n+ self.assertEquals(self._groupid, obsGroupid)\n+\n+ def test_createGroupidIfItNotExist_without_groupid(self):\n+ groupid = ""\n+ acronym = "checkGroupID"\n+\tlauncherParameter = LauncherParameter.createParameter(self._jobdb, self._groupid, acronym);\n+ iLa'..b' def test_runSingleJob_catch_error_wrong_tmpDir(self):\n+# acronym = "Test_runSingleJob_catch_error"\n+# os.mkdir(acronym)\n+# os.chdir(acronym)\n+# iLauncher2= Launcher2(self._jobdb, os.getcwd(), "", "", os.getcwd(), "%s/toto" % self._tmpDir, "", self._queue, self._groupid, acronym)\n+# iLauncher2.job.groupid = self._groupid\n+# iLauncher2.job.jobname = acronym\n+# iLauncher2.job.queue = self._queue\n+# if Test_Launcher2.SARUMAN_NAME == os.getenv("HOSTNAME"):\n+# iLauncher2.job.lResources = ["test=TRUE"]\n+# cmd = "log = os.system(\\"touch \'YuFei\'\\")\\n"\n+# iLauncher2.runSingleJob(cmd)\n+# time.sleep(20)\n+# jobStatus = self._jobdb.getJobStatus(iLauncher2.job) \n+# os.chdir(self._cDir)\n+# shutil.rmtree(acronym)\n+# self.assertEqual(jobStatus, "error")\n+# \n+# def test_runSingleJob_catch_error_wrong_cmd(self):\n+# acronym = "Test_runSingleJob_catch_error"\n+# os.mkdir(acronym)\n+# os.chdir(acronym)\n+# iLauncher2 = Launcher2(self._jobdb, os.getcwd(), "", "", os.getcwd(), self._tmpDir, "", self._queue, self._groupid, acronym)\n+# iLauncher2.job.groupid = self._groupid\n+# iLauncher2.job.jobname = acronym\n+# iLauncher2.job.queue = self._queue\n+# if Test_Launcher2.SARUMAN_NAME == os.getenv("HOSTNAME"):\n+# iLauncher2.job.lResources = ["test=TRUE"]\n+# cmd = "log = os.system(\\"truc -i toto\\")\\n"\n+# iLauncher2.runSingleJob(cmd)\n+# time.sleep(20)\n+# jobStatus = self._jobdb.getJobStatus(iLauncher2.job) \n+# self._jobdb.cleanJobGroup(self._groupid)\n+# os.chdir(self._cDir)\n+# shutil.rmtree(acronym)\n+# self.assertEqual(jobStatus, "error")\n+#\n+# def test_prepareCommands(self):\n+# expCmdStart = "os.symlink(\\"../Yufei_chunks.fa\\", \\"Yufei_chunks.fa\\")\\n\\tos.symlink(\\"../Yufei_chunks.fa_cut\\", \\"Yufei_chunks.fa_cut\\")\\n\\tlog = os.system(\\"touch file\\")\\n\\t" \n+# expCmdFinish = "if os.path.exists(\\"yufei.align\\"):\\n\\t\\tshutil.move(\\"yufei.align\\", \\"yufeiLuo/.\\" )\\n\\t"\n+# expCmdSize = "fileSize = 3.2\\n\\t\\t"\n+# expCmdCopy = "shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa\\", \\".\\")\\n\\t\\tshutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa_cut\\", \\".\\")\\n\\t\\t"\n+# \n+# lCmdStart = []\n+# lCmdStart.append("os.symlink(\\"../Yufei_chunks.fa\\", \\"Yufei_chunks.fa\\")")\n+# lCmdStart.append("os.symlink(\\"../Yufei_chunks.fa_cut\\", \\"Yufei_chunks.fa_cut\\")")\n+# lCmds = []\n+# lCmds.append("log = os.system(\\"touch file\\")")\n+# lCmdFinish = []\n+# lCmdFinish.append("if os.path.exists(\\"yufei.align\\"):")\n+# lCmdFinish.append("\\tshutil.move(\\"yufei.align\\", \\"yufeiLuo/.\\" )") \n+# lCmdSize = []\n+# lCmdSize.append("fileSize = 3.2") \n+# lCmdCopy = []\n+# lCmdCopy.append("shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa\\", \\".\\")")\n+# lCmdCopy.append("shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa_cut\\", \\".\\")")\n+#\n+# iLauncher2 = Launcher2(self._jobdb)\n+# obsCmdStart, obsCmdFinish, obsCmdSize, obsCmdCopy = iLauncher2.prepareCommands(lCmds, lCmdStart, lCmdFinish, lCmdSize, lCmdCopy) \n+# \n+# self.assertEquals(expCmdStart, obsCmdStart)\n+# self.assertEquals(expCmdFinish, obsCmdFinish) \n+# self.assertEquals(expCmdSize, obsCmdSize)\n+# self.assertEquals(expCmdCopy, obsCmdCopy)\n+# \n+# def test_getSystemCommand(self):\n+# prg = "touch"\n+# lArgs = []\n+# lArgs.append("file")\n+# expCmd = "log = os.system(\\"touch file\\")"\n+# iLauncher2 = Launcher2(self._jobdb)\n+# obsCmd = iLauncher2.getSystemCommand(prg, lArgs)\n+# self.assertEquals(expCmd, obsCmd)\n+\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Launcher2 ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite ) \n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/Test_LauncherUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/Test_LauncherUtils.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,102 @@
+import unittest
+from commons.core.launcher.LauncherUtils import LauncherUtils
+
+class Test_LauncherUtils(unittest.TestCase):
+
+    def test_createHomogeneousSizeList_empty(self):
+        lHeadersSizeTuples = []
+        maxSize = 500
+        expLHeadersList = []
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_one_item_upper_mean(self):
+        lHeadersSizeTuples = [("h1", 300)]
+        maxSize = 500
+        expLHeadersList = [["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_one_item_under_mean(self):
+        lHeadersSizeTuples = [("h1", 100)]
+        maxSize = 500
+        expLHeadersList = [["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_3items(self):
+        lHeadersSizeTuples = [("h1", 250),
+                              ("h2", 250),
+                              ("h3", 300)]
+        maxSize = 500
+        expLHeadersList = [["h3"], ["h2"], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_4items(self):
+        lHeadersSizeTuples = [("h1", 100),
+                              ("h2", 200),
+                              ("h3", 10),
+                              ("h4", 400)]
+        maxSize = 500
+        expLHeadersList = [["h4", "h3"], ["h2", "h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_5items(self):
+        lHeadersSizeTuples = [("h1", 300),
+                              ("h2", 300),
+                              ("h3", 250),
+                              ("h4", 100),
+                              ("h5", 90)]
+        maxSize = 500
+        expLHeadersList = [["h2", "h5","h4"], ["h1"], ["h3"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_all_upper_max(self):
+        lHeadersSizeTuples = [("h1", 600),
+                              ("h2", 500),
+                              ("h3", 700),
+                              ("h4", 900),
+                              ("h5", 500)]
+        maxSize = 500
+        expLHeadersList = [["h4"], ["h3"], ["h1"], ["h5"], ["h2"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_all_upper_mean(self):
+        lHeadersSizeTuples = [("h1", 300),
+                              ("h2", 300),
+                              ("h3", 300),
+                              ("h4", 300),
+                              ("h5", 300)]
+        maxSize = 500
+        expLHeadersList = [["h5"], ["h4"], ["h3"], ["h2"], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_all_under_mean(self):
+        lHeadersSizeTuples = [("h1", 100),
+                              ("h2", 100),
+                              ("h3", 100),
+                              ("h4", 100),
+                              ("h5", 100)]
+        maxSize = 500
+        expLHeadersList = [["h5", "h4", "h3", "h2"], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_floats(self):
+        lHeadersSizeTuples = [("h1", 99.1),
+                              ("h2", 100.7),
+                              ("h3", 100.1),
+                              ("h4", 100.1),
+                              ("h5", 100)]
+        maxSize = 500
+        expLHeadersList = [['h2', 'h4', 'h3', 'h5'], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/Test_WriteScript.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/Test_WriteScript.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,365 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.sql.Job import Job\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+import unittest\n+import os\n+import shutil\n+import time\n+import threading\n+\n+class Test_WriteScript(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._testDir = os.getcwd()\n+ self._acronym = "dummyAcronym"\n+ self._jobTable = "dummyJobsTable"\n+ self._iDb = DbFactory.createInstance()\n+ self._iDb.createTable(self._jobTable, "jobs", overwrite = True)\n+ self._jobdb = TableJobAdaptatorFactory.createInstance(self._iDb, self._jobTable)\n+ self._job = Job()\n+ self._job.groupid = "groupid"\n+ self._job.jobname = self._acronym\n+ self._job.launcher = "ClusterLauncher"\n+ self._jobdb.recordJob(self._job)\n+ self._dummyScratch = "dummyScratch"\n+ os.mkdir(self._dummyScratch)\n+ os.chdir(self._dummyScratch)\n+ self._tmpDir = os.getcwd()\n+ self._iScriptWriter = WriteScript(self._job, self._jobdb, self._testDir, self._tmpDir)\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable(self._jobTable)\n+ self._iDb.close()\n+ if FileUtils.isRessourceExists(self._dummyScratch):\n+ shutil.rmtree(self._dummyScratch)\n+\n+ def test_run(self):\n+ isScriptAsRun = False\n+ fileToCreate = \'dummyFile\'\n+ cmdStart = "log = os.system( \\"touch %s\\" )\\n" % fileToCreate\n+ cmdFinish = "os.system(\\"mv %s %s\\" )\\n" % (fileToCreate, self._testDir)\n+ pyFileName = "%s/ClusterLauncher_%s.py" % (os.getcwd(), self._acronym) \n+ \n+ self._iScriptWriter.run(cmdStart, cmdFinish, pyFileName)\n+ os.system("python %s" % pyFileName)\n+\n+ os.chdir(self._testDir)\n+ if FileUtils.isRessourceExists(fileToCreate):\n+ os.remove(fileToCreate)\n+ isScriptAsRun = True\n+ expJobStatus = "finished" \n+ obsJobStatus = self._jobdb.getJobStatus(self._job)\n+ \n+ self.assertTrue(isScriptAsRun)\n+ self.assertEquals(expJobStatus, obsJobStatus)\n+ \n+ def test_run_with_cmdSize_and_cmdCopy(self):\n+ isScriptAsRun = False\n+ fileToCreate = \'dummyFile\'\n+ fileSize = 0.5\n+ cmdSize = "fileSize = %f\\n" % fileSize\n+ cmdCopy = "os.system(\\"touch bank.fa\\")\\n"\n+ cmdStart = "log = os.system(\\"touch %s\\")\\n" % fileToCreate\n+ cmdFinish = "shutil.move(\\"%s\\", \\"%s\\")" % (fileToCreate, self._testDir)\n+ pyFileName = "%s/ClusterLauncher_%s.py" % (os.getcwd(), self._acronym) \n+ \n+ iWriteScript = WriteScript(self._job, self._jobdb, self._testDir, self._tmpDir, True)\n+ iWriteScript.run(cmdStart, cmdFinish, pyFileName, cmdSize, cmdCopy)\n+ os.system("python %s" % pyFileName)\n+\n+ os.chdir(self._testDir)\n+ if FileUtils.isRessourceExists(fileToCreate):\n+ os.remove(fileToCreate)\n+ isScriptAsRun = True\n+ expJobStatus = "finished" \n+ obsJobStatus = self._jobdb.getJobStatus(self._job)\n+ \n+ self.assertTrue(isScriptAsRun)\n+ self.assertEquals(expJobStatus, obsJobStatus)\n+\n+#TODO: how to test ?\n+# def test_run_2_jobs_trying_to_create_same_groupIdDir(self):\n+# fileToCreate1 = \'dummyFile1\'\n+# fileToCreate2 = \'dummyFile2\'\n+# flagFileOSError = "osErrorRaised"\n+# \n+# fileSize = 0.5\n+# cmd_checkSize = ""\n+# cmd_checkSize += "if not os.path.exists( \\"%s\\" ):\\n" % self._job.groupid\n+# cmd_checkSize += "\\tfileSize = %f\\n" % fileSize\n+# \n+# cmd_checkGroupidDir1 = ""\n+# cmd_checkGroupidDir1 += "if not os.path.exists(\\"%s\\"):\\n" % self._job.groupid\n+# cmd_checkGroupidDir1 += "\\ttry:\\n"\n+# cmd_checkGroupidDir1 += "\\t\\ttime.sleep('..b'JobsTable",\n+ "groupId" : "groupid",\n+ "jobName" : "job1",\n+ "launcher" : "ClusterLauncher",\n+ "time" : "20110505-105353",\n+ "repet_path" : "/home/user/workspace/repet_pipe",\n+ "cmdStart" : "log = os.system(\\"touch dummyFile1\\")",\n+ "cmdFinish" : "shutil.move(\\"dummyFile1\\", \\"/home/user/workspace/repet_pipe/commons/core/launcher/test\\")",\n+ "cDir" : "/home/user/workspace/repet_pipe/commons/core/launcher/test/",\n+ "cmdSize" : "fileSize = 0.500000",\n+ "cmdCopy" : "os.system(\\"touch bank.fa\\")"\n+ }\n+ expFileName = "expFiles/expJobScriptTemplateLight.py"\n+ obsFileName = "obs.py"\n+ \n+ iWS = WriteScript(chooseTemplateLight = True)\n+ iWS.fillTemplate(obsFileName, d)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(obsFileName)\n+ \n+ def test_createJobScriptDict(self):\n+ os.chdir("..")\n+ cmd_start = "log = os.system(\\"touch dummyFile1\\")"\n+ cmd_finish = "shutil.move(\\"dummyFile1\\", \\"/home/user/workspace/repet_pipe/commons/core/launcher/test\\")"\n+ cmd_size = ""\n+ cmd_copy = ""\n+ expDict = {\n+ "tmpDir" : self._tmpDir,\n+ "jobTableName" : self._jobTable,\n+ "groupId" : self._job.groupid,\n+ "jobName" : self._acronym,\n+ "launcher" : self._job.launcher,\n+ "time" : time.strftime("%Y%m%d-%H%M%S"),\n+ "repet_path" : os.environ["REPET_PATH"],\n+ "repet_host" : os.environ["REPET_HOST"],\n+ "repet_user" : os.environ["REPET_USER"],\n+ "repet_pw" : os.environ["REPET_PW"],\n+ "repet_db" : os.environ["REPET_DB"],\n+ "repet_port" : os.environ["REPET_PORT"],\n+ "cmdStart" : cmd_start,\n+ "cmdFinish" : cmd_finish,\n+ "cDir" : self._testDir,\n+ "cmdSize" : cmd_size,\n+ "cmdCopy" : cmd_copy\n+ }\n+ obsDict = self._iScriptWriter.createJobScriptDict(cmd_start, cmd_finish, cmd_size, cmd_copy)\n+ self.assertEquals(expDict, obsDict)\n+ \n+ def test_createJobScriptDict_with_cmdSize_and_cmdCopy(self):\n+ os.chdir("..")\n+ cmd_start = "log = os.system(\\"touch dummyFile1\\")"\n+ cmd_finish = "shutil.move(\\"dummyFile1\\", \\"/home/user/workspace/repet_pipe/commons/core/launcher/test\\")"\n+ cmd_size = "fileSize = 0.500000"\n+ cmd_copy = "os.system(\\"touch bank.fa\\")"\n+ expDict = {\n+ "tmpDir" : self._tmpDir,\n+ "jobTableName" : self._jobTable,\n+ "groupId" : self._job.groupid,\n+ "jobName" : self._acronym,\n+ "launcher" : self._job.launcher,\n+ "time" : time.strftime("%Y%m%d-%H%M%S"),\n+ "repet_path" : os.environ["REPET_PATH"],\n+ "repet_host" : os.environ["REPET_HOST"],\n+ "repet_user" : os.environ["REPET_USER"],\n+ "repet_pw" : os.environ["REPET_PW"],\n+ "repet_db" : os.environ["REPET_DB"],\n+ "repet_port" : os.environ["REPET_PORT"],\n+ "cmdStart" : cmd_start,\n+ "cmdFinish" : cmd_finish,\n+ "cDir" : self._testDir,\n+ "cmdSize" : cmd_size,\n+ "cmdCopy" : cmd_copy\n+ }\n+ obsDict = self._iScriptWriter.createJobScriptDict(cmd_start, cmd_finish, cmd_size, cmd_copy)\n+ self.assertEquals(expDict, obsDict)\n+ \n+class CreateFileThread(threading.Thread):\n+\n+ def __init__(self, pyFileName):\n+ threading.Thread.__init__(self)\n+ self._pyFileName = pyFileName\n+ \n+ def run(self):\n+ os.system("python %s" % self._pyFileName)\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_WriteScript ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite ) \n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/expFiles/expJobScriptSQLiteWithFilesCopyTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptSQLiteWithFilesCopyTemplate.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbMySql import DbMySql
+from commons.core.sql.DbSQLite import DbSQLite
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ fileSize = 0
+ if not os.path.exists("groupid"):
+ fileSize = 0.500000
+ freeGigaNeededInTmpDir = float(1 + fileSize)
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < freeGigaNeededInTmpDir):
+ raise RepetException("ERROR: less than %.2fG of input file in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % freeGigaNeededInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if not os.path.exists("groupid"):
+ try:
+ os.mkdir("groupid")
+ except OSError, e :
+ if e.args[0] != 17:
+ raise RepetException("ERROR: can't create 'groupid'")
+ os.chdir("groupid")
+ os.system("touch bank.fa")
+ else:
+ os.chdir("groupid")
+
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ queue = "main.q"
+ iJob = Job("jobs", jobname = "job1", groupid = "groupid", queue = queue, node = os.getenv("HOSTNAME"))
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+
+except IOError, e :
+ print e
+ queue = "main.q"
+ iJob = Job("jobs", jobname = "job1", groupid = "groupid", queue = queue, node = os.getenv("HOSTNAME"))
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ queue = "main.q"
+ iJob = Job("jobs", jobname = "job1", groupid = "groupid", queue = queue, node = os.getenv("HOSTNAME"))
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+ sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/expFiles/expJobScriptTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptTemplate.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists( "/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch" ):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of free space in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % minFreeGigaInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/expFiles/expJobScriptTemplateLight.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptTemplateLight.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists( "/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch" ):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of free space in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % minFreeGigaInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ sys.stdout.flush()
+ sys.exit(1)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/expFiles/expJobScriptTemplate_cmdWith2Lines.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptTemplate_cmdWith2Lines.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists( "/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch" ):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of free space in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % minFreeGigaInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ print "Hello Yufei"
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/launcher/test/expFiles/expJobScriptWithFilesCopyTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptWithFilesCopyTemplate.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ fileSize = 0
+ if not os.path.exists("groupid"):
+ fileSize = 0.500000
+ freeGigaNeededInTmpDir = float(1 + fileSize)
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < freeGigaNeededInTmpDir):
+ raise RepetException("ERROR: less than %.2fG of free space in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % freeGigaNeededInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if not os.path.exists("groupid"):
+ try:
+ os.mkdir("groupid")
+ except OSError, e :
+ if e.args[0] != 17:
+ raise RepetException("ERROR: can't create 'groupid'")
+ os.chdir("groupid")
+ os.system("touch bank.fa")
+ else:
+ os.chdir("groupid")
+
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/AxtParser.py
--- a/commons/core/parsing/AxtParser.py Mon Apr 29 03:45:52 2013 -0400
+++ b/commons/core/parsing/AxtParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -93,17 +93,20 @@

         m = re.search(r"^\s*\d+\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+\d+\s*$", line)
         if m != None:
+            #sys.exit("\nLine %d '%s' does not have an AXT format" % (self.currentLineNb, line))
+
             mapping = Mapping()
             subMapping = SubMapping()

+            offset = -1 if m.group(7) == "-" else 0
             subMapping.queryInterval.setName(m.group(4))
-            subMapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6))))
-            subMapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6))))
+            subMapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6)))-1)
+            subMapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6)))-1)
             subMapping.queryInterval.setDirection(m.group(7))

             subMapping.targetInterval.setChromosome(m.group(1))
-            subMapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))))
-            subMapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))))
+            subMapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))) + offset)
+            subMapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))) + offset)
             subMapping.targetInterval.setDirection(1)

             subMapping.setSize(min(subMapping.targetInterval.getSize(), subMapping.queryInterval.getSize()))
@@ -113,28 +116,39 @@

             mapping.setDirection(m.group(7))
             mapping.targetInterval.setChromosome(m.group(1))
-            mapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))))
-            mapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))))
+            mapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))) + offset)
+            mapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))) + offset)

             mapping.queryInterval.setName(m.group(4))
-            mapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6))))
-            mapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6))))
+            mapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6)))-1)
+            mapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6)))-1)

             mapping.setSize(min(mapping.targetInterval.getSize(), mapping.queryInterval.getSize()))

+            for line in self.handle:
+                string1 = line.strip()
+                self.currentLineNb += 1
+                break
+            for line in self.handle:
+                string2 = line.strip()
+                self.currentLineNb += 1
+                break
+            mapping.setNbMismatches(Utils.getHammingDistance(string1, string2))
+            mapping.setNbGaps(0)
+
             self.currentMapping = mapping
-            return None
-        if self.queryLine == None:
-            self.queryLine = line
-            return None
-        self.subjectLine = line
-        seqLen = float(len(self.subjectLine))
-        dist = float(getHammingDistance(self.queryLine, self.subjectLine))
-        self.currentMapping.setNbMismatches(getHammingDistance(self.queryLine, self.subjectLine))
-        self.currentMapping.setNbGaps(0)
-        self.queryLine = None
-        self.subjectLine = None
-        return self.currentMapping
+        else:
+            if self.queryLine == None:
+                self.queryLine = line
+            else:
+                self.subjectLine = line
+                seqLen = float(len(self.subjectLine))
+                dist = float(getHammingDistance(self.queryLine, self.subjectLine))
+                identity = ((seqLen-dist)/seqLen) *100
+                self.currentMapping.setIdentity(identity)
+                self.queryLine = None
+                self.subjectLine = None
+                return self.currentMapping

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/AxtParser.pyc

Binary file commons/core/parsing/AxtParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/BamParser.pyc

Binary file commons/core/parsing/BamParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/BedParser.pyc

Binary file commons/core/parsing/BedParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/BlastParser.pyc

Binary file commons/core/parsing/BlastParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/BowtieParser.pyc

Binary file commons/core/parsing/BowtieParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/CoordsParser.pyc

Binary file commons/core/parsing/CoordsParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/ElandParser.pyc

Binary file commons/core/parsing/ElandParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/ExoParser.pyc

Binary file commons/core/parsing/ExoParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/FastaParser.py
--- a/commons/core/parsing/FastaParser.py Mon Apr 29 03:45:52 2013 -0400
+++ b/commons/core/parsing/FastaParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -80,7 +80,7 @@
if self.currentLine != None:
if self.currentLine[0] != ">":
raise Exception("First line is weird: %s" % (self.currentLine))
- name = self.currentLine[1:].split()[0].replace("|", "_").replace(".", "_")
+ name = self.currentLine[1:].split()[0]
self.currentLine = None

for line in self.handle:
@@ -89,7 +89,7 @@
pass
elif line[0] == ">":
if name == None:
- name = line[1:].split()[0].replace("|", "_").replace(".", "_")
+ name = line[1:].split()[0]
else:
self.currentLine = line
return Sequence(name, string)

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/FastaParser.pyc

Binary file commons/core/parsing/FastaParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/FastqParser.pyc

Binary file commons/core/parsing/FastqParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/GffParser.pyc

Binary file commons/core/parsing/GffParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/GtfParser.pyc

Binary file commons/core/parsing/GtfParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/MapParser.pyc

Binary file commons/core/parsing/MapParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/MapperParser.pyc

Binary file commons/core/parsing/MapperParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/MaqParser.pyc

Binary file commons/core/parsing/MaqParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/NCListParser.pyc

Binary file commons/core/parsing/NCListParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/ParserChooser.pyc

Binary file commons/core/parsing/ParserChooser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/PklParser.pyc

Binary file commons/core/parsing/PklParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/PslParser.pyc

Binary file commons/core/parsing/PslParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/RmapParser.pyc

Binary file commons/core/parsing/RmapParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/SamParser.pyc

Binary file commons/core/parsing/SamParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/SeqmapParser.pyc

Binary file commons/core/parsing/SeqmapParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/SequenceListParser.pyc

Binary file commons/core/parsing/SequenceListParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/ShrimpParser.pyc

Binary file commons/core/parsing/ShrimpParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/Soap2Parser.pyc

Binary file commons/core/parsing/Soap2Parser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/SoapParser.pyc

Binary file commons/core/parsing/SoapParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/TranscriptListParser.pyc

Binary file commons/core/parsing/TranscriptListParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/WigParser.pyc

Binary file commons/core/parsing/WigParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/__init__.pyc

Binary file commons/core/parsing/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_BedParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BedParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,58 @@
+import unittest, os
+from commons.core.parsing.BedParser import BedParser
+
+
+class Test_BedParser(unittest.TestCase):
+
+    def test_Parser(self):
+        parser = BedParser("data/testBedParser1.bed")
+
+        self.assertEqual(parser.getNbTranscripts(), 1)
+
+        for transcript in parser.getIterator():
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getName(), "test1.1")
+            self.assertEqual(transcript.getStart(), 1000)
+            self.assertEqual(transcript.getEnd(), 2999)
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 2)
+            exons = transcript.getExons()
+            self.assertEqual(exons[0].getChromosome(), "arm_X")
+            self.assertEqual(exons[0].getStart(), 1000)
+            self.assertEqual(exons[0].getEnd(), 1099)
+            self.assertEqual(exons[0].getDirection(), 1)
+            self.assertEqual(exons[1].getChromosome(), "arm_X")
+            self.assertEqual(exons[1].getStart(), 2000)
+            self.assertEqual(exons[1].getEnd(), 2999)
+            self.assertEqual(exons[1].getDirection(), 1)
+
+    def test_Parser_short(self):
+        tmpFileName = "tmpFile.bed"
+        tmpHandle   = open(tmpFileName, "w")
+        tmpHandle.write("""X\t554748\t554904\texon
+X\t554748\t554904\tCDS
+X\t554748\t554750\tstart_codon
+""")
+        tmpHandle.close()
+        parser = BedParser(tmpFileName)
+        self.assertEqual(parser.getNbTranscripts(), 3)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getNbExons(), 1)
+            self.assertEqual(transcript.getChromosome(), "X")
+            self.assertEqual(transcript.getStart(), 554748)
+            if cpt == 0:
+                self.assertEqual(transcript.getEnd(), 554903)
+                self.assertEqual(transcript.getName(), "exon")
+            elif cpt == 1:
+                self.assertEqual(transcript.getEnd(), 554903)
+                self.assertEqual(transcript.getName(), "CDS")
+            elif cpt == 2:
+                self.assertEqual(transcript.getEnd(), 554749)
+                self.assertEqual(transcript.getName(), "start_codon")
+        os.remove(tmpFileName)
+
+
+
+if __name__ == '__main__':
+        unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_BlatFileParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatFileParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,61 @@
+import unittest
+from commons.core.parsing.BlatFileParser import BlatFileParser
+
+
+class Test_BlatFileParser(unittest.TestCase):
+
+
+    def test_parseBlatFile(self):
+        fileName = "dummayBlat.psl"
+        self._writeBlatInputFile(fileName)
+        blatFileParser = BlatFileParser(fileName)
+        blatFileParser.parseBlatFile()
+        obsNbHits = len(blatFileParser.getListsOfHits())
+        self.assertTrue(obsNbHits == 10)
+        obsQueries = blatFileParser.getDictOfQueries()
+        expQueries = {'5:574_1:574_539_5:1:G/C': 1, '3:574_1:574_539_5:1:G/C': 1, '5:574_2:574_433_5:1:G/C': 1,"3:574_2:574_433_5:1:G/C":1, "5:574_5:574_607_5:1:G/C": 1, "3:574_5:574_607_5:1:G/C": 1}
+        self.assertEquals(expQueries, obsQueries)
+
+    def test_parseBlatFileByQueries(self):
+        fileName = "dummayBlat.psl"
+        self._writeBlatInputFile(fileName)
+        blatFileParser = BlatFileParser(fileName)
+        blatFileParser.parseBlatFileByQueries()
+        obsDict = blatFileParser.getDictOfBlatHitsByQueries()
+        obs1 = len(obsDict["5:574_1:574_539_5:1:G/C"])
+        obs2 = len(obsDict["3:574_1:574_539_5:1:G/C"])
+        obs3 = len(obsDict["5:574_2:574_433_5:1:G/C"])
+        obs4 = len(obsDict["3:574_2:574_433_5:1:G/C"])
+        obs5 = len(obsDict["5:574_5:574_607_5:1:G/C"])
+        obs6 = len(obsDict["3:574_5:574_607_5:1:G/C"])
+        self.assertTrue(obs1 == 1)
+        self.assertTrue(obs2 == 1)
+        self.assertTrue(obs3 == 1)
+        self.assertTrue(obs4 == 5)
+        self.assertTrue(obs5 == 1)
+        self.assertTrue(obs6 == 1)
+        obsQueries = blatFileParser.getDictOfQueries()
+        expQueries = {'5:574_1:574_539_5:1:G/C': 1, '3:574_1:574_539_5:1:G/C': 1, '5:574_2:574_433_5:1:G/C': 1,"3:574_2:574_433_5:1:G/C":1, "5:574_5:574_607_5:1:G/C": 1, "3:574_5:574_607_5:1:G/C": 1}
+        self.assertEquals(expQueries, obsQueries)
+
+    def _writeBlatInputFile(self, fileName):
+        file = open(fileName, "w")
+        file.write("psLayout version 3\n")
+        file.write("\n")
+        file.write("match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n")
+        file.write("     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n")
+        file.write("---------------------------------------------------------------------------------------------------------------------------------------------------------------\n")
+        file.write("246\t0\t0\t4\t0\t0\t0\t0\t-\t5:574_1:574_539_5:1:G/C\t250\t0\t250\ttaecs3B_RPH7\t3109948\t1065213\t1065463\t1\t250,\t0,\t1065213,\n")
+        file.write("247\t0\t0\t2\t0\t0\t0\t0\t-\t3:574_1:574_539_5:1:G/C\t250\t1\t250\ttaecs3B_RPH7\t3109948\t1064962\t1065211\t1\t249,\t0,\t1064962,\n")
+        file.write("249\t0\t0\t1\t0\t0\t0\t0\t-\t5:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH7\t3109948\t1065319\t1065569\t1\t250,\t0,\t1065319,\n")
+        file.write("245\t0\t0\t5\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH8\t3109948\t1065068\t1065318\t1\t250,\t0,\t1065068,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH8\t3109948\t1065310\t1065560\t1\t250,\t0,\t1065310,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1065059\t1065309\t1\t250,\t0,\t1065059,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1064805\t1065055\t1\t250,\t0,\t1064805,\n")
+        file.write("68\t0\t0\t1\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t69\t0\t69\ttaecs3B_RPH9\t3109948\t1064733\t1064802\t1\t69,\t0,\t1064733,\n")
+        file.write("245\t0\t0\t5\t0\t0\t0\t0\t-\t5:574_5:574_607_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1065145\t1065395\t1\t250,\t0,\t1065145,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_5:574_607_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1064894\t1065144\t1\t250,\t0,\t1064894,\n")
+        file.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_BlatParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatParser.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,445 @@\n+from commons.core.parsing.BlatParser import BlatParser\n+import unittest\n+\n+\n+class Test_BlatParser(unittest.TestCase):\n+\n+\n+ def test_setAttributesFromString(self):\n+ blatLine = "315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,"\n+ \n+ iBlatParser = BlatParser()\n+ iBlatParser.setAttributesFromString(blatLine)\n+ \n+ obsmatch = iBlatParser.getMatch()\n+ obsmismatch = iBlatParser.getMismatch()\n+ obsrepMatch = iBlatParser.getRepMatch()\n+ obsN = iBlatParser.getN()\n+ obsQGapCount = iBlatParser.getQGapCount()\n+ obsQGapBases = iBlatParser.getQGapBases()\n+ obsTGapCount = iBlatParser.getTGapCount()\n+ obsTGapBases = iBlatParser.getTGapBases()\n+ obsstrand = iBlatParser.getStrand()\n+ obsQName = iBlatParser.getQName()\n+ obsQSize = iBlatParser.getQSize()\n+ obsQStart = iBlatParser.getQStart()\n+ obsQEnd = iBlatParser.getQEnd()\n+ obsTName = iBlatParser.getTName()\n+ obsTSize = iBlatParser.getTSize()\n+ obsTStart = iBlatParser.getTStart()\n+ obsTEnd = iBlatParser.getTEnd()\n+ obsblockCount = iBlatParser.getBlockCount()\n+ obsblockSizes = iBlatParser.getBlockSizes()\n+ obsqStarts = iBlatParser.getQStarts()\n+ obstStarts = iBlatParser.getTStarts()\n+ \n+ expmatch = "315"\n+ expmismatch = "20"\n+ exprepMatch = "0"\n+ expN = "0"\n+ expQGapCount = "3"\n+ expQGapBases = "10"\n+ expTGapCount = "2"\n+ expTGapBases = "9"\n+ expstrand = "+"\n+ expQName = "MRRE1H001H13FM1"\n+ expQSize = "378"\n+ expQStart = "0"\n+ expQEnd = "345"\n+ expTName = "chr16"\n+ expTSize = "22053297"\n+ expTStart = "21686950"\n+ expTEnd = "21687294"\n+ expblockCount = "4"\n+ expblockSizes = "76,185,7,67,"\n+ expqStarts = "0,77,263,278,"\n+ exptStarts = "21686950,21687026,21687213,21687227,"\n+ \n+ self.assertEquals(expmatch, obsmatch)\n+ self.assertEquals(expmismatch, obsmismatch)\n+ self.assertEquals(exprepMatch, obsrepMatch)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expQGapCount, obsQGapCount)\n+ self.assertEquals(expQGapBases, obsQGapBases)\n+ self.assertEquals(expTGapCount, obsTGapCount)\n+ self.assertEquals(expTGapBases, obsTGapBases)\n+ self.assertEquals(expstrand, obsstrand)\n+ self.assertEquals(expQName, obsQName)\n+ self.assertEquals(expQSize, obsQSize)\n+ self.assertEquals(expQStart, obsQStart)\n+ self.assertEquals(expQEnd, obsQEnd)\n+ self.assertEquals(expTName, obsTName)\n+ self.assertEquals(expTSize, obsTSize)\n+ self.assertEquals(expTStart, obsTStart)\n+ self.assertEquals(expTEnd, obsTEnd)\n+ self.assertEquals(expblockCount, obsblockCount)\n+ self.assertEquals(expblockSizes, obsblockSizes)\n+ self.assertEquals(expqStarts, obsqStarts)\n+ self.assertEquals(exptStarts, obstStarts)\n+ \n+ def test_setAttributesFromString_empty_QName(self):\n+ blatLine = "315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\t\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,"\n+ \n+ iBlatParser = BlatParser()\n+ iBlatParser.setAttributesFromString(blatLine)\n+ \n+ obsmatch = iBlatParser.getMatch()\n+ obsmismatch = iBlatParser.getMismatch()\n+ obsrepMatch = iBlatParser.getRepMatch()\n+ obsN = iBlatParser.getN()\n+ obsQGapCount = iBlatParser.getQGapCount()\n+ obsQGapBases = iBlatParser.getQGapBases()\n+ obsTGapCount = iBlatParser.getTGapCount()\n+ obsTGapBases = iBlatParser.getTGapBases()\n+ obsstrand = iBlatParser.getStrand()\n+ obsQName = iBlatParser.getQName()\n+ '..b'87227,")\n+ \n+ self.assertTrue(BlatParser1 == BlatParser2) \n+ \n+ def test_eq_Equals_case2(self):\n+ BlatParser1 = BlatParser()\n+ BlatParser1.setMatch("315")\n+ BlatParser1.setMismatch("20")\n+ BlatParser1.setRepMatch("0")\n+ BlatParser1.setN("0")\n+ BlatParser1.setQGapCount("3")\n+ BlatParser1.setQGapBases("10")\n+ BlatParser1.setTGapCount("2")\n+ BlatParser1.setTGapBases("9")\n+ BlatParser1.setStrand("+")\n+ BlatParser1.setQName("MRRE1H001H13FM1")\n+ BlatParser1.setQSize("378")\n+ BlatParser1.setQStart("0")\n+ BlatParser1.setQEnd("345")\n+ BlatParser1.setTName("chr16")\n+ BlatParser1.setTSize("22053297")\n+ BlatParser1.setTStart("21686950")\n+ BlatParser1.setTEnd("21687294")\n+ BlatParser1.setBlockCount("4")\n+ BlatParser1.setBlockSizes("76,185,7,67,")\n+ BlatParser1.setQStarts("0,77,263,278,")\n+ BlatParser1.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ BlatParser2 = BlatParser()\n+ BlatParser2.setMatch("315")\n+ BlatParser2.setMismatch("20")\n+ BlatParser2.setRepMatch("0")\n+ BlatParser2.setN("0")\n+ BlatParser2.setQGapCount("3")\n+ BlatParser2.setQGapBases("10")\n+ BlatParser2.setTGapCount("2")\n+ BlatParser2.setTGapBases("9")\n+ BlatParser2.setStrand("+")\n+ BlatParser2.setQName("TotoFM2")\n+ BlatParser2.setQSize("378")\n+ BlatParser2.setQStart("0")\n+ BlatParser2.setQEnd("345")\n+ BlatParser2.setTName("chr16")\n+ BlatParser2.setTSize("22053297")\n+ BlatParser2.setTStart("21686950")\n+ BlatParser2.setTEnd("21687294")\n+ BlatParser2.setBlockCount("4")\n+ BlatParser2.setBlockSizes("76,185,7,67,")\n+ BlatParser2.setQStarts("0,77,263,278,")\n+ BlatParser2.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ self.assertTrue(BlatParser1 == BlatParser2) \n+ \n+ def test_eq_notEquals(self):\n+ BlatParser1 = BlatParser()\n+ BlatParser1.setMatch("315")\n+ BlatParser1.setMismatch("20")\n+ BlatParser1.setRepMatch("0")\n+ BlatParser1.setN("0")\n+ BlatParser1.setQGapCount("3")\n+ BlatParser1.setQGapBases("10")\n+ BlatParser1.setTGapCount("2")\n+ BlatParser1.setTGapBases("9")\n+ BlatParser1.setStrand("+")\n+ BlatParser1.setQName("MRRE1H001H13FM1")\n+ BlatParser1.setQSize("378")\n+ BlatParser1.setQStart("0")\n+ BlatParser1.setQEnd("345")\n+ BlatParser1.setTName("chr16")\n+ BlatParser1.setTSize("22053297")\n+ BlatParser1.setTStart("21686950")\n+ BlatParser1.setTEnd("21687294")\n+ BlatParser1.setBlockCount("4")\n+ BlatParser1.setBlockSizes("76,185,7,67,")\n+ BlatParser1.setQStarts("0,77,263,278,")\n+ BlatParser1.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ BlatParser2 = BlatParser()\n+ BlatParser2.setMatch("315")\n+ BlatParser2.setMismatch("20")\n+ BlatParser2.setRepMatch("0")\n+ BlatParser2.setN("0")\n+ BlatParser2.setQGapCount("3")\n+ BlatParser2.setQGapBases("10")\n+ BlatParser2.setTGapCount("2")\n+ BlatParser2.setTGapBases("9")\n+ BlatParser2.setStrand("+")\n+ BlatParser2.setQName("TotoFM2")\n+ BlatParser2.setQSize("378")\n+ BlatParser2.setQStart("0")\n+ BlatParser2.setQEnd("345")\n+ BlatParser2.setTName("chr8")\n+ BlatParser2.setTSize("2205")\n+ BlatParser2.setTStart("2124")\n+ BlatParser2.setTEnd("2168")\n+ BlatParser2.setBlockCount("4")\n+ BlatParser2.setBlockSizes("76,185,7,67,")\n+ BlatParser2.setQStarts("0,77,263,278,")\n+ BlatParser2.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ self.assertFalse(BlatParser1 == BlatParser2) \n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_BlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatToGff.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,28 @@
+from commons.core.parsing.BlatToGff import BlatToGff
+import unittest
+
+
+class Test_BlatToGff(unittest.TestCase):
+
+
+    def test_convertBlatObjectToGffLine(self):
+        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
+        nbLine = 15
+        iBlatToGff = BlatToGff()
+        BlatToGff._methodName = ''
+        obsGffLine = iBlatToGff.convertBlatObjectToGffLine(blatLine, nbLine)
+        expGffLine = 'chr16\tBlatToGff\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n'
+        self.assertEquals(expGffLine, obsGffLine)
+
+    def test_convertBlatObjectToGffLine_with_methodName(self):
+        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
+        nbLine = 15
+        iBlatToGff = BlatToGff()
+        BlatToGff._methodName = 'Test'
+        obsGffLine = iBlatToGff.convertBlatObjectToGffLine(blatLine, nbLine)
+        expGffLine = 'chr16\tBlatToGff\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n'
+        self.assertEquals(expGffLine, obsGffLine)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_BlatToGffForBesPaired.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatToGffForBesPaired.py Tue Apr 30 14:33:21 2013 -0400

[

b"@@ -0,0 +1,292 @@\n+import unittest, os\n+from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired\n+\n+\n+class Test_BlatToGffForBesPaired(unittest.TestCase):\n+\n+\n+ def test_convertBlatObjectToGffLine(self):\n+ blatLine = '315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n'\n+ nbLine = 15\n+ besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']\n+ self._writeBesSequences(besFastaFileName)\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ iBlatToGffForBesPaired._methodName = ''\n+ iBlatToGffForBesPaired._inputFileFasta = besFastaFileName\n+ obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)\n+ expGffLine = 'chr16\\tBlatToGffForBesPaired\\tBES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\\n'\n+ expBesName = 'MRRE1H001H13FM1'\n+ expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'\n+ expBesType = 'FM'\n+ self.assertEquals(expGffLine, obsGffLine)\n+ self.assertEquals(expBesName, obsBesName)\n+ self.assertEquals(expBesSeq, obsBesSeq)\n+ self.assertEquals(expBesType, obsBesType)\n+ os.remove(besFastaFileName)\n+\n+ def test_convertBlatObjectToGffLine_with_methodName(self):\n+ blatLine = '315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n'\n+ nbLine = 15\n+ besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']\n+ self._writeBesSequences(besFastaFileName)\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ iBlatToGffForBesPaired._methodName = 'Test'\n+ iBlatToGffForBesPaired._inputFileFasta = besFastaFileName\n+ obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)\n+ expGffLine = 'chr16\\tBlatToGffForBesPaired\\tTest:BES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\\n'\n+ expBesName = 'MRRE1H001H13FM1'\n+ expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'\n+ expBesType = 'FM'\n+ self.assertEquals(expGffLine, obsGffLine)\n+ self.assertEquals(expBesName, obsBesName)\n+ self.assertEquals(expBesSeq, obsBesSeq)\n+ self.assertEquals(expBesType, obsBesType)\n+ os.remove(besFastaFileName)\n+ \n+ def test_getBesName(self):\n+ col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\\n'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ obsBesName = iBlatToGffForBesPaired.getBesName(col9)\n+ expBesName = 'machin1'\n+ self.assertEquals(expBesName, obsBesName)\n+ \n+ def test_checkBesNames_OK(self):\n+ besName1 = 'MRRE1H001H13FM8'\n+ besName2 = 'MRRE1H001H13RM2'\n+ line = 10\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))\n+ \n+ def test_checkBesNames_NOK(self):\n+ besName1 = 'MRRE1H001H13FM1'\n+ besName2 = 'TOTORM2'\n+ line = 10\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))\n+ \n+ def test_checkBesPositions_OK1(self):\n+ tBes1 = ('chr16', 25, 150)\n+ tBes2 "..b'ommons/core/parsing/test/sequence.fasta\' % os.environ[\'REPET_PATH\']\n+ fastaFile = open(fastaFileName, \'w\')\n+ fastaFile.write(\'>seq1\\n\')\n+ fastaFile.write(\'ATCGATCGATCGATCGATACGTCAGCGATCGAT\\n\')\n+ fastaFile.write(\'TACGTACGTACGATCGATCGATCGATCGATCGG\\n\')\n+ fastaFile.write(\'TACGTACGTACGATCGACGATCGATGCCGATCG\\n\')\n+ fastaFile.write(\'ATCGAC\\n\')\n+ fastaFile.write(\'>seq2\\n\')\n+ fastaFile.write(\'GTCTAGCTAGCTATATCTGACTGACGCGACGGT\\n\')\n+ fastaFile.write(\'CATGCTAGCTAGCACTGTACAGCTATCGATGCT\\n\')\n+ fastaFile.write(\'ACTGACACTGTACGTAC\\n\')\n+ fastaFile.write(\'>seq3\\n\')\n+ fastaFile.write(\'ACTCGATCGATCG\\n\')\n+ fastaFile.close()\n+ \n+ seqName = \'seq4\'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ iBlatToGffForBesPaired._inputFileFasta = fastaFileName\n+ obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)\n+ expSeq = \'NA\'\n+ self.assertEquals(expSeq, obsSeq)\n+ os.remove(fastaFileName)\n+ \n+ def test_getBesFmAndRmNamesAndSequences_case1(self):\n+ nameBes1 = \'MRRE1H0072T1FM1\'\n+ seqBes1 = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ typeBes1 = \'FM\'\n+ nameBes2 = \'MRRE1H0072T1RM3\'\n+ seqBes2 = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ typeBes2 = \'RM\'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)\n+ expNameBesFM = \'MRRE1H0072T1FM1\'\n+ expNameBesRM = \'MRRE1H0072T1RM3\'\n+ expSeqBesFM = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ expSeqBesRM = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ self.assertEquals(expNameBesFM, obsNameBesFM)\n+ self.assertEquals(expNameBesRM, obsNameBesRM)\n+ self.assertEquals(expSeqBesFM, obsSeqBesFM)\n+ self.assertEquals(expSeqBesRM, obsSeqBesRM)\n+ \n+ def test_getBesFmAndRmNamesAndSequences_case2(self):\n+ nameBes1 = \'MRRE1H0072T1RM1\'\n+ seqBes1 = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ typeBes1 = \'RM\'\n+ nameBes2 = \'MRRE1H0072T1FM3\'\n+ seqBes2 = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ typeBes2 = \'FM\'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)\n+ expNameBesFM = \'MRRE1H0072T1FM3\'\n+ expNameBesRM = \'MRRE1H0072T1RM1\'\n+ expSeqBesFM = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ expSeqBesRM = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ self.assertEquals(expNameBesFM, obsNameBesFM)\n+ self.assertEquals(expNameBesRM, obsNameBesRM)\n+ self.assertEquals(expSeqBesFM, obsSeqBesFM)\n+ self.assertEquals(expSeqBesRM, obsSeqBesRM)\n+ \n+ def _writeBesSequences(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write(\'>MRRE1H001H13RM1\\n\')\n+ file.write(\'ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\\n\')\n+ file.write(\'TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\\n\')\n+ file.write(\'ATCGATCGATCGATCGACATCGTACG\\n\')\n+ file.write(\'>MRRE1H001H13FM1\\n\')\n+ file.write(\'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\\n\')\n+ file.write(\'CTAGCTAGCTAGCTAGCTAGCTAGC\\n\')\n+ file.write(\'>MRRE2H007A13FM3\\n\')\n+ file.write(\'TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\\n\')\n+ file.write(\'TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\\n\')\n+ file.write(\'CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\\n\')\n+ file.write(\'TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\\n\')\n+ file.close()\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_BowtieParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BowtieParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,57 @@
+from commons.core.parsing.BowtieParser import BowtieParser
+import unittest, os
+
+
+class Test_BlatParser(unittest.TestCase):
+
+
+    def test_simple(self):
+        fileName = "tmpFile.bowtie"
+        handle   = open(fileName, "w")
+        handle.write("HWI-EAS179_0053:2:1:1365:7879#0/2\t+\tchrXHet\t191698\tACCGCTGAACCACTTTCATNCNTGGGATTGTGAACTGAAACTGTTCACATGAACTTGGAATTCCCAGTAAGTGTGA\tLcaYcacLaTdd`dacacYBaBTa^^TL^M`]`^aa`Tca`LaLTUa]a_bcLcTMMMMa^a^`bT`ccT_UbM_B\t0\t19:G>N,21:T>N\n")
+        handle.write("HWI-EAS179_0053:2:1:1365:7879#0/1\t-\tchrXHet\t191803\tCCCCTTGTACACACCGCCCGTCGCTACTACCGATTGAATTATGTAGTGAGGTCTCCGGACGTGATCACTGTGACGC\tBBBBBBBBB`O`DS]]aYabaaa[ULYLY]^b`^a^aZZZ_LLLca_a_b^^aYdbd``d^ccaY`_caccc^acc\t0\t33:T>G,72:T>C\n")
+        handle.write("HWI-EAS179_0053:2:1:1371:11420#0/2\t+\tchr3L\t16569206\tTATGAGCGCCAATTTTGCANTTTTATTTTTGTACAAGCCAAGGGTTTTGCAACATTCACAGCGCTTGCCACTTGTC\tcY^bcYLcaL]`]]`aaTaBaab^_ZZ__R[`[cYccc^Ybb^_L`L`Y`aM_a_TcTcc`LL]]MYaYabbTY`^\t0\t19:G>N\n")
+        handle.write("HWI-EAS179_0053:2:1:1371:11420#0/1\t-\tchr3L\t16569298\tAATGAACCATTGTAATTACCCACAACACATACAGTCACACACGAGATGCACACAAGTCGGAAACGGAAGCGAGACG\tBBBBBBBBBBBBBBBBBBBBBB^T`]Y^`KZY__LY_a]^T^ccYaYY__YT]VZbL]`b^cLT^a^caccYbT^b\t0\n")
+        handle.close()
+
+        parser = BowtieParser("tmpFile.bowtie", 0)
+        for cpt, mapping in enumerate(parser.getIterator()):
+            transcript = mapping.getTranscript()
+            if cpt == 0:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1365:7879#0/2")
+                self.assertEquals(transcript.getChromosome(), "chrXHet")
+                self.assertEquals(transcript.getDirection(), 1)
+                self.assertEquals(transcript.getStart(), 191699)
+                self.assertEquals(transcript.getEnd(), 191774)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 2)
+            elif cpt == 1:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1365:7879#0/1")
+                self.assertEquals(transcript.getChromosome(), "chrXHet")
+                self.assertEquals(transcript.getDirection(), -1)
+                self.assertEquals(transcript.getStart(), 191804)
+                self.assertEquals(transcript.getEnd(), 191879)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 2)
+            elif cpt == 2:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1371:11420#0/2")
+                self.assertEquals(transcript.getChromosome(), "chr3L")
+                self.assertEquals(transcript.getDirection(), 1)
+                self.assertEquals(transcript.getStart(), 16569207)
+                self.assertEquals(transcript.getEnd(), 16569282)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 1)
+            elif cpt == 3:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1371:11420#0/1")
+                self.assertEquals(transcript.getChromosome(), "chr3L")
+                self.assertEquals(transcript.getDirection(), -1)
+                self.assertEquals(transcript.getStart(), 16569299)
+                self.assertEquals(transcript.getEnd(), 16569374)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 0)
+            else:
+                self.fail()
+
+        os.remove(fileName)
+
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_CoordsParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_CoordsParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,105 @@
+import unittest
+from commons.core.parsing.CoordsParser import CoordsParser
+from SMART.Java.Python.mappingToCoordinates import MappingToCoordinates
+
+
+class Test_CoordsParser(unittest.TestCase):
+
+
+    def test_Parser(self):
+        parser = CoordsParser("data/testCoordsParser.coords")
+
+        cpt = 0
+        for mapping in parser.getIterator():
+            transcript = mapping.getTranscript()
+            cpt += 1
+            if cpt == 1:
+                self.assertEqual(transcript.getChromosome(), "scaffold_1")
+                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
+                self.assertEqual(transcript.getStart(), 1)
+                self.assertEqual(transcript.getEnd(), 6251)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("identity"), 89.030000000000001)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
+                self.assertEqual(exons[0].getStart(), 1)
+                self.assertEqual(exons[0].getEnd(), 6251)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 6251)
+            elif cpt == 2:
+                self.assertEqual(transcript.getChromosome(), "scaffold_1")
+                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
+                self.assertEqual(transcript.getStart(), 9127)
+                self.assertEqual(transcript.getEnd(), 11947)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("identity"), 90.450000000000003)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
+                self.assertEqual(exons[0].getStart(), 9127)
+                self.assertEqual(exons[0].getEnd(), 11947)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 2821)
+            if cpt == 3:
+                self.assertEqual(transcript.getChromosome(), "scaffold_1")
+                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
+                self.assertEqual(transcript.getStart(), 12201)
+                self.assertEqual(transcript.getEnd(), 12953)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
+                self.assertEqual(exons[0].getStart(), 12201)
+                self.assertEqual(exons[0].getEnd(), 12953)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 753)
+
+    def test_Parser_showcoord(self):
+        parser = CoordsParser("data/testCoordsParser_showcoord.coords")
+        expTranscriptCount = 1
+        obsTranscriptCount = 0
+
+        for mapping in parser.getIterator():
+            transcript = mapping.getTranscript()
+            obsTranscriptCount += 1
+            self.assertEqual(transcript.getChromosome(), "mivi_sl_A1_scaffold00001")
+            self.assertEqual(transcript.getName(), "mivi_sl_A2_scaffold00003")
+            self.assertEqual(transcript.getStart(), 296)
+            self.assertEqual(transcript.getEnd(), 2292)
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getTagValue("identity"), 98.30)
+            self.assertEqual(transcript.getTagValue("target_pident"), 98.30)
+            self.assertEqual(transcript.getTagValue("target_pcover"), 3.32)
+            self.assertEqual(transcript.getTagValue("target_length"), 60273)
+            self.assertEqual(transcript.getTagValue("Target"), "mivi_sl_A2_scaffold00003 1 2001")
+            self.assertEqual(transcript.getSize(), 1997)
+
+        self.assertEquals(expTranscriptCount, obsTranscriptCount)
+
+    def test_Parser_showcoord_promer(self):
+        parser = CoordsParser("data/testCoordsParser_showcoord_promer.coords")
+        expTranscriptCount = 1
+        obsTranscriptCount = 0
+
+        for mapping in parser.getIterator():
+            transcript = mapping.getTranscript()
+            obsTranscriptCount += 1
+            self.assertEqual(transcript.getChromosome(), "mivi_sl_A1_scaffold00001")
+            self.assertEqual(transcript.getName(), "mivi_sl_A2_scaffold00003")
+            self.assertEqual(transcript.getStart(), 291)
+            self.assertEqual(transcript.getEnd(), 1229)
+            self.assertEqual(transcript.getDirection(), -1)
+            self.assertEqual(transcript.getTagValue("identity"), 94.25)
+            self.assertEqual(transcript.getTagValue("target_pident"), 94.25)
+            self.assertEqual(transcript.getTagValue("target_pcover"), 1.56)
+            self.assertEqual(transcript.getTagValue("target_length"), 60273)
+            self.assertEqual(transcript.getTagValue("Target"), "mivi_sl_A2_scaffold00003 939 1")
+            self.assertEqual(transcript.getSize(), 939)
+
+        self.assertEquals(expTranscriptCount, obsTranscriptCount)
+
+
+if __name__ == '__main__':
+        unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_CrossSsrAndBesMappedByBlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_CrossSsrAndBesMappedByBlatToGff.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,70 @@
+from commons.core.parsing.CrossSsrAndBesMappedByBlatToGff import CrossSsrAndBesMappedByBlatToGff
+from commons.core.parsing.SsrParser import SsrParser
+
+import unittest
+import os
+
+
+class Test_CrossSsrAndBesMappedByBlatToGff(unittest.TestCase):
+
+
+    def test_createDictOfSsrParser(self):
+        obsDictSsrParser = {}
+
+        ssrFileName = 'input_SSR_Resuts.tab'
+        SSRFile = open(ssrFileName, 'w')
+        SSRFile.write('BES_name\tBES_redundancy\tSSR_di/tri/tetranucleotide\tSSR_Motif\tSSR_Motif_number\tSSR_start\tSSR_end\tBES_size\n')
+        SSRFile.write('MRRE1H001A12RM1\t1\t4\tttta\t6\t272\t295\t724\n')
+        SSRFile.write('MRRE1H001B01RM1\t1\t3\taat\t8\t264\t287\t683\n')
+        SSRFile.write('MRRE1H001B07RM1\t1\t2\tta\t19\t153\t190\t734\n')
+        SSRFile.write('MRRE1H001B07RM1\t2\t2\taata\t25\t83\t90\t734\n')
+        SSRFile.close()
+
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        iCrossSsrAndBesMappedByBlatToGff._inputFileSSR = ssrFileName
+        obsDictSsrParser = iCrossSsrAndBesMappedByBlatToGff.createDictOfSsrParser(obsDictSsrParser)
+
+        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+        SsrParser2 = SsrParser('MRRE1H001B01RM1', '1', '3', 'aat', '8', '264', '287', '683')
+        SsrParser3 = SsrParser('MRRE1H001B07RM1', '1', '2', 'ta', '19', '153', '190', '734')
+        SsrParser4 = SsrParser('MRRE1H001B07RM1', '2', '2', 'aata', '25', '83', '90', '734')
+
+        expDictSsrParser = {
+                         'MRRE1H001A12RM1': [SsrParser1],
+                         'MRRE1H001B01RM1': [SsrParser2],
+                         'MRRE1H001B07RM1': [SsrParser3, SsrParser4]
+                        }
+
+        self.assertEquals(expDictSsrParser, obsDictSsrParser)
+        os.remove(ssrFileName)
+
+    def test_convertSSRPositionsToBlatPositions_strand_FW(self):
+        ssrPos = 75
+        blatPosStart = 10501475
+        blatPosEnd = 10501985
+        strand = '+'
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        obsNewPos = iCrossSsrAndBesMappedByBlatToGff.convertSSRPositionsToChromPositions(ssrPos, blatPosStart, blatPosEnd, strand)
+        expNewPos = 10501549
+        self.assertEquals(expNewPos, obsNewPos)
+
+    def test_convertSSRPositionsToBlatPositions_strand_RV(self):
+        ssrPos = 75
+        blatPosStart = 10501475
+        blatPosEnd = 10501985
+        strand = '-'
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        obsNewPos = iCrossSsrAndBesMappedByBlatToGff.convertSSRPositionsToChromPositions(ssrPos, blatPosStart, blatPosEnd, strand)
+        expNewPos = 10501911
+        self.assertEquals(expNewPos, obsNewPos)
+
+    def test_getSsrMotif(self):
+        ssrMotif = 'atg'
+        ssrNbMotif = 4
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        obsSsrSeq = iCrossSsrAndBesMappedByBlatToGff.getSsrSeq(ssrMotif, ssrNbMotif)
+        expSsrSeq = 'atgatgatgatg'
+        self.assertEquals(expSsrSeq, obsSsrSeq)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_F_BlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_F_BlatToGff.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,77 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_F_BlatToGff(unittest.TestCase):
+
+
+    def test_run(self):
+        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
+        self._writeBlatInputFile(blatInputFileName)
+
+        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
+        cmd = 'python %s/commons/core/parsing/BlatToGff.py -i %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, obsOutputFileName)
+        os.system(cmd)
+
+        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
+        self._writeExpOutputFile(expOutputFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+        os.remove(blatInputFileName)
+        os.remove(obsOutputFileName)
+        os.remove(expOutputFileName)
+
+    def test_run_with_methodName(self):
+        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
+        self._writeBlatInputFile(blatInputFileName)
+
+        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
+        cmd = 'python %s/commons/core/parsing/BlatToGff.py -i %s -o %s -n Test_F' % (os.environ['REPET_PATH'], blatInputFileName, obsOutputFileName)
+        os.system(cmd)
+
+        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
+        self._writeExpOutputFile_with_methodName(expOutputFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+        os.remove(blatInputFileName)
+        os.remove(obsOutputFileName)
+        os.remove(expOutputFileName)
+
+    def _writeBlatInputFile(self, blatInputFileName):
+        file = open(blatInputFileName, 'w')
+        file.write('psLayout version 3\n')
+        file.write('\n')
+        file.write('match    mis-     rep.     N\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\n')
+        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\n')
+        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
+        file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
+        file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tmachin1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
+        file.write('554\t26\t0\t0\t1\t16\t1\t17\t-\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
+        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tmachin2\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
+        file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tmachin3\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
+        file.close()
+
+    def _writeExpOutputFile(self, expOutputFileName):
+        file = open(expOutputFileName, 'w')
+        file.write('##gff-version 3\n')
+        file.write('chr16\tBlatToGff\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n')
+        file.write('chr16\tBlatToGff\tBES\t21736364\t21737069\t.\t+\t.\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n')
+        file.write('chr11\tBlatToGff\tBES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\n')
+        file.write('chr11\tBlatToGff\tBES\t3794984\t3795627\t.\t+\t.\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\n')
+        file.write('chr18\tBlatToGff\tBES\t12067347\t12067719\t.\t+\t.\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\n')
+        file.close()
+
+    def _writeExpOutputFile_with_methodName(self, expOutputFileName):
+        file = open(expOutputFileName, 'w')
+        file.write('##gff-version 3\n')
+        file.write('chr16\tBlatToGff\tTest_F:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n')
+        file.write('chr16\tBlatToGff\tTest_F:BES\t21736364\t21737069\t.\t+\t.\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n')
+        file.write('chr11\tBlatToGff\tTest_F:BES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\n')
+        file.write('chr11\tBlatToGff\tTest_F:BES\t3794984\t3795627\t.\t+\t.\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\n')
+        file.write('chr18\tBlatToGff\tTest_F:BES\t12067347\t12067719\t.\t+\t.\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\n')
+        file.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py Tue Apr 30 14:33:21 2013 -0400

[

b"@@ -0,0 +1,117 @@\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_F_BlatToGffForBesPaired(unittest.TestCase):\n+\n+\n+ def test_run(self):\n+ blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']\n+ self._writeBlatInputFileName(blatInputFileName)\n+ fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']\n+ self._writeFastaInputFile(fastaInputFileName)\n+ \n+ obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']\n+ cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)\n+ os.system(cmd)\n+ \n+ expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']\n+ self._writeExpOutputFileName(expOutputFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))\n+ os.remove(blatInputFileName)\n+ os.remove(fastaInputFileName)\n+ os.remove(expOutputFileName)\n+ os.remove(obsOutputFileName)\n+ \n+ def test_run_with_methodName(self):\n+ blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']\n+ self._writeBlatInputFileName(blatInputFileName)\n+ fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']\n+ self._writeFastaInputFile(fastaInputFileName)\n+ \n+ obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']\n+ cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s -n TestF' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)\n+ os.system(cmd)\n+ \n+ expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']\n+ self._writeExpOutputFileName_with_methodName(expOutputFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))\n+ os.remove(blatInputFileName)\n+ os.remove(fastaInputFileName)\n+ os.remove(expOutputFileName)\n+ os.remove(obsOutputFileName)\n+\n+ def _writeBlatInputFileName(self, blatInputFileName):\n+ file = open(blatInputFileName, 'w')\n+ file.write('psLayout version 3\\n')\n+ file.write('\\n')\n+ file.write('match mis- rep. N\\'s Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts\\n')\n+ file.write(' match match count bases count bases name size start end name size start end count\\n')\n+ file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n')\n+ file.write('315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n')\n+ file.write('690\\t11\\t0\\t0\\t1\\t3\\t2\\t4\\t-\\tMRRE1H001H13RM1\\t704\\t0\\t704\\tchr16\\t22053297\\t21736364\\t21737069\\t3\\t40,647,14,\\t0,43,690,\\t21736364,21736406,21737055,\\n')\n+ file.write('554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMACHINFM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n')\n+ file.write('620\\t23\\t0\\t0\\t0\\t0\\t0\\t0\\t-\\tBIDULERM1\\t643\\t0\\t643\\tchr11\\t19818926\\t3794984\\t3795627\\t1\\t643,\\t0,\\t3794984,\\n')\n+ file.write('554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMRRE1H032F08FM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n')\n+ file.write('620\\t"..b'ATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\\n\')\n+ file.write(\'chr16\\tBlatToGffForBesPaired\\tTestF:BES\\t21736364\\t21737069\\t.\\t+\\t.\\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\\n\')\n+ file.write(\'chr16\\tBlatToGffForBesPaired\\tTestF:BAC\\t21686950\\t21737069\\t.\\t.\\t.\\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\\n\')\n+ file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BES\\t3725876\\t3726473\\t.\\t+\\t.\\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\\n\')\n+ file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BES\\t3794984\\t3795627\\t.\\t+\\t.\\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\\n\')\n+ file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BAC\\t3725876\\t3795627\\t.\\t.\\t.\\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\\n\')\n+ file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BES\\t12067347\\t12067719\\t.\\t+\\t.\\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\\n\')\n+ file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BES\\t11978635\\t11979338\\t.\\t+\\t.\\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n+ file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BAC\\t11978635\\t12067719\\t.\\t.\\t.\\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n+ file.close()\n+ \n+ def _writeFastaInputFile(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write(\'>MRRE1H001H13FM1\\n\')\n+ file.write(\'ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC\\n\')\n+ file.write(\'CTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGC\\n\')\n+ file.write(\'ACTGCTAGCTACG\\n\')\n+ file.write(\'>MRRE1H001H13RM1\\n\')\n+ file.write(\'ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCG\\n\')\n+ file.write(\'ACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGT\\n\')\n+ file.write(\'ACTGATCGACTGATCGACTGC\\n\')\n+ file.write(\'>MRRE1H032F08FM1\\n\')\n+ file.write(\'TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGAT\\n\')\n+ file.write(\'ATCGATCG\\n\')\n+ file.write(\'>MRRE1H032F08RM1\\n\')\n+ file.write(\'ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTG\\n\')\n+ file.write(\'TACGTACGTAC\\n\')\n+ file.write(\'>MRRE1B072N12FM1\\n\')\n+ file.write(\'ATCGTACGTACGATCGATCGCATGACTACGT\\n\')\n+ file.write(\'>MRRE1B072N12RM1\\n\')\n+ file.write(\'TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n+ file.write(\'>MACHINFM1\\n\')\n+ file.write(\'ATCGTACGCTAGCTAGTCGATCGATCGATCGATCG\\n\')\n+ file.write(\'>BIDULERM1\\n\')\n+ file.write(\'ACTCGATCGACTACGTACGTAGACTG\\n\')\n+ file.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_F_CrossSsrAndBesMappedByBlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_F_CrossSsrAndBesMappedByBlatToGff.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,66 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_CrossSsrAndBesMappedByBlatToGff(unittest.TestCase):
+
+
+    def test_run(self):
+        ssrInputFileName = '%s/commons/core/parsing/test/ssrInputFile.tab' % os.environ['REPET_PATH']
+        self._writeSsrInputFile(ssrInputFileName)
+        blatInputFileName = '%s/commons/core/parsing/test/blatInputFile.tab' % os.environ['REPET_PATH']
+        self._writeBlatInputFile(blatInputFileName)
+
+        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
+        cmd = 'python %s/commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py -s %s -b %s -o %s' % (os.environ['REPET_PATH'], ssrInputFileName, blatInputFileName, obsOutputFileName)
+        os.system(cmd)
+
+        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
+        self._writeExpOutputFile(expOutputFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+        os.remove(ssrInputFileName)
+        os.remove(blatInputFileName)
+        os.remove(obsOutputFileName)
+        os.remove(expOutputFileName)
+
+    def _writeBlatInputFile(self, blatInputFileName):
+        file = open(blatInputFileName, 'w')
+        file.write('psLayout version 3\n')
+        file.write('\n')
+        file.write('match    mis-     rep.     N\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\n')
+        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\n')
+        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
+        file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
+        file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tmachin1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
+        file.write('554\t26\t0\t0\t1\t16\t1\t17\t-\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
+        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tmachin2\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
+        file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tmachin3\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
+        file.close()
+
+    def _writeSsrInputFile(self, ssrInputFileName):
+        file = open(ssrInputFileName, 'w')
+        file.write('BES_name    BES_redundancy    SSR_di/tri/tetranucleotide    SSR_Motif    SSR_Motif_number    SSR_start    SSR_end    BES_size\n')
+        file.write('truc1\t1\t4\tttta\t6\t272\t295\t724\n')
+        file.write('truc2\t1\t3\taat\t8\t264\t287\t683\n')
+        file.write('MRRE1H001H13FM1\t1\t2\tta\t19\t153\t190\t378\n')
+        file.write('truc3\t2\t4\taaag\t8\t518\t549\t734\n')
+        file.write('MRRE1H032F08FM1\t1\t4\taaat\t7\t544\t571\t606\n')
+        file.write('MRRE1H032F08FM1\t2\t2\tag\t10\t587\t606\t606\n')
+        file.write('truc4\t1\t2\tat\t16\t519\t550\t672\n')
+        file.write('truc5\t1\t3\ttct\t8\t205\t228\t752\n')
+        file.write('truc6\t1\t2\tat\t33\t287\t352\t569\n')
+        file.close()
+
+    def _writeExpOutputFile(self, expOutputFileName):
+        file = open(expOutputFileName, 'w')
+        file.write('##gff-version 3\n')
+        file.write('chr16\tCrossSsrAndBesAlignedByBlat\tSSR\t21687102\t21687139\t.\t+\t.\tID=SSR_MRRE1H001H13FM1_1;Name=SSR_MRRE1H001H13FM1_1;bes_name=MRRE1H001H13FM1;bes_size=378;bes_matchstart=0;bes_matchend=345;bes_redundancy=1;ssr_type=2;ssr_motif=ta;ssr_motif_number=19;ssr_start=153;ssr_end=190;muscadine_seq=tatatatatatatatatatatatatatatatatatata\n')
+        file.write('chr11\tCrossSsrAndBesAlignedByBlat\tSSR\t3725930\t3725903\t.\t-\t.\tID=SSR_MRRE1H032F08FM1_1;Name=SSR_MRRE1H032F08FM1_1;bes_name=MRRE1H032F08FM1;bes_size=606;bes_matchstart=10;bes_matchend=606;bes_redundancy=1;ssr_type=4;ssr_motif=aaat;ssr_motif_number=7;ssr_start=544;ssr_end=571;muscadine_seq=aaataaataaataaataaataaataaat\n')
+        file.write('chr11\tCrossSsrAndBesAlignedByBlat\tSSR\t3725887\t3725868\t.\t-\t.\tID=SSR_MRRE1H032F08FM1_2;Name=SSR_MRRE1H032F08FM1_2;bes_name=MRRE1H032F08FM1;bes_size=606;bes_matchstart=10;bes_matchend=606;bes_redundancy=2;ssr_type=2;ssr_motif=ag;ssr_motif_number=10;ssr_start=587;ssr_end=606;muscadine_seq=agagagagagagagagagag\n')
+        file.close()
+
+if __name__ == "__main__":
+    unittest.main()
+
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_F_VarscanToVCF.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_F_VarscanToVCF.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,173 @@\n+import unittest\n+import os\n+from commons.core.parsing.VarscanToVCF import VarscanToVCF\n+from commons.core.utils.FileUtils import FileUtils\n+\n+class Test_F_VarscanToVCF(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self.emptyVarscanFileName = "emptyfile.varscan"\n+ self.varscanFileName = "%s/commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan" % os.environ["REPET_PATH"]\n+ self.expVCFFileName = "expVCF.vcf"\n+ self.obsVCFFileName = "obsVCF.vcf"\n+ \n+ def tearDown(self):\n+ if os.path.exists(self.emptyVarscanFileName):\n+ os.remove(self.emptyVarscanFileName)\n+ os.remove(self.expVCFFileName)\n+ os.remove(self.obsVCFFileName)\n+ \n+ def test_VarscanToVCF(self):\n+ self._writeExpOutputFile()\n+ iVarscanFile = VarscanToVCF(self.varscanFileName, self.obsVCFFileName, doClean = True)\n+ iVarscanFile.run()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName))\n+ \n+ def test_VarscanToVCF_empty(self):\n+ self._writeInputFile_empty()\n+ self._writeExpOutputFile_empty()\n+ iVarscanFile = VarscanToVCF(self.emptyVarscanFileName, self.obsVCFFileName, doClean = True)\n+ iVarscanFile.run()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName))\n+ \n+ def _writeInputFile_empty(self):\n+ with open(self.emptyVarscanFileName, "w") as varscanFileName:\n+ varscanFileName.write("Chrom\\tPosition\\tRef\\tCons\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\tMapQual1\\tMapQual2\\tReads1Plus\\tReads1Minus\\tReads2Plus\\tReads2Minus\\tVarAllele\\n")\n+ \n+ def _writeExpOutputFile_empty(self):\n+ with open(self.expVCFFileName, "w") as vcfFileName: \n+ vcfFileName.write("##fileformat=VCFv4.1\\n")\n+ vcfFileName.write("#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\n")\n+ \n+ def _writeExpOutputFile(self):\n+ with open(self.expVCFFileName, "w") as vcfFileName:\n+ vcfFileName.write("##fileformat=VCFv4.1\\n")\n+ vcfFileName.write("#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\n")\n+ vcfFileName.write("chr1\\t10759\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=47\\n")\n+ vcfFileName.write("chr1\\t12438\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=62;ABQ=42\\n")\n+ vcfFileName.write("chr1\\t17432\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=55;ABQ=37\\n")\n+ vcfFileName.write("chr1\\t20391\\t.\\tA\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=56;ABQ=37\\n")\n+ vcfFileName.write("chr1\\t21207\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=55;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t26057\\t.\\tT\\tG\\t0.087739243\\t.\\tAF=0.1538;DP=13;RBQ=60;ABQ=37\\n")\n+ vcfFileName.write("chr1\\t36838\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=61;ABQ=36\\n")\n+ vcfFileName.write("chr1\\t37751\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=42\\n")\n+ vcfFileName.write("chr1\\t43500\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t50481\\t.\\tA\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=60;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t106849\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t108726\\t.\\tT\\tA\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=50;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t114204\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=6;RBQ=60;ABQ=39\\n")\n+ vcfFileName.write("chr1\\t115030\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=57;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t116173\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.2222;DP=9;RBQ=58;ABQ=39\\n")\n+ vcfFileName.write("chr1\\t118433\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.1429;DP=7;RBQ=64;ABQ=50\\n")\n+ vcfFileName.write("chr1\\t119042\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=55;ABQ=51\\n")\n+ vcfFi'..b'me.write("chr1\\t498962\\t.\\tC\\tA\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t510532\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.2000;DP=5;RBQ=64;ABQ=53\\n")\n+ vcfFileName.write("chr1\\t516369\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=34;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t523631\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=64;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t524680\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t525898\\t.\\tT\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=62;ABQ=49\\n")\n+ vcfFileName.write("chr1\\t526118\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=61;ABQ=50\\n")\n+ vcfFileName.write("chr1\\t535762\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=42\\n")\n+ vcfFileName.write("chr1\\t543235\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=45;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t550086\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=50;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t550508\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=55;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t551143\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=39\\n")\n+ vcfFileName.write("chr1\\t552924\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=8;RBQ=62;ABQ=38\\n")\n+ vcfFileName.write("chr1\\t553541\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.1250;DP=8;RBQ=65;ABQ=52\\n")\n+ vcfFileName.write("chr1\\t560806\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=65;ABQ=49\\n")\n+ vcfFileName.write("chr1\\t562736\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=64;ABQ=52\\n")\n+ vcfFileName.write("chr1\\t563224\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=51;ABQ=39\\n")\n+ vcfFileName.write("chr1\\t564217\\t.\\tT\\tA\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=62;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t567288\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t569652\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=48;ABQ=42\\n")\n+ vcfFileName.write("chr1\\t570280\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.1250;DP=8;RBQ=60;ABQ=53\\n")\n+ vcfFileName.write("chr1\\t582185\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=63;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t582453\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=65;ABQ=38\\n")\n+ vcfFileName.write("chr1\\t583477\\t.\\tT\\tG\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=62;ABQ=39\\n")\n+ vcfFileName.write("chr1\\t584179\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=41\\n")\n+ vcfFileName.write("chr1\\t589074\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=36\\n")\n+ vcfFileName.write("chr1\\t596641\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=40\\n")\n+ vcfFileName.write("chr1\\t599263\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=60;ABQ=38\\n")\n+ \n+##fileDate=20090805\n+##source=myImputationProgramV3.1\n+##reference=1000Gchr1 10759 . C T 0.087739243 . AF=33.33%;DP=3enomesPilot-NCBI36\n+##phasing=partial\n+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">\n+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">\n+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">\n+##FILTER=<ID=q10,Description="Quality below 10">\n+##FILTER=<ID=s50,Description="Less than 50% of samples have data">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">\n+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n+ \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_FastaParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_FastaParser.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,75 @@
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.structure.Sequence import Sequence
+import unittest
+
+class Test_FastaParser(unittest.TestCase):
+
+    def test_getSubsequence(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "1"
+        expSeq = Sequence("1:1-20 (1)", "CCTAAGCCATTGCTTGGTGA")
+        obsSeq = parser.getSubSequence(chromosome, 1, 20, 1)
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        expSeq = Sequence("subsequence", "TGAAGA")
+        obsSeq = parser.getSubSequence(chromosome, 55, 60, 1, "subsequence")
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence_inside_and_outside(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        expSeq = Sequence("subsequence", "TTA")
+        obsSeq = parser.getSubSequence(chromosome, 137, 151, 1, "subsequence")
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence_last_letter(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        expSeq = Sequence("subsequence", "A")
+        obsSeq = parser.getSubSequence(chromosome, 139, 151, 1, "subsequence")
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence_totally_outside(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        isSysExit = False
+        try:
+            parser.getSubSequence(chromosome, 140, 151, 1, "subsequence")
+        except:
+            isSysExit = True
+        self.assertTrue(isSysExit)
+
+    def test_setTags(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        parser.setTags()
+        expTags = {"1" : 0,
+                   "2" : 54}
+        obsTags = parser.getTags()
+        self.assertEquals(expTags, obsTags)
+
+    def _writeInputFastaFile(self, fastaFile):
+        myHandler = open(fastaFile, 'w')
+        myHandler.write(">1\n")
+        myHandler.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAAT\n")
+        myHandler.write(">2\n")
+        myHandler.write("TATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCG\n")
+        myHandler.write("GACCTGAAGAAATTCCTGATTGTACGTTCTGGTTACTCTTCAATTTGGGC\n")
+        myHandler.write("TGCTTAATTATCTCCTCAATTTCAATTTGGCCATGCTTA\n")
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_FindRep.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_FindRep.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,108 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from xml.sax import make_parser
+from xml.sax.handler import feature_namespaces
+from commons.core.parsing.FindRep import FindRep
+
+
+class Test_FindRep(unittest.TestCase):
+    def setUp(self):
+        self._mrepsOuputFileName = "output.xml"
+        self._obsSetFileName = "obsOuput.set"
+        self._expSetFileName = "expOuput.set"
+        self._writeExpSet(self._expSetFileName)
+        self._writeMrepsOutput(self._mrepsOuputFileName)
+
+    def tearDown(self):
+        os.remove(self._expSetFileName)
+        os.remove(self._obsSetFileName)
+        os.remove(self._mrepsOuputFileName)
+
+    def test_parse(self):
+        xmlParser = make_parser()
+        xmlParser.setFeature( feature_namespaces, 0 )
+        xmlParser.setContentHandler( FindRep( self._obsSetFileName,0,  0 ) )
+        xmlParser.parse( self._mrepsOuputFileName )
+        self.assertTrue(FileUtils.are2FilesIdentical(self._obsSetFileName, self._expSetFileName))
+
+    def _writeExpSet(self, fileName):
+        f = open(fileName, "w")
+        f.write("1\t(tatt)3\tseq1\t4\t16\n")
+        f.write("2\t(tatt)3\tseq1\t23\t35\n")
+        f.write("3\t(tatt)3\tseq1\t42\t54\n")
+        f.close()
+
+    def _writeMrepsOutput(self, fileName):
+        f = open(fileName, "w")
+        f.write("<?xml version='1.0' encoding='UTF-8' ?>\n")
+        f.write("<mreps>\n")
+        f.write("<time>Thu Dec  1 17:25:54 2011\n")
+        f.write("</time>\n")
+        f.write("<parameters>\n")
+        f.write("    <type-of-input>file in fasta format</type-of-input>\n")
+        f.write("    <err>3</err>\n")
+        f.write("    <from>1</from>\n")
+        f.write("    <to>-1</to>\n")
+        f.write("    <win>-1</win>\n")
+        f.write("    <minsize>1</minsize>\n")
+        f.write("    <maxsize>-1</maxsize>\n")
+        f.write("    <minperiod>1</minperiod>\n")
+        f.write("   <maxperiod>-1</maxperiod>\n")
+        f.write("   <minexponent>3.00</minexponent>\n")
+        f.write("</parameters>\n")
+        f.write("<results>\n")
+        f.write("<sequence-name>seq1</sequence-name>\n")
+        f.write("<repetitions>\n")
+        f.write("<window>\n")
+        f.write("<windowstart>1</windowstart>\n")
+        f.write("<windowend>60</windowend>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>4</start>\n")
+        f.write("        <end>16</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("       <period>4</period>\n")
+        f.write("       <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>23</start>\n")
+        f.write("        <end>35</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>42</start>\n")
+        f.write("       <end>54</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("<nbofreps>3</nbofreps>\n")
+        f.write("</window>\n")
+        f.write("</repetitions>\n")
+        f.write("</results>\n")
+        f.write("<errorcode>0</errorcode>\n")
+        f.write("</mreps>\n")
+        f.close()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_GffParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_GffParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,62 @@
+import unittest
+from commons.core.parsing.GffParser import GffParser
+
+
+class Test_GffParser(unittest.TestCase):
+
+
+    def test_Parser(self):
+        parser = GffParser("data/testGffParser1.gff3")
+
+        self.assertEqual(parser.getNbTranscripts(), 3)
+
+        cpt = 0
+        for transcript in parser.getIterator():
+            cpt += 1
+            if cpt == 1:
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getName(), "test1")
+                self.assertEqual(transcript.getStart(), 1000)
+                self.assertEqual(transcript.getEnd(), 2000)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("field"), "value1")
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "arm_X")
+                self.assertEqual(exons[0].getStart(), 1000)
+                self.assertEqual(exons[0].getEnd(), 2000)
+                self.assertEqual(exons[0].getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 1001)
+            elif cpt == 2:
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getName(), "test2")
+                self.assertEqual(transcript.getStart(), 10000)
+                self.assertEqual(transcript.getEnd(), 20000)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 2)
+                self.assertEqual(transcript.getTagValue("field"), "value2")
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "arm_X")
+                self.assertEqual(exons[0].getStart(), 10000)
+                self.assertEqual(exons[0].getEnd(), 10100)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 9602)
+            if cpt == 3:
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getName(), "test1.1")
+                self.assertEqual(transcript.getStart(), 1000)
+                self.assertEqual(transcript.getEnd(), 2000)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("ID"), "test1.1-1")
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "arm_X")
+                self.assertEqual(exons[0].getStart(), 1000)
+                self.assertEqual(exons[0].getEnd(), 2000)
+                self.assertEqual(exons[0].getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 1001)
+
+
+if __name__ == '__main__':
+        unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_MapParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_MapParser.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,53 @@
+import unittest
+import os
+from commons.core.parsing.MapParser import MapParser
+
+class Test_MapParser(unittest.TestCase):
+
+
+    def setUp(self):
+        self.inputMapFileName = "testMapParser.map"
+        self._writeInputMapFile()
+
+    def tearDown(self):
+        if os.path.exists(self.inputMapFileName):
+            os.remove(self.inputMapFileName)
+
+    def test_Parser(self):
+        parser = MapParser(self.inputMapFileName)
+
+        cpt = 0
+        for transcript in parser.getIterator():
+            cpt += 1
+            if cpt == 1:
+                self.assertEqual(transcript.getChromosome(), "dmel_chr4")
+                self.assertEqual(transcript.getName(), "aagatgcgtaacggccatac_17")
+                self.assertEqual(transcript.getStart(), 4380)
+                self.assertEqual(transcript.getEnd(), 4400)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 21)
+            elif cpt == 10:
+                self.assertEqual(transcript.getChromosome(), "dmel_chr4")
+                self.assertEqual(transcript.getName(), "aacggccatacattggtttg_12")
+                self.assertEqual(transcript.getStart(), 4389)
+                self.assertEqual(transcript.getEnd(), 4409)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 21)
+
+
+    def _writeInputMapFile(self):
+        inputFile = open(self.inputMapFileName,'w')
+        inputFile.write("aagatgcgtaacggccatac_17\tdmel_chr4\t4380\t4400\n")
+        inputFile.write("agatgcgtaacggccataca_16\tdmel_chr4\t4381\t4401\n")
+        inputFile.write("gatgcgtaacggccatacat_16\tdmel_chr4\t4382\t4402\n")
+        inputFile.write("atgcgtaacggccatacatt_15\tdmel_chr4\t4383\t4403\n")
+        inputFile.write("tgcgtaacggccatacattg_15\tdmel_chr4\t4384\t4404\n")
+        inputFile.write("gcgtaacggccatacattgg_15\tdmel_chr4\t4385\t4405\n")
+        inputFile.write("cgtaacggccatacattggt_14\tdmel_chr4\t4386\t4406\n")
+        inputFile.write("gtaacggccatacattggtt_14\tdmel_chr4\t4387\t4407\n")
+        inputFile.write("taacggccatacattggttt_14\tdmel_chr4\t4388\t4408\n")
+        inputFile.write("aacggccatacattggtttg_12\tdmel_chr4\t4389\t4409\n")
+        inputFile.close()
+
+if __name__ == '__main__':
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_MrepsToSet.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_MrepsToSet.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,105 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.parsing.MrepsToSet import MrepsToSet
+
+class Test_MrepsToSet(unittest.TestCase):
+    def setUp(self):
+        self._mrepsInputFileName = "mrepsInput.fa"
+        self._mrepsOuputFileName = "mrepsOutput.xml"
+        self._obsSetFileName = "obsOuput.set"
+        self._expSetFileName = "expOuput.set"
+
+        self._writeExpSet(self._expSetFileName)
+        self._writeMrepsOutput(self._mrepsOuputFileName)
+
+    def tearDown(self):
+        os.remove(self._expSetFileName)
+        os.remove(self._obsSetFileName)
+        os.remove(self._mrepsOuputFileName)
+
+    def test_convert(self):
+        iMrepsToSet = MrepsToSet(self._mrepsInputFileName, self._mrepsOuputFileName, self._obsSetFileName)
+        iMrepsToSet.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._obsSetFileName, self._expSetFileName))
+
+    def _writeExpSet(self, fileName):
+        f = open(fileName, "w")
+        f.write("1\t(tatt)3\tseq1\t4\t16\n")
+        f.write("2\t(tatt)3\tseq1\t23\t35\n")
+        f.write("3\t(tatt)3\tseq1\t42\t54\n")
+        f.close()
+
+    def _writeMrepsOutput(self, fileName):
+        f = open(fileName, "w")
+        f.write("<?xml version='1.0' encoding='UTF-8' ?>\n")
+        f.write("<mreps>\n")
+        f.write("<time>Thu Dec  1 17:25:54 2011\n")
+        f.write("</time>\n")
+        f.write("<parameters>\n")
+        f.write("    <type-of-input>file in fasta format</type-of-input>\n")
+        f.write("    <err>3</err>\n")
+        f.write("    <from>1</from>\n")
+        f.write("    <to>-1</to>\n")
+        f.write("    <win>-1</win>\n")
+        f.write("    <minsize>1</minsize>\n")
+        f.write("    <maxsize>-1</maxsize>\n")
+        f.write("    <minperiod>1</minperiod>\n")
+        f.write("   <maxperiod>-1</maxperiod>\n")
+        f.write("   <minexponent>3.00</minexponent>\n")
+        f.write("</parameters>\n")
+        f.write("<results>\n")
+        f.write("<sequence-name>seq1</sequence-name>\n")
+        f.write("<repetitions>\n")
+        f.write("<window>\n")
+        f.write("<windowstart>1</windowstart>\n")
+        f.write("<windowend>60</windowend>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>4</start>\n")
+        f.write("        <end>16</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("       <period>4</period>\n")
+        f.write("       <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>23</start>\n")
+        f.write("        <end>35</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>42</start>\n")
+        f.write("       <end>54</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("<nbofreps>3</nbofreps>\n")
+        f.write("</window>\n")
+        f.write("</repetitions>\n")
+        f.write("</results>\n")
+        f.write("<errorcode>0</errorcode>\n")
+        f.write("</mreps>\n")
+        f.close()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_Multifasta2SNPFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_Multifasta2SNPFile.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1786 @@\n+import os\n+import shutil\n+import unittest\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile\n+from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.seq.BioseqDB import BioseqDB\n+from smac_pipe.tests.Utils4Test import Utils4Test\n+\n+\n+class Test_Multifasta2SNPFile(unittest.TestCase):\n+# TODO TEST LOGFILE\n+ def setUp(self):\n+ os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])\n+ self._inFileName = "multifasta_input.fasta"\n+ \n+ self._expSubSNPFileName = "%s/commons/core/parsing/test/expSubSNP.csv" % os.environ["REPET_PATH"]\n+ self._expAlleleFileName = "%s/commons/core/parsing/test/expAllele.csv" % os.environ["REPET_PATH"]\n+ \n+ self._expIndividualFileName = "%s/commons/core/parsing/test/expIndividual.csv" % os.environ["REPET_PATH"]\n+ self._expSequenceFSAFileName = "%s/commons/core/parsing/test/expSequences.fsa" % os.environ["REPET_PATH"]\n+ self._expSequenceCSVFileName = "%s/commons/core/parsing/test/expSequences.csv" % os.environ["REPET_PATH"]\n+ self._expBatchFileName = "%s/commons/core/parsing/test/expBatch.txt" % os.environ["REPET_PATH"]\n+ self._expBatchLineFileName = "%s/commons/core/parsing/test/expBatchLine.csv" % os.environ["REPET_PATH"]\n+ \n+ self._realInputFileName = "data/real_multifasta_input.fasta"\n+ self._realExpSubSNPFileName = "data/realExpSubSNP.csv"\n+ self._realExpSequenceFSAFileName = "data/realExpSequences.fsa"\n+ self._realExpBatchLineFileName = "data/realExpBatchLine.csv"\n+ self._realExpIndividualFileName = "data/realExpIndividual.csv"\n+ \n+ self._inputDirSeveralBatches = "%s/commons/core/parsing/test/severalBatchDir" % os.environ["REPET_PATH"]\n+ \n+ self._obsSubSNPFileName = "SubSNP.csv"\n+ self._obsAlleleFileName = "Allele.csv"\n+ self._obsIndividualFileName = "Individual.csv"\n+ self._obsSequenceFSAFileName = "Sequences.fsa"\n+ self._obsSequenceCSVFileName = "Sequences.csv"\n+ self._obsBatchFileName = "Batch.txt"\n+ self._obsBatchLineFileName = "BatchLine.csv"\n+ \n+ self._fileUtils = FileUtils()\n+\n+ def tearDown(self):\n+ os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])\n+ logFileName = "multifasta2SNP.log"\n+ if self._fileUtils.isRessourceExists(self._inFileName):\n+ os.remove(self._inFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSubSNPFileName):\n+ os.remove(self._obsSubSNPFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered"):\n+ os.remove(self._obsSubSNPFileName + "_filtered")\n+ if self._fileUtils.isRessourceExists(self._obsAlleleFileName):\n+ os.remove(self._obsAlleleFileName)\n+ if self._fileUtils.isRessourceExists(self._obsIndividualFileName):\n+ os.remove(self._obsIndividualFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSequenceFSAFileName):\n+ os.remove(self._obsSequenceFSAFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSequenceCSVFileName):\n+ os.remove(self._obsSequenceCSVFileName)\n+ if self._fileUtils.isRessourceExists(self._obsBatchFileName):\n+ os.remove(self._obsBatchFileName)\n+ if self._fileUtils.isRessourceExists(self._obsBatchLineFileName):\n+ os.remove(self._obsBatchLineFileName)\n+\n+ if self._fileUtils.isRessourceExists(self._expSubSNPFileName): \n+ os.remove(self._expSubSNPFileName)\n+ if self._fileUtils.isRessourceExists(self._realExpSubSNPFileName + "_filtered"): \n+ os.remove(self._realExpSubSNPFileName + "_filtered")\n+ if self._fileUtils.isRessourceExists(self._expAlleleFileName):\n+ '..b' \n+ def _writeInputFileSeveralBatches(self):\n+ if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n+ os.mkdir(self._inputDirSeveralBatches)\n+ \n+ inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n+ inFileHandle.write(">Sequence_de_Reference1\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n+ inFileHandle2.write(">Sequence_de_Reference2\\n")\n+ inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line1\\n")\n+ inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line2\\n")\n+ inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle2.close()\n+ \n+ def _writeInputFileSeveralBatches_different_lines_between_files(self):\n+ if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n+ os.mkdir(self._inputDirSeveralBatches)\n+ \n+ inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n+ inFileHandle.write(">Sequence_de_Reference1\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n+ inFileHandle2.write(">Sequence_de_Reference2\\n")\n+ inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line3\\n")\n+ inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line4\\n")\n+ inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle2.close()\n+ \n+ def _writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files(self): \n+ if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n+ os.mkdir(self._inputDirSeveralBatches)\n+ \n+ inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n+ inFileHandle.write(">Sequence_de_Reference1\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n+ inFileHandle2.write(">Sequence_de_Reference1\\n")\n+ inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line3\\n")\n+ inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line4\\n")\n+ inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle2.close()\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_Multifasta2SNPFileWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_Multifasta2SNPFileWriter.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,292 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFileWriter\n+from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile\n+from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper\n+from commons.core.LoggerFactory import LoggerFactory\n+import os\n+import logging\n+import unittest\n+\n+class Test_Multifasta2SNPFileWriter(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._obsSubSNPFile = "SubSNP.csv"\n+ self._expSubSNPFile = "ExpSubSNP.csv"\n+ \n+ self._obsAlleleFile = "Allele.csv"\n+ self._expAlleleFile = "ExpAllele.csv"\n+ \n+ self._obsIndividualFile = "Individual.csv"\n+ self._expIndividualFile = "ExpIndividual.csv"\n+ \n+ self._obsSequenceFSAFile = "Sequences.fsa"\n+ self._expSequenceFSAFile = "ExpSequences.fsa"\n+ \n+ self._obsSequenceCSVFile = "Sequences.csv"\n+ self._expSequenceCSVFile = "ExpSequences.csv"\n+ \n+ self._obsBatchFile = "Batch.txt"\n+ self._expBatchFile = "ExpBatch.txt"\n+ \n+ self._obsBatchLineFile = "BatchLine.csv"\n+ self._expBatchLineFile = "ExpBatchLine.csv"\n+ \n+ self._logFileName = "Test_Multifasta2SNPWriter.log"\n+ \n+ self._inputFileName = "multifasta.fsa"\n+ \n+ self._lSNPResult = []\n+ self._dAlleleResult = {}\n+ self._lIndividualResult = []\n+ self._refSeq = Bioseq()\n+ self._seqDb= BioseqDB()\n+ \n+ self._logFile = LoggerFactory.createLogger(self._logFileName, logging.INFO, "%(asctime)s %(levelname)s: %(message)s")\n+ self._lSequenceWrapper = ReferenceBioseqAndLinesBioseqDBWrapper(self._refSeq, self._seqDb, self._logFile, self._inputFileName)\n+ self._lBatchLineResults = []\n+ \n+ self._Multifasta2SNPFileWriter = Multifasta2SNPFileWriter()\n+ \n+ self._inFileName = "multifasta.txt"\n+ self._taxon = "Arabidopsis thaliana"\n+\n+ def tearDown(self):\n+ if FileUtils.isRessourceExists(self._inFileName):\n+ os.remove(self._inFileName)\n+ if FileUtils.isRessourceExists("multifasta2SNP.log"):\n+ os.remove("multifasta2SNP.log")\n+ if FileUtils.isRessourceExists("Test_Multifasta2SNPWriter.log"):\n+ os.remove("Test_Multifasta2SNPWriter.log")\n+ \n+ if FileUtils.isRessourceExists(self._obsSubSNPFile):\n+ os.remove(self._obsSubSNPFile)\n+ if FileUtils.isRessourceExists(self._expSubSNPFile):\n+ os.remove(self._expSubSNPFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsAlleleFile):\n+ os.remove(self._obsAlleleFile)\n+ if FileUtils.isRessourceExists(self._expAlleleFile):\n+ os.remove(self._expAlleleFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsIndividualFile):\n+ os.remove(self._obsIndividualFile)\n+ if FileUtils.isRessourceExists(self._expIndividualFile):\n+ os.remove(self._expIndividualFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsSequenceFSAFile):\n+ os.remove(self._obsSequenceFSAFile)\n+ if FileUtils.isRessourceExists(self._expSequenceFSAFile):\n+ os.remove(self._expSequenceFSAFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsSequenceCSVFile):\n+ os.remove(self._obsSequenceCSVFile)\n+ if FileUtils.isRessourceExists(self._expSequenceCSVFile):\n+ os.remove(self._expSequenceCSVFile)\n+\n+ if FileUtils.isRessourceExists(self._obsBatchFile):\n+ FileUtils.removeFilesByPattern(self._obsBatchFile)\n+ if FileUtils.isRessourceExists(self._expBatchFile):\n+ FileUtils.removeFilesByPattern(self._expBatchFile)\n+ \n+ if FileUtils.isRessourceExists(self._ob'..b'File))\n+ self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFile))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFile, self._obsBatchFile))\n+ self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFile))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFile, self._obsBatchLineFile)) \n+ \n+ def _writeExpSubSNPFile(self):\n+ expFile = open(self._expSubSNPFile, "w")\n+ expFile.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\\n")\n+ expFile.write("SubSNP1;A;SNP;1;A;T;1;1;1;Sequence;;;1\\n")\n+ expFile.write("SubSNP2;A;SNP;10;T;A;1;1;1;Sequence;;;2\\n")\n+ expFile.write("SubSNP3;A;SNP;20;T;A;1;1;2;Sequence;;;3\\n")\n+ expFile.close()\n+ \n+ def _writeExpAlleleFile(self):\n+ expFile = open(self._expAlleleFile, "w")\n+ expFile.write("AlleleNumber;Value;Motif;NbCopy;Comment\\n")\n+ expFile.write("1;A;;;\\n")\n+ expFile.write("2;C;;;\\n")\n+ expFile.write("3;T;;;\\n")\n+ expFile.close() \n+ \n+ \n+ def _writeExpIndividualFile(self):\n+ expFile = open(self._expIndividualFile, "w")\n+ expFile.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\\n")\n+ expFile.write("1;Individual1;;;;;;;;;;Arabidopsis thaliana;;;;;\\n")\n+ expFile.write("2;Individual2;;;;;;;;;;Arabidopsis thaliana;;;;;\\n")\n+ expFile.close() \n+\n+ def _writeInputFile(self):\n+ inFileHandle = open(self._inFileName, "w")\n+ inFileHandle.write(">Sequence_de_Reference\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ def _writeExpSequenceFiles(self):\n+ SequenceFSAFileHandle = open(self._expSequenceFSAFile, "w")\n+ SequenceFSAFileHandle.write(">Sequence_de_Reference\\n")\n+ SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ SequenceFSAFileHandle.close()\n+ SequenceCSVFileHandle = open(self._expSequenceCSVFile, "w")\n+ SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\\n")\n+ SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\\n")\n+ SequenceCSVFileHandle.close()\n+ \n+ def _writeExpBatchFile(self):\n+ BatchFileHandle = open(self._expBatchFile, "w")\n+ BatchFileHandle.write("BatchNumber: 1\\n")\n+ BatchFileHandle.write("BatchName: batch1\\n")\n+ BatchFileHandle.write("GeneName: gene1\\n")\n+ BatchFileHandle.write("Description: \\n")\n+ BatchFileHandle.write("ContactNumber: \\n")\n+ BatchFileHandle.write("ProtocolNumber: \\n")\n+ BatchFileHandle.write("ThematicNumber: \\n")\n+ BatchFileHandle.write("RefSeqName: Sequence de Reference\\n")\n+ BatchFileHandle.write("AlignmentFileName: \\n")\n+ BatchFileHandle.write("SeqName: \\n")\n+ BatchFileHandle.write("//\\n")\n+ BatchFileHandle.close()\n+ \n+ def _writeExpBatchLineFile(self):\n+ BatchLineFileHandle = open(self._expBatchLineFile, "w")\n+ BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\\n")\n+ BatchLineFileHandle.write("1;;;1;\\n")\n+ BatchLineFileHandle.write("2;;;1;\\n")\n+ BatchLineFileHandle.close()\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_PalsToAlign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_PalsToAlign.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,43 @@
+import unittest
+from commons.core.utils.FileUtils import FileUtils
+import os
+from commons.core.parsing.PalsToAlign import PalsToAlign
+
+class Test_PalsToAlign(unittest.TestCase):
+
+    def setUp(self):
+        self._palsFileName = "input.gff"
+        self._expAlignFileName = "file.align"
+        self._obsAlignFileName = "output.align"
+
+    def tearDown(self):
+        os.remove(self._palsFileName)
+        os.remove(self._expAlignFileName)
+        os.remove(self._obsAlignFileName)
+
+    def testRun(self):
+        self._writePalsFile(self._palsFileName)
+        self._writeExpAlignFile(self._expAlignFileName)
+
+        iPalsToAlign = PalsToAlign(self._palsFileName,self._obsAlignFileName)
+        iPalsToAlign.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expAlignFileName, self._obsAlignFileName))
+
+
+    def _writePalsFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk01\tpals\thit\t32290\t32583\t252\t+\t.\tTarget chunk02 28975 29268; maxe 0.035\n")
+        f.write("chunk01\tpals\thit\t28975\t29268\t252\t+\t.\tTarget chunk02 32290 32583; maxe 0.035\n")
+        f.write("chunk01\tpals\thit\t65932\t66032\t68\t+\t.\tTarget chunk02 59293 59395; maxe 0.085\n")
+        f.close()
+
+    def _writeExpAlignFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk01\t28975\t29268\tchunk02\t32290\t32583\t0.0\t252\t96.5\n")
+        f.write("chunk01\t32290\t32583\tchunk02\t28975\t29268\t0.0\t252\t96.5\n")
+        f.write("chunk01\t65932\t66032\tchunk02\t59293\t59395\t0.0\t68\t91.5\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_PathNum2Id.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_PathNum2Id.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,69 @@
+import unittest
+import os
+from commons.core.parsing.PathNum2Id import PathNum2Id
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_PathNum2Id(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = "dummyInputPathFile.path"
+        self._outputFileName = "dummyOutputPathFile.path"
+        self._expectedFileName = "expectedpathFile.path"
+        self._pathNum2Id = PathNum2Id()
+
+    def tearDown(self):
+        os.remove( self._inputFileName )
+        os.remove( self._outputFileName )
+        os.remove( self._expectedFileName )
+
+    def test_RunWhithoutReturnAtEndOfFile(self):
+        self._createAndFillInputFileWhithoutReturnAtTheEnd()
+        self._createExpectedFile()
+        self._pathNum2Id.setInFileName( self._inputFileName )
+        self._pathNum2Id.setOutFileName( self._outputFileName )
+        self._pathNum2Id.run()
+        fileutils = FileUtils()
+        self.assertTrue(fileutils.are2FilesIdentical(self._outputFileName, self._expectedFileName))
+
+    def test_RunWhithReturnAtEndOfFile(self):
+        self._createAndFillInputFileWhithReturnAtTheEnd()
+        self._createExpectedFile()
+        self._pathNum2Id.setInFileName( self._inputFileName )
+        self._pathNum2Id.setOutFileName( self._outputFileName )
+        self._pathNum2Id.run()
+        fileutils = FileUtils()
+        self.assertTrue(fileutils.are2FilesIdentical(self._outputFileName, self._expectedFileName))
+
+    def _createExpectedFile(self):
+        f = open(self._expectedFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("4\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("5\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("6\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+
+    def _createAndFillInputFileWhithoutReturnAtTheEnd(self):
+        f = open(self._inputFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0")
+        f.close()
+
+    def _createAndFillInputFileWhithReturnAtTheEnd(self):
+        f = open(self._inputFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_PslParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_PslParser.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,165 @@\n+from commons.core.parsing.PslParser import PslParser\n+import unittest, os\n+\n+\n+class Test_PslParser(unittest.TestCase):\n+\n+ def test_forward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\T\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n158\\t0\\t0\\t0\\t0\\t0\\t1\\t158\\t+\\ttest\\t158\\t0\\t158\\tchr1\\t1501\\t237\\t553\\t2\\t79,79,\\t0,79,\\t237,474,\\n")\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "test")\n+ self.assertEquals(transcript.getChromosome(), "chr1")\n+ self.assertEquals(transcript.getDirection(), 1)\n+ self.assertEquals(transcript.getStart(), 238)\n+ self.assertEquals(transcript.getEnd(), 553)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 0:\n+ self.assertEquals(exon.getStart(), 238)\n+ self.assertEquals(exon.getEnd(), 316)\n+ elif i == 1:\n+ self.assertEquals(exon.getStart(), 475)\n+ self.assertEquals(exon.getEnd(), 553)\n+ os.remove(fileName)\n+ \n+ \n+ def test_backward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n158\\t0\\t0\\t0\\t0\\t0\\t1\\t158\\t-\\ttest\\t158\\t0\\t158\\tchr1\\t1501\\t237\\t553\\t2\\t79,79,\\t0,79,\\t237,474,\\n")\n+\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "test")\n+ self.assertEquals(transcript.getChromosome(), "chr1")\n+ self.assertEquals(transcript.getDirection(), -1)\n+ self.assertEquals(transcript.getStart(), 238)\n+ self.assertEquals(transcript.getEnd(), 553)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 1:\n+ self.assertEquals(exon.getStart(), 238)\n+ self.assertEquals(exon.getEnd(), 316)\n+ elif i == 0:\n+ self.assertEquals(exon.getStart(), 475)\n+ self.assertEquals(exon.getEnd(), 553)\n+ os.remove(fileName)\n+\n+\n+ def test_query_forward_target_forward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t0\\t0\\t0\\t0\\t0\\t1\\t60\\t++\\tseq1\\t255\\t9\\t250\\tref\\t2262\\t59\\t360\\t2'..b'assertEquals(transcript.getChromosome(), "ref")\n+ self.assertEquals(transcript.getDirection(), -1)\n+ self.assertEquals(transcript.getStart(), 60)\n+ self.assertEquals(transcript.getEnd(), 360)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 1:\n+ self.assertEquals(exon.getStart(), 60)\n+ self.assertEquals(exon.getEnd(), 180)\n+ elif i == 0:\n+ self.assertEquals(exon.getStart(), 241)\n+ self.assertEquals(exon.getEnd(), 360)\n+ os.remove(fileName)\n+\n+ def test_query_backward_target_backward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t1\\t0\\t0\\t0\\t0\\t1\\t60\\t--\\tseq1\\t255\\t8\\t250\\tref\\t2262\\t58\\t360\\t2\\t120,122,\\t5,125,\\t1902,2082,\\n")\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "seq1")\n+ self.assertEquals(transcript.getChromosome(), "ref")\n+ self.assertEquals(transcript.getDirection(), 1)\n+ self.assertEquals(transcript.getStart(), 59)\n+ self.assertEquals(transcript.getEnd(), 360)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 0:\n+ self.assertEquals(exon.getStart(), 59)\n+ self.assertEquals(exon.getEnd(), 180)\n+ elif i == 1:\n+ self.assertEquals(exon.getStart(), 241)\n+ self.assertEquals(exon.getEnd(), 360)\n+ os.remove(fileName)\n+\n+\n+ def test_query_forward_target_backward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t1\\t0\\t0\\t0\\t0\\t1\\t60\\t+-\\tseq2\\t255\\t5\\t247\\tref\\t2262\\t58\\t360\\t2\\t120,122,\\t5,125,\\t1902,2082,\\n")\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "seq2")\n+ self.assertEquals(transcript.getChromosome(), "ref")\n+ self.assertEquals(transcript.getDirection(), -1)\n+ self.assertEquals(transcript.getStart(), 59)\n+ self.assertEquals(transcript.getEnd(), 360)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 1:\n+ self.assertEquals(exon.getStart(), 59)\n+ self.assertEquals(exon.getEnd(), 180)\n+ elif i == 0:\n+ self.assertEquals(exon.getStart(), 241)\n+ self.assertEquals(exon.getEnd(), 360)\n+ os.remove(fileName)\n+\n+\n+if __name__ == "__main__":\n+ unittest.main()\n+\n+\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_SsrParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_SsrParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,154 @@
+from commons.core.parsing.SsrParser import SsrParser
+import unittest
+
+
+class Test_SsrParser(unittest.TestCase):
+
+
+    def test_setAttributesFromString(self):
+        ssrLine = "MRRE1H001B07RM1\t1\t2\tta\t19\t153\t190\t734"
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributesFromString(ssrLine)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = 'MRRE1H001B07RM1'
+        expBES_redundancy = '1'
+        expSSR_nbNucleotides = '2'
+        expSSR_Motif = 'ta'
+        expSSR_Motif_number = '19'
+        expSSR_start = '153'
+        expSSR_end = '190'
+        expBES_size = '734'
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_setAttributesFromString_empty_BESName(self):
+        ssrLine = "\t1\t2\tta\t19\t153\t190\t734"
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributesFromString(ssrLine)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = ''
+        expBES_redundancy = ''
+        expSSR_nbNucleotides = ''
+        expSSR_Motif = ''
+        expSSR_Motif_number = ''
+        expSSR_start = ''
+        expSSR_end = ''
+        expBES_size = ''
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_setAttributesFromString_less_than_8_fields(self):
+        ssrLine = "1\t2\tta\t19\t153\t190\t734"
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributesFromString(ssrLine)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = ''
+        expBES_redundancy = ''
+        expSSR_nbNucleotides = ''
+        expSSR_Motif = ''
+        expSSR_Motif_number = ''
+        expSSR_start = ''
+        expSSR_end = ''
+        expBES_size = ''
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_setAttributes(self):
+        lResults = ['MRRE1H001B07RM1','1','2','ta','19','153','190','734']
+        lineNumber = 1
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributes(lResults, lineNumber)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = 'MRRE1H001B07RM1'
+        expBES_redundancy = '1'
+        expSSR_nbNucleotides = '2'
+        expSSR_Motif = 'ta'
+        expSSR_Motif_number = '19'
+        expSSR_start = '153'
+        expSSR_end = '190'
+        expBES_size = '734'
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_eq_Equals(self):
+        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+        SsrParser2 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+
+        self.assertTrue(SsrParser1 == SsrParser2)
+
+    def test_eq_NotEquals(self):
+        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+        SsrParser2 = SsrParser('MRRE1H001A12RM3', '1', '5', 'ttta', '6', '272', '295', '852')
+
+        self.assertFalse(SsrParser1 == SsrParser2)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanFile.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,287 @@\n+from commons.core.parsing.VarscanFile import VarscanFile\n+from commons.core.parsing.VarscanHit import VarscanHit\n+import unittest\n+import os\n+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n+from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8\n+from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag\n+from commons.core.checker.CheckerException import CheckerException\n+\n+class Test_VarscanFile(unittest.TestCase):\n+\n+ def test_parse_fileWithHeader(self):\n+ varscanFileName = "file.varscan"\n+ self._writeVarscanFile(varscanFileName)\n+ \n+ varscanHit1 = VarscanHit()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ expVarscanHitsList = [varscanHit1, varscanHit2]\n+ \n+ iVarscanFile = VarscanFile(varscanFileName)\n+ iVarscanFile.parse()\n+ obsVarscanHitsList = iVarscanFile.getVarscanHitsList()\n+ os.remove(varscanFileName)\n+ \n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList) \n+\n+ def test_parse_FileWithoutHeader(self):\n+ varscanFileName = "file.varscan"\n+ self._writeVarscanFileWithoutHeader(varscanFileName)\n+ \n+ varscanHit1 = VarscanHit()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ expVarscanHitsList = [varscanHit1, varscanHit2]\n+ \n+ iVarscanFile = VarscanFile(varscanFileName)\n+ iVarscanFile.parse()\n+ obsVarscanHitsList = iVarscanFile.getVarscanHitsList()\n+ obsTypeOfVarscanFile = iVarscanFile.getTypeOfVarscanFile()\n+ expTypeOfVarscanFile = "Varscan_2_2"\n+ \n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList) \n+ self.assertEquals(expTypeOfVarscanFile, obsTypeOfVarscanFile) \n+ os.remove(varscanFileName)\n+ \n+ def test_parse_VarscanFileWithTag(self):\n+ inputFileName = "%s/commons/core/parsing/test/varscan.tab" % os.environ["REPET_PATH"]\n+ self._writeVarscanFileWithTag(inputFileName)\n+ launcher = VarscanFile(inputFileName)\n+ launcher.parse()\n+ obsListOfVarscanHits = launcher.getListOfVarscanHits() \n+ \n+ varscanHit1 = VarscanHit_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setTag(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setTag(\'EspeceA\')\n+ expVarscanHitsList = [varscanHit1, varscanHit2] \n+ \n+ obsTypeOfVarscanFile = launcher.getTypeOfVarscanFile()\n+ expTypeOfVarscanFile = "Varscan_2_2_WithTag"\n+ \n+ self.assertEquals(expVarscanHitsList, obsListOfVarscanHits) \n+ self.assertEquals(expTypeOfVarscanFile, obsTypeOfVarscanFile) \n+ os.remove(inputFileName)\n+ \n+ def test_parse_VarscanFile_v2_2_8(self):\n+ inputFileName = "%s/commons/core/parsing/test/varscan.tab" % os.environ["REPET_PATH"]\n+ self._writeVarscanFile_v2_2_8(inputFileName)\n+ launcher = VarscanFile(inputFileName)\n+ launcher.parse()\n+ obsListOfVarscanHits = launcher.getListOfVarscanHits('..b'expVarscanHit = VarscanHit_v2_2_8()\n+ expVarscanHit.setChrom(\'C11HBa0064J13_LR285\')\n+ expVarscanHit.setPosition(\'3227\')\n+ expVarscanHit.setRef(\'G\')\n+ expVarscanHit.setVar(\'A\')\n+ expVarscanHit.setCns(\'A\')\n+ self.assertEquals(expVarscanHit, obsVarscanHit)\n+ \n+ def test_createVarscanObjectFromLine_VarscanHit_v2_2_8_WithTag(self):\n+ line = "C11HBa0064J13_LR285\\t3227\\tG\\tA\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tA\\tEspeceA\\n"\n+ nbLine = 1\n+ launcher = VarscanFile()\n+ launcher.setTypeOfVarscanFile("Varscan_2_2_8_WithTag")\n+ obsVarscanHit = launcher.createVarscanObjectFromLine(line, nbLine)\n+ expVarscanHit = VarscanHit_v2_2_8_WithTag()\n+ expVarscanHit.setChrom(\'C11HBa0064J13_LR285\')\n+ expVarscanHit.setPosition(\'3227\')\n+ expVarscanHit.setRef(\'G\')\n+ expVarscanHit.setVar(\'A\')\n+ expVarscanHit.setCns(\'A\')\n+ expVarscanHit.setTag(\'EspeceA\')\n+ self.assertEquals(expVarscanHit, obsVarscanHit)\n+ \n+ def _writeVarscanFile(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+\n+ def _writeVarscanFileWithoutHeader(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFileWithTag(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\tEspeceA\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\tEspeceA\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFile_v2_2_8(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tCons\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\tMapQual1\\tMapQual2\\tReads1Plus\\tReads1Minus\\tReads2Plus\\tReads2Minus\\tVarAllele\\n")\n+ varscanFile.write("C11HBa0064J13_LR285\\t3227\\tG\\tA\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tA\\n")\n+ varscanFile.write("C11HBa0064J13_LR285\\t3230\\tG\\tT\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tT\\n")\n+ varscanFile.close()\n+ \n+ def _writeOther(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write(\'##gff-version 3\\n\')\n+ file.write(\'chr16\\tBlatToGff\\tBES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\\n\')\n+ file.write(\'chr16\\tBlatToGff\\tBES\\t21736364\\t21737069\\t.\\t+\\t.\\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\\n\')\n+ file.write(\'chr11\\tBlatToGff\\tBES\\t3725876\\t3726473\\t.\\t+\\t.\\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\\n\')\n+ file.write(\'chr11\\tBlatToGff\\tBES\\t3794984\\t3795627\\t.\\t+\\t.\\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\\n\')\n+ file.write(\'chr18\\tBlatToGff\\tBES\\t12067347\\t12067719\\t.\\t+\\t.\\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\\n\')\n+ file.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanFileForGnpSNP.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,305 @@\n+import unittest\n+import os\n+\n+from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP\n+from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP\n+\n+class Test_VarscanFileForGnpSNP(unittest.TestCase):\n+\n+ def test__init__(self):\n+ expFastqFileName = "SR.fastq"\n+ expRefFastaFileName = "ref.fasta"\n+ expTaxonName = "Arabidopsis thaliana"\n+ expVarscanFieldSeparator = "\\t"\n+ expVarscanHitsList = []\n+ \n+ iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName)\n+ \n+ obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName()\n+ obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName()\n+ obsTaxonName = iVarscanFileForGnpSNP.getTaxonName()\n+ obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator()\n+ obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()\n+ \n+ self.assertEquals(expFastqFileName, obsFastaqFileName)\n+ self.assertEquals(expRefFastaFileName, obsRefFastaFileName)\n+ self.assertEquals(expTaxonName, obsTaxonName)\n+ self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator)\n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList)\n+\n+ def test_parse(self):\n+ varscanFileName = "varscan.tab"\n+ self._writeVarscanFile(varscanFileName)\n+ \n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ varscanHit1.setGnpSNPRef("C")\n+ varscanHit1.setGnpSNPVar("T")\n+ varscanHit1.setGnpSNPPosition(32)\n+ varscanHit1.setOccurrence(1)\n+ varscanHit1.setPolymType("SNP")\n+ varscanHit1.setPolymLength(1)\n+ \n+ varscanHit2 = VarscanHitForGnpSNP()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setReads1(\'1\')\n+ varscanHit2.setReads2(\'2\')\n+ varscanHit2.setVarFreq(\'66,67%\')\n+ varscanHit2.setStrands1(\'1\')\n+ varscanHit2.setStrands2(\'1\')\n+ varscanHit2.setQual1(\'40\')\n+ varscanHit2.setQual2(\'34\')\n+ varscanHit2.setPvalue(\'0.3999999999999999\')\n+ varscanHit2.setGnpSNPRef("A")\n+ varscanHit2.setGnpSNPVar("T")\n+ varscanHit2.setGnpSNPPosition(34)\n+ varscanHit2.setOccurrence(1)\n+ varscanHit2.setPolymType("SNP")\n+ varscanHit2.setPolymLength(1)\n+ expVarscanHitsList = [varscanHit1, varscanHit2]\n+ \n+ iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, \'\', \'\', \'\')\n+ iVarscanFileForGnpSNP.parse()\n+ obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()\n+ os.remove(varscanFileName)\n+ \n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList)\n+ \n+ def test_parse_with_same_position_and_chr_and_type(self):\n+ varscanFileName = "varscan.tab"\n+ self._writeVarscanFile_2(varscanFileName)\n+ \n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ var'..b' refFastaFileName = "ref.fasta"\n+ taxonName = "Arabidopsis thaliana"\n+ \n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'40\')\n+ varscanHit1.setQual2(\'34\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ lVarscanHits1 = [varscanHit1]\n+ \n+ iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)\n+ iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1)\n+ \n+ varscanHit2 = VarscanHitForGnpSNP()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setReads1(\'1\')\n+ varscanHit2.setReads2(\'2\')\n+ varscanHit2.setVarFreq(\'66,67%\')\n+ varscanHit2.setStrands1(\'1\')\n+ varscanHit2.setStrands2(\'1\')\n+ varscanHit2.setQual1(\'40\')\n+ varscanHit2.setQual2(\'34\')\n+ varscanHit2.setPvalue(\'0.3999999999999999\')\n+ lVarscanHits2 = [varscanHit2]\n+\n+ iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)\n+ iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2)\n+\n+ self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)\n+ \n+ def _writeVarscanFile(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFile_2(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFile_3(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\t+A\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+\n+ def _writeVarscanFile_4(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("seqname\\t2\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("seqname\\t4\\tC\\tG\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("seqname\\t4\\tC\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("seqname\\t8\\tT\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("chrom\\t4\\tC\\tG\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("chrom\\t4\\tC\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanHit.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,165 @@
+import unittest
+from commons.core.parsing.VarscanHit import VarscanHit
+from commons.core.checker.CheckerException import CheckerException
+
+class Test_VarscanHit(unittest.TestCase):
+
+    def test_setAttributesFromString(self):
+        line = "C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n"
+
+        expChrom = "C02HBa0291P19_LR48"
+        expPosition = "32"
+        expRef = "C"
+        expVar = "T"
+
+        varscanHit = VarscanHit()
+        varscanHit.setAttributesFromString(line)
+
+        obsChrom = varscanHit.getChrom()
+        obsPosition = varscanHit.getPosition()
+        obsRef = varscanHit.getRef()
+        obsVar = varscanHit.getVar()
+
+        self.assertEquals(expChrom, obsChrom)
+        self.assertEquals(expPosition, obsPosition)
+        self.assertEquals(expRef, obsRef)
+        self.assertEquals(expVar, obsVar)
+
+    def test_setAttributesFromString_empty_chrom(self):
+        line = "\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n"
+        iVarscanHit = VarscanHit()
+        try :
+            iVarscanHit.setAttributesFromString(line)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Chrom is empty in varscan file in line "
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def  test_setAttributesFromString_less_than_12_fields(self):
+        line = "C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\n"
+        iVarscanHit = VarscanHit()
+        iVarscanHit.setAttributesFromString(line)
+        self.assertEquals("", iVarscanHit.getQualVar())
+        self.assertEquals("", iVarscanHit.getPValue())
+
+    def test_setAttributes(self):
+        lResults = ["C02HBa0291P19_LR48", "32", "C", "T", "1", "2", "66,67%", "1", "1", "37", "35", "0.3999999999999999"]
+        lineNumber = 1
+
+        expChrom = "C02HBa0291P19_LR48"
+        expPosition = "32"
+        expRef = "C"
+        expVar = "T"
+
+        varscanHit = VarscanHit()
+        varscanHit.setAttributes(lResults, lineNumber)
+
+        obsChrom = varscanHit.getChrom()
+        obsPosition = varscanHit.getPosition()
+        obsRef = varscanHit.getRef()
+        obsVar = varscanHit.getVar()
+
+        self.assertEquals(expChrom, obsChrom)
+        self.assertEquals(expPosition, obsPosition)
+        self.assertEquals(expRef, obsRef)
+        self.assertEquals(expVar, obsVar)
+
+    def test_setAttributes_empty_chrom(self):
+        lResults = ["", "", "", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 1
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Chrom is empty in varscan file in line 1"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test_setAttributes_empty_position(self):
+        lResults = ["chrom", "", "", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 5
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Position is empty in varscan file in line 5"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test_setAttributes_empty_ref(self):
+        lResults = ["chrom", "position", "", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 5
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Ref is empty in varscan file in line 5"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test_setAttributes_empty_var(self):
+        lResults = ["chrom", "position", "ref", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 5
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Var is empty in varscan file in line 5"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test__eq__notEquals(self):
+        varscanHit1 = VarscanHit()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('32')
+        varscanHit1.setRef('C')
+        varscanHit1.setVar('T')
+
+        varscanHit2 = VarscanHit()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('34')
+        varscanHit2.setRef('A')
+        varscanHit2.setVar('T')
+
+        self.assertFalse(varscanHit1 == varscanHit2)
+
+    def test__eq__Equals(self):
+        varscanHit1 = VarscanHit()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('32')
+        varscanHit1.setRef('C')
+        varscanHit1.setVar('T')
+
+        varscanHit2 = VarscanHit()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('32')
+        varscanHit2.setRef('C')
+        varscanHit2.setVar('T')
+
+        self.assertTrue(varscanHit1 == varscanHit2)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanHitForGnpSNP.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHitForGnpSNP.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,412 @@\n+import unittest\n+from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP\n+from commons.core.checker.CheckerException import CheckerException\n+\n+class Test_VarscanHitForGnpSNP(unittest.TestCase):\n+\n+ def test_setAttributes(self):\n+ lResults = ["C02HBa0291P19_LR48", "32", "C", "T", "1", "2", "66,67%", "1", "1", "37", "35", "0.3999999999999999"]\n+ lineNumber = 1\n+ \n+ expChrom = "C02HBa0291P19_LR48"\n+ expPosition = "32"\n+ expRef = "C"\n+ expVar = "T"\n+ expReads1 = "1"\n+ expReads2 = "2"\n+ expVarFreq = 66.67\n+ expStrands1 = "1"\n+ expStrands2 = "1"\n+ expQual1 = "37"\n+ expQual2 = "35"\n+ expPvalue = "0.3999999999999999"\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsVar = varscanHit.getVar()\n+ obsReads1 = varscanHit.getReads1()\n+ obsReads2 = varscanHit.getReads2()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrands1 = varscanHit.getStrands1()\n+ obsStrands2 = varscanHit.getStrands2()\n+ obsQual1 = varscanHit.getQual1()\n+ obsQual2 = varscanHit.getQual2()\n+ obsPvalue = varscanHit.getPvalue()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expVar, obsVar)\n+ self.assertEquals(expReads1, obsReads1)\n+ self.assertEquals(expReads2, obsReads2)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrands1, obsStrands1)\n+ self.assertEquals(expStrands2, obsStrands2)\n+ self.assertEquals(expQual1, obsQual1)\n+ self.assertEquals(expQual2, obsQual2)\n+ self.assertEquals(expPvalue, obsPvalue)\n+\n+ def test_setAttributes_empty_chrom(self):\n+ lResults = ["", "", "", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Chrom is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_position(self):\n+ lResults = ["chrom", "", "", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 5\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Position is empty in varscan file in line 5"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_ref(self):\n+ lResults = ["chrom", "position", "", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 5\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 5"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_var(self):\n+ lResults = ["chrom", "position", "ref", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 5\n+ \n+ varscanHit = VarscanHitForGnpS'..b'atAlleles2GnpSnp_for_Deletion(self):\n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'-ATT\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ \n+ expPolymType = "DELETION"\n+ expGnpSnpRef = "ATT"\n+ expGnpSnpVar = "---"\n+ expGnpSnpPosition = 33\n+ \n+ varscanHit1.formatAlleles2GnpSnp()\n+ \n+ obsPolymType = varscanHit1.getPolymType()\n+ obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n+ obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n+ obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n+ \n+ self.assertEquals(expPolymType,obsPolymType)\n+ self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n+ self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n+ self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n+ \n+ def test_setVarFreq(self):\n+ varscanHit1 = VarscanHitForGnpSNP() \n+ exp = 66.67 \n+ varscanHit1.setVarFreq(\'66,67%\')\n+ obs = varscanHit1.getVarFreq()\n+ self.assertEquals(exp, obs)\n+ \n+ def test_formatAlleles2GnpSnp_for_Insertion(self):\n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'+TG\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ \n+ expPolymType = "INSERTION"\n+ expGnpSnpRef = "--"\n+ expGnpSnpVar = "TG"\n+ expGnpSnpPosition = 32\n+ \n+ varscanHit1.formatAlleles2GnpSnp()\n+ \n+ obsPolymType = varscanHit1.getPolymType()\n+ obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n+ obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n+ obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n+ \n+ self.assertEquals(expPolymType,obsPolymType)\n+ self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n+ self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n+ self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n+ \n+ def test_formatAlleles2GnpSnp_for_SNP(self):\n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'12\')\n+ varscanHit1.setRef(\'G\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ \n+ expPolymType = "SNP"\n+ expGnpSnpRef = "G"\n+ expGnpSnpVar = "T"\n+ expGnpSnpPosition = 12\n+ \n+ varscanHit1.formatAlleles2GnpSnp()\n+ \n+ obsPolymType = varscanHit1.getPolymType()\n+ obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n+ obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n+ obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n+ \n+ self.assertEquals(expPolymType,obsPolymType)\n+ self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n+ self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n+ self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n+ \n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanHit_WithTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit_WithTag.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,212 @@\n+import unittest\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n+\n+class Test_VarscanHit_WithTag(unittest.TestCase):\n+\n+ def test_setAttributesFromString(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expVar = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expTag = "EspeceA"\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ varscanHit.setAttributesFromString(line)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsVar = varscanHit.getVar()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsTag = varscanHit.getTag()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expVar, obsVar)\n+ self.assertEquals(expReadsRef, obsReadsRef)\n+ self.assertEquals(expReadsVar, obsReadsVar)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expTag, obsTag)\n+ \n+ def test_setAttributesFromString_empty_chrom(self):\n+ line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n+ varscanHit = VarscanHit_WithTag()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "The field Chrom is empty in varscan file in line "\n+ obsMessage = checkerExceptionInstance.msg\n+ self.assertEquals(expMessage, obsMessage)\n+ \n+ def test_setAttributes(self):\n+ lResults = [\'chr1\', \'1804\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expVar = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expTag = "EspeceA"\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsVar = varscanHit.getVar()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsTag = varscanHit.getTag()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals'..b'q)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expTag, obsTag)\n+\n+ def test_setAttributes_empty_chrom(self):\n+ lResults = [\'\', \'1804\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Chrom is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_position(self):\n+ lResults = [\'chr1\', \'\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Position is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_ref(self):\n+ lResults = [\'chr1\', \'1000\', \'\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_cns(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Var is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test__eq__notEquals(self):\n+ varscanHit1 = VarscanHit_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceB\')\n+ \n+ self.assertFalse(varscanHit1 == varscanHit2)\n+\n+ def test__eq__Equals(self):\n+ varscanHit1 = VarscanHit_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceA\')\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanHit_v2_2_8.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit_v2_2_8.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,290 @@\n+import unittest\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8\n+from commons.core.parsing.VarscanHit import VarscanHit\n+\n+class Test_VarscanHit_v2_2_8(unittest.TestCase):\n+\n+ def test_setAttributesFromString(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expCns = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expMapQualRef = "0"\n+ expMapQualVar = "1"\n+ expReadsRefPlus = "0"\n+ expReadsRefMinus = "0"\n+ expReadsVarPlus = "1"\n+ expReadsVarMinus = "0"\n+ expVar = "C"\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ varscanHit.setAttributesFromString(line)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsCns = varscanHit.getCns()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsMapQualRef = varscanHit.getMapQualRef()\n+ obsMapQualVar = varscanHit.getMapQualVar()\n+ obsReadsRefPlus = varscanHit.getReadsRefPlus()\n+ obsReadsRefMinus = varscanHit.getReadsRefMinus()\n+ obsReadsVarPlus = varscanHit.getReadsVarPlus()\n+ obsReadsVarMinus = varscanHit.getReadsVarMinus()\n+ obsVar = varscanHit.getVar()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expCns, obsCns)\n+ self.assertEquals(expReadsRef, obsReadsRef)\n+ self.assertEquals(expReadsVar, obsReadsVar)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expMapQualRef, obsMapQualRef)\n+ self.assertEquals(expMapQualVar, obsMapQualVar)\n+ self.assertEquals(expReadsRefPlus, obsReadsRefPlus)\n+ self.assertEquals(expReadsRefMinus, obsReadsRefMinus)\n+ self.assertEquals(expReadsVarPlus, obsReadsVarPlus)\n+ self.assertEquals(expReadsVarMinus, obsReadsVarMinus)\n+ self.assertEquals(expVar, obsVar)\n+ \n+ def test_setAttributesFromString_empty_chrom(self):\n+ line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ varscanHit = VarscanHit_v2_2_8()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "The field Chrom is empty in varscan file in line "\n+ obsMessage = checkerExceptionInstance.msg\n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributesFromString_less_than_19_fields(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ varscanHit = VarscanHit_v2_2_8()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "This varscan line (l.) is not complete"\n+ obsMessage = checkerExceptionInstance.msg\n+ self.a'..b' expMessage = "The field Position is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_ref(self):\n+ lResults = [\'chr1\', \'1000\', \'\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_cns(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Cons is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_var(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field varAllele is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test__eq__notEquals(self):\n+ varscanHit1 = VarscanHit_v2_2_8()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ \n+ self.assertFalse(varscanHit1 == varscanHit2)\n+\n+ def test__eq__Equals(self):\n+ varscanHit1 = VarscanHit_v2_2_8()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ \n+ self.assertTrue(varscanHit1 == varscanHit2)\n+ \n+ def test_convertVarscanHit_v2_2_8_To_VarscanHit(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ iVarscanHit_v2_2_8_WithTag = VarscanHit_v2_2_8()\n+ iVarscanHit_v2_2_8_WithTag.setAttributesFromString(line)\n+ obsVarcanHit_WithTag = iVarscanHit_v2_2_8_WithTag.convertVarscanHit_v2_2_8_To_VarscanHit()\n+ \n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\n"\n+ expVarcanHit_WithTag = VarscanHit()\n+ expVarcanHit_WithTag.setAttributesFromString(line)\n+ \n+ self.assertEquals(expVarcanHit_WithTag, obsVarcanHit_WithTag)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanHit_v2_2_8_WithTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit_v2_2_8_WithTag.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,300 @@\n+import unittest\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag\n+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n+\n+class Test_VarscanHit_v2_2_8_WithTag(unittest.TestCase):\n+\n+ def test_setAttributesFromString(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expCns = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expMapQualRef = "0"\n+ expMapQualVar = "1"\n+ expReadsRefPlus = "0"\n+ expReadsRefMinus = "0"\n+ expReadsVarPlus = "1"\n+ expReadsVarMinus = "0"\n+ expVar = "C"\n+ expTag = "EspeceA"\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ varscanHit.setAttributesFromString(line)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsCns = varscanHit.getCns()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsMapQualRef = varscanHit.getMapQualRef()\n+ obsMapQualVar = varscanHit.getMapQualVar()\n+ obsReadsRefPlus = varscanHit.getReadsRefPlus()\n+ obsReadsRefMinus = varscanHit.getReadsRefMinus()\n+ obsReadsVarPlus = varscanHit.getReadsVarPlus()\n+ obsReadsVarMinus = varscanHit.getReadsVarMinus()\n+ obsVar = varscanHit.getVar()\n+ obsTag = varscanHit.getTag()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expCns, obsCns)\n+ self.assertEquals(expReadsRef, obsReadsRef)\n+ self.assertEquals(expReadsVar, obsReadsVar)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expMapQualRef, obsMapQualRef)\n+ self.assertEquals(expMapQualVar, obsMapQualVar)\n+ self.assertEquals(expReadsRefPlus, obsReadsRefPlus)\n+ self.assertEquals(expReadsRefMinus, obsReadsRefMinus)\n+ self.assertEquals(expReadsVarPlus, obsReadsVarPlus)\n+ self.assertEquals(expReadsVarMinus, obsReadsVarMinus)\n+ self.assertEquals(expVar, obsVar)\n+ self.assertEquals(expTag, obsTag)\n+ \n+ def test_setAttributesFromString_empty_chrom(self):\n+ line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "The field Chrom is empty in varscan file in line "\n+ obsMessage = checkerExceptionInstance.msg\n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributesFromString_less_than_20_fields(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ '..b'\'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_cns(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Cons is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_var(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field varAllele is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test__eq__notEquals(self):\n+ varscanHit1 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceB\')\n+ \n+ self.assertFalse(varscanHit1 == varscanHit2)\n+\n+ def test__eq__Equals(self):\n+ varscanHit1 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceA\')\n+ \n+ self.assertTrue(varscanHit1 == varscanHit2)\n+ \n+ def test_convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ iVarscanHit_v2_2_8_WithTag = VarscanHit_v2_2_8_WithTag()\n+ iVarscanHit_v2_2_8_WithTag.setAttributesFromString(line)\n+ obsVarcanHit_WithTag = iVarscanHit_v2_2_8_WithTag.convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag()\n+ \n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n+ expVarcanHit_WithTag = VarscanHit_WithTag()\n+ expVarcanHit_WithTag.setAttributesFromString(line)\n+ \n+ self.assertEquals(expVarcanHit_WithTag, obsVarcanHit_WithTag)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_VarscanToVCF.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanToVCF.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,61 @@
+import unittest
+from commons.core.parsing.VarscanToVCF import VarscanToVCF
+
+class Test_VarscanToVCF(unittest.TestCase):
+
+    def setUp(self):
+        self._iVarscanToVCF = VarscanToVCF(doClean = True, verbosity = 2)
+
+#    def test_convertVarscanLineToVCFRecord(self):
+##        Chrom    Position    Ref    Cons    Reads1    Reads2    VarFreq    Strands1    Strands2    Qual1    Qual2    Pvalue
+##        chr1    10            C        Y        1        1        50%        1            1            68    69    0.49999999999999994
+##     MapQual1    MapQual2    Reads1Plus    Reads1Minus    Reads2Plus    Reads2Minus    VarAllele
+##             1           1            1            0            1            0                T
+#        varscanLine = "chr1\t10\tC\tY\t1\t1\t50%\t1\t1\t68\t69\t0.49999999999999994\t1\t1\t1\t0\t1\t0\tT"
+#        obsRecord = self._iVarscanToVCF._convertVarscanLineToVCFRecord(varscanLine, 1)
+#
+#        expInfo = {"AF": "50%",
+#                   "DP": 2,
+#                   "MQ": "1"}
+#
+#        expRecord = vcf.model._Record("chr1", 10, "1", "C", "T", 3.010299957, ".", expInfo, ".", None)
+##        CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes, samples=None
+#
+#        self.assertEqual(expRecord.ID, obsRecord.ID)
+#        self.assertEqual(expRecord.QUAL, obsRecord.QUAL)
+#        self.assertEqual(expRecord.FILTER, obsRecord.FILTER)
+#        self.assertEqual(expRecord.INFO, obsRecord.INFO)
+#        self.assertEqual(expRecord.FORMAT, obsRecord.FORMAT)
+#        self.assertEqual(expRecord, obsRecord)
+
+    def test_convertVarscanLineToVCFLine(self):
+#        Chrom    Position    Ref    Cons    Reads1    Reads2    VarFreq    Strands1    Strands2    Qual1    Qual2    Pvalue
+#        chr1    10            C        Y        1        1        50%        1            1            68    69    0.49999999999999994
+#     MapQual1    MapQual2    Reads1Plus    Reads1Minus    Reads2Plus    Reads2Minus    VarAllele
+#             1           1            1            0            1            0                T
+        varscanLine = "chr1\t10\tC\tY\t1\t1\t50%\t1\t1\t68\t69\t0.49999999999999994\t1\t1\t1\t0\t1\t0\tT"
+        obsVCFLine = self._iVarscanToVCF._convertVarscanLineToVCFRecord(varscanLine, 1)
+
+        expVCFLine = "chr1\t10\t.\tC\tT\t3.010299957\t.\tAF=0.5000;DP=2;RBQ=68;ABQ=69\n"
+        #        CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
+        self.assertEqual(expVCFLine, obsVCFLine)
+
+    def test_convertVarscanLineToVCFLine_false_VarAllele(self):
+        varscanLine = "chr1\t10\tC\tY\t1\t1\t50%\t1\t1\t68\t69\t0.49999999999999994\t1\t1\t1\t0\t1\t0\tA"
+        obsVCFLine = self._iVarscanToVCF._convertVarscanLineToVCFRecord(varscanLine, 1)
+
+        expVCFLine = "chr1\t10\t.\tC\tT\t3.010299957\t.\tAF=0.5000;DP=2;RBQ=68;ABQ=69\n"
+        #        CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
+        self.assertEqual(expVCFLine, obsVCFLine)
+
+    def test_convertVarscanLineToVCFRecord_empty_line(self):
+        obsMsg = ""
+        try:
+            self._iVarscanToVCF._convertVarscanLineToVCFRecord("", 10)
+        except Exception as e:
+            obsMsg = e.msg
+
+        self.assertEqual("This varscan line (l.10) is not complete", obsMsg)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_WigParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_WigParser.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,28 @@
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.parsing.WigParser import WigParser
+import unittest
+
+class Test_WigParser(unittest.TestCase):
+
+    def tearDown(self):
+        FileUtils.removeFilesByPattern("data/.chr*.index")
+
+    def test_GetRange1(self):
+        self.parser = WigParser("data/test.wig")
+        outputRange = [0.0, 1.1, 1.2, 0.0, 1.4, 1.5, 0.0, 1.7, 0.0, 1.9, 0.0]
+        self.assertEqual(self.parser.getRange("chr1", 10, 20), outputRange)
+        outputRange = [0.0, 9.5, 9.6, 0.0]
+        self.assertEqual(self.parser.getRange("chrX", 4, 7), outputRange)
+
+    def test_GetRange2(self):
+        self.parser = WigParser("data/test1.wig")
+        outputRange = [0.0, 1.1, 1.2, 0.0, 1.4, 1.5, 0.0, 1.7, 0.0, 1.9, 0.0]
+        self.assertEqual(self.parser.getRange("chr2", 10, 20), outputRange)
+
+    def test_GetRange3(self):
+        self.parser = WigParser("data/test2.wig")
+        outputRange = [1.4, 1.5]
+        self.assertEqual(self.parser.getRange("chr3", 14, 15), outputRange)
+
+if __name__ == '__main__':
+        unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/Test_pilerTAToGrouperMap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_pilerTAToGrouperMap.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,108 @@
+import unittest
+import os
+from commons.core.parsing.PilerTAToGrouperMap import PilerTAToGrouperMap
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_pilerTAToGrouperMap(unittest.TestCase):
+
+    def setUp(self):
+        self._inputGffFileName = "input.gff"
+        self._inputPYRFileName = "input_pyr.gff"
+        self._inputMOTIFFileName = "input_motif.gff"
+
+        self._obsOutFileName = "output.info"
+        self._obsGrouperFileName = "input_motif.gff.grp"
+        self._obsGrpMapFileName = "input_motif.gff.grp.map"
+
+        self._expOutFileName = "exp_output.info"
+        self._expGrouperFileName = "exp_motif.gff.grp"
+        self._expGrpMapFileName = "exp_motif.gff.grp.map"
+
+    def tearDown(self):
+        os.remove(self._inputGffFileName)
+        os.remove(self._inputPYRFileName)
+        os.remove(self._inputMOTIFFileName)
+
+        os.remove(self._obsOutFileName)
+        os.remove(self._obsGrouperFileName)
+        os.remove(self._obsGrpMapFileName)
+
+        os.remove(self._expOutFileName)
+        os.remove(self._expGrouperFileName)
+        os.remove(self._expGrpMapFileName)
+
+    def testRun(self):
+        self._writePilerTAFilePYR(self._inputPYRFileName)
+        self._writePilerTAFileMOTIF(self._inputMOTIFFileName)
+        self._writePilerTAGff(self._inputGffFileName)
+
+        self._writeExpOutputFile(self._expOutFileName)
+        self._writeExpGrouperFile(self._expGrouperFileName)
+        self._writeExpGrouperMapFile(self._expGrpMapFileName)
+
+        iPilerTAToGrouperMap = PilerTAToGrouperMap(self._inputGffFileName, self._inputPYRFileName,self._inputMOTIFFileName, self._obsOutFileName)
+        iPilerTAToGrouperMap.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutFileName, self._obsOutFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expGrouperFileName, self._obsGrouperFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expGrpMapFileName, self._obsGrpMapFileName))
+
+
+    def _writePilerTAGff(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk21\tpiler\thit\t155146\t156020\t0\t+\t.\tTarget chunk21 150519 151392 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154790\t156023\t0\t+\t.\tTarget chunk21 150519 151751 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154078\t156023\t0\t+\t.\tTarget chunk21 150519 152463 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154256\t156023\t0\t+\t.\tTarget chunk21 150519 152285 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154434\t156023\t0\t+\t.\tTarget chunk21 150519 152107 ; Pile 510 ; Pyramid 0\n")
+        f.close()
+
+    def _writePilerTAFilePYR(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk21\tpiler\tpyramid\t150519\t156023\t0\t.\t.\tPyramidIndex 0\n")
+        f.write("chunk21\tpiler\tpyramid\t150519\t156023\t0\t.\t.\tPyramidIndex 1\n")
+        f.write("chunk21\tpiler\tpyramid\t165574\t174424\t0\t.\t.\tPyramidIndex 2\n")
+        f.write("chunk21\tpiler\tpyramid\t166301\t174424\t0\t.\t.\tPyramidIndex 3\n")
+        f.write("chunk21\tpiler\tpyramid\t168967\t174424\t0\t.\t.\tPyramidIndex 4\n")
+        f.write("chunk21\tpiler\tpyramid\t170215\t174424\t0\t.\t.\tPyramidIndex 5\n")
+        f.close()
+
+    def _writePilerTAFileMOTIF(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk21\tpiler\ttandemmotif\t155843\t156020\t0\t.\t.\tTarget chunk21 151215 151392 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t151215\t151392\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t151574\t151751\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t152286\t152463\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t152108\t152285\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.close()
+
+    def _writeExpOutputFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("Pile 510\tPyramid 0\n")
+        f.write("\tPyramid 1\n")
+        f.write("\tPyramid 2\n")
+        f.write("\tPyramid 3\n")
+        f.write("\tPyramid 4\n")
+        f.write("\tPyramid 5\n")
+        f.close()
+
+    def _writeExpGrouperFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("MbS1Gr0Cl510\tchunk21\tpiler\ttandemmotif\t155843\t156020\t0\t.\t.\tTarget chunk21 151215 151392 \tPile 510\tPyramid 0\n")
+        f.write("MbS2Gr0Cl510\tchunk21\tpiler\ttandemmotif\t151215\t151392\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.write("MbS3Gr0Cl510\tchunk21\tpiler\ttandemmotif\t151574\t151751\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.write("MbS4Gr0Cl510\tchunk21\tpiler\ttandemmotif\t152286\t152463\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.write("MbS5Gr0Cl510\tchunk21\tpiler\ttandemmotif\t152108\t152285\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.close()
+
+    def _writeExpGrouperMapFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("MbS1Gr0Cl510\tchunk21\t155843\t156020\n")
+        f.write("MbS2Gr0Cl510\tchunk21\t151215\t151392\n")
+        f.write("MbS3Gr0Cl510\tchunk21\t151574\t151751\n")
+        f.write("MbS4Gr0Cl510\tchunk21\t152286\t152463\n")
+        f.write("MbS5Gr0Cl510\tchunk21\t152108\t152285\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/ExpPotDooblonsSubSNP.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/ExpPotDooblonsSubSNP.csv Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,832 @@\n+SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n+Batch_AU247387_SNP_30_10102;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;9\n+Batch_AU247387_SNP_30_IRELAND;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATTCCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGT;1;12;15;Sequence;;;7\n+Batch_AU247387_SNP_30_POLAND;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGT;1;12;20;Sequence;;;9\n+Batch_AU247387_SNP_30_VIGOR;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGT;1;12;23;Sequence;;;9\n+Batch_AU247387_SNP_34_10102;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;7\n+Batch_AU247387_SNP_34_IRELAND;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGACGAT;CCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCT;1;12;15;Sequence;;;10\n+Batch_AU247387_SNP_34_POLAND;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCA;1;12;20;Sequence;;;7\n+Batch_AU247387_SNP_34_VIGOR;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCA;1;12;23;Sequence;;;7\n+Batch_AU247387_SNP_35_10102;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGAAGATC;CRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;9\n+Batch_AU247387_SNP_35_IRELAND;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGACGATT;CAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCTG;1;12;15;Sequence;;;7\n+Batch_AU247387_SNP_35_POLAND;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGAAGATC;CGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCAT;1;12;20;Sequence;;;9\n+Batch_AU247387_SNP_35_VIGOR;A;SNP;35;NNNTATAGCTCCTAACA'..b'CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCC;GTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;6\n+Batch_AU247387_SNP_601_CARILLON;A;SNP;601;-----------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCRAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTA;GANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;9;Sequence;;;10\n+Batch_AU247387_SNP_601_SPAIN;A;SNP;601;-----------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTA;GAGAAGTACGACGACAAGGTTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;10\n+Batch_AU247387_SNP_601_VIGOR;A;SNP;601;TCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTA;GAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;7\n+Batch_AU247387_SNP_613_SPAIN;A;SNP;613;-----------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGA;GACAAGGTTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;7\n+Batch_AU247387_SNP_613_VIGOR;A;SNP;613;CAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGA;GACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;6\n+Batch_AU247387_SNP_620_SPAIN;A;SNP;620;----------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAG;TTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;6\n+Batch_AU247387_SNP_620_VIGOR;A;SNP;620;TTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAG;TCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;9\n+Batch_AU247387_SNP_622_SPAIN;A;SNP;622;--------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAGGT;GATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;10\n+Batch_AU247387_SNP_622_VIGOR;A;SNP;622;GCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGAT;GATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;7\n+Batch_AU247387_SNP_634_SPAIN;A;SNP;634;--CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAGGTTGATGCTTTTGG;GAGAAG;1;12;21;Sequence;;;10\n+Batch_AU247387_SNP_634_VIGOR;A;SNP;634;ATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGG;GAGAAG;1;12;23;Sequence;;;9\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/Wig/chr1.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/Wig/chr1.wig Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,9 @@
+fixedStep  chrom=chr1  start=11  step=1
+1.1
+1.2
+fixedStep  chrom=chr1  start=14  step=1
+1.4
+1.5
+variableStep chrom=chr1
+17  1.7
+19  1.9

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/realExpBatchLine.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpBatchLine.csv Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,41 @@
+IndividualNumber;Pos5;Pos3;BatchNumber;Sequence
+1;;;1;
+2;;;1;
+3;;;1;
+4;;;1;
+5;;;1;
+6;;;1;
+7;;;1;
+8;;;1;
+9;;;1;
+10;;;1;
+11;;;1;
+12;;;1;
+13;;;1;
+14;;;1;
+15;;;1;
+16;;;1;
+17;;;1;
+18;;;1;
+19;;;1;
+20;;;1;
+21;;;1;
+22;;;1;
+23;;;1;
+24;;;1;
+25;;;1;
+26;;;1;
+27;;;1;
+28;;;1;
+29;;;1;
+30;;;1;
+31;;;1;
+32;;;1;
+33;;;1;
+34;;;1;
+35;;;1;
+36;;;1;
+37;;;1;
+38;;;1;
+39;;;1;
+40;;;1;

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/realExpIndividual.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpIndividual.csv Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,41 @@
+IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id
+1;Treesnips_40-4-3;;;;;;;;;;Pinus pinaster;;;;;
+2;Treesnips_40-8-3;;;;;;;;;;Pinus pinaster;;;;;
+3;Treesnips_40-1-2;;;;;;;;;;Pinus pinaster;;;;;
+4;Treesnips_40-14-1;;;;;;;;;;Pinus pinaster;;;;;
+5;Treesnips_40-15-2;;;;;;;;;;Pinus pinaster;;;;;
+6;Treesnips_40-20-2;;;;;;;;;;Pinus pinaster;;;;;
+7;Treesnips_40-25-1;;;;;;;;;;Pinus pinaster;;;;;
+8;Treesnips_41-3-3;;;;;;;;;;Pinus pinaster;;;;;
+9;Treesnips_41-8-1;;;;;;;;;;Pinus pinaster;;;;;
+10;Treesnips_41-1-3;;;;;;;;;;Pinus pinaster;;;;;
+11;Treesnips_41-2-1;;;;;;;;;;Pinus pinaster;;;;;
+12;Treesnips_41-3-2;;;;;;;;;;Pinus pinaster;;;;;
+13;Treesnips_41-6-2;;;;;;;;;;Pinus pinaster;;;;;
+14;Treesnips_41-9-1;;;;;;;;;;Pinus pinaster;;;;;
+15;Treesnips_42-1-3;;;;;;;;;;Pinus pinaster;;;;;
+16;Treesnips_42-8-2;;;;;;;;;;Pinus pinaster;;;;;
+17;Treesnips_42-1-2;;;;;;;;;;Pinus pinaster;;;;;
+18;Treesnips_42-2-1;;;;;;;;;;Pinus pinaster;;;;;
+19;Treesnips_42-2-2;;;;;;;;;;Pinus pinaster;;;;;
+20;Treesnips_42-8-1;;;;;;;;;;Pinus pinaster;;;;;
+21;Treesnips_42-9-2;;;;;;;;;;Pinus pinaster;;;;;
+22;Treesnips_43-4-3;;;;;;;;;;Pinus pinaster;;;;;
+23;Treesnips_43-5-3;;;;;;;;;;Pinus pinaster;;;;;
+24;Treesnips_43-1-1;;;;;;;;;;Pinus pinaster;;;;;
+25;Treesnips_43-2-1;;;;;;;;;;Pinus pinaster;;;;;
+26;Treesnips_43-7-2;;;;;;;;;;Pinus pinaster;;;;;
+27;Treesnips_43-9-3;;;;;;;;;;Pinus pinaster;;;;;
+28;Treesnips_43-10-2;;;;;;;;;;Pinus pinaster;;;;;
+29;Treesnips_44-3-3;;;;;;;;;;Pinus pinaster;;;;;
+30;Treesnips_44-6-2;;;;;;;;;;Pinus pinaster;;;;;
+31;Treesnips_44-3-1;;;;;;;;;;Pinus pinaster;;;;;
+32;Treesnips_44-5-2;;;;;;;;;;Pinus pinaster;;;;;
+33;Treesnips_44-7-1;;;;;;;;;;Pinus pinaster;;;;;
+34;Treesnips_44-10-2;;;;;;;;;;Pinus pinaster;;;;;
+35;Treesnips_45-5-3;;;;;;;;;;Pinus pinaster;;;;;
+36;Treesnips_45-8-3;;;;;;;;;;Pinus pinaster;;;;;
+37;Treesnips_45-1-1;;;;;;;;;;Pinus pinaster;;;;;
+38;Treesnips_45-4-1;;;;;;;;;;Pinus pinaster;;;;;
+39;Treesnips_45-7-1;;;;;;;;;;Pinus pinaster;;;;;
+40;Treesnips_45-9-1;;;;;;;;;;Pinus pinaster;;;;;

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/realExpSequences.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpSequences.fsa Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+>PpHDZ31_ref
+GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTCAGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTGCAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGACTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATTCTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATTTATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTTAGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCTTGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGCACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCTATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATACCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCTTGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCTAGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTATTCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCCTGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGCAGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCACTGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCACAGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAGGTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATGATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGATTTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTATCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAACTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAGTTATTTAAAAAAAATGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGGGAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTGTAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGATCAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTTATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGGCACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTTTGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTATATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATGTAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCTGTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTATACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAACAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGAACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGGGTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGCATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGCTGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGATCTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAACTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCATTCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATGTGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGAATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTTGGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTCGCAAAAGTA

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/realExpSubSNP.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpSubSNP.csv Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,799 @@\n+SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-4-3;A;SNP;136;NNNNNNNNNNNNNNNNNNGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;1;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-8-3;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;2;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-1-2;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;3;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-14-1;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;4;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-15-2;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;5;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-20-2;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;6;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-25-1;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;7;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_41-3-3;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;8;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_41-8-1;A;SNP;136;GCTAG'..b'NRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-5-2;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGNNNNNNNNNNN;1;1;32;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-7-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;33;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-10-2;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTNNNNNNNNNN;1;1;34;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-5-3;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAANNNNNNNNNNNNNN;1;1;35;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-8-3;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGNNNNNNNNNNNN;1;1;36;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-4-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;38;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-7-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTCGCAAAAGTA;1;1;39;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-9-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;40;Sequence;;;1\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/real_multifasta_input.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/real_multifasta_input.fasta Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,2419 @@\n+>PpHDZ31_ref\n+GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATAT\n+TGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n+AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n+GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n+GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n+GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n+CGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTC\n+AGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTG\n+CAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGA\n+CTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATT\n+CTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATT\n+TATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTT\n+AGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCT\n+TGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGC\n+ACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCT\n+ATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATA\n+CCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCT\n+TGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCT\n+AGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTAT\n+TCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCC\n+TGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGC\n+AGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCAC\n+TGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCAC\n+AGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAG\n+GTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATG\n+ATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGAT\n+TTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTA\n+TCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAA\n+CTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAG\n+TTATTTAAAAAAAA-TGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGG\n+GAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTG\n+TAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGAT\n+CAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTT\n+ATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGG\n+CACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTT\n+TGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTA\n+TATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATG\n+TAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCT\n+GTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTAT\n+ACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAA\n+CAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGA\n+ACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGG\n+GTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGC\n+ATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGC\n+TGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGAT\n+CTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAA\n+CTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCAT\n+TCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATG\n+TGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGA\n+ATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTT\n+GGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCA\n+TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n+GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n+AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n+ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n+GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n+AATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTG\n+GACAAAATTCTGGATGAAAATGGTCGCAAAAGTA\n+>Treesnips_40-4-3\n+NNNNNNNNNNNNNNNNNNGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATAT\n+TGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n+AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n+GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n+GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n+GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n+CGGT'..b'A\n+TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n+GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n+AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n+ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n+GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n+AATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTG\n+GACAAAATTCTGGATGAAAATGGTCGCAAAAGTA\n+>Treesnips_45-9-1\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n+AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n+GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n+GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n+GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n+CGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTC\n+AGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTG\n+CAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGA\n+CTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATT\n+CTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATT\n+TATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTT\n+AGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCT\n+TGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGC\n+ACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCT\n+ATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATA\n+CCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCT\n+TGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCT\n+AGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTAT\n+TCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCC\n+TGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGC\n+AGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCAC\n+TGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCAC\n+AGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAG\n+GTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATG\n+ATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGAT\n+TTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTA\n+TCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAA\n+CTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAG\n+TTATTTAAAAAAAA-TGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGG\n+GAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTG\n+TAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGAT\n+CAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTT\n+ATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGG\n+CACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTT\n+TGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTA\n+TATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATG\n+TAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCT\n+GTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTAT\n+ACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAA\n+CAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGA\n+ACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGG\n+GTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGC\n+ATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGC\n+TGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGAT\n+CTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAA\n+CTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCAT\n+TCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATG\n+TGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGA\n+ATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTT\n+GGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCA\n+TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n+GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n+AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n+ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n+GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n+AATCAAGCAGGGTTGGACATGCTGGAAACGACANNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,108 @@
+Chrom Position Ref Cons Reads1 Reads2 VarFreq Strands1 Strands2 Qual1 Qual2 Pvalue MapQual1 MapQual2 Reads1Plus Reads1Minus Reads2Plus Reads2Minus VarAllele
+chr1 10759 C Y 2 1 33.33% 1 1 65 47 0.98 1 1 0 2 0 1 T
+chr1 12438 C S 1 1 50% 1 1 62 42 0.98 1 1 0 1 0 1 G
+chr1 17432 A M 2 1 33.33% 1 1 55 37 0.98 1 1 0 2 1 0 C
+chr1 20391 A W 2 1 33.33% 1 1 56 37 0.98 1 1 2 0 1 0 T
+chr1 21207 T Y 2 1 33.33% 1 1 55 41 0.98 1 1 2 0 1 0 C
+chr1 26057 T K 11 2 15.38% 2 1 60 37 0.98 1 1 7 4 2 0 G
+chr1 36838 T Y 2 1 33.33% 1 1 61 36 0.98 1 1 2 0 0 1 C
+chr1 37751 A M 3 1 25% 1 1 65 42 0.98 1 1 0 3 0 1 C
+chr1 43500 C S 3 1 25% 2 1 65 40 0.98 1 1 2 1 0 1 G
+chr1 50481 A W 2 1 33.33% 1 1 60 40 0.98 1 1 0 2 1 0 T
+chr1 106849 C S 3 1 25% 2 1 65 40 0.98 1 1 2 1 1 0 G
+chr1 108726 T W 2 1 33.33% 1 1 50 41 0.98 1 1 0 2 0 1 A
+chr1 114204 G S 4 2 33.33% 2 2 60 39 0.98 1 1 2 2 1 1 C
+chr1 115030 A M 3 1 25% 2 1 57 41 0.98 1 1 2 1 1 0 C
+chr1 116173 A R 7 2 22.22% 2 1 58 39 0.98 1 1 5 2 2 0 G
+chr1 118433 G S 6 1 14.29% 2 1 64 50 0.98 1 1 4 2 1 0 C
+chr1 119042 A R 5 1 16.67% 2 1 55 51 0.98 1 1 2 3 1 0 G
+chr1 139219 A R 1 1 50% 1 1 65 46 0.98 1 1 1 0 0 1 G
+chr1 140710 A R 9 2 18.18% 2 1 63 41 0.98 1 1 3 6 0 2 G
+chr1 144419 T Y 3 1 25% 2 1 63 39 0.98 1 1 1 2 1 0 C
+chr1 146099 G S 2 1 33.33% 2 1 64 38 0.98 1 1 1 1 0 1 C
+chr1 146435 T Y 11 7 38.89% 2 1 60 24 0.98 1 1 4 7 7 0 C
+chr1 147232 A W 2 1 33.33% 2 1 64 39 0.98 1 1 1 1 0 1 T
+chr1 158703 A M 2 1 33.33% 2 1 60 55 0.98 1 1 1 1 1 0 C
+chr1 166732 A W 4 2 33.33% 2 2 62 24 0.98 1 1 2 2 1 1 T
+chr1 179887 C M 3 1 25% 2 1 64 39 0.98 1 1 1 2 1 0 A
+chr1 185971 A R 3 1 25% 2 1 55 40 0.98 1 1 2 1 0 1 G
+chr1 211074 A M 3 1 25% 1 1 65 40 0.98 1 1 3 0 0 1 C
+chr1 219573 G S 1 1 50% 1 1 61 41 0.98 1 1 1 0 0 1 C
+chr1 229396 C S 2 1 33.33% 2 1 57 39 0.98 1 1 1 1 0 1 G
+chr1 236388 T K 8 2 20% 2 1 52 32 0.98 1 1 2 6 0 2 G
+chr1 245990 G S 5 1 16.67% 2 1 61 46 0.98 1 1 3 2 1 0 C
+chr1 249155 C S 2 1 33.33% 2 1 62 37 0.98 1 1 1 1 1 0 G
+chr1 261257 T K 3 1 25% 1 1 60 39 0.98 1 1 0 3 0 1 G
+chr1 274692 A R 2 1 33.33% 2 1 63 39 0.98 1 1 1 1 0 1 G
+chr1 283468 G S 3 1 25% 2 1 63 45 0.98 1 1 2 1 1 0 C
+chr1 284288 T W 3 1 25% 2 1 64 39 0.98 1 1 2 1 1 0 A
+chr1 286983 T Y 2 1 33.33% 1 1 64 37 0.98 1 1 0 2 1 0 C
+chr1 287378 C M 2 1 33.33% 2 1 65 37 0.98 1 1 1 1 0 1 A
+chr1 302928 A R 1 1 50% 1 1 64 38 0.98 1 1 0 1 1 0 G
+chr1 305952 C M 2 1 33.33% 1 1 65 38 0.98 1 1 0 2 0 1 A
+chr1 307932 T Y 4 1 20% 2 1 65 42 0.98 1 1 1 3 0 1 C
+chr1 317422 G S 3 1 25% 1 1 57 40 0.98 1 1 0 3 0 1 C
+chr1 321480 A W 2 1 33.33% 1 1 64 38 0.98 1 1 2 0 0 1 T
+chr1 322307 A M 1 1 50% 1 1 65 37 0.98 1 1 1 0 0 1 C
+chr1 328326 G S 5 1 16.67% 2 1 62 52 0.98 1 1 4 1 1 0 C
+chr1 333138 T K 3 2 40% 1 2 63 24 0.98 1 1 0 3 1 1 G
+chr1 333388 T Y 4 1 20% 2 1 64 43 0.98 1 1 2 2 1 0 C
+chr1 335592 T K 2 1 33.33% 1 1 58 39 0.98 1 1 2 0 1 0 G
+chr1 336572 C S 1 1 50% 1 1 58 38 0.98 1 1 0 1 0 1 G
+chr1 347396 T K 5 1 16.67% 2 1 62 52 0.98 1 1 2 3 1 0 G
+chr1 359080 T K 4 1 20% 2 1 61 42 0.98 1 1 1 3 0 1 G
+chr1 360223 A W 5 1 16.67% 2 1 54 52 0.98 1 1 2 3 0 1 T
+chr1 361047 T K 5 1 16.67% 2 1 62 50 0.98 1 1 2 3 0 1 G
+chr1 366048 A M 2 1 33.33% 2 1 65 39 0.98 1 1 1 1 0 1 C
+chr1 368105 A R 3 1 25% 2 1 55 40 0.98 1 1 2 1 0 1 G
+chr1 373782 T W 3 1 25% 2 1 64 41 0.98 1 1 2 1 1 0 A
+chr1 378159 G K 2 1 33.33% 2 1 63 41 0.98 1 1 1 1 0 1 T
+chr1 383945 C S 8 1 11.11% 2 1 58 57 0.98 1 1 2 6 1 0 G
+chr1 389461 A R 1 1 50% 1 1 65 36 0.98 1 1 1 0 1 0 G
+chr1 396860 G K 4 1 20% 1 1 65 43 0.98 1 1 0 4 0 1 T
+chr1 397170 A R 1 1 50% 1 1 65 36 0.98 1 1 1 0 1 0 G
+chr1 399939 A M 5 1 16.67% 2 1 63 46 0.98 1 1 1 4 0 1 C
+chr1 400733 G S 3 1 25% 1 1 65 41 0.98 1 1 3 0 0 1 C
+chr1 401165 A R 3 1 25% 2 1 64 41 0.98 1 1 1 2 0 1 G
+chr1 406774 T K 3 1 25% 2 1 58 39 0.98 1 1 2 1 0 1 G
+chr1 417293 C Y 2 1 33.33% 1 1 65 55 0.98 1 1 2 0 1 0 T
+chr1 417723 G S 2 1 33.33% 1 1 65 49 0.98 1 1 2 0 0 1 C
+chr1 420308 C M 1 1 50% 1 1 65 36 0.98 1 1 1 0 0 1 A
+chr1 435579 C Y 3 1 25% 1 1 64 40 0.98 1 1 0 3 0 1 T
+chr1 437183 G R 2 1 33.33% 1 1 65 39 0.98 1 1 0 2 0 1 A
+chr1 437194 A W 3 1 25% 1 1 64 41 0.98 1 1 0 3 0 1 T
+chr1 438866 G S 6 1 14.29% 2 1 62 52 0.98 1 1 5 1 0 1 C
+chr1 446237 T Y 3 1 25% 2 1 65 39 0.98 1 1 2 1 1 0 C
+chr1 446308 A R 2 1 33.33% 1 1 50 38 0.98 1 1 2 0 1 0 G
+chr1 452322 A M 1 1 50% 1 1 65 36 0.98 1 1 0 1 1 0 C
+chr1 462721 T K 1 1 50% 1 1 58 41 0.98 1 1 0 1 1 0 G
+chr1 477145 C S 2 1 33.33% 2 1 64 39 0.98 1 1 1 1 1 0 G
+chr1 493772 G S 3 1 25% 2 1 65 40 0.98 1 1 1 2 1 0 C
+chr1 498962 C M 2 1 33.33% 2 1 65 41 0.98 1 1 1 1 1 0 A
+chr1 510532 T Y 4 1 20% 2 1 64 53 0.98 1 1 2 2 1 0 C
+chr1 516369 T Y 1 1 50% 1 1 34 40 0.98 1 1 1 0 1 0 C
+chr1 523631 G K 3 1 25% 2 1 64 40 0.98 1 1 1 2 0 1 T
+chr1 524680 C Y 2 1 33.33% 1 1 65 41 0.98 1 1 0 2 0 1 T
+chr1 525898 T K 3 1 25% 2 1 62 49 0.98 1 1 2 1 1 0 G
+chr1 526118 A M 5 1 16.67% 2 1 61 50 0.98 1 1 2 3 0 1 C
+chr1 535762 C Y 3 1 25% 1 1 65 42 0.98 1 1 3 0 1 0 T
+chr1 543235 G K 1 1 50% 1 1 45 41 0.98 1 1 1 0 1 0 T
+chr1 550086 T Y 3 1 25% 1 1 50 41 0.98 1 1 3 0 1 0 C
+chr1 550508 A R 3 1 25% 2 1 55 40 0.98 1 1 1 2 0 1 G
+chr1 551143 G S 2 1 33.33% 1 1 65 39 0.98 1 1 0 2 1 0 C
+chr1 552924 A R 6 2 25% 2 2 62 38 0.98 1 1 4 2 1 1 G
+chr1 553541 A R 7 1 12.5% 2 1 65 52 0.98 1 1 4 3 0 1 G
+chr1 560806 T Y 5 1 16.67% 2 1 65 49 0.98 1 1 2 3 0 1 C
+chr1 562736 C S 5 1 16.67% 1 1 64 52 0.98 1 1 0 5 1 0 G
+chr1 563224 T Y 2 1 33.33% 1 1 51 39 0.98 1 1 2 0 1 0 C
+chr1 564217 T W 3 1 25% 2 1 62 40 0.98 1 1 1 2 0 1 A
+chr1 567288 C Y 2 1 33.33% 2 1 65 40 0.98 1 1 1 1 1 0 T
+chr1 569652 T Y 1 1 50% 1 1 48 42 0.98 1 1 1 0 0 1 C
+chr1 570280 G K 7 1 12.5% 2 1 60 53 0.98 1 1 1 6 0 1 T
+chr1 582185 T Y 1 1 50% 1 1 63 40 0.98 1 1 1 0 1 0 C
+chr1 582453 G S 1 1 50% 1 1 65 38 0.98 1 1 0 1 0 1 C
+chr1 583477 T K 2 1 33.33% 2 1 62 39 0.98 1 1 1 1 1 0 G
+chr1 584179 G K 3 1 25% 2 1 65 41 0.98 1 1 2 1 0 1 T
+chr1 589074 G S 2 1 33.33% 2 1 65 36 0.98 1 1 1 1 0 1 C
+chr1 596641 C S 2 1 33.33% 1 1 65 40 0.98 1 1 2 0 0 1 G
+chr1 599263 G K 2 1 33.33% 1 1 60 38 0.98 1 1 2 0 0 1 T

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/test.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/test.wig Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,12 @@
+fixedStep  chrom=chr1  start=11  step=1
+1.1
+1.2
+fixedStep  chrom=chr1  start=14  step=1
+1.4
+1.5
+variableStep chrom=chr1
+17  1.7
+19  1.9
+variableStep chrom=chrX
+5  9.5
+6  9.6

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/test1.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/test1.wig Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,11 @@
+fixedStep  chrom=chr2  start=9  step=1
+0
+0
+1.1
+1.2
+fixedStep  chrom=chr2  start=14  step=1
+1.4
+1.5
+variableStep chrom=chr2
+17  1.7
+19  1.9

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/test2.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/test2.wig Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,3 @@
+fixedStep chrom=chr3 start=14 step=1
+1.4
+1.5

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/testBedParser1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testBedParser1.bed Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,2 @@
+track name=reads description="Reads" useScore=0 visibility=full offset=0
+arm_X 1000 3000 test1.1 1000 + 1000 3000 0 2 100,1000, 0,1000,

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/testCoordsParser.coords
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testCoordsParser.coords Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,19 @@
+/home/urgi/genome_alignment/data/banks/arabidopsis/lyrata/ara_lyra_sca1.fa /home/urgi/genome_alignment/data/banks/arabidopsis/thaliana/ara_thal_chr1.fa
+NUCMER
+
+    [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  | [TAGS]
+=====================================================================================
+       1     6251  |   421251   415029  |     6251     6223  |    89.03  | scaffold_1 gi|240254421:1-30427671
+    9127    11947  |   414945   412123  |     2821     2823  |    90.45  | scaffold_1 gi|240254421:1-30427671
+   12201    12953  |   411933   411173  |      753      761  |    82.56  | scaffold_1 gi|240254421:1-30427671
+   13086    20401  |   411034   403760  |     7316     7275  |    88.56  | scaffold_1 gi|240254421:1-30427671
+   20482    20686  |   403573   403369  |      205      205  |    94.66  | scaffold_1 gi|240254421:1-30427671
+   32288    32623  |   402639   402280  |      336      360  |    76.52  | scaffold_1 gi|240254421:1-30427671
+   32936    33572  |   401974   401308  |      637      667  |    79.80  | scaffold_1 gi|240254421:1-30427671
+   33748    35013  |   401256   400080  |     1266     1177  |    82.77  | scaffold_1 gi|240254421:1-30427671
+   35456    44084  |   399895   391566  |     8629     8330  |    86.23  | scaffold_1 gi|240254421:1-30427671
+   44401    45265  |   391569   390737  |      865      833  |    90.40  | scaffold_1 gi|240254421:1-30427671
+   45374    46243  |   390633   389755  |      870      879  |    71.70  | scaffold_1 gi|240254421:1-30427671
+   46366    48958  |   389607   387128  |     2593     2480  |    82.32  | scaffold_1 gi|240254421:1-30427671
+   55079    55160  |   369603   369683  |       82       81  |    93.90  | scaffold_1 gi|240254421:1-30427671
+   55407    56537  |   369910   371016  |     1131     1107  |    81.69  | scaffold_1 gi|240254421:1-30427671
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/testCoordsParser_showcoord.coords
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testCoordsParser_showcoord.coords Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,5 @@
+/home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A1_scaffolds.fa /home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A2_scaffolds.fa
+NUCMER
+
+[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [LEN R] [LEN Q] [COV R] [COV Q] [FRM] [TAGS]
+296 2292 1 2001 1997 2001 98.30 175930 60273 1.14 3.32 1 1 mivi_sl_A1_scaffold00001 mivi_sl_A2_scaffold00003

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/testCoordsParser_showcoord_promer.coords
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testCoordsParser_showcoord_promer.coords Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,5 @@
+/home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A1_scaffolds.fa /home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A2_scaffolds.fa
+PROMER
+
+[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [% SIM] [% STP] [LEN R] [LEN Q] [COV R] [COV Q] [FRM] [TAGS]
+1229 291 939 1 939 939 94.25 97.12 3.04 175930 60273 0.53 1.56 -3 -1 mivi_sl_A1_scaffold00001 mivi_sl_A2_scaffold00003

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/parsing/test/data/testGffParser1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testGffParser1.gff3 Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/Bioseq.pyc

Binary file commons/core/seq/Bioseq.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/__init__.pyc

Binary file commons/core/seq/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/TestClusterConsensusCollection.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/TestClusterConsensusCollection.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,67 @@
+import unittest
+import os
+from commons.core.seq.ClusterConsensusCollection import ClusterConsensusCollection
+from commons.core.seq.Bioseq import Bioseq
+from commons.core.seq.BioseqDB import BioseqDB
+
+class TestClusterConsensusCollection(unittest.TestCase):
+
+    def setUp(self):
+        self._clusterSequencesFileName = "clusterSequences.fa"
+        self._ClusterConsensusCollection = ClusterConsensusCollection(self._clusterSequencesFileName)
+        self._createClusterConsensusFile()
+
+    def tearDown(self):
+        os.remove(self._clusterSequencesFileName)
+
+    def test_fillCollection(self):
+        expClusterConsensusCollection = ClusterConsensusCollection(self._clusterSequencesFileName)
+        expClusterConsensusCollection._clusterFileName = self._clusterSequencesFileName
+        bioseq1 = Bioseq("seq1", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
+        bioseq2 = Bioseq("seq2", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
+        bioseq3 = Bioseq("seq3", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
+        iBioseqDB1 = BioseqDB()
+        iBioseqDB2 = BioseqDB()
+        iBioseqDB1.setData([bioseq1, bioseq2])
+        iBioseqDB2.setData([bioseq3])
+        expClusterConsensusCollection._lClusterConsensus = [iBioseqDB1, iBioseqDB2]
+        self._ClusterConsensusCollection.fillCollection()
+        self.assertEqual(expClusterConsensusCollection, self._ClusterConsensusCollection)
+
+    def test_getNumClusterForAConsensus_for_seq2(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expClusterNumber = 1
+        obsClusterNumber = self._ClusterConsensusCollection.getNumClusterForAConsensus ("seq2")
+        self.assertEqual(expClusterNumber, obsClusterNumber)
+
+    def test_getNumClusterForAConsensus_for_seq3(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expClusterNumber = 2
+        obsClusterNumber = self._ClusterConsensusCollection.getNumClusterForAConsensus ("seq3")
+        self.assertEqual(expClusterNumber, obsClusterNumber)
+
+    def test_getNumConsensusInCluster_1(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expConsensusNumber = 2
+        obsConsensusNumber = self._ClusterConsensusCollection.getNumConsensusInCluster (1)
+        self.assertEqual(expConsensusNumber, obsConsensusNumber)
+
+    def test_getNumConsensusInCluster_2(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expConsensusNumber = 1
+        obsConsensusNumber = self._ClusterConsensusCollection.getNumConsensusInCluster (2)
+        self.assertEqual(expConsensusNumber, obsConsensusNumber)
+
+    def _createClusterConsensusFile(self):
+        fCluster = open(self._clusterSequencesFileName, "w")
+        fCluster.write(">BlastclustCluster1Mb1 seq1\n")
+        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
+        fCluster.write(">BlastclustCluster1Mb2 seq2\n")
+        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
+        fCluster.write(">BlasterGrouperCluster3Mb1 seq3\n")
+        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
+        fCluster.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/TestSuite_seq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/TestSuite_seq.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_AlignedBioseqDB
+import Test_Bioseq
+import Test_BioseqDB
+import Test_BioseqUtils
+import Test_FastaUtils
+
+
+def main():
+
+        TestSuite_seq = unittest.TestSuite()
+
+        TestSuite_seq.addTest( unittest.makeSuite( Test_AlignedBioseqDB.Test_AlignedBioseqDB, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_Bioseq.Test_Bioseq, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_BioseqDB.Test_BioseqDB, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_BioseqUtils.Test_BioseqUtils, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_FastaUtils.Test_FastaUtils, "test" ) )
+
+        runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+        runner.run( TestSuite_seq )
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/Test_AlignedBioseqDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_AlignedBioseqDB.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,773 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import sys\n+import os\n+import time\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Range import Range\n+from commons.core.stat.Stat import Stat\n+\n+\n+class Test_AlignedBioseqDB( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._i = AlignedBioseqDB()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ \n+ \n+ def tearDown( self ):\n+ self._i = None\n+ self._uniqId = ""\n+ \n+ \n+ def test_getLength(self):\n+ iAlignedBioseqDB = AlignedBioseqDB()\n+\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iAlignedBioseqDB.setData([iBioseq1])\n+ \n+ expLenght = 29\n+ obsLength = iAlignedBioseqDB.getLength() \n+\n+ self.assertEquals(expLenght, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithoutGaps( self ):\n+ iAlignedBioseqDB = AlignedBioseqDB()\n+ iAlignedBioseqDB.add( Bioseq( "seq3",\n+ "AGCG-GACGATGCAGCAT--GCGAATGA--CGAT" ) )\n+ expLenght = 29\n+ obsLength = iAlignedBioseqDB.getSeqLengthWithoutGaps( "seq3" )\n+ \n+ self.assertEquals(expLenght, obsLength)\n+ \n+ \n+ def test_getListOccPerSite(self):\n+ iBioseq1 = Bioseq( "seq1", "AGAAA")\n+ iBioseq2 = Bioseq( "seq2", "TCAAG")\n+ iBioseq3 = Bioseq( "seq3", "GGTAC")\n+ iBioseq4 = Bioseq( "seq4", "CCTTA")\n+ \n+ iAlignedBioseqDB = AlignedBioseqDB()\n+ iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])\n+\n+ expList = [\n+ \n+ {"A":1, "T":1, "G":1, "C":1},\n+\n+ {"G":2, "C":2},\n+ \n+ {"A":2, "T":2 },\n+ \n+ {"A":3, "T":1 }, \n+ \n+ {"A":2, "G":1, "C":1}\n+ ]\n+ \n+ obsList = iAlignedBioseqDB.getListOccPerSite()\n+ \n+ self.assertEquals(expList, obsList)\n+ \n+ \n+ def test_getListOccPerSite_with_none_sequence(self):\n+ iBioseq1 = Bioseq( "seq1", "AGAAA")\n+ iBioseq2 = Bioseq( "seq2", "TCAAG")\n+ iBi'..b'\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------\n+#>BlastclustCluster2Mb2_chunk7 (dbseq-nr 1) [99136,100579]\n+#GTAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA\n+#ATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATA\n+#ATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATC\n+#ATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA\n+#ATCATAATAATCATAATAATCATAATAATCATAATAATAATAATAATCATAATCATAATC\n+#ATAATAAGCGATAAAAAAATTAAAAAATAAAAATTAAAACCCACTGCAATCACGTTGGAC\n+#GGCGAGTCACAGACGTCAGAATAGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCA\n+#AGAAGGTTTTTATTGAACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATC\n+#ATAAT---AATCATAATAATCATAATAATCATAATAATCATAATAAT-------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#-----------------------------------------------CATA-ATAATCAT\n+#AATAAT--CATAATAATCATA-ATAATCATAATAATCATAATAATCATAATAATCATAAT\n+#AATCATAATAATCATAATAATCATAA----TAATCATAATAATCATAATAATCATAATAA\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------TCATAA-TAATCATAATAATCGTAA---TAATCATAA----TAATCATAATAAT\n+#CATAATAATCATAA-TAAT----CAT-----AATAATCAT-----AATAATCATAATAAT\n+#CATAATAATCATAATAATCATAATAATCATAATAATCATAAT-AA-TCAT--AA--TAAT\n+#-----CATAATAATCATAATAA--TCA----TAATAATC---AT---AATAATCATAATA\n+#-AT---CATAATAATCATAATAATC-----------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#-----------------------------------ATAATAATCATAAT-AATCA-----\n+#TAATAA------TCATAAT----AATCATAAT-AATCATAATAA-TCA-TAATAATCATA\n+#ATAATCATAATAATCATAATAATAATAATAATCATAATCATAATCATAATAAGCATAAAA\n+#AAAT--------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#TAAAAAATAAAAATTAAAACCCACTGCAA---TCACGTTGGACGGCGAGTCACAGACGTC\n+#A-GAAT-AGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCAAGAAGGTTTTTATTG\n+#AACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATCATAAT---AATCATA\n+#ATAA---TCA-TAATAATCAT-AATAATCATAATAATCATAA-----TAA-TCATA-ATA\n+#ATCATAATAATCATAATAA--TCATAATA-ATCA-TAATAATCATAATAATCATAATCAT\n+#CATAATAATCATAATAAT--CATAA-T-------AATC--ATAATAATCATAATAATCAT\n+#AATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAAT\n+#CATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAAT\n+#AATCATAATAAT\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_AlignedBioseqDB ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/Test_Bioseq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_Bioseq.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1051 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import sys\n+from commons.core.seq.Bioseq import Bioseq \n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Map import Map\n+from commons.core.checker.RepetException import RepetException\n+\n+\n+class Test_Bioseq( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._bs = Bioseq()\n+\n+\n+ def test_isEmpty_True(self):\n+ self._bs.setHeader( "" )\n+ self._bs.setSequence( "" )\n+ exp = True\n+ obs = self._bs.isEmpty()\n+ self.assertEquals( exp, obs )\n+\n+ \n+ def test_isEmpty_False(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ exp = False\n+ obs = self._bs.isEmpty()\n+ self.assertEquals( exp, obs )\n+ \n+ \n+ def test___eq__(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ self.assertEquals( self._bs, obs )\n+ \n+ \n+ def test___ne__Header(self):\n+ self._bs.setHeader( "seq2" )\n+ self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ self.assertNotEquals( self._bs, obs )\n+ \n+ \n+ def test___ne__Sequence(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" )\n+ obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ self.assertNotEquals( self._bs, obs )\n+ \n+ \n+ def test_reverse(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "TGCGGA" )\n+ exp = "AGGCGT"\n+ self._bs.reverse()\n+ obs = self._bs.sequence\n+ self.assertEqual( obs, exp )\n+ \n+ \n+ def test_complement(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "TGCGGA" )\n+ exp = "ACGCCT"\n+ self._bs.complement()\n+ obs = self._bs.sequence\n+ self.assertEqual( obs, exp )\n+ \n+ \n+ def test_complement_with_unknown_symbol(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" )\n+ exp = "ACGCCTNKYWARSMBDHVN"\n+ self._bs.complement()\n+ obs = self._bs.sequence\n+ '..b' bioseq = Bioseq()\n+ bioseq.sequence = "ATGCNRATGCN\\rATGCAAT\\rTATA\\r"\n+ bioseq.checkEOF()\n+ obsSequence = bioseq.sequence\n+ expSequence = "ATGCNRATGCNATGCAATTATA"\n+ \n+ self.assertEquals(expSequence, obsSequence)\n+ \n+ \n+ def test_getLMapWhithoutGap(self):\n+ iBioseq = Bioseq()\n+ iBioseq.header = "header"\n+ iBioseq.sequence = "ATGC-RA-GCT"\n+ obsLMap = iBioseq.getLMapWhithoutGap()\n+ expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]\n+ \n+ self.assertEquals(expLMap, obsLMap)\n+ \n+ \n+ def test_getLMapWhithoutGap_seqStartsWithGap(self):\n+ iBioseq = Bioseq()\n+ iBioseq.header = "header"\n+ iBioseq.sequence = "-TGC-RA-GCT"\n+ obsLMap = iBioseq.getLMapWhithoutGap()\n+ expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]\n+ \n+ self.assertEquals(expLMap, obsLMap)\n+ \n+ \n+ def test_getLMapWhithoutGap_seqEndsWithGap(self):\n+ iBioseq = Bioseq()\n+ iBioseq.header = "header"\n+ iBioseq.sequence = "ATGC-RA-GC-"\n+ obsLMap = iBioseq.getLMapWhithoutGap()\n+ expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )]\n+ \n+ self.assertEquals(expLMap, obsLMap)\n+ \n+ def test_getGCpercentage_onlyATGC( self ):\n+ iBs = Bioseq( "seq", "TGCAGCT" )\n+ exp = 100 * 4 / 7.0\n+ obs = iBs.getGCpercentage()\n+ self.assertEqual( exp, obs )\n+ \n+ def test_getGCpercentageInSequenceWithoutCountNInLength( self ):\n+ iBs = Bioseq( "seq", "TGCAGCTNNNNN" )\n+ exp = 100 * 4 / 7.0\n+ obs = iBs.getGCpercentageInSequenceWithoutCountNInLength()\n+ self.assertEqual( exp, obs ) \n+ \n+ def test_get5PrimeFlank(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get5PrimeFlank(position, 3)\n+ expFlank = "TTT"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get5PrimeFlank_flank_length_truncated(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get5PrimeFlank(position, 15)\n+ expFlank = "AACTTT"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get5PrimeFlank_flank_of_first_base(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 1\n+ obsFlank = bs.get5PrimeFlank(position, 15)\n+ expFlank = ""\n+ self.assertEquals(expFlank, obsFlank) \n+ \n+ def test_get3PrimeFlank(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get3PrimeFlank(position, 3)\n+ expFlank = "CAG"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get3PrimeFlank_flank_length_truncated(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get3PrimeFlank(position, 15)\n+ expFlank = "CAGAA"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get3PrimeFlank_flank_of_last_base(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 12\n+ obsFlank = bs.get3PrimeFlank(position, 15)\n+ expFlank = ""\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get3PrimeFlank_polymLength_different_of_1(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get3PrimeFlank(position, 3, 2)\n+ expFlank = "AGA"\n+ self.assertEquals(expFlank, obsFlank) \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Bioseq ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/Test_BioseqDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_BioseqDB.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,974 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Map import Map\n+\n+\n+class Test_BioseqDB( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ \n+ \n+ def tearDown( self ):\n+ if os._exists("dummyBioseqDB.fa"):\n+ os.remove("dummyBioseqDB.fa")\n+ \n+ \n+ def test__eq__(self):\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ \n+ iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ \n+ self.assertEquals( expBioseqDB, obsBioseqDB )\n+ \n+ \n+ def test__eq__instances_with_different_header(self):\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ \n+ iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" )\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ \n+ self.assertNotEquals( expBioseqDB, obsBioseqDB )\n+ \n+ \n+ def test__eq__instances_with_different_sequences(self):\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ \n+ iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ \n+ self.assertNotEquals( expBioseqDB, obsBioseqDB )\n+ \n+ \n+ def test__eq__instance'..b'9, iBioseq10, iBioseq11] )\n+ \n+ obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB)\n+ self.assertEquals(expBioseqDB, obsBioseqDB)\n+ \n+ \n+ def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self):\n+ iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n+ iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")\n+ iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n+ iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )\n+ \n+ iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n+ iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n+ inBioseqDB = BioseqDB()\n+ inBioseqDB.setData( [ iBioseq5, iBioseq6 ])\n+\n+ iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n+ iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")\n+ iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n+ iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")\n+ \n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] )\n+ \n+ obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB)\n+ self.assertEquals(expBioseqDB, obsBioseqDB)\n+ \n+ \n+ def test_upCase (self):\n+ iBioseq1 = Bioseq("consensus4","atgacGatgca")\n+ iBioseq2 = Bioseq("consensus1","atgcgaT")\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ iBioseq3 = Bioseq("consensus4","ATGACGATGCA")\n+ iBioseq4 = Bioseq("consensus1","ATGCGAT")\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ obsBioseqDB.upCase()\n+ self.assertEquals(expBioseqDB, obsBioseqDB)\n+ \n+ \n+ def test_getMap(self):\n+ iBioseq1 = Bioseq("header1","ATGC-RA-GCT")\n+ iBioseq2 = Bioseq("header2","-TGC-RA-GCT")\n+ iBioseq3 = Bioseq("header3","ATGC-RA-GC-")\n+\n+ iAlignedBioseqDB = BioseqDB()\n+ iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])\n+ \n+ obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps()\n+ \n+ expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )]\n+ expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )]\n+ expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )] \n+ \n+ expDict = {\n+ "header1": expLMap1,\n+ "header2": expLMap2,\n+ "header3": expLMap3\n+ } \n+ \n+ self.assertEquals(expDict, obsDict)\n+\n+ def test_getSeqLengthByListOfName(self):\n+ iBioseq1 = Bioseq("header1","ATGC-RA-GCT")\n+ iBioseq2 = Bioseq("header2","-TGC-RAR")\n+ iBioseq3 = Bioseq("header3","ATGC")\n+\n+ iBioseqDB = BioseqDB()\n+ iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])\n+ \n+ expList = [11, 4]\n+ obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"])\n+ \n+ self.assertEquals( expList, obsList ) \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/Test_BioseqUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_BioseqUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,498 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.seq.BioseqUtils import BioseqUtils\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_BioseqUtils( unittest.TestCase ):\n+ \n+ def test_translateSequence_one_nt( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "G"\n+ BioseqUtils.translateSequence(bioseq, 1)\n+ expSequence = ""\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_translateSequence_frame1( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n+ BioseqUtils.translateSequence(bioseq, 1)\n+ expSequence = "XGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_translateSequence_frame2( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n+ BioseqUtils.translateSequence(bioseq, 2)\n+ expSequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL"\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_translateSequence_frame3( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n+ BioseqUtils.translateSequence(bioseq, 3)\n+ expSequence = "WLLVDQFMITMISRRCLVAPTNQQYNASRA*"\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_setFrameInfoOnHeader(self):\n+ bioseq = Bioseq()\n+ bioseq.header = "header1 description1 description2"\n+ BioseqUtils.setFrameInfoOnHeader(bioseq,1)\n+ expHeader = "header1_1 description1 description2"\n+ obsHeader = bioseq.header\n+ self.assertEquals(expHeader,obsHeader)\n+ \n+ \n+ def test_setFrameInfoOnHeader_header_without_space(self):\n+ bioseq = Bioseq()\n+ bioseq.header = "header"\n+ BioseqUtils.setFrameInfoOnHeader(bioseq,1)\n+ expHeader = "header_1"\n+ obsHeader = bioseq.header\n+ '..b' bioseq2.header = "header2"\n+ bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header1 description")\n+ expLength = 31\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_second_item ( self ):\n+ bioseq1 = Bioseq()\n+ bioseq1.header = "header1 description"\n+ bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ \n+ bioseq2 = Bioseq()\n+ bioseq2.header = "header2"\n+ bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n+ expLength = 44\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_empty_list ( self ):\n+ lBioseq = []\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n+ expLength = 0\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_empty_sequence ( self ):\n+ bioseq1 = Bioseq()\n+ bioseq1.header = "header1 description"\n+ bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ \n+ bioseq2 = Bioseq()\n+ bioseq2.header = "header2"\n+ bioseq2.sequence = ""\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n+ expLength = 0\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_sequence_unknown ( self ):\n+ bioseq1 = Bioseq()\n+ bioseq1.header = "header1 description"\n+ bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ \n+ bioseq2 = Bioseq()\n+ bioseq2.header = "header2"\n+ bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header3")\n+ expLength = 0\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getLengthPerSeqFromFile( self ):\n+ inFile = "dummyInFile"\n+ inFileHandler = open( inFile, "w" )\n+ inFileHandler.write( ">seq1\\nAGCGATGCAGCTA\\n" )\n+ inFileHandler.write( ">seq2\\nGCGATGCGCATCGACGCGA\\n" )\n+ inFileHandler.close()\n+ \n+ dExp = { "seq1": 13, "seq2": 19 }\n+ \n+ dObs = BioseqUtils.getLengthPerSeqFromFile( inFile )\n+ \n+ self.assertEqual( dExp, dObs )\n+ \n+ os.remove( inFile )\n+ \n+ \n+ def test_getBioseqListSortedByDecreasingLength( self ):\n+ lBioseqs = [ Bioseq( "TE2", "ACC" ),\n+ Bioseq( "TE3", "TA" ),\n+ Bioseq( "TE1", "AGCG" ) ]\n+ lExp = [ Bioseq( "TE1", "AGCG" ),\n+ Bioseq( "TE2", "ACC" ),\n+ Bioseq( "TE3", "TA" ) ]\n+ lObs = BioseqUtils.getBioseqListSortedByDecreasingLength( lBioseqs )\n+ self.assertEquals( lExp, lObs )\n+ \n+ \n+ def test_getBioseqListSortedByDecreasingLengthWithoutGaps( self ):\n+ lBioseqs = [ Bioseq( "TE2", "-ACC-" ),\n+ Bioseq( "TE3", "TA---" ),\n+ Bioseq( "TE1", "-AGCG" ) ]\n+ lExp = [ Bioseq( "TE1", "-AGCG" ),\n+ Bioseq( "TE2", "-ACC-" ),\n+ Bioseq( "TE3", "TA---" ) ]\n+ lObs = BioseqUtils.getBioseqListSortedByDecreasingLengthWithoutGaps( lBioseqs )\n+ self.assertEquals( lExp, lObs )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_BioseqUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/Test_FastaUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_FastaUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1694 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.core.seq.test.Utils_for_T_FastaUtils import Utils_for_T_FastaUtils\n+from commons.core.utils.FileUtils import FileUtils\n+import glob\n+import os\n+import shutil\n+import unittest\n+\n+\n+class Test_FastaUtils( unittest.TestCase ):\n+ \n+ \n+ def test_dbSize_for_empty_file(self):\n+ fileName = "dummyFastaFile.fa"\n+ Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)\n+ \n+ obsNb = FastaUtils.dbSize( fileName )\n+ \n+ expNb = 0\n+ os.remove(fileName)\n+ self.assertEquals(expNb, obsNb)\n+ \n+ \n+ def test_dbSize_one_sequence(self):\n+ fileName = "dummyFastaFile.fa"\n+ Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName)\n+ \n+ obsNb = FastaUtils.dbSize( fileName )\n+ \n+ expNb = 1\n+ os.remove(fileName)\n+ self.assertEquals(expNb, obsNb)\n+ \n+ \n+ def test_dbSize_four_sequences(self):\n+ fileName = "dummyFastaFile.fa"\n+ Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)\n+ \n+ obsNb = FastaUtils.dbSize( fileName )\n+ \n+ expNb = 4\n+ os.remove(fileName)\n+ self.assertEquals(expNb, obsNb)\n+ \n+ \n+ def test_dbChunks(self):\n+ inFileName = "dummyBigSeqFastaFile.fa"\n+ expChunksFileName = \'exp\' + inFileName +\'_chunks.fa\'\n+ expChunksMapFileName = \'exp\' + inFileName +\'_chunks.map\'\n+ expCutFileName = \'exp\' + inFileName +\'_cut\'\n+ expNStretchFileName = \'exp\' + inFileName +\'.Nstretch.map\'\n+ Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName)\n+ Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName)\n+ Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName)\n+ Utils_for_T_FastaUtils._createFastaFile_of_cut(expCutFileName)\n+ Utils_for_T_FastaUtils._createFastaFile_of_Nstretch(expNStretchFileName)\n+ \n+ FastaUtils.dbChunks(inFileName, \'60\', \'10\', \'11\', \'\', False, 0)\n+ \n+ obsChunksFileName = inFileName +\'_chunks.fa\'\n+ obsChunksMapFileName = inFileName +\'_chunks.map\'\n+ obsCutFileName = inFileName +\'_cut\'\n+ obsNStretchFileName = inFileName +\'.Nstretch.map\'\n+ \n+ self.assertTrue(FileUtils.are2'..b'")\n+ f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\\n")\n+ f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\\n")\n+ f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATANN\\n")\n+ \n+ FastaUtils.writeNstreches(fileName, 0)\n+ obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0]\n+ \n+ expFileName = "expNstretches.map"\n+ with open(expFileName, "w") as f:\n+ pass\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))\n+ \n+ os.remove(obsFileName)\n+ os.remove(expFileName)\n+ os.remove(fileName)\n+ \n+ def test_getNstreches_2_GFF(self):\n+ fileName = "dummy.fa"\n+ with open(fileName, "w") as f:\n+ f.write(">seq2\\n")\n+ f.write("NNNNxxNNnNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n+ f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n+ f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n+ f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n+ f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n+ f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n+ f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\\n")\n+ f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\\n")\n+ f.write("\\n")\n+ f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\\n")\n+ f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\\n")\n+ f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\\n")\n+ f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\\n")\n+ f.write("\\n")\n+ f.write(">seq1\\n")\n+ f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\\n")\n+ f.write("TTGCGGATCATxxxxxxxxxxxxxxxTTTGTTTGAACAACCGACAATGCGACCAATTTCA\\n")\n+ f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\\n")\n+ f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\\n")\n+ f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\\n")\n+ f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\\n")\n+ f.write("\\n")\n+ f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\\n")\n+ f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\\n")\n+ f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\\n")\n+ f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAxx\\n")\n+ f.write("\\n")\n+ f.write("")\n+ \n+ FastaUtils.writeNstreches(fileName, 2, outFormat = "gff")\n+ obsFileName = "%s_Nstretches.gff3" % os.path.splitext(os.path.split(fileName)[1])[0]\n+ \n+ expFileName = "expNstretches.gff3"\n+ with open(expFileName, "w") as f:\n+ f.write("##gff-version 3\\n")\n+ f.write("seq1\\tFastaUtils\\tN_stretch\\t72\\t86\\t.\\t.\\t.\\tName=N_stretch_72-86\\n")\n+ f.write("seq1\\tFastaUtils\\tN_stretch\\t261\\t262\\t.\\t.\\t.\\tName=N_stretch_261-262\\n")\n+ f.write("seq1\\tFastaUtils\\tN_stretch\\t510\\t542\\t.\\t.\\t.\\tName=N_stretch_510-542\\n")\n+ f.write("seq1\\tFastaUtils\\tN_stretch\\t592\\t593\\t.\\t.\\t.\\tName=N_stretch_592-593\\n")\n+ f.write("seq2\\tFastaUtils\\tN_stretch\\t1\\t10\\t.\\t.\\t.\\tName=N_stretch_1-10\\n")\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))\n+ \n+ os.remove(obsFileName)\n+ os.remove(expFileName)\n+ os.remove(fileName)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/seq/test/Utils_for_T_FastaUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Utils_for_T_FastaUtils.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,857 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+class Utils_for_T_FastaUtils( object ):\n+ \n+ def _createFastaFile_for_empty_file(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("")\n+ f.close()\n+ \n+ _createFastaFile_for_empty_file = staticmethod ( _createFastaFile_for_empty_file )\n+ \n+ \n+ def _createFastaFile_one_sequence(fileName):\n+ f = open(fileName, \'w\')\n+ f.write(">seq 1\\n")\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCG\\n")\n+ f.close()\n+ \n+ _createFastaFile_one_sequence = staticmethod ( _createFastaFile_one_sequence )\n+ \n+ \n+ def createFastaFile_twoSequences( fileName ):\n+ f = open( fileName, "w" )\n+ f.write( ">seq 1\\n" )\n+ f.write( "ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.write( ">seq 2\\n" )\n+ f.write( "ATATTCTTTCATCGATCGATCGGCGGCTATATGCTAGTGACGAAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.close()\n+ \n+ createFastaFile_twoSequences = staticmethod ( createFastaFile_twoSequences )\n+ \n+ \n+ def createFastaFile_seq_1( fileName ):\n+ f = open( fileName, "w" )\n+ f.write( ">seq 1\\n" )\n+ f.write( "ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.close()\n+ \n+ createFastaFile_seq_1 = staticmethod( createFastaFile_seq_1 )\n+ \n+ \n+ def createFastaFile_seq_2( fileName ):\n+ f = open( fileName, "w" )\n+ f.write( ">seq 2\\n" )\n+ f.write( "ATATTCTTTCATCGATCGATCGGCGGCTATATGCTAGTGACGAAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.close()\n+ \n+ createFastaFile_seq_2 = staticmethod( createFastaFile_seq_2 )\n+ \n+ \n+ def _createFastaFile_sequence_without_header(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCG\\n")\n+ f.close()\n+ \n+ _createFastaFile_sequence_without_header = staticmethod ( _createFastaFile_sequence_without_header )\n+ \n+ \n+ def _createFastaFile_four_sequences'..b'CCTCGATGAAATGGTCGCG\\n")\n+ f.write("CGCGTACGATAATGCGGGCCTGGCTCACGGATGCGCGCCTTTCCCTATCGTCAGTCACGC\\n")\n+ f.write("AAATGTAGGCTTCCATCTGGAACGCTGCTTGATGGCCTAAGAATGGGCCGTCACGGAACA\\n")\n+ f.write("GCTCACCGCCTGCAGACACGAACGGCCGTGGCGGTCATGGAAGGATCTGAACGTGTCGCC\\n")\n+ f.write("CCATACGATTGACGAAGAGATGTAAGCTCCCTTGGTA\\n")\n+ f.close()\n+ \n+ _createFastaFile_three_sequences_with_ORFs = staticmethod ( _createFastaFile_three_sequences_with_ORFs )\n+ \n+ \n+ def _createFastaFile_three_sequences_with_ORFs_expected(fileName): \n+ f = open(fileName, \'w\') \n+ f.write("ORF|1|662\\tMivi_sl_Blaster_Grouper_1_Map_3\\t307\\t969\\n")\n+ f.write("ORF|-3|254\\tMivi_sl_Blaster_Grouper_1_Map_3\\t793\\t539\\n")\n+ f.write("ORF|2|197\\tMivi_sl_Blaster_Grouper_1_Map_3\\t356\\t553\\n")\n+ f.write("ORF|3|176\\tMivi_sl_Blaster_Grouper_1_Map_3\\t288\\t464\\n")\n+ f.write("ORF|-1|176\\tMivi_sl_Blaster_Grouper_1_Map_3\\t786\\t610\\n")\n+ f.write("ORF|3|143\\tMivi_sl_Blaster_Grouper_1_Map_3\\t672\\t815\\n")\n+ f.write("ORF|1|131\\tMivi_sl_Blaster_Grouper_1_Map_3\\t175\\t306\\n")\n+ f.write("ORF|-2|131\\tMivi_sl_Blaster_Grouper_1_Map_3\\t797\\t666\\n")\n+ f.write("ORF|2|128\\tMivi_sl_Blaster_Grouper_1_Map_3\\t167\\t295\\n")\n+ f.write("ORF|-2|119\\tMivi_sl_Blaster_Grouper_1_Map_3\\t242\\t123\\n")\n+ f.write("ORF|1|464\\tMivi_sl_Blaster_Grouper_2_Map_3\\t304\\t768\\n")\n+ f.write("ORF|3|305\\tMivi_sl_Blaster_Grouper_2_Map_3\\t669\\t974\\n")\n+ f.write("ORF|-3|251\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1094\\t843\\n")\n+ f.write("ORF|-2|245\\tMivi_sl_Blaster_Grouper_2_Map_3\\t531\\t286\\n")\n+ f.write("ORF|-3|224\\tMivi_sl_Blaster_Grouper_2_Map_3\\t791\\t567\\n")\n+ f.write("ORF|-2|215\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1098\\t883\\n")\n+ f.write("ORF|2|197\\tMivi_sl_Blaster_Grouper_2_Map_3\\t353\\t550\\n")\n+ f.write("ORF|3|173\\tMivi_sl_Blaster_Grouper_2_Map_3\\t288\\t461\\n")\n+ f.write("ORF|-1|173\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1087\\t914\\n")\n+ f.write("ORF|-1|143\\tMivi_sl_Blaster_Grouper_2_Map_3\\t310\\t167\\n")\n+ f.write("ORF|3|626\\tMivi_sl_Blaster_Grouper_3_Map_3\\t141\\t767\\n")\n+ f.write("ORF|2|434\\tMivi_sl_Blaster_Grouper_3_Map_3\\t164\\t598\\n")\n+ f.write("ORF|3|365\\tMivi_sl_Blaster_Grouper_3_Map_3\\t768\\t1133\\n")\n+ f.write("ORF|-3|359\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1514\\t1155\\n")\n+ f.write("ORF|-1|320\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1879\\t1559\\n")\n+ f.write("ORF|3|272\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1299\\t1571\\n")\n+ f.write("ORF|-2|248\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1503\\t1255\\n")\n+ f.write("ORF|1|236\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1576\\t1812\\n")\n+ f.write("ORF|-1|227\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1423\\t1196\\n")\n+ f.write("ORF|-3|227\\tMivi_sl_Blaster_Grouper_3_Map_3\\t368\\t141\\n")\n+ f.close()\n+\n+ _createFastaFile_three_sequences_with_ORFs_expected = staticmethod ( _createFastaFile_three_sequences_with_ORFs_expected )\n+ \n+ \n+ def _createLinkFile_four_sequences_with_new_headers(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("seq 1\\tReconCluster1Mb155 chunk183 {Fragment} 1..5506\\t1\\t127\\n")\n+ f.write("seq 2\\tMbQ3Gr2Cl0 chunk440 {Fragment} 2678..3645\\t1\\t307\\n")\n+ f.write("seq 3\\tMbS2Gr2Cl0 chunk622 {Fragment} 104..1078\\t1\\t427\\n")\n+ f.write("seq 4\\tPilerCluster3.574Mb796 chunk0117 {Fragment} 51582..50819\\t1\\t307\\n")\n+ \n+ _createLinkFile_four_sequences_with_new_headers = staticmethod ( _createLinkFile_four_sequences_with_new_headers )\n+ \n+ \n+ def _createLinkFile_four_sequences_same_headers(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("seq 1\\tseq 1\\t1\\t127\\n")\n+ f.write("seq 2\\tseq 2\\t1\\t307\\n")\n+ f.write("seq 3\\tseq 3\\t1\\t427\\n")\n+ f.write("seq 4\\tseq 4\\t1\\t307\\n")\n+ \n+ _createLinkFile_four_sequences_same_headers = staticmethod ( _createLinkFile_four_sequences_same_headers )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/DbFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/DbFactory.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,38 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.sql.DbMySql import DbMySql
+
+class DbFactory (object):
+
+    def createInstance(configFileName = "", verbosity = 1):
+        return DbMySql(cfgFileName = configFileName, verbosity = verbosity)
+
+    createInstance = staticmethod(createInstance)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/DbMySql.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/DbMySql.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,851 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+# Exception hierarchy:\n+#\n+# StandardError\n+# |__Warning\n+# |__Error\n+# |__InterfaceError\n+# |__DatabaseError\n+# |__DataError\n+# |__OperationalError\n+# |__IntegrityError\n+# |__InternalError\n+# |__ProgrammingError\n+# |__NotSupportedError\n+\n+import os\n+import sys\n+import time\n+import ConfigParser\n+import MySQLdb\n+from MySQLdb import InterfaceError\n+from MySQLdb import OperationalError\n+from MySQLdb import InternalError\n+from MySQLdb import DatabaseError\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.checker.RepetException import RepetException\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.sql.TableSetAdaptator import TableSetAdaptator\n+\n+LOG_DEPTH = "repet.commons"\n+\n+TABLE_SCHEMA_DESCRIPTOR = {"map": [("name", "varchar(255)"), ("chr", "varchar(255)"), ("start", "int"), ("end", "int")],\n+ "set": [("path", "int unsigned"), ("name", "varchar(255)"), ("chr", "varchar(255)"), ("start", "int"), ("end", "int")],\n+ "match": [("query_name", "varchar(255)"), ("query_start", "int"), ("query_end", "int"), ("query_length", "int unsigned"), ("query_length_perc", "float"),\n+ ("match_length_perc", "float"), ("subject_name", "varchar(255)"), ("subject_start", "int unsigned"), ("subject_end", "int unsigned"),\n+ ("subject_length", "int unsigned"), ("subject_length_perc", "float"), ("E_value", "double"), ("score", "int unsigned"), ("identity", "float"),\n+ ("path", "int unsigned")],\n+ "path": [("path", "int unsigned"), ("query_name", "varchar(255)"), ("query_start", "int"), ("query_end", "int"), ("subject_name", "varchar(255)"),\n+ ("subject_start", "int unsigned"), ("subject_end", "int unsigned"), ("E_value", "double"), ("score", "int unsigned"), ("identity", "float")],\n+ "align": [("query_name", "varchar(255)"), ("query_start", "int"), ("query_end", "int"), ("subject_name", "varchar(255)"), ("subject_start", "int unsigned"),\n+ '..b' # @param setTableName string new set table name\n+ #\n+ def convertMapTableIntoSetTable( self, mapTableName, setTableName ):\n+ sqlCmd = "CREATE TABLE %s (path int(10) unsigned auto_increment primary key) select name, chr, start, end from %s;" % (setTableName, mapTableName)\n+ self.execute(sqlCmd)\n+ self.createIndex(setTableName, "set")\n+ \n+ \n+ ## Convert an Align table into a Path table\n+ #\n+ # @param inAlignTable string name of the input Align table\n+ # @param outPathTable string name of the output Path table\n+ #\n+ def convertAlignTableIntoPathTable( self, inAlignTable, outPathTable ):\n+ self.createTable( outPathTable, "path", "", True )\n+ sqlCmd = "SELECT * FROM %s" % ( inAlignTable )\n+ self.execute( sqlCmd )\n+ lResults = self.fetchall()\n+ rowIndex = 0\n+ for res in lResults:\n+ rowIndex += 1\n+ sqlCmd = "INSERT INTO %s" % ( outPathTable )\n+ sqlCmd += " (path,query_name,query_start,query_end,subject_name,subject_start,subject_end,E_value,score,identity)"\n+ sqlCmd += " VALUES ( \'%i\'" % ( rowIndex )\n+ for i in res:\n+ sqlCmd += \', "%s"\' % ( i )\n+ sqlCmd += " )"\n+ self.execute( sqlCmd )\n+ self.updateInfoTable( outPathTable, "" )\n+ \n+ \n+ ## Give a list of instances according to the SQL command\n+ #\n+ # @param SQLCmd string is a SQL command\n+ # @param methodGetInstance2Adapt a getter method name. With this method you choose the type of intances contained in lObjs. See example in Test_DbMySql.py.\n+ # @return lObjs list of instances\n+ #\n+ def getObjectListWithSQLCmd( self, SQLCmd, methodGetInstance2Adapt):\n+ self.execute( SQLCmd )\n+ res = self.fetchall()\n+ lObjs = []\n+ for t in res:\n+ iObj = methodGetInstance2Adapt()\n+ iObj.setFromTuple( t )\n+ lObjs.append( iObj )\n+ return lObjs\n+ \n+ \n+ ## Give a list of integer according to the SQL command\n+ #\n+ # @param sqlCmd string is a SQL command\n+ # @return lInteger integer list\n+ #\n+ def getIntegerListWithSQLCmd( self, sqlCmd ):\n+ self.execute(sqlCmd)\n+ res = self.fetchall()\n+ lInteger = []\n+ for t in res:\n+ if t[0] != None:\n+ lInteger.append(int(t[0]))\n+ return lInteger\n+ \n+ \n+ ## Give a int according to the SQL command\n+ #\n+ # @param sqlCmd string is a SQL command\n+ # @return nb integer \n+ #\n+ def getIntegerWithSQLCmd( self, sqlCmd ):\n+ self.execute(sqlCmd)\n+ res = self.fetchall()\n+ nb = res[0][0]\n+ if nb == None:\n+ nb = 0\n+ return nb\n+ \n+ \n+ ## Give a list of str according to the SQL command\n+ #\n+ # @param sqlCmd string is a SQL command\n+ # @return lString str list\n+ #\n+ def getStringListWithSQLCmd( self, sqlCmd ):\n+ self.execute(sqlCmd)\n+ res = self.fetchall()\n+ lString = []\n+ for i in res:\n+ lString.append(i[0])\n+ return lString\n+ \n+#TODO: use API to add indexes\n+ ## Remove doublons in a given table\n+ #\n+ # @param table string name of a MySQL table\n+ #\n+ def removeDoublons( self, table ):\n+ tmpTable = "%s_%s" % ( table, time.strftime("%Y%m%d%H%M%S") )\n+ sqlCmd = "CREATE TABLE %s SELECT DISTINCT * FROM %s" % ( tmpTable, table )\n+ self.execute( sqlCmd )\n+ self.dropTable( table )\n+ self.renameTable(tmpTable, table)\n+ \n+ \n+ ## Get a list of table names from a pattern\n+ #\n+ # @note for instance pattern = \'MyProject_%\'\n+ #\n+ def getTableListFromPattern( self, pattern ):\n+ if pattern == "*" or pattern == "%":\n+ sqlCmd = "SHOW TABLES"\n+ else:\n+ sqlCmd = "SHOW TABLES like \'%s\'" % ( pattern )\n+ lTables = self.getStringListWithSQLCmd( sqlCmd )\n+ return lTables\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/DbSQLite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/DbSQLite.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,173 @@
+import sqlite3
+import os
+import sys
+
+#TODO: update...compare with DbMySql.py
+class DbSQLite(object):
+
+    ## Constructor
+    #
+    # @param host string db file path
+    # @param cfgFileName string configuration file name
+    #
+    # @note when a parameter is left blank, the constructor is able
+    #   to set attribute values from environment variable: REPET_HOST,
+    #
+    def __init__(self, host = ""):
+        if host != "":
+            self.host = host
+        else:
+            msg = "ERROR: no host specified"
+            sys.stderr.write( "%s\n" % msg )
+            sys.exit(1)
+        # remove open() and cursor from init() use directly outside this class ...
+        self.open()
+        self.cursor = self.db.cursor()
+
+    ## Connect to the DbSQLite database
+    #
+    # @param verbose integer (default = 0)
+    #
+    def open( self, verbose = 0, nb = 0 ):
+        try:
+            #sqlite.connect(":memory:", check_same_thread = False)
+            self.db = sqlite3.connect(self.host, check_same_thread= False, isolation_level=None, detect_types=sqlite3.PARSE_DECLTYPES)
+        except sqlite3.Error, e:
+            if verbose > 0:
+                print "ERROR %s" % e
+                sys.stdout.flush()
+            return False
+        return True
+
+    ## Execute a SQL query
+    #
+    # @param qry string SQL query to execute
+    # @param params parameters of SQL query
+    #
+    def execute( self, qry, params=None ):
+        try :
+            if params == None:
+                self.cursor.execute( qry )
+            else:
+                self.cursor.execute( qry, params )
+        except Exception, e:
+            #TODO Must be test
+            try :
+                if params == None:
+                    self.cursor.execute( qry )
+                else:
+                    self.cursor.execute( qry, params )
+            except Exception, e:
+                    print "Erreur : %s" % e
+
+    ## Retrieve the results of a SQL query
+    #
+    def fetchall(self):
+        return self.cursor.fetchall()
+
+    ## Record a new table in the 'info_table' table
+    #
+    # @param tableName string table name
+    # @param info string information on the table origin
+    #
+    def updateInfoTable( self, tableName, info ):
+        if not self.doesTableExist( "info_tables" ):
+            sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+            self.execute( sqlCmd )
+        sqlCmd = 'INSERT INTO info_tables VALUES ("%s","%s")' % (tableName, info)
+        self.execute( sqlCmd )
+
+    def createTable(self, tableName, dataType, overwrite=False, verbose=0):
+        if verbose > 0:
+            print "creating table '%s' from file '%s' of type '%s'..." % (tableName, dataType)
+            sys.stdout.flush()
+        if overwrite:
+            self.dropTable(tableName)
+        if dataType.lower() in ["job", "jobs"]:
+            self.createJobTable(tableName)
+        else:
+            print "ERROR: unknown type %s" % (dataType)
+            self.close()
+            sys.exit(1)
+        if verbose > 0:
+            print "done!"; sys.stdout.flush()
+
+    ## Create a job table
+    #
+    # @param tablename new table name
+    #
+    def createJobTable( self, tablename ):
+        sqlCmd = "CREATE TABLE %s" % ( tablename )
+        sqlCmd += " ( jobid INT UNSIGNED"
+        sqlCmd += ", jobname VARCHAR(255)"
+        sqlCmd += ", groupid VARCHAR(255)"
+        sqlCmd += ", command TEXT"
+        sqlCmd += ", launcher VARCHAR(1024)"
+        sqlCmd += ", queue VARCHAR(255)"
+        sqlCmd += ", status VARCHAR(255)"
+        sqlCmd += ", time timestamp"
+        sqlCmd += ", node VARCHAR(255) )"
+        self.execute( sqlCmd )
+
+        self.updateInfoTable( tablename, "job table" )
+        sqlCmd = "CREATE INDEX igroupid ON " + tablename + " ( groupid )"
+        self.execute( sqlCmd )
+
+    ## Test if a table exists
+    #
+    # @param table string table name
+    # @return boolean True if the table exists, False otherwise
+    #
+    def doesTableExist( self, table ):
+        qry = "PRAGMA table_info(%s)" % (table)
+        self.execute( qry )
+        results = self.cursor.fetchall()
+        if results:
+            return True
+        return False
+
+    def isEmpty( self, tableName ):
+        return self.getSize( tableName ) == 0
+
+    ## Give the rows number of the table
+    #
+    # @param tableName string table name
+    #
+    def getSize( self, tableName ):
+        qry = "SELECT count(*) FROM %s;" % ( tableName )
+        self.execute( qry )
+        res = self.fetchall()
+        return int( res[0][0] )
+
+    ## Remove a table if it exists
+    #
+    # @param table string table name
+    # @param verbose integer (default = 0)
+    #
+    def dropTable( self, table, verbose = 0 ):
+        if self.doesTableExist( table ):
+            sqlCmd = "DROP TABLE %s" % ( table )
+            self.execute( sqlCmd )
+            sqlCmd = 'DELETE FROM info_tables WHERE name = "%s"' % ( table )
+            self.execute( sqlCmd )
+
+    ## Get a list with the fields
+    #
+    def getFieldList( self, table ):
+        lFields = []
+        sqlCmd = "PRAGMA table_info(%s)" % ( table )
+        self.execute( sqlCmd )
+        lResults = self.fetchall()
+        for res in lResults:
+            lFields.append( res[1] )
+        return lFields
+
+    ## delete this SQLite database session
+    #
+    def delete(self):
+        os.remove(self.host)
+
+    ## Close the connection
+    #
+    def close( self ):
+        self.db.close()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/ITableMapAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableMapAdaptator.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,113 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for TableMapAdaptator
+#
+class ITableMapAdaptator(object):
+
+    ## Insert a map instance
+    #
+    # @param obj map or set
+    # @param delayed boolean must the insert be delayed
+    #
+    # @warning old name was insAMap
+    #
+    def insert(self, obj, delayed=False):
+        pass
+
+
+    ## Insert a list of Map or Set or Match instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    # @warning old name was insMapList
+    #
+    def insertList(self, l, delayed = False):
+        pass
+
+    ## Give a list of the distinct seqName/chr present in the table
+    #
+    # @return lDistinctContigNames string list
+    #
+    # @warning old name was getContig_name
+    #
+    def getSeqNameList(self):
+        pass
+
+
+    ## Give a list of Map instances having a given seq name
+    #
+    # @param seqName string seq name
+    # @return lMap list of instances
+    #
+    # @warning old name was get_MapList_from_contig
+    #
+    def getMapListFromSeqName(self, seqName):
+        pass
+
+
+    ## Return a list of Set instances from a given sequence name
+    #
+    # @param seqName string sequence name
+    # @return lSets list of Set instances
+    #
+    # @warning old name was getSetList_from_contig
+    #
+    def getSetListFromSeqName( self, seqName ):
+        pass
+
+
+    ## Give a map instances list overlapping a given region
+    #
+    # @param seqName string seq name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lMap list of map instances
+    #
+    # @warning old name was getMapList_from_qcoord
+    #
+    def getMapListOverlappingCoord(self, seqName, start, end):
+        pass
+
+
+    ## Return a list of Set instances overlapping a given region
+    #
+    # @param seqName string sequence name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    # @warning old name was getSetList_from_qcoord
+    #
+    def getSetListOverlappingCoord( self, seqName, start, end ):
+        pass
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/ITableMatchAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableMatchAdaptator.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,68 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for TableMatchAdaptator
+#
+class ITableMatchAdaptator(object):
+
+    ## Give a list of Match instances given a query name
+    #
+    # @param query string sequence name
+    # @return lMatches list of Match instances
+    #
+    def getMatchListFromQuery( self, query ):
+        pass
+
+    ## Give a list of Match instances having the same identifier
+    #
+    # @param id integer identifier number
+    # @return lMatch a list of Match instances
+    #
+    def getMatchListFromId( self, id ):
+        pass
+
+    ## Insert a Match instance
+    #
+    # @param iMatch a Match instance
+    # @param delayed boolean
+    #
+    def insert(self, iMatch, delayed = False):
+        pass
+
+    ## Insert a list of Map or Set or Match instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    # @warning old name was insMapList
+    #
+    def insertList(self, l, delayed = False):
+        pass
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/ITablePathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITablePathAdaptator.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,429 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+## Interface for TablePathAdaptator\n+#\n+class ITablePathAdaptator (object):\n+\n+ ## Give the data contained in the table as a list of Path instances\n+ #\n+ # @return lPaths list of path instances\n+ #\n+ def getListOfAllPaths( self ):\n+ pass\n+ \n+ ## Give a list of Path instances having the same identifier\n+ #\n+ # @param id integer identifier number\n+ # @return lPath a list of Path instances\n+ #\n+ # @warning old name was getPathList_from_num\n+ #\n+ def getPathListFromId( self, id ):\n+ pass\n+\n+ ## Give a list of Path instances according to the given list of identifier numbers\n+ #\n+ # @param lId integer list \n+ # @return lPath a list of Path instances\n+ #\n+ # @warning old name was getPathList_from_numlist\n+ #\n+ def getPathListFromIdList( self, lId ):\n+ pass\n+ \n+ ## Give a list of Path instances having the same given query name\n+ #\n+ # @param query string name of the query \n+ # @return lPath a list of Path instances\n+ #\n+ # @warning old name was getPathList_from_query\n+ #\n+ def getPathListFromQuery( self, query ):\n+ pass\n+ \n+ ## Give a list with all the distinct identifiers corresponding to the query\n+ #\n+ # @param query string name of the query \n+ # @return lId a list of integer\n+ #\n+ # @warning old name was getPathList_from_query\n+ #\n+ def getIdListFromQuery( self, query ):\n+ pass\n+ \n+ ## Give a list with all the distinct identifiers corresponding to the subject\n+ #\n+ # @param subject string name of the subject \n+ # @return lId a list of integer\n+ #\n+ # @warning old name was getPathList_from_subject\n+ #\n+ def getIdListFromSubject( self, subject ):\n+ pass\n+ \n+ ## Insert a path instance\n+ #\n+ # @param obj a path instance\n+ # @param delayed boolean indicating if the insert must be delayed\n+ #\n+ # @note data are inserted such that the query is always on the direct strand\n+ #\n+ # @warning old name was insAPath\n+ #\n+ def insert(self, obj, delayed = False):\n+ pass\n+ \n+ ## Insert a list of Path instances\n+ #\n+ # @param l a list of Path instances\n+ # @param delayed boolean\n+ #\n+ # @warning old name was insPathList\n+ #\n+ def insertList(self, l, delayed = False):\n+ pass\n+ \n+ ## '..b'th_from_subject\n+ # \n+ def getCumulLengthFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a list of the length of all chains of paths for a given subject name\n+ #\n+ # @param subjectName string name of the subject\n+ # @return lChainLengths list of lengths per chain of paths\n+ # @warning doesn\'t take into account the overlaps !!\n+ # @warning old name was getListChainLength_from_subject\n+ #\n+ def getChainLengthListFromSubject( self, subjectName ):\n+ pass\n+\n+ ## Give a list of identity of all chains of paths for a given subject name\n+ #\n+ # @param subjectName string name of the subject\n+ # @return lChainIdentities list of identities per chain of paths\n+ # @warning doesn\'t take into account the overlaps !!\n+ # @warning old name was getListChainIdentity_from_subject\n+ # \n+ def getChainIdentityListFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a list of Path lists sorted by weighted identity.\n+ #\n+ # @param qry query name\n+ # @return lChains list of chains\n+ #\n+ def getListOfChainsSortedByAscIdentityFromQuery( self, qry ):\n+ pass\n+ \n+ ## Give a list of the length of all paths for a given subject name\n+ #\n+ # @param subjectName string name of the subject\n+ # @return lPathLengths list of lengths per path\n+ # @warning doesn\'t take into account the overlaps !!\n+ # @warning old name was getListPathLength_from_subject\n+ #\n+ def getPathLengthListFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains\n+ # \n+ # @return lPathNums a list of paths Id\n+ #\n+ # @warning old name was getPathNumListSortedByDecreasingChainLengthFromSubject\n+ #\n+ def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a list of Set instance list from the path contained on a query name\n+ #\n+ # @param query string query name\n+ # @return lSet list of set instance \n+ #\n+ # @warning old name was getSetList_from_contig\n+ #\n+ def getSetListFromQuery(self, query):\n+ pass\n+ \n+ ## Delete path corresponding to a given identifier number\n+ #\n+ # @param id integer identifier number\n+ #\n+ # @warning old name was delPath_from_num\n+ #\n+ def deleteFromId(self,id):\n+ pass\n+ \n+ ## Delete path corresponding to a given list of identifier number\n+ #\n+ # @param lId list of identifier number\n+ #\n+ # @warning old name was delPath_from_numlist\n+ #\n+ def deleteFromIdList(self,lId):\n+ pass\n+\n+ ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2\n+ #\n+ # @param id1 integer path number\n+ # @param id2 integer path number\n+ # @return newId integer id used to join\n+ #\n+ # @warning old name was joinPath\n+ #\n+ def joinTwoPaths(self,id1,id2):\n+ pass\n+ \n+ ## Get a new id number\n+ #\n+ # @return newId integer new id\n+ #\n+ def getNewId(self):\n+ pass\n+ \n+ ## Test if table is empty\n+ # \n+ def isEmpty( self ):\n+ pass\n+ \n+ ## Create a \'pathRange\' table from a \'path\' table. \n+ # The output table summarizes the information per identifier. \n+ # The min and max value are taken. \n+ # The identity is averaged over the fragments. \n+ # It may overwrite an existing table.\n+ #\n+ # @param outTable string name of the output table\n+ # @return outTable string Table which summarizes the information per identifier\n+ #\n+ def path2PathRange( self, outTable="" ):\n+ pass\n+ \n+ ## Return the number of times a given instance is present in the table\n+ # The identifier is not considered,\n+ # only coordinates, score, E-value and identity.\n+ #\n+ # @return nbOcc integer\n+ #\n+ def getNbOccurrences( self, iPath ):\n+ pass\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/ITableSeqAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableSeqAdaptator.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,63 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for TableSeqAdaptator
+#
+class ITableSeqAdaptator(object):
+
+    ## Retrieve all the distinct accession names in a list.
+    #
+    # @return lAccessions list of accessions
+    #
+    # @warning old name was getListAccession
+    #
+    def getAccessionsList( self ):
+        pass
+
+    ## Save sequences in a fasta file from a list of accession names.
+    #
+    # @param lAccessions list of accessions
+    # @param outFileName string Fasta file
+    #
+    # @warning old name saveListAccessionInFastaFile
+    #
+    def saveAccessionsListInFastaFile( self, lAccessions, outFileName ):
+        pass
+
+    ## insert bioseq instance
+    #
+    # @param seq bioseq
+    # @param delayed boolean must the insert be delayed
+    #
+    # @warning old name was insASeq
+    #
+    def insert(self, seq, delayed = False):
+        pass
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/ITableSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableSetAdaptator.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,146 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+## Interface for TableSetAdaptator
+#
+class ITableSetAdaptator (object):
+
+    ## Insert a set instance
+    #
+    # @param obj a set instance
+    # @param delayed boolean indicating if the insert must be delayed
+    #
+    # @warning old name was insASet
+    #
+    def insert(self, obj, delayed = False):
+        pass
+
+    ## Insert a list of Set instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    # @warning old name was insSetList
+    #
+    def insertList(self, l, delayed = False):
+        pass
+
+    ## Give a list of identifier numbers contained in the table
+    #
+    # @return l integer list
+    #
+    # @warning old name was getSet_num
+    #
+    def getIdList(self):
+        pass
+
+    ## Give a list of Set instances having a given seq name
+    #
+    # @param seqName string seq name
+    # @return lSets list of instances
+    #
+    # @warning old name was get_SetList_from_contig
+    #
+    def getSetListFromSeqName(self, seqName):
+        pass
+
+    ## Give a set instances list with a given identifier number
+    #
+    # @param id integer identifier number
+    # @return lSet list of set instances
+    #
+    # @warning old name was getSetList_from_num
+    #
+    def getSetListFromId(self, id):
+        pass
+
+    ## Give a set instances list with a list of identifier numbers
+    #
+    # @param lId integers list identifiers list numbers
+    # @return lSet list of set instances
+    #
+    # @warning old name was getSetList_from_numlist
+    #
+    def getSetListFromIdList(self,lId):
+        pass
+
+    ## Return a list of Set instances overlapping a given sequence
+    #
+    # @param seqName string sequence name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    # @warning old name was getSetList_from_qcoord
+    #
+    def getSetListOverlappingCoord( self, seqName, start, end ):
+        pass
+
+    ## Delete set corresponding to a given identifier number
+    #
+    # @param id integer identifier number
+    #
+    # @warning old name was delSet_from_num
+    #
+    def deleteFromId(self, id):
+        pass
+
+    ## Delete set corresponding to a given list of identifier number
+    #
+    # @param lId integers list list of identifier number
+    #
+    # @warning old name was delSet_from_listnum
+    #
+    def deleteFromIdList(self, lId):
+        pass
+
+    ## Join two set by changing id number of id1 and id2 set to the least of id1 and id2
+    #
+    # @param id1 integer id path number
+    # @param id2 integer id path number
+    #
+    # @warning old name was joinSet
+    #
+    def joinTwoSets(self, id1, id2):
+        pass
+
+    ## Get a new id number
+    #
+    # @return new_id integer max_id + 1
+    #
+    def getNewId(self):
+        pass
+
+    ## Give the data contained in the table as a list of Sets instances
+    #
+    # @return lSets list of set instances
+    #
+    def getListOfAllSets( self ):
+        pass
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/Job.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/Job.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,74 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+## Job informations to launch a command on a cluster.
+#
+class Job(object):
+
+    ## Constructor
+    #
+    #   @param jobid the job identifier
+    #   @param jobname the job name
+    #   @param groupid the group identifier to record related job series
+    #   @param queue queue name of the job manager
+    #   @param command command launched
+    #   @param node cluster node name where the execution takes place
+    #   @param launcherFile file name launched as job
+    #   @param lResources resources (memory, time...) but need to conform to SGE/Torque syntax !
+    #
+    def __init__(self, jobid=0, jobname="", groupid="", queue="", command="", launcherFile="",\
+                  node="", lResources=["mem_free=1G"], parallelEnvironment="" ):
+        if str(jobid).isdigit():
+            self.jobid = int(jobid)
+            self.jobname = jobname
+        else:
+            self.jobname = jobid
+            self.jobid = 0
+        self.jobid = jobid
+        self.groupid = groupid
+        self.setQueue(queue)
+        self.command = command
+        self.launcher = launcherFile
+        self.node = node
+        self.lResources = lResources
+        self.parallelEnvironment = parallelEnvironment
+
+    def setQueue(self, queue):
+        self.queue = ""
+        if queue != "none":
+            self.queue = queue
+
+    def __eq__(self, o):
+        if self.jobid == o.jobid and self.jobname == o.jobname\
+         and self.groupid == o.groupid and self.queue == o.queue and self.command == o.command \
+         and self.launcher == o.launcher and self.node == o.node and self.lResources == o.lResources \
+         and self.parallelEnvironment == o.parallelEnvironment:
+            return True
+        return False

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/JobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/JobAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,271 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import time\n+import sys\n+import tempfile\n+import subprocess\n+from commons.core.sql.Job import Job\n+\n+## Methods for Job persistence \n+#\n+class JobAdaptator(object):\n+ \n+ def __init__(self, lJob = [], table = "" ):\n+ self._lJobID = lJob\n+ self._table = table\n+ self._acronym = ""\n+ ## Record a job\n+ #\n+ # @param job Job instance with the job informations\n+ #\n+ def recordJob(self, job):\n+ self._lJobID.append(job)\n+ \n+ ## Remove a job from the job table\n+ #\n+ # @param job: job instance to remove\n+ #\n+ def removeJob(self, job):\n+ pass \n+ \n+ ## Set the jobid of a job with the id of SGE\n+ #\n+ # @param job job instance\n+ # @param jobid integer\n+ #\n+ def updateJobIdInDB(self, job, jobid):\n+ pass\n+ \n+ ## Get a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ #\n+ def getJobStatus(self, job):\n+ pass\n+ \n+ \n+ ## Change a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ # @param status: the new status (waiting,finished,error)\n+ #\n+ def changeJobStatus(self, job, status):\n+ pass\n+ \n+ ## Get the number of jobs belonging to the desired groupid with the desired status.\n+ #\n+ # @param groupid string a group identifier to record related job series \n+ # @param status string job status (waiting, running, finished, error)\n+ # @return int\n+ #\n+ def getCountStatus(self, groupid, status):\n+ pass\n+ \n+ ## Clean all job from a job group\n+ #\n+ # @param groupid: a group identifier to record related job series\n+ #\n+ def cleanJobGroup(self, groupid):\n+ pass \n+ \n+ ## Check if there is unfinished job from a job group.\n+ #\n+ # @param groupid string a group identifier to record related job series \n+ # \n+ def hasUnfinishedJob(self, groupid):\n+ pass\n+\n+ def _getJobIDListFromQstat(self):\n+ lJobIDFromQstat = []\n+ tmp = tempfile.NamedTemporaryFile(delete=False)\n+ cmd ="qstat | grep %s" % self._acronym\n+ process = subprocess.Popen(cmd, shell=True,stdout=tmp)\n+ process.communicate()\n+ tmp.close()\n+ if process.returncode == 0:\n+ fileName = tmp.name\n+ jo'..b'ault = 0)\n+ # \n+ def submitJob(self, job, verbose=0, maxNbWaitingJobs=10000, checkInterval=30):\n+ cmd = self._getQsubCommand(job)\n+ tmp = tempfile.NamedTemporaryFile(delete=False)\n+ process = subprocess.Popen(cmd, shell=True,stdout=tmp)\n+ process.communicate()\n+ tmp.close()\n+ if process.returncode == 0:\n+ fileName = tmp.name\n+ jobidFileHandler = open(fileName, "r")\n+ jobid = self._getJobidFromJobManager(jobidFileHandler)\n+ if verbose > 0:\n+ print "job \'%i %s\' submitted" % (jobid, job.jobname)\n+ sys.stdout.flush()\n+ job.jobid = jobid\n+ #newJob= Job(job.jobid, job.jobname, job.groupid, job.queue, job.command, job.launcher, job.node, job.lResources, job.parallelEnvironment)\n+ self._acronym = job.jobname.split("_")[0][:10]\n+ self.recordJob(job.jobid)\n+ jobidFileHandler.close()\n+ os.remove(fileName)\n+ return process.returncode\n+\n+\n+ ## Get the list of nodes where jobs of one group were executed\n+ #\n+ # @param groupid string a group identifier of job series \n+ # @return lNodes list of nodes names without redundancy\n+ #\n+ def getNodesListByGroupId(self, groupId):\n+ pass\n+ \n+ def checkJobTable(self):\n+ pass\n+ \n+ def close(self):\n+ pass\n+ \n+ def _getJobidAndNbJob(self, jobid) :\n+ tab = jobid.split(".")\n+ jobid = tab[0]\n+ tab = tab[1].split(":")\n+ nbJob = tab[0]\n+ return jobid, nbJob\n+ \n+class JobAdaptatorSGE(JobAdaptator):\n+\n+ ## Check if a job is still handled by SGE\n+ #\n+ # @param jobid string job identifier\n+ # @param jobname string job name\n+ # \n+ def isJobStillHandledBySge(self, jobid, jobname):\n+ isJobInQstat = False\n+ tmp = tempfile.NamedTemporaryFile(delete=False)\n+ cmd = "qstat"\n+ process = subprocess.Popen(cmd, shell=True,stdout=tmp)\n+ process.communicate()\n+ tmp.close()\n+ qstatFile = tmp.name\n+ if process.returncode != 0:\n+ msg = "ERROR while launching \'qstat\'"\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ qstatFileHandler = open(qstatFile, "r")\n+ lLines = qstatFileHandler.readlines()\n+ for line in lLines:\n+ tokens = line.split()\n+ if len(tokens) > 3 and tokens[0] == str(jobid) and tokens[2] == jobname[0:len(tokens[2])]:\n+ isJobInQstat = True\n+ break\n+ qstatFileHandler.close()\n+ os.remove(qstatFile)\n+ return isJobInQstat\n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ cmd += " -cwd"\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources)\n+ cmd += "\\""\n+ if job.parallelEnvironment != "":\n+ cmd += " -pe " + job.parallelEnvironment\n+ return cmd\n+ \n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(" ")[2])\n+ \n+\n+class JobAdaptatorTorque(JobAdaptator): \n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -d %s" % os.getcwd()\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources).replace("mem_free","mem")\n+ cmd += "\\""\n+ return cmd\n+\n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(".")[0])\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/OldRepetDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/OldRepetDB.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,27 @@
+import pyRepet.sql.RepetDBMySQL
+
+
+class RepetDB ( pyRepet.sql.RepetDBMySQL.RepetDB ):
+
+    #TODO: try
+    def execute( self, qry, params=None ):
+        if params == None:
+            self.cursor.execute( qry )
+        else:
+            self.cursor.execute( qry, params )
+
+
+    ## Record a new table in the 'info_table' table
+    #
+    # @param tablename table name
+    # @param info information on the origin of the table
+    #
+    def updateInfoTable( self, tablename, info ):
+        self.execute( """SHOW TABLES""" )
+        results = self.fetchall()
+        if ("info_tables",) not in results:
+            sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+            self.execute( sqlCmd )
+        qryParams = "INSERT INTO info_tables VALUES (%s, %s)"
+        params = ( tablename, info )
+        self.execute( qryParams,params )

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/RepetJob.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/RepetJob.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,252 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import time\n+import sys\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+\n+#TODO: to remove... => replace all RepetJob() by TableJobAdaptator()...\n+## Methods for Job persistence \n+#\n+class RepetJob( DbMySql ):\n+ \n+ \n+ ## Record a job\n+ #\n+ # @param job Job instance with the job informations\n+ #\n+ def recordJob( self, job ):\n+ self.removeJob( job )\n+ sqlCmd = "INSERT INTO %s" % ( job.tablename )\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % ( job.jobid )\n+ sqlCmd += " \\"%s\\"," % ( job.jobname )\n+ sqlCmd += " \\"%s\\"," % ( job.groupid )\n+ sqlCmd += " \\"%s\\"," % ( job.command.replace("\\"","\\\'") )\n+ sqlCmd += " \\"%s\\"," % ( job.launcher )\n+ sqlCmd += " \\"%s\\"," % ( job.queue )\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % ( time.strftime( "%Y-%m-%d %H:%M:%S" ) )\n+ sqlCmd += " \\"?\\" );"\n+ self.execute( sqlCmd )\n+ \n+ \n+ ## Remove a job from the job table\n+ #\n+ # @param job: job instance to remove\n+ #\n+ def removeJob( self, job ):\n+ qry = "DELETE FROM %s" % ( job.tablename )\n+ qry += " WHERE groupid=\'%s\'" % ( job.groupid )\n+ qry += " AND jobname=\'%s\'" % ( job.jobname )\n+ qry += " AND queue=\'%s\';" % ( job.queue )\n+ self.execute( qry )\n+ \n+ \n+ ## Set the jobid of a job with the id of SGE\n+ #\n+ # @param job job instance\n+ # @param jobid integer\n+ #\n+ def setJobIdFromSge( self, job, jobid ):\n+ qry = "UPDATE %s" % ( job.tablename )\n+ qry += " SET jobid=\'%i\'" % ( int(jobid) )\n+ qry += " WHERE jobname=\'%s\'" % ( job.jobname )\n+ qry += " AND groupid=\'%s\'" % ( job.groupid )\n+ qry += " AND queue=\'%s\';" % ( job.queue )\n+ self.execute( qry )\n+ \n+ \n+ ## Get a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ #\n+ def getJobStatus( self, job ):\n+ if job.jobid != 0 and job.jobname == "":\n+ job.jobname = job.jobid\n+ job.jobid = 0\n+ qry = "SELECT status FROM %s" % ( job.tablename )\n+ qry += " WHERE groupid=\'%s\'" % ( job.groupid )\n+ qry += " AND jobname=\'%s\'" % ( job.jobname )\n+ qry += " '..b' table name to record the jobs\n+ # @param groupid string a group identifier to record related job series \n+ # \n+ def hasUnfinishedJob( self, tablename, groupid ):\n+ if not self.doesTableExist( tablename ):\n+ return False\n+ qry = "SELECT * FROM %s" % ( tablename )\n+ qry += " WHERE groupid=\'%s\'" % ( groupid )\n+ qry += " and status!=\'finished\';" \n+ self.execute( qry )\n+ res = self.fetchall()\n+ if len(res) == 0:\n+ return False\n+ return True\n+ \n+ \n+ ## Check if a job is still handled by SGE\n+ #\n+ # @param jobid string job identifier\n+ # @param jobname string job name\n+ # \n+ def isJobStillHandledBySge( self, jobid, jobname ):\n+ isJobInQstat = False\n+ qstatFile = "qstat_stdout"\n+ cmd = "qstat > %s" % ( qstatFile )\n+ returnStatus = os.system( cmd )\n+ if returnStatus != 0:\n+ msg = "ERROR while launching \'qstat\'"\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ qstatFileHandler = open( qstatFile, "r" )\n+ lLines = qstatFileHandler.readlines()\n+ for line in lLines:\n+ tokens = line.split()\n+ if len(tokens) > 3 and tokens[0] == str(jobid) and tokens[2] == jobname[0:len(tokens[2])]:\n+ isJobInQstat = True\n+ break\n+ qstatFileHandler.close()\n+ os.remove( qstatFile )\n+ return isJobInQstat\n+ \n+ \n+ ## Wait job finished status from a job group.\n+ # Job are re-launched if error (max. 3 times)\n+ #\n+ # @param tableName string table name to record the jobs\n+ # @param groupid string a group identifier to record related job series\n+ # @param checkInterval integer time laps in seconds between two checks (default = 5)\n+ # @param maxRelaunch integer max nb of times a job in error is relaunch before exiting (default = 3)\n+ # @param exitIfTooManyErrors boolean exit if a job is still in error above maxRelaunch (default = True)\n+ # @param timeOutPerJob integer max nb of seconds after which one tests if a job is still in SGE or not (default = 60*60=1h)\n+ #\n+ def waitJobGroup(self, tableName, groupid, checkInterval=5, maxRelaunch=3, exitIfTooManyErrors=True, timeOutPerJob=60*60):\n+ iTJA = TableJobAdaptatorFactory.createInstance(self, tableName)\n+ iTJA.waitJobGroup(groupid, checkInterval, maxRelaunch, exitIfTooManyErrors, timeOutPerJob)\n+ \n+ ## Submit a job to a queue and record it in job table.\n+ #\n+ # @param job a job instance\n+ # @param maxNbWaitingJobs integer max nb of waiting jobs before submitting a new one (default = 10000)\n+ # @param checkInterval integer time laps in seconds between two checks (default = 30)\n+ # @param verbose integer (default = 0)\n+ # \n+ def submitJob( self, job, verbose=0, maxNbWaitingJobs=10000, checkInterval=30 ):\n+ iTJA = TableJobAdaptatorFactory.createInstance(self, job.tablename)\n+ return iTJA.submitJob(job, verbose, maxNbWaitingJobs, checkInterval)\n+ \n+ \n+ ## Get the list of nodes where jobs of one group were executed\n+ #\n+ # @param tablename string table name where jobs are recored \n+ # @param groupid string a group identifier of job series \n+ # @return lNodes list of nodes names\n+ #\n+ def getNodesListByGroupId( self, tableName, groupId ):\n+ qry = "SELECT node FROM %s" % tableName\n+ qry += " WHERE groupid=\'%s\'" % groupId\n+ self.execute( qry )\n+ res = self.fetchall()\n+ lNodes = []\n+ for resTuple in res:\n+ lNodes.append(resTuple[0])\n+ return lNodes\n+ \n+ def getDbName(self):\n+ return "DbMySql"\n+ \n+ def _getJobidAndNbJob(self, jobid) :\n+ tab = []\n+ tab = jobid.split(".")\n+ jobid = tab[0]\n+ tab = tab[1].split(":")\n+ nbJob = tab[0]\n+ return jobid, nbJob\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableAdaptator.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,128 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Abstract class, Ancestor of Table*Adaptator
+#
+class TableAdaptator( object ):
+
+    ## Constructor
+    #
+    # @param iDb DbMySql instance
+    # @param table str table name
+    #
+    def __init__( self, iDb = None, table = "" ):
+        self._iDb = iDb
+        self._table = table
+
+    ## Set connector to database
+    #
+    # @param iDb database instance
+    #
+    def setDbConnector( self, iDb ):
+        self._iDb = iDb
+
+    ## Set table
+    #
+    # @param table string table name
+    #
+    def setTable( self, table ):
+        self._table = table
+
+    ## Return the table name
+    #
+    def getTable( self ):
+        return self._table
+
+    ## Return the number of rows in the table
+    #
+    def getSize( self ):
+        return self._iDb.getSize( self._table )
+
+    ## Test if table is empty
+    #
+    def isEmpty( self ):
+        return self._iDb.isEmpty( self._table )
+
+    ## Insert an instance of Map or Set or Match or Path or Seq instances
+    #
+    # @param obj a Map or Set or Match or Path or Seq instance
+    # @param delayed boolean
+    #
+    def insert(self, obj, delayed = False):
+        if obj.isEmpty():
+            return
+        self._escapeAntislash(obj)
+        sql_cmd = self._genSqlCmdForInsert(obj, delayed)
+        self._iDb.execute(sql_cmd)
+
+    ## Insert a list of Map or Set or Match or Path instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    def insertList(self, l, delayed = False):
+        for i in l:
+            self.insert(i, delayed)
+
+    ## Give the data contained in the table as a list of coord object instances
+    #
+    # @return lObject list of coord object instances
+    #
+    def getListOfAllCoordObject( self ):
+        sqlCmd = "SELECT * FROM %s" % ( self._table )
+        lObjs = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lObjs
+
+    ## Generate sql command for GetListOverlappingCoord method
+    #
+    # @param obj Map, Set or Match instance
+    # @param delayed boolean
+    # @return sqlCmd string generated sql command
+    #
+    def _genSqlCmdForInsert(self, obj, delayed):
+        sqlCmd = 'INSERT '
+        if delayed :
+            sqlCmd += ' DELAYED '
+        type2Insert, attr2Insert = self._getTypeAndAttr2Insert(obj)
+        sqlCmd +=  'INTO %s VALUES (' % (self._table)
+        sqlCmd +=  ",".join(type2Insert)
+        sqlCmd += ")"
+        sqlCmd = sqlCmd % attr2Insert
+        return sqlCmd
+
+    def _getTypeAndAttr2Insert(self, obj):
+        pass
+
+    def _getInstanceToAdapt(self):
+        pass
+
+    def _escapeAntislash(self, obj):
+        pass

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableBinPathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableBinPathAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,257 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.coord.Range import getIdx\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.PathUtils import PathUtils\n+\n+## Bin Adaptator for a path table.\n+#\n+class TableBinPathAdaptator(TablePathAdaptator):\n+\n+ \n+ ## Constructor\n+ #\n+ # @param db db instance\n+ # @param tableName string table name (default = "")\n+ #\n+ def __init__(self, db, tableName = ""):\n+ TablePathAdaptator.__init__(self, db, tableName)\n+ self._table_idx = "%s_idx" % (self._table)\n+ \n+ ## Insert a path instance\n+ #\n+ # @param path a path instance\n+ # @param delayed boolean indicating if the insert must be delayed (default = false) \n+ # \n+ def insert( self, path, delayed = False ):\n+ TablePathAdaptator.insert(self, path, delayed)\n+ self._escapeAntislash(path)\n+ idx = path.range_query.findIdx()\n+ max = path.range_query.getMax()\n+ min = path.range_query.getMin()\n+ strand = path.range_query.isOnDirectStrand()\n+ if delayed:\n+ sql_cmd = \'INSERT DELAYED INTO %s VALUES (%d,%d,"%s",%d,%d,%d)\'\\\n+ % (self._table_idx,\\\n+ path.id,\\\n+ idx,\\\n+ path.range_query.seqname,\\\n+ min,\\\n+ max,\\\n+ strand)\n+ else:\n+ sql_cmd = \'INSERT INTO %s VALUES (%d,%d,"%s",%d,%d,%d)\'\\\n+ % (self._table_idx,\\\n+ path.id,\\\n+ idx,\\\n+ path.range_query.seqname,\\\n+ min,\\\n+ max,\\\n+ strand)\n+ \n+ self._iDb.execute(sql_cmd)\n+ \n+ ## Return a path instances list included in a given region using the bin scheme\n+ #\n+ # @param contig string contig name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lOutPath a path instances list\n+ #\n+ def getPathListIncludedInQueryCoord(self, contig, start, end):\n+ min_coord = min(start, end)\n+ max_coord = max(start, end)\n+ lpath = self.getChainListOverlappingQueryCoord(contig, start, end)\n+ lOutPath = []\n+ for i in lpath:\n+ if i.range_query.getMin() > min_coord and \\\n+ i.range_query.getMax() < max_'..b' \n+ sql_cmd += ") and min<=%d and max>=%d;" % (max_coord, min_coord)\n+\n+ \n+ self._iDb.execute(sql_cmd)\n+ res = self._iDb.fetchall()\n+ lnum = []\n+ for i in res:\n+ lnum.append( int(i[0]) )\n+ lpath = self.getPathListFromIdList(lnum)\n+ return lpath\n+\n+ ## Delete path corresponding to a given identifier number\n+ #\n+ # @param num integer identifier number\n+ #\n+ def deleteFromId(self, num):\n+ TablePathAdaptator.deleteFromId(self, num)\n+ sqlCmd=\'delete from %s where path=%d;\' % (self._table_idx, num)\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Delete path corresponding to a given list of identifier number\n+ #\n+ # @param lNum list list of integer identifier number\n+ #\n+ def deleteFromIdList(self, lNum):\n+ if lNum == []:\n+ return\n+ TablePathAdaptator.deleteFromIdList(self, lNum)\n+ sqlCmd = \'delete from %s where path=%d\' % (self._table_idx, lNum[0])\n+ for i in lNum[1:]:\n+ sqlCmd += " or path=%d" % (i)\n+ sqlCmd += ";"\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2\n+ #\n+ # @param id1 integer id path number\n+ # @param id2 integer id path number\n+ # @return newId integer minimum of id1 id2\n+ # @note this method modify the ID even if this one not existing in the path table \n+ # \n+ def joinTwoPaths(self, id1, id2):\n+ TablePathAdaptator.joinTwoPaths(self, id1, id2)\n+ if id1 < id2:\n+ newId = id1\n+ oldId = id2\n+ else:\n+ newId = id2\n+ oldId = id1\n+ sqlCmd = \'UPDATE %s SET path=%d WHERE path=%d\' % (self._table_idx, newId, oldId)\n+ self._iDb.execute(sqlCmd)\n+ return newId\n+ \n+ ## Get a new id number\n+ #\n+ # @return newId integer max Id in path table + 1\n+ #\n+ def getNewId(self):\n+ sqlCmd = \'select max(path) from %s;\' % (self._table_idx)\n+ self._iDb.execute(sqlCmd)\n+ maxId = self._iDb.fetchall()[0][0]\n+ if maxId == None:\n+ maxId = 0\n+ newId = int(maxId) + 1\n+ return newId\n+ \n+ ## Give a list of Set instances included in a given region\n+ #\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of Set instances\n+ #\n+ def getSetListIncludedInQueryCoord(self, query, start, end):\n+ lPath=self.getPathListIncludedInQueryCoord(query, start, end)\n+ lSet = PathUtils.getSetListFromQueries(lPath) \n+ return lSet\n+ \n+ ## Give a list of Set instances overlapping a given region\n+ #\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of Set instances\n+ #\n+ def getSetListOverlappingQueryCoord(self, query, start, end):\n+ lPath = self.getPathListOverlappingQueryCoord(query, start, end)\n+ lSet = PathUtils.getSetListFromQueries(lPath)\n+ return lSet\n+ \n+ ## Give a list of identifiers contained in the table\n+ #\n+ # @return lId integer list\n+ #\n+ def getIdList(self):\n+ sqlCmd = "SELECT DISTINCT path from %s;" % (self._table_idx)\n+ lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )\n+ return lId\n+ \n+ ## Give a list of the distinct query names present in the table\n+ #\n+ # @return lDistinctQueryNames string list\n+ #\n+ def getQueryList(self):\n+ lDistinctQueryNames = self._getDistinctTypeNamesList("query")\n+ return lDistinctQueryNames\n+ \n+ def _getDistinctTypeNamesList( self, type ):\n+ sqlCmd = "SELECT DISTINCT contig FROM %s" % ( self._table_idx )\n+ lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)\n+ return lDistinctTypeNames\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableBinSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableBinSetAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,265 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.sql.TableSetAdaptator import TableSetAdaptator\n+from commons.core.coord.SetUtils import SetUtils\n+\n+## Adaptator for Set tables with bin indexes\n+#\n+class TableBinSetAdaptator(TableSetAdaptator):\n+ \n+ ## constructor\n+ #\n+ # @param iDb DbMySql instance instance of DbMySql\n+ # @param tableName string table name (default = "")\n+ #\n+ def __init__(self, iDb, tableName = ""):\n+ TableSetAdaptator.__init__(self, iDb, tableName)\n+ self._table_idx = "%s_idx" % (self._table)\n+ \n+ ## Insert a set instance in a set bin table\n+ # \n+ # @param iSet set instance an instance of set object\n+ # @param delayed boolean an insert delayed or not\n+ #\n+ def insASetInSetAndBinTable(self, iSet, delayed = False):\n+ self.insert(iSet, delayed)\n+ iSet.seqname = iSet.seqname.replace("\\\\", "\\\\\\\\")\n+ iSet.name = iSet.name.replace("\\\\", "\\\\\\\\")\n+ bin = iSet.getBin()\n+ max = iSet.getMax()\n+ min = iSet.getMin()\n+ strand = iSet.isOnDirectStrand()\n+ sql_prefix = \'\'\n+ if delayed:\n+ sql_prefix = \'INSERT DELAYED INTO \'\n+ else:\n+ sql_prefix = \'INSERT INTO \'\n+ sql_cmd = sql_prefix + \'%s VALUES (%d,%f,"%s",%d,%d,%d)\'\\\n+ %(self._table_idx,\\\n+ iSet.id,\\\n+ bin,\\\n+ iSet.seqname,\\\n+ min,\\\n+ max,\\\n+ strand)\n+ self._iDb.execute(sql_cmd)\n+\n+ ## Delete set corresponding to a given identifier number in set and bin set table\n+ # @param id integer identifier number\n+ # @note old name was delSet_from_num\n+ #\n+ def deleteFromIdFromSetAndBinTable(self, id):\n+ self.deleteFromId(id)\n+ sql_cmd = \'delete from %s where path=%d\' % (self._table_idx, id)\n+ self._iDb.execute(sql_cmd)\n+\n+ ## Delete path corresponding to a given list of identifier number\n+ #\n+ # @param lId integer list list of identifier number\n+ # @note old name was delSet_from_listnum\n+ #\n+ def deleteFromListIdFromSetAndBinTable(self, lId):\n+ if lId != []:\n+ self.deleteFromIdList(lId)\n+ sql_cmd = \'delete from %s where path=%d\' % (self._table_idx, lId[0])\n+ for i in lId[1:]:\n+ sql_cmd += " or path=%d" % (i)\n+ self.'..b"has been changed : I added the two first lines\n+ #\n+ def getSetListStrictlyIncludedInQueryCoord(self, contig, start, end):\n+ min_coord = min(start,end)\n+ max_coord = max(start,end)\n+ lSet = self.getSetListFromQueryCoord(contig, start, end) \n+ lSetStrictlyIncluded = []\n+ for iSet in lSet:\n+ if iSet.getMin() > min_coord and \\\n+ iSet.getMax() < max_coord:\n+ lSetStrictlyIncluded.append(iSet)\n+ \n+ return lSetStrictlyIncluded\n+ \n+ ## Get a list of the identifier Id contained in the table bin\n+ #\n+ # @return lId list of int list of identifier\n+ # @note old name was getSet_num\n+ #\n+ def getIdList(self):\n+ sql_cmd = 'select distinct path from %s;' % (self._table_idx)\n+ self._iDb.execute(sql_cmd)\n+ res = self._iDb.fetchall()\n+ lId = []\n+ for t in res:\n+ lId.append(int(t[0]))\n+ return lId\n+ \n+ ## Get a list of the query sequence name contained in the table bin\n+ #\n+ # @return lSeqName list of string list of query sequence name\n+ # @note old name was getContig_name\n+ #\n+ def getSeqNameList(self):\n+ sql_cmd = 'select distinct contig from %s;' % (self._table_idx)\n+ self._iDb.execute(sql_cmd)\n+ res = self._iDb.fetchall()\n+ lSeqName = []\n+ for t in res:\n+ lSeqName.append(t[0])\n+ return lSeqName\n+ \n+ ## Insert a Set list with the same new identifier in the table bin and set\n+ #\n+ # @note old name was insAddSetList\n+ #\n+ def insertListInSetAndBinTable(self, lSets, delayed = False):\n+ id = self.getNewId()\n+ SetUtils.changeIdInList( lSets, id )\n+ for iSet in lSets:\n+ self.insASetInSetAndBinTable(iSet, delayed)\n+ \n+ ## Insert a set list instances In table Bin and Set and merge all overlapping sets\n+ #\n+ # @param lSets reference seq name\n+ # @note old name was insMergeSetList\n+ # \n+ def insertListInSetAndBinTableAndMergeAllSets(self, lSets):\n+ min, max = SetUtils.getListBoundaries(lSets)\n+ oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)\n+ oldQueryhash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)\n+ qhash = SetUtils.getDictOfListsWithIdAsKey(lSets)\n+ for lNewSetById in qhash.values():\n+ found = False\n+ for currentId, oldLsetById in oldQueryhash.items():\n+ if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, oldLsetById):\n+ oldLsetById.extend(lNewSetById)\n+ oldLsetById = SetUtils.mergeSetsInList(oldLsetById)\n+ self.deleteFromIdFromSetAndBinTable(currentId)\n+ found = True\n+ if not found:\n+ self.insertListInSetAndBinTable(lNewSetById)\n+ else:\n+ id = self.getNewId()\n+ SetUtils.changeIdInList(oldLsetById, id)\n+ self.insertListInSetAndBinTable(oldLsetById)\n+ \n+ ## Insert a set list instances In table Bin and Set after removing all overlaps between database and lSets\n+ #\n+ # @param lSets reference seq name\n+ # @note old name was insDiffSetList\n+ # \n+ def insertListInSetAndBinTableAndRemoveOverlaps(self, lSets):\n+ min, max = SetUtils.getListBoundaries(lSets)\n+ oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)\n+ oldQueryHash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)\n+ newQueryHash = SetUtils.getDictOfListsWithIdAsKey(lSets)\n+ for lNewSetById in newQueryHash.values():\n+ for lOldSetById in oldQueryHash.values():\n+ if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, lOldSetById):\n+ lNewSetById = SetUtils.getListOfSetWithoutOverlappingBetweenTwoListOfSet(lOldSetById, lNewSetById)\n+ self.insertListInSetAndBinTable(lNewSetById)\n"

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableJobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableJobAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,405 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import time\n+import datetime\n+import sys\n+from commons.core.sql.Job import Job \n+from commons.core.sql.TableAdaptator import TableAdaptator\n+\n+## Methods for Job persistence \n+#\n+class TableJobAdaptator(TableAdaptator):\n+ \n+ ## Record a job\n+ #\n+ # @param job Job instance with the job informations\n+ #\n+ def recordJob(self, job):\n+ self.removeJob(job)\n+ sqlCmd = "INSERT INTO %s" % self._table\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % job.jobid\n+ sqlCmd += " \\"%s\\"," % job.jobname\n+ sqlCmd += " \\"%s\\"," % job.groupid\n+ sqlCmd += " \\"%s\\"," % job.launcher\n+ sqlCmd += " \\"%s\\"," % job.queue\n+ sqlCmd += " \\"%s\\"," % job.lResources\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % time.strftime("%Y-%m-%d %H:%M:%S")\n+ sqlCmd += " \\"?\\" );"\n+ self._iDb.execute(sqlCmd)\n+ \n+ \n+ ## Remove a job from the job table\n+ #\n+ # @param job: job instance to remove\n+ #\n+ def removeJob(self, job):\n+ qry = "DELETE FROM %s" % self._table\n+ qry += " WHERE groupid=\'%s\'" % job.groupid\n+ qry += " AND jobname=\'%s\'" % job.jobname\n+ qry += " AND launcher=\'%s\';" % job.launcher\n+ self._iDb.execute(qry)\n+ \n+ \n+ ## Set the jobid of a job with the id of SGE\n+ #\n+ # @param job job instance\n+ # @param jobid integer\n+ #\n+ def updateJobIdInDB(self, job, jobid):\n+ #TODO: check if only one job will be updated\n+ qry = "UPDATE %s" % self._table\n+ qry += " SET jobid=\'%i\'" % int(jobid)\n+ qry += " WHERE jobname=\'%s\'" % job.jobname\n+ qry += " AND groupid=\'%s\'" % job.groupid\n+ qry += " AND launcher=\'%s\';" % job.launcher\n+ self._iDb.execute(qry)\n+ \n+ \n+ ## Get a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ #\n+ def getJobStatus(self, job):\n+ if job.jobid != 0 and job.jobname == "":\n+ job.jobname = job.jobid\n+ job.jobid = 0\n+ qry = "SELECT status FROM %s" % self._table\n+ qry += " WHERE groupid=\'%s\'" % job.groupid\n+ qry += " AND jobname=\'%s\'" % job.jobname\n+ qry += " AND launcher=\'%s\';" % job.launcher\n+ self._iDb.execute(qry)\n+ res = self._iDb.fetchall()\n+ if len(re'..b'outside the interval: go to next interval (time out) \n+ if delta.seconds >= (nbTimeOuts+1) * timeOutPerJob:\n+ nbTimeOuts += 1\n+ # Job with \'running\' status should be in qstat. Because status in DB is set at \'running\' by the job launched.\n+ if not self.isJobStillHandledBySge(jobid, jobname):\n+ # But if not, let time for the status update (in DB), if the job finished between the query execution and now.\n+ time.sleep( 5 )\n+ # If no update at \'finished\', exit\n+ #TODO: check status in DB\n+ if not self.isJobStillHandledBySge(jobid, jobname):\n+ msg = "ERROR: job \'%s\', supposedly still running, is not handled by SGE anymore" % ( jobid )\n+ msg += "\\nit was launched the %s (> %.2f hours ago)" % ( dateTimeOldestJob, timeOutPerJob/3600.0 )\n+ msg += "\\nthis problem can be due to:"\n+ msg += "\\n* memory shortage, in that case, decrease the size of your jobs;"\n+ msg += "\\n* timeout, in that case, decrease the size of your jobs;"\n+ msg += "\\n* node failure or database error, in that case, launch the program again or ask your system administrator."\n+ sys.stderr.write("%s\\n" % msg)\n+ sys.stderr.flush()\n+ self.cleanJobGroup(groupid)\n+ sys.exit(1)\n+ return nbTimeOuts\n+ \n+ ## Check if a job is still handled by SGE\n+ #\n+ # @param jobid string job identifier\n+ # @param jobname string job name\n+ # \n+ def isJobStillHandledBySge(self, jobid, jobname):\n+ isJobInQstat = False\n+ qstatFile = "qstat_stdout"\n+ cmd = "qstat > %s" % qstatFile\n+ returnStatus = os.system(cmd)\n+ if returnStatus != 0:\n+ msg = "ERROR while launching \'qstat\'"\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ qstatFileHandler = open(qstatFile, "r")\n+ lLines = qstatFileHandler.readlines()\n+ for line in lLines:\n+ tokens = line.split()\n+ if len(tokens) > 3 and tokens[0] == str(jobid) and tokens[2] == jobname[0:len(tokens[2])]:\n+ isJobInQstat = True\n+ break\n+ qstatFileHandler.close()\n+ os.remove(qstatFile)\n+ return isJobInQstat\n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ cmd += " -cwd"\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources)\n+ cmd += "\\""\n+ if job.parallelEnvironment != "":\n+ cmd += " -pe " + job.parallelEnvironment\n+ cmd += " > jobid.stdout"\n+ return cmd\n+ \n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(" ")[2])\n+ \n+\n+class TableJobAdaptatorTorque(TableJobAdaptator): \n+ \n+ def _checkIfJobsTableAndJobsManagerInfoAreConsistent(self, nbTimeOuts, timeOutPerJob, groupid):\n+ return nbTimeOuts\n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -d %s" % os.getcwd()\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources).replace("mem_free","mem")\n+ cmd += "\\""\n+ cmd += " > jobid.stdout"\n+ return cmd\n+\n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(".")[0])\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableJobAdaptatorFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableJobAdaptatorFactory.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,66 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import os
+import sys
+from commons.core.sql.TableJobAdaptator import TableJobAdaptatorSGE
+from commons.core.sql.TableJobAdaptator import TableJobAdaptatorTorque
+from commons.core.sql.JobAdaptator import JobAdaptatorSGE
+from commons.core.sql.JobAdaptator import JobAdaptatorTorque
+
+class TableJobAdaptatorFactory(object):
+
+    def createInstance(iDb, jobTableName):
+        if os.environ["REPET_JOB_MANAGER"].lower() == "sge":
+            iTJA = TableJobAdaptatorSGE(iDb, jobTableName)
+        elif os.environ["REPET_JOB_MANAGER"].lower() == "torque":
+            iTJA = TableJobAdaptatorTorque(iDb, jobTableName)
+        else:
+            print "ERROR: unknown jobs manager : $REPET_JOB_MANAGER = %s." % os.environ["REPET_JOB_MANAGER"]
+            sys.exit(1)
+
+        return iTJA
+
+    createInstance = staticmethod(createInstance)
+
+    def createJobInstance():
+        if os.environ["REPET_JOB_MANAGER"].lower() == "sge":
+            iJA = JobAdaptatorSGE()
+        elif os.environ["REPET_JOB_MANAGER"].lower() == "torque":
+            iJA = JobAdaptatorTorque()
+        else:
+            print "ERROR: unknown jobs manager : $REPET_JOB_MANAGER = %s." % os.environ["REPET_JOB_MANAGER"]
+            sys.exit(1)
+
+        return iJA
+
+
+    createJobInstance = staticmethod(createJobInstance)
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableMapAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableMapAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,193 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import sys
+from commons.core.sql.TableAdaptator import TableAdaptator
+from commons.core.sql.ITableMapAdaptator import ITableMapAdaptator
+from commons.core.coord.Map import Map
+from commons.core.coord.MapUtils import MapUtils
+
+
+## Adaptator for Map table
+#
+class TableMapAdaptator( TableAdaptator, ITableMapAdaptator ):
+
+    ## Give a list of Map instances having a given seq name
+    #
+    # @param seqName string seq name
+    # @return lMap list of instances
+    #
+    def getListFromSeqName( self, seqName ):
+        sqlCmd = "SELECT * FROM %s" % (self._table)
+        colum2Get, type2Get, attr2Get = self._getTypeColumAttr2Get(seqName)
+        sqlCmd += " WHERE " + colum2Get
+        sqlCmd += " = "
+        sqlCmd = sqlCmd + type2Get
+        sqlCmd = sqlCmd % "'" + attr2Get + "'"
+        return self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+
+    ## Give a list of Map instances overlapping a given region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return list map instances
+    #
+    def getListOverlappingCoord(self, query, start, end):
+        sqlCmd = 'select * from %s where chr="%s" and ((start between least(%d,%d) and greatest(%d,%d) or end between least(%d,%d) and greatest(%d,%d)) or (least(start,end)<=least(%d,%d) and greatest(start,end)>=greatest(%d,%d)))  ;' % (self._table, query, start, end, start, end, start, end, start, end, start, end, start, end)
+        return self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+
+    ## Give a list of Map instances having a given sequence name
+    #
+    # @param seqName string sequence name
+    # @return lMap list of instances
+    #
+    def getMapListFromSeqName(self, seqName):
+        lMap = self.getListFromSeqName( seqName )
+        return lMap
+
+#TODO: Check getListFromSeqName method: uses name instead of seqname
+#    ## Give a list of Map instances having a given sequence name from list
+#    #
+#    # @param lSeqName string sequence name list
+#    # @return lMap list of instances
+#    #
+#    def getMapListFromSeqNameList(self, lSeqName):
+#        lMap = []
+#        [lMap.extend(self.getListFromSeqName(seqName)) for seqName in lSeqName]
+#        return lMap
+
+    ## Give a list of Map instances having a given chromosome
+    #
+    # @param chr string chromosome
+    # @return lMap list of instances
+    #
+    def getMapListFromChr(self, chr):
+        sqlCmd = "SELECT * FROM %s WHERE chr='%s'" % (self._table, chr)
+        lMap = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMap
+
+    ## Give a list of the distinct seqName/chr present in the table
+    #
+    # @return lDistinctContigNames string list
+    #
+    def getSeqNameList(self):
+        sqlCmd = "SELECT DISTINCT chr FROM %s" % ( self._table )
+        lDistinctContigNames = self._iDb.getStringListWithSQLCmd(sqlCmd)
+        return lDistinctContigNames
+
+    ## Return a list of Set instances from a given sequence name
+    #
+    # @param seqName string sequence name
+    # @return lSets list of Set instances
+    #
+    def getSetListFromSeqName( self, seqName ):
+        lMaps = self.getListFromSeqName( seqName )
+        lSets = MapUtils.mapList2SetList( lMaps )
+        return lSets
+
+    ## Give a map instances list overlapping a given region
+    #
+    # @param seqName string seq name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lMap list of map instances
+    #
+    def getMapListOverlappingCoord(self, seqName, start, end):
+        lMap = self.getListOverlappingCoord(seqName, start, end)
+        return lMap
+
+    ## Return a list of Set instances overlapping a given sequence
+    #
+    # @param seqName string sequence name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    def getSetListOverlappingCoord( self, seqName, start, end ):
+        lMaps = self.getListOverlappingCoord( seqName, start, end )
+        lSets = MapUtils.mapList2SetList( lMaps )
+        return lSets
+
+    ## Give a dictionary which keys are Map names and values the corresponding Map instances
+    #
+    # @return dName2Maps dict which keys are Map names and values the corresponding Map instances
+    #
+    def getDictPerName( self ):
+        dName2Maps = {}
+        lMaps = self.getListOfAllMaps()
+        for iMap in lMaps:
+            if dName2Maps.has_key( iMap.name ):
+                if iMap == dName2Maps[ iMap.name ]:
+                    continue
+                else:
+                    msg = "ERROR: in table '%s' two different Map instances have the same name '%s'" % ( self._table, iMap.name )
+                    sys.stderr.write( "%s\n" % ( msg ) )
+                    sys.exit(1)
+            dName2Maps[ iMap.name ] = iMap
+        return dName2Maps
+
+    ## Return a list of Map instances with all the data contained in the table
+    #
+    # @return lMaps list of Map instances
+    #
+    def getListOfAllMaps( self ):
+        sqlCmd = "SELECT * FROM %s" % ( self._table )
+        lMaps = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMaps
+
+    ## Give the end of map as integer
+    #
+    # @return end integer the end of map
+    #
+    def getEndFromSeqName(self, seqName):
+        sqlCmd = "SELECT end FROM %s WHERE chr = '%s'" % (self._table, seqName)
+        end = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        return end
+
+    def _getInstanceToAdapt(self):
+        iMap = Map()
+        return iMap
+
+    def _getTypeColumAttr2Get(self, name):
+        colum2Get = 'name'
+        type2Get = '%s'
+        attr2Get = name
+        return colum2Get, type2Get, attr2Get
+
+    def _getTypeAndAttr2Insert(self, map):
+        type2Insert = ("'%s'","'%s'","'%d'","'%d'")
+        attr2Insert = (map.name, map.seqname, map.start, map.end)
+        return type2Insert, attr2Insert
+
+    def _escapeAntislash(self, obj):
+        obj.name = obj.name.replace("\\", "\\\\")
+        obj.seqname = obj.seqname.replace("\\", "\\\\")

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableMatchAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableMatchAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,100 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.sql.TableAdaptator import TableAdaptator
+from commons.core.sql.ITableMatchAdaptator import ITableMatchAdaptator
+from commons.core.coord.Match import Match
+
+## Adaptator for Match table
+#
+class TableMatchAdaptator( TableAdaptator, ITableMatchAdaptator ):
+
+    ## Give a list of Match instances given a query name
+    #
+    # @param query string sequence name
+    # @return lMatches list of Match instances
+    #
+    def getMatchListFromQuery( self, query ):
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s';" % ( self._table, query )
+        return self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+
+    ## Give a list of Match instances having the same identifier
+    #
+    # @param id integer identifier number
+    # @return lMatch a list of Match instances
+    #
+    def getMatchListFromId( self, id ):
+        sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id )
+        lMatch = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMatch
+
+    ## Give a list of Match instances according to the given list of identifier numbers
+    #
+    # @param lId integer list
+    # @return lMatch a list of Match instances
+    #
+    def getMatchListFromIdList( self, lId ):
+        lMatch=[]
+        if lId == []:
+            return lMatch
+        sqlCmd = "select * from %s where path=%d" % (self._table, lId[0])
+        for i in lId[1:]:
+            sqlCmd += " or path=%d" % (i)
+        sqlCmd += ";"
+        lMatch = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMatch
+
+    ## Give the data contained in the table as a list of Match instances
+    #
+    # @return lMatchs list of match instances
+    #
+    def getListOfAllMatches( self ):
+        sqlCmd = "SELECT * FROM %s" % ( self._table )
+        lMatches = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMatches
+
+    def _getInstanceToAdapt(self):
+        iMatch = Match()
+        return iMatch
+
+    def _getTypeAndAttr2Insert(self, match):
+        type2Insert = ("'%s'","'%d'","'%d'","'%d'","'%f'","'%f'","'%s'","'%d'","'%d'","'%d'","'%f'","'%g'","'%d'","'%f'","'%d'")
+        attr2Insert = ( match.range_query.seqname, match.range_query.start, \
+                        match.range_query.end, match.query_length, match.query_length_perc, \
+                        match.match_length_perc, match.range_subject.seqname, match.range_subject.start,\
+                        match.range_subject.end, match.subject_length, match.subject_length_perc, \
+                        match.e_value, match.score, match.identity, \
+                        match.id)
+        return type2Insert, attr2Insert
+
+    def _escapeAntislash(self, obj):
+        obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\")
+        obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\")

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TablePathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TablePathAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,673 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.coord.Path import Path\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.sql.TableAdaptator import TableAdaptator\n+from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator\n+\n+\n+## Adaptator for a Path table\n+#\n+class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ):\n+\n+ ## Give a list of Path instances having the same identifier\n+ #\n+ # @param id integer identifier number\n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromId( self, id ):\n+ sqlCmd = "SELECT * FROM %s WHERE path=\'%d\';" % ( self._table, id )\n+ lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lPath\n+ \n+ ## Give a list of Path instances according to the given list of identifier numbers\n+ #\n+ # @param lId integer list \n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromIdList( self, lId ):\n+ lPath=[]\n+ if lId == []:\n+ return lPath\n+ sqlCmd = "select * from %s where path=%d" % (self._table, lId[0])\n+ for i in lId[1:]:\n+ sqlCmd += " or path=%d" % (i)\n+ sqlCmd += ";"\n+ lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lPath\n+ \n+ ## Give a list of Path instances having the same given query name\n+ #\n+ # @param query string name of the query \n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromQuery( self, query ):\n+ lPath = self._getPathListFromTypeName("query", query)\n+ return lPath\n+ \n+ ## Give a list of Path instances having the same given subject name\n+ #\n+ # @param subject string name of the subject \n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromSubject( self, subject ):\n+ lPath = self._getPathListFromTypeName("subject", subject)\n+ return lPath\n+ \n+ ## Give a list of the distinct subject names present in the table\n+ #\n+ # @return lDistinctSubjectNames string list\n+ #\n+ def getSubjectList(self):\n+ lDistinctSubjectNames = self._getDistinctTypeNamesList("subject")\n+ return lDistinctSubjectNames\n+ \n+ ## Give a list of the distinct query names present in the table\n+ #\n+ # @return lDistinctQueryNames string list\n+ #\n+ def ge'..b'TypeNamesList( self, type ):\n+ sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table )\n+ lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)\n+ return lDistinctTypeNames\n+ \n+ def _getPathsNbFromTypeName( self, type, typeName ):\n+ sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name=\'%s\'" % ( self._table, type, typeName )\n+ pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )\n+ return pathNb\n+ \n+ def _getIdListFromTypeName( self, type, typeName ):\n+ sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name=\'%s\'" % ( self._table, type, typeName )\n+ lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )\n+ return lId\n+ \n+ def _getIdNbFromTypeName( self, type, typeName ):\n+ sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name=\'%s\'" % ( self._table, type, typeName )\n+ idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )\n+ return idNb\n+ \n+ def _getTypeAndAttr2Insert(self, path):\n+ type2Insert = ("\'%d\'", "\'%s\'", "\'%d\'", "\'%d\'", "\'%s\'", "\'%d\'", "\'%d\'", "\'%g\'", "\'%d\'", "\'%f\'")\n+ if path.range_query.isOnDirectStrand():\n+ queryStart = path.range_query.start\n+ queryEnd = path.range_query.end\n+ subjectStart = path.range_subject.start\n+ subjectEnd = path.range_subject.end\n+ else:\n+ queryStart = path.range_query.end\n+ queryEnd = path.range_query.start\n+ subjectStart = path.range_subject.end\n+ subjectEnd = path.range_subject.start\n+ attr2Insert = ( path.id,\\\n+ path.range_query.seqname,\\\n+ queryStart,\\\n+ queryEnd,\\\n+ path.range_subject.seqname,\\\n+ subjectStart,\\\n+ subjectEnd,\\\n+ path.e_value,\\\n+ path.score,\\\n+ path.identity\\\n+ )\n+ return type2Insert, attr2Insert\n+ \n+ def _getInstanceToAdapt(self):\n+ iPath = Path()\n+ return iPath\n+ \n+ def _escapeAntislash(self, obj):\n+ obj.range_query.seqname = obj.range_query.seqname.replace("\\\\", "\\\\\\\\")\n+ obj.range_subject.seqname = obj.range_subject.seqname.replace("\\\\", "\\\\\\\\")\n+ \n+ def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable):\n+ sqlCmd = ""\n+ if queryName == "":\n+ sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table)\n+ else:\n+ sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name=\'%s\'" % (tmpTable, self._table, queryName)\n+ return sqlCmd\n+ \n+ ## return a filtered list with only one unique occurrence of path of a given list\n+ #\n+ # @param lPath a list of Path instances\n+ # @return lUniquePath a list of Path instances\n+ #\n+ def getListOfUniqueOccPath(self, lPath):\n+ if len(lPath) < 2 :\n+ return lPath\n+ \n+ sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd()))\n+ lUniquePath = [] \n+ for i in xrange(1, len(sortedListPath)):\n+ previousPath = sortedListPath [i-1]\n+ currentPath = sortedListPath [i]\n+ if previousPath != currentPath:\n+ lUniquePath.append(previousPath)\n+ \n+ if previousPath != currentPath:\n+ lUniquePath.append(currentPath) \n+ \n+ return lUniquePath \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableSeqAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableSeqAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,185 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import sys
+from commons.core.sql.TableAdaptator import TableAdaptator
+from commons.core.sql.ITableSeqAdaptator import ITableSeqAdaptator
+from commons.core.coord.SetUtils import SetUtils
+from commons.core.seq.Bioseq import Bioseq
+
+
+## Adaptator for a Seq table
+#
+class TableSeqAdaptator( TableAdaptator, ITableSeqAdaptator ):
+
+    ## Retrieve all the distinct accession names in a list.
+    #
+    # @return lAccessions list of accessions
+    #
+    def getAccessionsList( self ):
+        sqlCmd = "SELECT DISTINCT accession FROM %s;" % ( self._table )
+        lAccessions = self._getStringListWithSQLCmd(sqlCmd)
+        return lAccessions
+
+    ## Save sequences in a fasta file from a list of accession names.
+    #
+    # @param lAccessions list of accessions
+    # @param outFileName string Fasta file
+    #
+    def saveAccessionsListInFastaFile( self, lAccessions, outFileName ):
+        outFile = open( outFileName, "w" )
+        for ac in lAccessions:
+            bs = self.getBioseqFromHeader( ac )
+            bs.write(outFile)
+        outFile.close()
+
+    ## Get a bioseq instance given its header
+    #
+    # @param header string name of the sequence ('accession' field in the 'seq' table)
+    # @return bioseq instance
+    #
+    def getBioseqFromHeader( self, header ):
+        sqlCmd = "SELECT * FROM %s WHERE accession='%s';" % ( self._table, header )
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        return Bioseq( res[0][0], res[0][1] )
+
+    ## Retrieve the length of a sequence given its name.
+    #
+    # @param accession name of the sequence
+    # @return seqLength integer length of the sequence
+    #
+    def getSeqLengthFromAccession( self, accession ):
+        sqlCmd = 'SELECT length FROM %s WHERE accession="%s"' % ( self._table, accession )
+        seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        return seqLength
+
+    ## Retrieve the length of a sequence given its description.
+    #
+    # @param description of the sequence
+    # @return seqLength integer length of the sequence
+    #
+    def getSeqLengthFromDescription( self, description ):
+        sqlCmd = 'SELECT length FROM %s WHERE description="%s"' % ( self._table, description )
+        seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        return seqLength
+
+    ## Retrieve all the accessions with length in a list of tuples
+    #
+    # @return lAccessionLengthTuples list of tuples
+    #
+    def getAccessionAndLengthList(self):
+        sqlCmd = 'SELECT accession, length FROM %s' % self._table
+        self._iDb.execute(sqlCmd)
+        res = self._iDb.fetchall()
+        lAccessionLengthTuples = []
+        for i in res:
+            lAccessionLengthTuples.append(i)
+        return lAccessionLengthTuples
+
+    ## get subsequence according to given parameters
+    #
+    # @param accession
+    # @param start integer
+    # @param end integer
+    # @return bioseq.sequence string
+    #
+    def getSubSequence( self, accession, start, end ):
+        bs = Bioseq()
+        if start <= 0 or end <= 0:
+            print "ERROR with coordinates start=%i or end=%i" % ( start, end )
+            sys.exit(1)
+
+        if accession not in self.getAccessionsList():
+            print "ERROR: accession '%s' absent from table '%s'" % ( accession, self._table )
+            sys.exit(1)
+
+        lengthAccession = self.getSeqLengthFromAccession( accession )
+        if start > lengthAccession or end > lengthAccession:
+            print "ERROR: coordinates start=%i end=%i out of sequence '%s' range (%i bp)" % ( start, end, accession, lengthAccession )
+            sys.exit(1)
+
+        sqlCmd = "SELECT SUBSTRING(sequence,%i,%i) FROM %s WHERE accession='%s'" % ( min(start,end), abs(end-start)+ 1, self._table, accession )
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        bs.setSequence( res[0][0] )
+        if start > end:
+            bs.reverseComplement()
+        return bs.sequence
+
+    ## get bioseq from given set list
+    #
+    # @param lSets set list of sets
+    # @return bioseq instance
+    #
+    def getBioseqFromSetList( self, lSets ):
+        header = "%s::%i %s " % ( lSets[0].name, lSets[0].id, lSets[0].seqname )
+        sequence = ""
+        lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
+        if not lSets[0].isOnDirectStrand():
+            lSortedSets.reverse()
+        for iSet in lSortedSets:
+            header += "%i..%i," % ( iSet.getStart(), iSet.getEnd() )
+            sequence += self.getSubSequence( iSet.seqname, iSet.getStart(), iSet.getEnd() )
+        return Bioseq( header[:-1], sequence )
+
+    ## Return True if the given accession is present in the table
+    #
+    def isAccessionInTable( self, name ):
+        sqlCmd = "SELECT accession FROM %s WHERE accession='%s'" % ( self._table, name )
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        return bool(res)
+
+    ## Retrieve all the distinct accession names in a fasta file.
+    #
+    # @param outFileName string Fasta file
+    #
+    def exportInFastaFile(self, outFileName ):
+        lAccessions = self.getAccessionsList()
+        self.saveAccessionsListInFastaFile( lAccessions, outFileName )
+
+    def _getStringListWithSQLCmd( self, sqlCmd ):
+        self._iDb.execute(sqlCmd)
+        res = self._iDb.fetchall()
+        lString = []
+        for i in res:
+            lString.append(i[0])
+        return lString
+
+    def _getTypeAndAttr2Insert(self, bs):
+        type2Insert =  ( "'%s'", "'%s'", "'%s'", "'%i'" )
+        attr2Insert =  (bs.header.split()[0], bs.sequence, bs.header, bs.getLength())
+        return type2Insert, attr2Insert
+
+    def _escapeAntislash(self, obj):
+        pass
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/TableSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableSetAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,215 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.sql.ITableSetAdaptator import ITableSetAdaptator\n+from commons.core.sql.TableAdaptator import TableAdaptator\n+from commons.core.coord.Set import Set\n+\n+\n+## Adaptator for a Set table\n+#\n+class TableSetAdaptator( TableAdaptator, ITableSetAdaptator ):\n+ \n+ ## Give a list of Set instances having a given seq name\n+ #\n+ # @param seqName string seq name\n+ # @return lSet list of instances\n+ #\n+ def getListFromSeqName( self, seqName ):\n+ sqlCmd = "SELECT * FROM %s" % (self._table)\n+ colum2Get, type2Get, attr2Get = self._getTypeColumAttr2Get(seqName)\n+ sqlCmd += " WHERE " + colum2Get\n+ sqlCmd += " = "\n+ sqlCmd = sqlCmd + type2Get\n+ sqlCmd = sqlCmd % "\'" + attr2Get + "\'"\n+ lSet = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lSet\n+ \n+ ## Give a list of set instances overlapping a given region\n+ #\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of set instances\n+ #\n+ def getListOverlappingCoord(self, query, start, end):\n+ sqlCmd = \'select * from %s where chr="%s" and ((start between least(%d,%d) and greatest(%d,%d) or end between least(%d,%d) and greatest(%d,%d)) or (least(start,end)<=least(%d,%d) and greatest(start,end)>=greatest(%d,%d))) ;\' % (self._table, query, start, end, start, end, start, end, start, end, start, end, start, end)\n+ lSet = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lSet\n+\n+ #TODO: to test !!!\n+ ## Give a list of Set instances overlapping a given region\n+ #\n+ # @note whole chains are returned, even if only a fragment overlap with the given region\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSets list of Path instances\n+ #\n+ def getChainListOverlappingCoord(self, query, start, end):\n+ sqlCmd = "select distinct path from %s where chr=\'%s\' and ((start between least(%d,%d) and greatest(%d,%d) or end between least(%d,%d) and greatest(%d,%d)) or (least(start,end)<=least(%d,%d) and greatest(start,end)>=greatest(%d,%d)));" % (self._table, query,start,end,start,end,start,end,start,end,start,end,start,e'..b'lCmd)\n+ return lDistinctContigNames\n+ \n+ ## Give a list of Set instances having a given seq name\n+ #\n+ # @param seqName string seq name\n+ # @return lSet list of instances\n+ #\n+ def getSetListFromSeqName( self, seqName):\n+ lSets = self.getListFromSeqName(seqName)\n+ return lSets\n+ \n+ ## Give a set instances list with a given identifier number\n+ #\n+ # @param id integer identifier number\n+ # @return lSet list of set instances\n+ #\n+ def getSetListFromId(self, id):\n+ SQLCmd = "select * from %s where path=%d;" % (self._table, id)\n+ return self._iDb.getObjectListWithSQLCmd( SQLCmd, self._getInstanceToAdapt )\n+ \n+ ## Give a set instances list with a list of identifier numbers\n+ #\n+ # @param lId integers list identifiers list numbers\n+ # @return lSet list of set instances\n+ # \n+ def getSetListFromIdList(self,lId):\n+ lSet = []\n+ if lId == []:\n+ return lSet\n+ SQLCmd = "select * from %s where path=%d" % (self._table, lId[0])\n+ for i in lId[1:]:\n+ SQLCmd += " or path=%d" % (i)\n+ SQLCmd += ";"\n+ return self._iDb.getObjectListWithSQLCmd( SQLCmd, self._getInstanceToAdapt )\n+ \n+ ## Return a list of Set instances overlapping a given sequence\n+ # \n+ # @param seqName string sequence name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of Set instances\n+ #\n+ def getSetListOverlappingCoord( self, seqName, start, end ):\n+ lSet = self.getListOverlappingCoord( seqName, start, end )\n+ return lSet\n+ \n+ ## Delete set corresponding to a given identifier number\n+ #\n+ # @param id integer identifier number\n+ # \n+ def deleteFromId(self, id):\n+ sqlCmd = "delete from %s where path=%d;" % (self._table, id)\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Delete set corresponding to a given list of identifier number\n+ #\n+ # @param lId integers list list of identifier number\n+ # \n+ def deleteFromIdList(self, lId):\n+ if lId == []:\n+ return\n+ sqlCmd = "delete from %s where path=%d" % ( self._table, lId[0] )\n+ for i in lId[1:]:\n+ sqlCmd += " or path=%d"%(i)\n+ sqlCmd += ";"\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Join two set by changing id number of id1 and id2 set to the least of id1 and id2\n+ #\n+ # @param id1 integer id path number\n+ # @param id2 integer id path number\n+ # \n+ def joinTwoSets(self, id1, id2):\n+ if id1 < id2:\n+ newId = id1\n+ oldId = id2\n+ else:\n+ newId = id2\n+ oldId = id1\n+ sqlCmd = "UPDATE %s SET path=%d WHERE path=%d" % (self._table, newId, oldId)\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Get a new id number\n+ #\n+ # @return new_id integer max_id + 1 \n+ #\n+ def getNewId(self):\n+ sqlCmd = "select max(path) from %s;" % (self._table)\n+ maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd)\n+ newId = int(maxId) + 1\n+ return newId\n+ \n+ ## Give the data contained in the table as a list of Sets instances\n+ #\n+ # @return lSets list of set instances\n+ #\n+ def getListOfAllSets( self ):\n+ return self.getListOfAllCoordObject()\n+ \n+ def _getInstanceToAdapt(self):\n+ iSet = Set()\n+ return iSet\n+ \n+ def _getTypeColumAttr2Get(self, contig):\n+ colum2Get = \'chr\'\n+ type2Get = \'%s\'\n+ attr2Get = contig\n+ return colum2Get, type2Get, attr2Get\n+ \n+ def _getTypeAndAttr2Insert(self, set):\n+ type2Insert = ("\'%d\'","\'%s\'","\'%s\'","\'%d\'","\'%d\'")\n+ attr2Insert = (set.id, set.name, set.seqname, set.start, set.end)\n+ return type2Insert, attr2Insert\n+\n+ def _escapeAntislash(self, obj):\n+ obj.name = obj.name.replace("\\\\", "\\\\\\\\")\n+ obj.seqname = obj.seqname.replace("\\\\", "\\\\\\\\")\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/TestSuite_sql.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/TestSuite_sql.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_DbMySql
+import Test_TableBinPathAdaptator
+import Test_TableMapAdaptator
+import Test_TableMatchAdaptator
+import Test_TablePathAdaptator
+import Test_TableSeqAdaptator
+import Test_TableSetAdaptator
+import Test_F_RepetJob
+import Test_RepetJob
+import Test_TableBinSetAdaptator
+
+def main():
+
+        TestSuite_sql = unittest.TestSuite()
+
+        TestSuite_sql.addTest( unittest.makeSuite( Test_DbMySql.Test_DbMySql, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableBinPathAdaptator.Test_TableBinPathAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableMapAdaptator.Test_TableMapAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableMatchAdaptator.Test_TableMatchAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableSetAdaptator.Test_TableSetAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableSeqAdaptator.Test_TableSeqAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableMatchAdaptator.Test_TableMatchAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TablePathAdaptator.Test_TablePathAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_F_RepetJob.Test_F_RepetJob, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_RepetJob.Test_RepetJob, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableBinSetAdaptator.Test_TableBinSetAdaptator, "test" ) )
+
+        runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+        runner.run( TestSuite_sql )
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_DbFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_DbFactory.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,63 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import os
+import unittest
+from commons.core.sql.DbFactory import DbFactory
+
+class Test_DbFactory( unittest.TestCase ):
+
+    def test_createInstance (self):
+        dbInstance = DbFactory.createInstance()
+        expValue = None
+        obsValue = dbInstance
+        self.assertNotEquals(expValue, obsValue)
+
+    def test_createInstance_with_config (self):
+        configFileName = "dummyConfigFileName.cfg"
+        configF = open(configFileName,"w")
+        configF.write("[repet_env]\n")
+        configF.write( "repet_host: %s\n" % ( os.environ["REPET_HOST"] ) )
+        configF.write( "repet_user: %s\n" % ( os.environ["REPET_USER"] ) )
+        configF.write( "repet_pw: %s\n" % ( os.environ["REPET_PW"] ) )
+        configF.write( "repet_db: %s\n" % ( os.environ["REPET_DB"] ) )
+        configF.write( "repet_port: %s\n" % ( os.environ["REPET_PORT"] ) )
+        configF.close()
+
+        dbInstance = DbFactory.createInstance(configFileName)
+        expValue = None
+        obsValue = dbInstance
+        self.assertNotEquals(expValue, obsValue)
+        os.remove(configFileName)
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_DbFactory ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_DbMySql.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_DbMySql.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1554 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import unittest\n+import time\n+import os\n+from MySQLdb import ProgrammingError\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.DbMySql import TABLE_SCHEMA_DESCRIPTOR\n+from commons.core.sql.DbMySql import TABLE_TYPE_SYNONYMS\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Path import Path\n+\n+class Test_DbMySql( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._iDb = DbMySql( )\n+ self._uniqId = "%s" % time.strftime("%Y%m%d%H%M%S")\n+\n+ def tearDown( self ):\n+ if self._iDb.db.open:\n+ self._iDb.close()\n+ self._iDb = None\n+ \n+ def test_execute_syntax_error(self):\n+ expErrorMsg = "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'CHAUD TABLES\' at line 1"\n+ obsErrorMsg = ""\n+ sqlCmd = "CHAUD TABLES"\n+ try:\n+ self._iDb.execute(sqlCmd)\n+ except ProgrammingError as excep:\n+ obsErrorMsg = excep.args[1]\n+ \n+ self.assertEquals(expErrorMsg, obsErrorMsg)\n+\n+ def test_execute_with_1_retry(self):\n+ tableName = "dummyTable%s" % self._uniqId\n+ sqlCmd = "CREATE TABLE %s (dummyColumn varchar(255))" % tableName\n+ self._iDb.close()\n+ self._iDb.execute(sqlCmd)\n+ self.assertTrue(self._iDb.doesTableExist(tableName))\n+ self._iDb.dropTable(tableName)\n+\n+ def test_setAttributesFromConfigFile(self):\n+ expHost = "dummyHost"\n+ expUser = "dummyUser"\n+ expPw = "dummyPw"\n+ expDb = "dummyDb"\n+ expPort = 1000\n+ \n+ configFileName = "dummyConfigFileName.cfg"\n+ f = open( configFileName, "w" )\n+ f.write("[repet_env]\\n")\n+ f.write("repet_host: " + expHost + "\\n")\n+ f.write("repet_user: " + expUser + "\\n")\n+ f.write("repet_pw: " + expPw + "\\n")\n+ f.write("repet_db: " + expDb + "\\n")\n+ f.write("repet_port: " + str(expPort) + "\\n")\n+ f.close()\n+ \n+ self._iDb.setAttributesFromConfigFile(configFileName)\n+ \n+ obsHost = self._iDb.host\n+ obsUser = self._iDb.user\n+ obsPw = self._iDb.passwd\n+ obsDb = self._iDb.dbname\n+ obsPort = self._iDb.port\n+ \n+ os.remove(configFileName)\n+ \n+ self.asse'..b'l_r4.3: 3.73%; TermRepeats: non-termLTR: 1701; SSRCoverage=0.14<0.75)\\n")\n+ \n+ self._iDb.createTable(tableName, "classif", fileName)\n+ self.assertTrue(self._iDb.doesTableExist(tableName))\n+ \n+ expColumnNb = 8\n+ sqlCmd = "DESC %s;" % tableName\n+ self._iDb.execute(sqlCmd)\n+ res = self._iDb.fetchall()\n+ obsColumnNb = len(res)\n+ self.assertEquals(expColumnNb, obsColumnNb)\n+ \n+ expSize = 3\n+ obsSize = self._iDb.getSize(tableName)\n+ self.assertEquals(expSize, obsSize)\n+ \n+ expLIndex = ["iseq_name", "istatus", "iclass", "iorder", "icomp"]\n+ sqlCmd = "SHOW INDEX FROM %s" % tableName\n+ self._iDb.execute(sqlCmd)\n+ res = self._iDb.cursor.fetchall()\n+ obsLIndex = []\n+ for tuple in res:\n+ obsLIndex.append(tuple[2])\n+ self.assertEquals(expLIndex, obsLIndex)\n+ \n+ self._iDb.dropTable(tableName)\n+ os.remove(fileName)\n+ \n+ def test_createClassifIndex(self):\n+ tableName = "dummyclassifTable%s" % self._uniqId\n+ sqlCmd = "CREATE TABLE %s (seq_name varchar(255), length int unsigned, strand char, status varchar(255), class_classif varchar(255), order_classif varchar(255), completeness varchar(255), evidences text);" % tableName\n+ self._iDb.execute(sqlCmd)\n+ expLIndex = ["iseq_name", "istatus", "iclass", "iorder", "icomp"]\n+ \n+ self._iDb.createIndex(tableName, "classif")\n+ \n+ sqlCmd = "SHOW INDEX FROM %s" % tableName\n+ self._iDb.execute(sqlCmd)\n+ res = self._iDb.cursor.fetchall()\n+ \n+ obsLIndex = []\n+ for tuple in res:\n+ obsLIndex.append(tuple[2])\n+ self.assertEquals(expLIndex, obsLIndex)\n+ self._iDb.dropTable(tableName)\n+\n+ def test_createBinPathTable(self):\n+ pathFileName = "dummy.path"\n+ with open(pathFileName, "w") as pathF:\n+ pathF.write("1\\tqry\\t1\\t100\\tsbj\\t1\\t100\\t1e-123\\t136\\t98.4\\n")\n+ pathF.write("2\\tqry\\t500\\t401\\tsbj\\t1\\t100\\t1e-152\\t161\\t98.7\\n")\n+ \n+ expPathTuple1 = (1, 1000000, "qry", 1, 100, 1)\n+ expPathTuple2 = (2, 1000000, "qry", 401, 500, 1) # change coordinates\n+ expTPathTuples = (expPathTuple1, expPathTuple2)\n+ \n+ pathTableName = "dummy_path"\n+ idxTableName = "dummy_path_idx"\n+ self._iDb.createTable(pathTableName, "path", pathFileName)\n+ self._iDb.createBinPathTable(pathTableName, True)\n+ \n+ sqlCmd = "SELECT * FROM %s" % idxTableName\n+ self._iDb.execute(sqlCmd)\n+ obsTPathTuples = self._iDb.fetchall()\n+ \n+ self._iDb.dropTable(pathTableName)\n+ self._iDb.dropTable(idxTableName)\n+ os.remove(pathFileName)\n+ \n+ self.assertEquals(expTPathTuples, obsTPathTuples)\n+\n+ def test_createBinSetTable(self):\n+ setFileName = "dummy.set"\n+ with open(setFileName, "w") as setF:\n+ setF.write("1\\tseq1\\tchr1\\t1900\\t3900\\n")\n+ setF.write("2\\tseq2\\tchr1\\t2\\t9\\n")\n+ setF.write("3\\tseq3\\tchr1\\t8\\t13\\n")\n+ \n+ expTuple = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L))\n+ \n+ setTableName = "dummy_set"\n+ idxTableName = "dummy_set_idx"\n+ self._iDb.createTable(setTableName, "set", setFileName)\n+ self._iDb.createBinSetTable(setTableName, True)\n+ \n+ sqlCmd = "SELECT * FROM %s" % idxTableName\n+ self._iDb.execute(sqlCmd)\n+ obsTuple = self._iDb.fetchall()\n+ \n+ self._iDb.dropTable(setTableName)\n+ self._iDb.dropTable(idxTableName)\n+ os.remove(setFileName)\n+ \n+ self.assertEquals(expTuple, obsTuple)\n+\n+ def _getInstanceToAdapt(self):\n+ iPath = Path()\n+ return iPath\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_DbSQLite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_DbSQLite.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,162 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import time
+from commons.core.sql.DbSQLite import DbSQLite
+
+class Test_DbSQLite(unittest.TestCase):
+
+    def setUp( self ):
+        self._iDb = DbSQLite("test.db")
+        self._uniqId = "%s" % time.strftime("%Y%m%d%H%M%S")
+
+    def tearDown( self ):
+        if self._iDb.open():
+            self._iDb.close()
+        self._iDb.delete()
+        self._iDb = None
+
+    def test_open_True(self):
+        self._iDb.close()
+        self.assertTrue( self._iDb.open(1) )
+
+    def test_open_False(self):
+        self._iDb.close()
+        self._iDb.host = "/toto/toto.db"
+        self.assertFalse( self._iDb.open(1) )
+        self._iDb.host = "test.db"
+
+    def test_updateInfoTable(self):
+        tableName = "dummyTable" + self._uniqId
+        info = "Table_for_test"
+
+        self._iDb.updateInfoTable(tableName, info)
+
+        sqlCmd = 'SELECT file FROM info_tables WHERE name = "%s"' % ( tableName )
+        self._iDb.execute( sqlCmd )
+        results = self._iDb.fetchall()
+        obsResult = False
+        if (info,) in results:
+            obsResult = True
+            sqlCmd = 'DELETE FROM info_tables WHERE name = "%s"' % ( tableName )
+            self._iDb.execute( sqlCmd )
+
+        self.assertTrue( obsResult )
+
+    def test_doesTableExist_True(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        self.assertTrue( self._iDb.doesTableExist(tableName) )
+
+    def test_dropTable(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % tableName
+        self._iDb.execute( sqlCmd )
+        sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+        self._iDb.execute( sqlCmd )
+        sqlCmd = 'INSERT INTO info_tables VALUES ("%s","")' % tableName
+        self._iDb.execute( sqlCmd )
+
+        self._iDb.dropTable(tableName)
+        self.assertFalse( self._iDb.doesTableExist(tableName) )
+
+    def test_doesTableExist_False(self):
+        tableName = "dummyTable" + self._uniqId
+        self.assertFalse( self._iDb.doesTableExist(tableName) )
+
+    def test_createJobTable_is_table_created(self):
+        self._iDb.createTable("dummyJobTable", "jobs")
+        isTableCreated = self._iDb.doesTableExist("dummyJobTable")
+        self.assertTrue(isTableCreated)
+
+    def test_createJobTable_field_list(self):
+        self._iDb.createTable("dummyJobTable", "jobs")
+        obsLFiled = self._iDb.getFieldList("dummyJobTable")
+        expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]
+        self.assertEquals(expLField, obsLFiled)
+
+    def test_createTable(self):
+        tableName = "dummyJobTable" + self._uniqId
+        self._iDb.createTable(tableName, "job")
+        obsLFiled = self._iDb.getFieldList(tableName)
+        expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]
+        self.assertEquals(expLField, obsLFiled)
+
+    def test_createTable_with_overwrite_Job(self):
+        tableName = "dummyJobTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % tableName
+        self._iDb.execute( sqlCmd )
+        sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+        self._iDb.execute( sqlCmd )
+        sqlCmd = 'INSERT INTO info_tables VALUES ("%s","")' % tableName
+        self._iDb.execute( sqlCmd )
+
+        self._iDb.createTable(tableName, "job", True)
+        obsLFiled = self._iDb.getFieldList(tableName)
+        expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]
+        self.assertEquals(expLField, obsLFiled)
+
+    def test_getSize_empty_table(self):
+        tableName = "dummyJobTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        expSize = 0
+        obsSize = self._iDb.getSize(tableName)
+        self.assertEquals( expSize, obsSize )
+
+    def test_getSize_one_rows(self):
+        tableName = "dummyJobTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        sqlCmd = "INSERT INTO %s (dummyColumn) VALUES ('toto')" % tableName
+        self._iDb.execute( sqlCmd )
+        expSize = 1
+        obsSize = self._iDb.getSize(tableName)
+        self.assertEquals( expSize, obsSize )
+
+    def test_isEmpty_True(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        self.assertTrue(self._iDb.isEmpty(tableName))
+
+    def test_isEmpty_False(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % (tableName)
+        self._iDb.execute(sqlCmd)
+        sqlCmd = "INSERT INTO %s (dummyColumn) VALUES ('toto')" % tableName
+        self._iDb.execute(sqlCmd)
+        self.assertFalse(self._iDb.isEmpty(tableName))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_F_JobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_F_JobAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,91 @@
+from commons.core.launcher.WriteScript import WriteScript
+from commons.core.sql.Job import Job
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+import sys
+import stat
+import os
+import time
+import unittest
+import glob
+
+class Test_F_TableJobAdaptator(unittest.TestCase):
+
+    def setUp(self):
+        self._jobTableName = "dummyJobTable"
+        self._iJA = TableJobAdaptatorFactory.createJobInstance()
+
+    def tearDown(self):
+        pass
+
+    def test_submitJob(self):
+        job1 = self._createJobInstance("job1")
+        self._createLauncherFile(job1, self._iJA)
+        job2 = self._createJobInstance("job2")
+        self._createLauncherFile(job2, self._iJA)
+        job3 = self._createJobInstance("job3")
+        self._createLauncherFile(job3, self._iJA)
+
+        self._iJA.submitJob( job1, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iJA.submitJob( job2, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iJA.submitJob( job3, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+
+        time.sleep(120)
+
+        expErrorFilePrefix1 = job1.jobname + ".e"
+        expOutputFilePrefix1 = job1.jobname + ".o"
+        expErrorFilePrefix2 = job2.jobname + ".e"
+        expOutputFilePrefix2 = job2.jobname + ".o"
+        expErrorFilePrefix3 = job3.jobname + ".e"
+        expOutputFilePrefix3 = job3.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+        lErrorFiles2 = glob.glob(expErrorFilePrefix2 + "*")
+        lOutputFiles2 = glob.glob(expOutputFilePrefix2 + "*")
+        lErrorFiles3 = glob.glob(expErrorFilePrefix3 + "*")
+        lOutputFiles3 = glob.glob(expOutputFilePrefix3 + "*")
+
+        isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0)
+        isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)
+        isLErrorFileNotEmpty2 = (len(lErrorFiles2) != 0)
+        isLOutputFileNotEmpty2 = (len(lOutputFiles2) != 0)
+        isLErrorFileNotEmpty3 = (len(lErrorFiles3) != 0)
+        isLOutputFileNotEmpty3 = (len(lOutputFiles3) != 0)
+
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)
+        self.assertTrue(isLErrorFileNotEmpty2 and isLOutputFileNotEmpty2)
+        self.assertTrue(isLErrorFileNotEmpty3 and isLOutputFileNotEmpty3)
+
+    def test_submit_and_waitJobGroup(self):
+        iJob = self._createJobInstance("test")
+        self._createLauncherFile(iJob, self._iJA)
+
+        self._iJA.submitJob( iJob, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iJA.waitJobGroup(iJob.groupid, 0, 2)
+
+        expErrorFilePrefix1 = iJob.jobname + ".e"
+        expOutputFilePrefix1 = iJob.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+
+        isLErrorFileExist = (len(lErrorFiles1) != 0)
+        isLOutputFileExist = (len(lOutputFiles1) != 0)
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileExist and isLOutputFileExist)
+
+    def _createJobInstance(self, name):
+        lResources = []
+        if os.environ.get("HOSTNAME") == "compute-2-46.local":
+            lResources.append("test=TRUE")
+        return Job(0, name, "test", "", "log = os.system(\"date;sleep 5;date\")", "%s/launcherFileTest_%s.py" % (os.getcwd(), name), lResources=lResources)
+
+    def _createLauncherFile(self, iJob, iJA):
+        iWriteScript = WriteScript(iJob, iJA, os.getcwd(), os.getcwd(), False, True)
+        iWriteScript.run(iJob.command, "", iJob.launcher)
+        os.chmod(iJob.launcher, stat.S_IRWXU+stat.S_IRWXG+stat.S_IRWXO)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_F_TableJobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_F_TableJobAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,185 @@
+from commons.core.launcher.WriteScript import WriteScript
+from commons.core.sql.Job import Job
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+import sys
+import stat
+import os
+import time
+import unittest
+import glob
+
+class Test_F_TableJobAdaptator(unittest.TestCase):
+
+    def setUp(self):
+        self._jobTableName = "dummyJobTable"
+        self._db = DbFactory.createInstance()
+        self._iTJA = TableJobAdaptatorFactory.createInstance(self._db, self._jobTableName)
+
+    def tearDown(self):
+        self._db.dropTable(self._jobTableName)
+        self._db.close()
+
+    def test_submitJob_with_multiple_jobs(self):
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        job1 = _createJobInstance("job1")
+        _createLauncherFile(job1, self._iTJA)
+        job2 = _createJobInstance("job2")
+        _createLauncherFile(job2, self._iTJA)
+        job3 = _createJobInstance("job3")
+        _createLauncherFile(job3, self._iTJA)
+
+        self._iTJA.submitJob( job1, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iTJA.submitJob( job2, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iTJA.submitJob( job3, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+
+        time.sleep(120)
+
+        expJobStatus = "finished"
+        obsJobStatus1 = self._iTJA.getJobStatus(job1)
+        obsJobStatus2 = self._iTJA.getJobStatus(job2)
+        obsJobStatus3 = self._iTJA.getJobStatus(job3)
+
+        self.assertEquals(expJobStatus, obsJobStatus1)
+        self.assertEquals(expJobStatus, obsJobStatus2)
+        self.assertEquals(expJobStatus, obsJobStatus3)
+
+        expErrorFilePrefix1 = job1.jobname + ".e"
+        expOutputFilePrefix1 = job1.jobname + ".o"
+        expErrorFilePrefix2 = job2.jobname + ".e"
+        expOutputFilePrefix2 = job2.jobname + ".o"
+        expErrorFilePrefix3 = job3.jobname + ".e"
+        expOutputFilePrefix3 = job3.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+        lErrorFiles2 = glob.glob(expErrorFilePrefix2 + "*")
+        lOutputFiles2 = glob.glob(expOutputFilePrefix2 + "*")
+        lErrorFiles3 = glob.glob(expErrorFilePrefix3 + "*")
+        lOutputFiles3 = glob.glob(expOutputFilePrefix3 + "*")
+
+        isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0)
+        isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)
+        isLErrorFileNotEmpty2 = (len(lErrorFiles2) != 0)
+        isLOutputFileNotEmpty2 = (len(lOutputFiles2) != 0)
+        isLErrorFileNotEmpty3 = (len(lErrorFiles3) != 0)
+        isLOutputFileNotEmpty3 = (len(lOutputFiles3) != 0)
+
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)
+        self.assertTrue(isLErrorFileNotEmpty2 and isLOutputFileNotEmpty2)
+        self.assertTrue(isLErrorFileNotEmpty3 and isLOutputFileNotEmpty3)
+
+    def test_submitJob_job_already_submitted(self):
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        iJob = _createJobInstance("job")
+        self._iTJA.recordJob(iJob)
+
+        isSysExitRaised = False
+        try:
+            self._iTJA.submitJob(iJob)
+        except SystemExit:
+            isSysExitRaised = True
+        self.assertTrue(isSysExitRaised)
+
+    def test_waitJobGroup_with_error_job_maxRelaunch_two(self):
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        iJob = _createJobInstance("job")
+        _createLauncherFile(iJob, self._iTJA)
+
+        self._iTJA.recordJob(iJob)
+        self._iTJA.changeJobStatus(iJob, "error")
+
+        self._iTJA.waitJobGroup(iJob.groupid, 0, 2)
+
+        time.sleep(120)
+
+        expJobStatus = "finished"
+        obsJobStatus1 = self._iTJA.getJobStatus(iJob)
+
+        self.assertEquals(expJobStatus, obsJobStatus1)
+
+        expErrorFilePrefix1 = iJob.jobname + ".e"
+        expOutputFilePrefix1 = iJob.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+
+        isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0)
+        isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)
+
+        self._iTJA.removeJob(iJob)
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)
+
+class Test_F_TableJobAdaptator_SGE(unittest.TestCase):
+
+    def setUp(self):
+        if os.environ["REPET_JOB_MANAGER"].lower() != "sge":
+            print "ERROR: jobs manager is not SGE: REPET_JOB_MANAGER = %s." % os.environ["REPET_JOB_MANAGER"]
+            sys.exit(0)
+        self._jobTableName = "dummyJobTable"
+        self._db = DbFactory.createInstance()
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        self._iTJA = TableJobAdaptatorFactory.createInstance(self._db, self._jobTableName)
+        self._iJob = _createJobInstance("job")
+        _createLauncherFile(self._iJob, self._iTJA)
+
+    def tearDown(self):
+        self._db.dropTable(self._jobTableName)
+        self._db.close()
+
+    def test_waitJobGroup_with_several_nbTimeOut_waiting(self):
+        self._iTJA.recordJob(self._iJob)
+        self._iTJA.changeJobStatus(self._iJob, "running")
+
+        expMsg = "ERROR: job '%s', supposedly still running, is not handled by SGE anymore\n" % self._iJob.jobid
+
+        obsError = "obsError.txt"
+        obsErrorHandler = open(obsError, "w")
+        stderrRef = sys.stderr
+        sys.stderr = obsErrorHandler
+
+        isSysExitRaised = False
+        try:
+            self._iTJA.waitJobGroup(self._iJob.groupid, timeOutPerJob = 3)
+        except SystemExit:
+            isSysExitRaised = True
+
+        obsErrorHandler.close()
+
+        obsErrorHandler = open(obsError, "r")
+        obsMsg = obsErrorHandler.readline()
+        obsErrorHandler.close()
+
+        sys.stderr = stderrRef
+        os.remove(obsError)
+        os.system("rm launcherFileTest*.py")
+        self.assertTrue(isSysExitRaised)
+        self.assertEquals(expMsg, obsMsg)
+
+    def test_isJobStillHandledBySge_True(self):
+        self._iTJA.submitJob(self._iJob)
+        isJobHandledBySge = self._iTJA.isJobStillHandledBySge(self._iJob.jobid, self._iJob.jobname)
+        os.system("rm launcherFileTest*.py")
+        self.assertTrue(isJobHandledBySge)
+
+    def test_isJobStillHandledBySge_False(self):
+        self._iTJA.recordJob(self._iJob)
+        isJobHandledBySge = self._iTJA.isJobStillHandledBySge(self._iJob.jobid, self._iJob.jobname)
+        os.system("rm launcherFileTest*.py")
+        self.assertFalse(isJobHandledBySge)
+
+def _createJobInstance(name):
+    lResources = []
+    if os.environ.get("HOSTNAME") == "compute-2-46.local":
+        lResources.append("test=TRUE")
+    return Job(0, name, "test", "", "log = os.system(\"date;sleep 5;date\")", "%s/launcherFileTest_%s.py" % (os.getcwd(), name), lResources=lResources)
+
+def _createLauncherFile(iJob, iTJA):
+    iWriteScript = WriteScript(iJob, iTJA, os.getcwd(), os.getcwd())
+    iWriteScript.run(iJob.command, "", iJob.launcher)
+    os.chmod(iJob.launcher, stat.S_IRWXU+stat.S_IRWXG+stat.S_IRWXO)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_Job.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_Job.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,30 @@
+import unittest
+from commons.core.sql.Job import Job
+
+class Test_Job(unittest.TestCase):
+
+    def test__eq__(self):
+        self._job = Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertEqual( self._job, o ) # same data
+        o =  Job(jobid=1, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different jobid
+        o =  Job(jobid=0, jobname="test1", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different jobname
+        o =  Job(jobid=0, jobname="test", groupid="test1", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different groupid
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test1",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different queue
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test1", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different command
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test1", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different launcherFile
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test1", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different node
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=2G" )
+        self.assertNotEqual( self._job, o ) # different lResources
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G", parallelEnvironment="multithread 6" )
+        self.assertNotEqual( self._job, o ) # different parallelEnvironment
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableBinPathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableBinPathAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1244 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import unittest\n+import os\n+import time\n+from commons.core.sql.TableBinPathAdaptator import TableBinPathAdaptator\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Set import Set\n+from commons.core.sql.DbFactory import DbFactory\n+\n+class Test_TableBinPathAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % (time.strftime("%Y%m%d%H%M%S") , os.getpid())\n+ self._db = DbFactory.createInstance()\n+ self._table = "dummyPathTable_%s" % self._uniqId\n+ self._table_idx = "dummyPathTable_%s_idx" % self._uniqId\n+ \n+ def tearDown( self ):\n+ self._db.dropTable(self._table)\n+ self._db.dropTable(self._table_idx)\n+ self._db.close()\n+ \n+ #TODO: strand ?!? How does it work ?\n+ def test_insert_QryRevSbjDir( self ):\n+ tuple = ("1", "chr1", "10", "25", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p1 = Path()\n+ p1.setFromTuple(tuple)\n+\n+ tuple = ("1", "chr1", "250", "100", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p2 = Path()\n+ p2.setFromTuple(tuple)\n+ \n+ tuple = ("2", "chr1", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+ \n+ tuple = ("4", "chr5", "140", "251", "TE5", "140", "251", "2e-14", "14", "73.1")\n+ p4 = Path()\n+ p4.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ self._tpA.insert(p4)\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._db.execute( sqlCmd )\n+ obsPathTuple = self._db.cursor.fetchall()\n+ expPathTuple = ((1, "chr1", 10, 25, "TE1", 11, 17, 1e-18, 20, 87.4),\n+ (1, "chr1", 100, 250, "TE1", 17, 11, 1e-18, 20, 87.4),\n+ (2, "chr1", 15, 30, "TE2", 10, 13, 5e-24, 34, 93.1),\n+ (4, "chr5", 140, 251, "TE5", 140, 251, 2e-14, 14, 73.1),)\n+ self.assertEquals(expPathTuple, obsPathTuple)\n+\n+ sqlCmd = "SELECT * FROM %s_idx" % ( self._table )\n+ self._db.execute( sqlCmd )\n+ obsPathTuple = self._db.cursor'..b'uple(tuple)\n+ \n+ tuple = ("3", "chr1", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ \n+ expLSet = []\n+ obsLSet = self._tpA.getSetListOverlappingQueryCoord(\'chr1\', 5000, 6000)\n+ \n+ self.assertEquals(expLSet, obsLSet)\n+ \n+ def test_getSetListOverlappingQueryCoord_one_included_and_two_chain(self):\n+ tuple = ("1", "chr1", "10", "25", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p1 = Path()\n+ p1.setFromTuple(tuple)\n+\n+ tuple = ("2", "chr1", "100", "250", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p2 = Path()\n+ p2.setFromTuple(tuple)\n+\n+ tuple = ("2", "chr1", "1000", "2500", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+\n+ tuple = ("3", "chr1", "50", "150", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p4 = Path()\n+ p4.setFromTuple(tuple)\n+ \n+ tuple = ("4", "chr1", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p5 = Path()\n+ p5.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ self._tpA.insert(p4)\n+ self._tpA.insert(p5)\n+ \n+ s2 = Set()\n+ s2.setFromTuple(("2","TE1","chr1","100","250"))\n+ s4 = Set()\n+ s4.setFromTuple(("3","TE1","chr1","50","150"))\n+ expLSet = [s2, s4]\n+ obsLSet = self._tpA.getSetListOverlappingQueryCoord(\'chr1\', 95, 300)\n+ \n+ self.assertEquals(expLSet, obsLSet)\n+ \n+ def test_getIdList( self ):\n+ p1 = Path()\n+ p1.setFromString( "1\\tchr1\\t1\\t10\\tTE1\\t11\\t17\\t1e-20\\t30\\t90.2\\n" )\n+ p2 = Path()\n+ p2.setFromString( "2\\tchr1\\t2\\t9\\tTE2\\t10\\t13\\t1e-20\\t30\\t90.2\\n" )\n+ p3 = Path()\n+ p3.setFromString( "2\\tchr1\\t12\\t19\\tTE2\\t15\\t22\\t1e-10\\t40\\t94.2\\n" )\n+ p4 = Path()\n+ p4.setFromString( "3\\tchr2\\t8\\t13\\tTE1\\t11\\t17\\t1e-20\\t30\\t90.2\\n" )\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ \n+ lPath = [ p1, p2, p3, p4]\n+ self._tpA.insertList(lPath)\n+ \n+ expList = [ 1, 2, 3 ]\n+ obsList = self._tpA.getIdList()\n+ \n+ self.assertEqual( expList, obsList )\n+ \n+ def test_getQueryList(self):\n+ tuple = ("1", "chr1", "10", "25", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p1 = Path()\n+ p1.setFromTuple(tuple)\n+\n+ tuple = ("2", "chr1", "100", "250", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p2 = Path()\n+ p2.setFromTuple(tuple)\n+ \n+ tuple = ("3", "chr2", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ \n+ expList = [ "chr1", "chr2" ]\n+ obsList = self._tpA.getQueryList()\n+ self.assertEqual( expList, obsList )\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableBinPathAdaptator ) )\n+if __name__ == \'__main__\':\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableBinSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableBinSetAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,290 @@\n+import unittest\n+import os\n+import time\n+from commons.core.sql.TableBinSetAdaptator import TableBinSetAdaptator\n+from commons.core.coord.Set import Set\n+from commons.core.sql.DbFactory import DbFactory\n+\n+class Test_TableBinSetAdaptator(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._uniqId = "%s_%s" % (time.strftime("%Y%m%d%H%M%S") , os.getpid())\n+ self._iDb = DbFactory.createInstance()\n+ radicalTableName = "dummySetTable"\n+ self._tableName = "%s_%s" % (radicalTableName, self._uniqId)\n+ self._tableName_bin = "%s_idx" % self._tableName\n+ self._setFileName = "dummySetFile_%s" % self._uniqId\n+ setF = open( self._setFileName, "w" )\n+ setF.write("1\\tseq1\\tchr1\\t1900\\t3900\\n")\n+ setF.write("2\\tseq2\\tchr1\\t2\\t9\\n")\n+ setF.write("3\\tseq3\\tchr1\\t8\\t13\\n")\n+ setF.close()\n+ self._iDb.createTable(self._tableName, "set", self._setFileName)\n+ self._iTableBinSetAdaptator = TableBinSetAdaptator(self._iDb, self._tableName)\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable( self._tableName )\n+ self._iDb.dropTable( self._tableName_bin )\n+ self._iDb.close()\n+ if os.path.exists(self._setFileName):\n+ os.remove(self._setFileName)\n+ \n+ def test_insASetInSetAndBinTable(self):\n+ iSet = Set(1, "set1", "seq1", 2, 1)\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insASetInSetAndBinTable(iSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (1L, 1000.0, \'seq1\', 1L, 2L, 0L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (1L, \'set1\', \'seq1\', 2L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+ \n+ def test_insASetInSetAndBinTable_delayedCase(self):\n+ iSet = Set(1, "set1", "seq1", 2, 1)\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insASetInSetAndBinTable(iSet, True)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (1L, 1000.0, \'seq1\', 1L, 2L, 0L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (1L, \'set1\', \'seq1\', 2L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+ \n+ def test_deleteFromIdFromSetAndBinTable(self):\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.deleteFromIdFromSetAndBinTable(2)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (3L, \'seq3\', \'chr1\', 8L, 13L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ '..b' obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (5L, \'seq5\', \'chr1\', 1L, 13L), (4L, \'seq4\', \'chr1\', 100L, 390L) )\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+\n+ def test_insertListInSetAndBinTableAndRemoveOverlaps(self):\n+ iSet1 = Set(1, "seq4", "chr1", 100, 390)\n+ iSet2 = Set(2, "seq5", "chr1", 1, 13)\n+ lSet = [iSet1, iSet2]\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insertListInSetAndBinTableAndRemoveOverlaps(lSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (4L, 1000.0, \'chr1\', 100L, 390L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (4L, \'seq4\', \'chr1\', 100L, 390L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+\n+ def test_insertListInSetAndBinTableAndRemoveOverlaps_Without_Overlaps(self):\n+ iSet1 = Set(1, "seq4", "chr1", 100, 390)\n+ iSet2 = Set(2, "seq5", "chr1", 50, 65)\n+ lSet = [iSet1, iSet2]\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insertListInSetAndBinTableAndRemoveOverlaps(lSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (4L, 1000.0, \'chr1\', 100L, 390L, 1L), (5L, 1000.0, \'chr1\', 50L, 65L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (4L, \'seq4\', \'chr1\', 100L, 390L), (5L, \'seq5\', \'chr1\', 50L, 65L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+\n+ def test_insertListInSetAndBinTableAndRemoveOverlaps_With_Only_Overlaps(self):\n+ iSet1 = Set(1, "seq4", "chr1", 1, 5)\n+ iSet2 = Set(2, "seq5", "chr1", 8, 13)\n+ lSet = [iSet1, iSet2]\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insertListInSetAndBinTableAndRemoveOverlaps(lSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableJobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableJobAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,640 @@\n+import unittest\n+import sys\n+import os\n+import time\n+#import stat\n+#import threading\n+from commons.core.sql.DbMySql import DbMySql\n+#from commons.core.sql.DbSQLite import DbSQLite\n+from commons.core.sql.Job import Job\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+\n+#class Test_TableJobAdaptator_SQLite( unittest.TestCase ):\n+# \n+# def setUp(self):\n+# self._jobTableName = "dummyJobTable"\n+# self._dbName = "test.db"\n+# self._db = DbSQLite(self._dbName)\n+# self._iTJA = TableJobAdaptator(self._db, self._jobTableName)\n+# if not self._db.doesTableExist(self._jobTableName):\n+# self._db.createJobTable(self._jobTableName)\n+# self._iJob = self._createJobInstance()\n+# \n+# def tearDown(self):\n+# self._iTJA = None\n+# self._db.close()\n+## self._db.delete()\n+# \n+## def test_recordJob(self):\n+## self._iTJA.recordJob(self._iJob)\n+## qryParams = "SELECT jobid, groupid, command, launcher, queue, status, node FROM " + self._jobTableName + " WHERE jobid = ?" \n+## params = (self._iJob.jobid,)\n+## self._db.execute(qryParams, params)\n+## tObs = self._db.fetchall()[0]\n+## tExp =(self._iJob.jobid, self._iJob.groupid, self._iJob.command, self._iJob.launcher, self._iJob.queue, "waiting", "?")\n+## self.assertEquals(tExp,tObs)\n+## \n+## def test_removeJob(self):\n+## self._iTJA.recordJob(self._iJob)\n+## self._iTJA.removeJob(self._iJob)\n+## self.assertTrue(self._db.isEmpty(self._jobTableName))\n+## \n+## def test_getJobStatus(self):\n+## self._iTJA.recordJob(self._iJob)\n+## expStatus = "waiting"\n+## obsStatus = self._iTJA.getJobStatus(self._iJob)\n+## self.assertEquals(expStatus, obsStatus)\n+## \n+## def test_getJobStatus_no_job(self):\n+## expStatus = "unknown"\n+## obsStatus = self._iTJA.getJobStatus(self._iJob)\n+## self.assertEquals(expStatus, obsStatus)\n+##\n+## def test_getJobStatus_no_name(self):\n+## iJob = Job( self._jobTableName, 20, "", "groupid", "queue", "command", "launcherFile", "node", "lResources" ) \n+## expStatus = "unknown"\n+## obsStatus = self._iTJA.getJobStatus(iJob)\n+## self.assertEquals(expStatus, obsStatus)\n+## \n+## def test_getJobStatus_two_jobs(self):\n+## # Warning : this case will not append, because recordJob() begin by removeJob()\n+## sqlCmd = "INSERT INTO %s" % self._iJob.tablename\n+## sqlCmd += " VALUES ("\n+## sqlCmd += " \\"%s\\"," % self._iJob.jobid\n+## sqlCmd += " \\"%s\\"," % self._iJob.jobname\n+## sqlCmd += " \\"%s\\"," % self._iJob.groupid\n+## sqlCmd += " \\"%s\\"," % self._iJob.command.replace("\\"","\\\'")\n+## sqlCmd += " \\"%s\\"," % self._iJob.launcher\n+## sqlCmd += " \\"%s\\"," % self._iJob.queue\n+## sqlCmd += " \\"waiting\\","\n+## sqlCmd += " \\"%s\\"," % time.strftime( "%Y-%m-%d %H:%M:%S" )\n+## sqlCmd += " \\"?\\" );"\n+## self._db.execute(sqlCmd)\n+## self._db.execute(sqlCmd)\n+## \n+## expError = "expError.txt"\n+## expErrorHandler = open(expError, "w")\n+## expErrorHandler.write("ERROR while getting job status: non-unique jobs\\n")\n+## expErrorHandler.close()\n+## obsError = "obsError.txt"\n+## obsErrorHandler = open(obsError, "w")\n+## stderrRef = sys.stderr\n+## sys.stderr = obsErrorHandler\n+## \n+## isSysExitRaised = False\n+## try:\n+## self._iTJA.getJobStatus(self._iJob)\n+## except SystemExit:\n+## isSysExitRaised = True\n+## \n+## obsErrorHandler.close()\n+## \n+## self.assertTrue(isSysExitRaised)\n+## self.assertTrue(FileUtils.are2FilesIdentical(expError, obsError))\n+## sys.stderr = stderrRef\n+## os.remove(obs'..b':\n+ obs = False\n+ self._iTJA.recordJob(self._iJob)\n+ self._iTJA.changeJobStatus(self._iJob, "error")\n+ try:\n+ self._iTJA.waitJobGroup(self._iJob.groupid, 0, 0)\n+ except SystemExit:\n+ obs = True\n+ self.assertTrue(obs)\n+ \n+ #TODO: how to test ?!?\n+# def test_waitJobGroup_with_error_relaunch(self):\n+# iJob = Job(0, "job1", "groupid", "queue.q", "command", "launcherFile", "node", ["mem_free=10M", "test=TRUE"])\n+# obs = False\n+# self._iTJA.recordJob(iJob)\n+# self._iTJA.changeJobStatus(iJob, "error")\n+# try:\n+# self._iTJA.waitJobGroup(iJob.groupid)\n+# except SystemExit:\n+# obs = True\n+# self.assertTrue(obs)\n+ \n+ def test_updateJobIdInDB(self):\n+ self._iTJA.recordJob(self._iJob)\n+ self._iTJA.updateJobIdInDB(self._iJob, 1000)\n+ qryParams = "SELECT jobid FROM " + self._jobTableName + " WHERE jobname = %s AND queue = %s AND groupid = %s" \n+ params = (self._iJob.jobname, self._iJob.queue, self._iJob.groupid)\n+ self._db.execute(qryParams, params)\n+ tObs = self._db.fetchall()[0]\n+ tExp =(1000,)\n+ self.assertEquals(tExp,tObs)\n+\n+ def test_getNodesListByGroupId(self):\n+ iJob1 = Job(0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources")\n+ iJob2 = Job(1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources")\n+ iJob3 = Job(2, "job3", "groupid", "queue", "command", "launcherFile", "node2", "lResources")\n+ iJob4 = Job(3, "job4", "groupid2", "queue", "command", "launcherFile", "node3", "lResources")\n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ self._insertJob(iJob4)\n+ expNodeList = ["node1", "node2"]\n+ obsNodeList = self._iTJA.getNodesListByGroupId("groupid")\n+ self.assertEquals(expNodeList, obsNodeList)\n+\n+ def test_getNodesListByGroupId_empty_list(self):\n+ iJob1 = Job(0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources")\n+ iJob2 = Job(1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources")\n+ iJob3 = Job(2, "job3", "groupid32", "queue", "command", "launcherFile", "node3", "lResources")\n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ expNodeList = []\n+ obsNodeList = self._iTJA.getNodesListByGroupId("groupid3")\n+ self.assertEquals(expNodeList, obsNodeList)\n+ \n+# TODO test TableJobAdaptator._createJobInstance TableJobAdaptator._createLauncherFile\n+ def _insertJob(self, iJob):\n+ self._iTJA = TableJobAdaptatorFactory.createInstance(self._db, self._jobTableName) \n+ self._iTJA.removeJob(iJob)\n+ sqlCmd = "INSERT INTO %s" % self._jobTableName\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % iJob.jobid\n+ sqlCmd += " \\"%s\\"," % iJob.jobname\n+ sqlCmd += " \\"%s\\"," % iJob.groupid\n+ sqlCmd += " \\"%s\\"," % iJob.launcher\n+ sqlCmd += " \\"%s\\"," % iJob.queue\n+ sqlCmd += " \\"%s\\"," % iJob.lResources\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % time.strftime("%Y-%m-%d %H:%M:%S")\n+ sqlCmd += " \\"%s\\" );" % iJob.node\n+ self._db.execute(sqlCmd)\n+\n+ def _createJobInstance(self):\n+ return Job(0, "job1", "groupid", "", "command", "launcherFile", "node", ["mem_free=10M"])\n+\n+#class RecordJobThread(threading.Thread):\n+#\n+# def __init__(self, iTableJobAdaptator, iJob):\n+# threading.Thread.__init__(self)\n+# self._iTableJobAdaptator = iTableJobAdaptator\n+# self._iJob = iJob\n+# \n+# def run(self):\n+# self._iTableJobAdaptator.recordJob(self._iJob)\n+# #self._iTableJobAdaptator.submitJob(self._iJob)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableJobAdaptatorFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableJobAdaptatorFactory.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,27 @@
+import os
+import unittest
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+from commons.core.sql.DbFactory import DbFactory
+
+class Test_TableJobAdaptatorFactory(unittest.TestCase):
+
+    def test_createInstance_SGE(self):
+        REPET_JOB_MANAGER_Initial_Value = os.environ["REPET_JOB_MANAGER"]
+        os.environ["REPET_JOB_MANAGER"] = "SGE"
+        instance = TableJobAdaptatorFactory.createInstance(DbFactory.createInstance(), "dummyJobTable")
+        obsClassName = instance.__class__.__name__
+        expClassName = "TableJobAdaptatorSGE"
+        os.environ["REPET_JOB_MANAGER"] = REPET_JOB_MANAGER_Initial_Value
+        self.assertEquals(expClassName, obsClassName)
+
+    def test_createInstance_Torque(self):
+        REPET_JOB_MANAGER_Initial_Value = os.environ["REPET_JOB_MANAGER"]
+        os.environ["REPET_JOB_MANAGER"] = "Torque"
+        instance = TableJobAdaptatorFactory.createInstance(DbFactory.createInstance(), "dummyJobTable")
+        obsClassName = instance.__class__.__name__
+        expClassName = "TableJobAdaptatorTorque"
+        os.environ["REPET_JOB_MANAGER"] = REPET_JOB_MANAGER_Initial_Value
+        self.assertEquals(expClassName, obsClassName)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableMapAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableMapAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,250 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import time\n+import os\n+from commons.core.sql.TableMapAdaptator import TableMapAdaptator\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Map import Map\n+from commons.core.coord.Set import Set\n+\n+\n+class Test_TableMapAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._iDb = DbMySql( cfgFileName=self._configFileName )\n+ self._table = "dummyMapTable_%s" % ( self._uniqId )\n+ self._tMapA = TableMapAdaptator( self._iDb, self._table )\n+ \n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._iDb.dropTable( self._table )\n+ self._iDb.close()\n+ self._table = None\n+ self._tMapA = None\n+ os.remove( self._configFileName )\n+ self._configFileName = ""\n+ \n+##################################################################################\n+################## Tests for methods in ITableMapAdaptator #######################\n+################################################################################## \n+\n+ def test_getEndFromSeqName(self):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+ for m in [ map1, map2]:\n+ self._tMapA.insert(m)\n+ expEnd = 20\n+ obsEnd = self._tMapA.getEndFromSeqName("desc2")\n+ self.assertEqual(expEnd, obsEnd) \n+ \n+\n+ def test_getMapListFromSeqName( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ '..b' map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+ map3 = Map()\n+ map3.setFromString( "name2\\tdesc2\\t1\\t50\\n" )\n+ for m in [ map1, map2, map3 ]: self._tMapA.insert( m )\n+ explMap = [Set( 1,"name2", "desc2", 1, 20), Set( 2,"name2", "desc2", 1, 50)]\n+ obslMap = self._tMapA.getSetListFromSeqName("name2")\n+ self.assertEqual( explMap, obslMap )\n+ \n+ def test_getMapListOverlappingCoord( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t70\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc1\\t1\\t20\\n" )\n+ map3 = Map()\n+ map3.setFromString( "name3\\tdesc1\\t1\\t50\\n" ) \n+ for m in [ map1, map2, map3 ]: self._tMapA.insert( m )\n+ explMap = [Map("name2", "desc1", 1, 20), Map("name3", "desc1", 1, 50)]\n+ obslMap = self._tMapA.getMapListOverlappingCoord("desc1", 1, 60)\n+ self.assertEqual( explMap, obslMap )\n+ \n+ def test_getSetListOverlappingCoord( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t70\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc1\\t1\\t20\\n" )\n+ map3 = Map()\n+ map3.setFromString( "name3\\tdesc1\\t1\\t50\\n" ) \n+ for m in [ map1, map2, map3 ]: self._tMapA.insert( m )\n+ explSet = [Set(1, "name2", "desc1", 1, 20), Set(2, "name3", "desc1", 1, 50)]\n+ obslSet = self._tMapA.getSetListOverlappingCoord("desc1", 1, 60)\n+ self.assertEqual( explSet, obslSet )\n+ \n+##################################################################################\n+########################### Tests for other methods ##############################\n+##################################################################################\n+ \n+ def test_getListOfAllMaps( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+ for m in [ map1, map2 ]: self._tMapA.insert( m )\n+ lExp = [ map1, map2 ]\n+ lObs = self._tMapA.getListOfAllMaps()\n+ self.assertEqual( lObs, lExp )\n+ \n+ def test_getDictPerNameFromMapFile( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ iMap1 = Map( "chunk1", "chromosome1", 1, 100 )\n+ iMap2 = Map( "chunk2", "chromosome1", 91, 190 )\n+ iMap3 = Map( "chunk3", "chromosome2", 1, 100 )\n+ iMap4 = Map( "chunk1", "chromosome1", 1, 100 ) # redundant with iMap1\n+ for iMap in [ iMap1, iMap2, iMap3, iMap4 ]:\n+ self._tMapA.insert( iMap )\n+ dExp = { "chunk1": iMap1, "chunk2": iMap2, "chunk3": iMap3 }\n+ dObs = self._tMapA.getDictPerName()\n+ self.assertEquals( dExp, dObs )\n+ \n+#TODO: Check getListFromSeqName method: uses name instead of seqname\n+# def test_getMapListFromSeqNameList( self ):\n+# self._iDb.createTable( self._table, "map", "" )\n+# map1 = Map()\n+# map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+# map2 = Map()\n+# map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+# map3 = Map()\n+# map3.setFromString( "name3\\tdesc2\\t1\\t10\\n" )\n+# map4 = Map()\n+# map4.setFromString( "name4\\tdesc3\\t10\\t200\\n" )\n+# for m in [map1, map2, map3, map4]: self._tMapA.insert( m )\n+# \n+# lMapToRetrieve = ["name1", "desc2"]\n+# lExp = [map1, map2, map3]\n+# lObs = self._tMapA.getMapListFromSeqNameList(lMapToRetrieve)\n+# self.assertEqual( lObs, lExp )\n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableMapAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableMatchAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableMatchAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,264 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import time\n+import os\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Match import Match\n+from commons.core.sql.TableMatchAdaptator import TableMatchAdaptator\n+\n+\n+class Test_TableMatchAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % (time.strftime("%Y%m%d%H%M%S") , os.getpid())\n+ self._configFileName = "dummyConfigFile_%s" % self._uniqId\n+ self._iDb = DbMySql()\n+ self._table = "dummyMatchTable_%s" % self._uniqId\n+ self._tMatchA = TableMatchAdaptator( self._iDb, self._table )\n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._iDb.dropTable( self._table )\n+ self._iDb.close()\n+ self._table = None\n+ self._tMatchA = None\n+ \n+##################################################################################\n+################## Tests for methods in ITableMatchAdaptator #####################\n+################################################################################## \n+ def test_insert(self):\n+ match = Match() \n+\n+ tuple = ("QName1", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ \n+ match.setFromTuple(tuple)\n+ \n+ self._iDb.createTable( self._table, "match", "" ) \n+ self._tMatchA.insert( match, False )\n+ \n+ expTMatchTuple = ((\'QName1\', 1L, 5L, 5L, 0.1, 0.2, \'SName1\', 5L, 25L, 20L, 0.15, 1e-20, 15L, 87.2, 1L),)\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ obsTmatchTuple = self._iDb.cursor.fetchall()\n+ \n+ self.assertEquals( expTMatchTuple, obsTmatchTuple )\n+ \n+\n+ def test_insert_empty_match(self):\n+ match = Match() \n+\n+ tuple = ("", -1, -1, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ \n+ match.setFromTuple(tuple)\n+ \n+ self._iDb.createTable( self._table, "match", "" ) \n+ self._tMatchA.insert( match, False )\n+ \n+ expTMatchTuple = ()\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ obsTmatchTuple = self._iDb.cursor.fetchall()\n+ \n+ self.assertEquals( expTMatchTuple, obsTmatchTuple'..b' = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ expListMatch = [ match1 ]\n+ self._tMatchA.insertList(lMatch)\n+ \n+ obsListMatch = self._tMatchA.getMatchListFromId(1)\n+ \n+ self.assertEquals(expListMatch, obsListMatch)\n+ \n+ \n+ def test_getMatchListFromIdList_empty_id_list( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ tuple2 = ("QName", 1, 6, 6, 0.2, 0.1, "SName", 6, 26, 10, 0.18, 1e-30, 18, 85.2, 2)\n+ tuple3 = ("QName", 1, 7, 8, 0.1, 0.2, "SName", 5, 20, 15, 0.20, 1e-25, 20, 89.0, 3)\n+ tuple4 = ("QName", 1, 8, 8, 0.1, 0.1, "SName", 5, 15, 10, 0.17, 1e-23, 14, 89.5, 4)\n+ match1 = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ self._tMatchA.insertList(lMatch)\n+ \n+ expList = []\n+ obsList = self._tMatchA.getMatchListFromIdList([])\n+ self.assertEquals(expList, obsList)\n+ \n+ \n+ def test_getMatchListFromIdList( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ tuple2 = ("QName", 1, 6, 6, 0.2, 0.1, "SName", 6, 26, 10, 0.18, 1e-30, 18, 85.2, 2)\n+ tuple3 = ("QName", 1, 7, 8, 0.1, 0.2, "SName", 5, 20, 15, 0.20, 1e-25, 20, 89.0, 3)\n+ tuple4 = ("QName", 1, 8, 8, 0.1, 0.1, "SName", 5, 15, 10, 0.17, 1e-23, 14, 89.5, 4)\n+ match1 = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ self._tMatchA.insertList(lMatch)\n+ \n+ lObs = self._tMatchA.getMatchListFromIdList((1, 2, 3))\n+ \n+ lExp = [match1, match2, match3]\n+ self.assertEquals(lExp, lObs)\n+ \n+ def test_getListOfAllMatches( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ tuple2 = ("QName", 1, 6, 6, 0.2, 0.1, "SName", 6, 26, 10, 0.18, 1e-30, 18, 85.2, 2)\n+ tuple3 = ("QName", 1, 7, 8, 0.1, 0.2, "SName", 5, 20, 15, 0.20, 1e-25, 20, 89.0, 3)\n+ tuple4 = ("QName", 1, 8, 8, 0.1, 0.1, "SName", 5, 15, 10, 0.17, 1e-23, 14, 89.5, 4)\n+ match1 = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ expList = [ match1, match2, match3, match4 ]\n+ self._tMatchA.insertList(lMatch)\n+\n+ obsList = self._tMatchA.getListOfAllMatches()\n+ self.assertEqual( expList, obsList )\n+ \n+ \n+ def test_getListOfAllMatches_empty_table( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ expList = []\n+ obsList = self._tMatchA.getListOfAllMatches()\n+ self.assertEqual( expList, obsList )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableMatchAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TablePathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TablePathAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1376 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Set import Set\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Range import Range\n+from commons.core.coord.PathUtils import PathUtils\n+from copy import deepcopy\n+\n+class Test_TablePathAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._db = DbMySql( cfgFileName = self._configFileName )\n+ self._table = "dummyPathTable_%s" % ( self._uniqId )\n+ self._tpA = TablePathAdaptator( self._db, self._table )\n+ \n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._db.dropTable( self._table )\n+ self._db.close()\n+ self._table = None\n+ self._tMatchA = None\n+ os.remove( self._configFileName )\n+ self._configFileName = "" \n+ \n+ \n+##################################################################################\n+################## Tests for methods in ITableMapAdaptator #######################\n+################################################################################## \n+ \n+ def test_getPathListFromId( self ):\n+ pathFileName = "dummyPathFile_%s" % ( self._uniqId )\n+ pathF = open( pathFileName, "w" )\n+ pathF.write( "1\\tchr1\\t1\\t6\\tTE2\\t11\\t16\\t1e-20\\t30\\t90.2\\n" )\n+ pathF.write( "2\\tchr1\\t1001\\t1006\\tTE2\\t11\\t16\\t1e-20\\t30\\t90.2\\n" )\n+ pathF.write( "2\\tchr1\\t1201\\t1226\\tTE2\\t10\\t26\\t1e-40\\t70\\t87.2\\n" )\n+ pathF.close()\n+ p1 = Path()\n+ p1.setFromString( "2\\tchr1\\t1001\\t1006\\tTE2\\t11\\t16\\t1e-20\\t30\\t90.2\\n" )\n+ p2 = Pat'..b'()\n+ self.assertEqual( expList, obsList )\n+ self._db.dropTable( obsTable )\n+ \n+ \n+ def test_path2PathRangeFromQuery_QryDirSbjRev( self ):\n+ self._db.createTable( self._table, "path" )\n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p2a = Path()\n+ p2a.setFromTuple( ( "2", "chr2", "1", "100", "TE2", "109", "10", "1e-20", "163", "92.1" ) )\n+ p2b = Path()\n+ p2b.setFromTuple( ( "2", "chr2", "201", "250", "TE2", "200", "151", "1e-10", "75", "88.7" ) )\n+ for p in [ p1, p2a, p2b ]: self._tpA.insert( p )\n+ p2 = Path()\n+ p2.setFromTuple( ( "2", "chr2", "1", "250", "TE2", "200", "10", "1e-20", "238", "90.96" ) ) # \'merge\' p2a and p2b\n+ expList = [ p2 ]\n+ obsTable = self._tpA._path2PathRangeFromQuery( "chr2" )\n+ self._tpA._table = obsTable\n+ obsList = self._tpA.getListOfAllPaths()\n+ self.assertEqual( obsList, expList )\n+ self._db.dropTable( obsTable )\n+ \n+ \n+ def test_getNbOccurrences( self ):\n+ self._db.createTable( self._table, "path" )\n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ \n+ exp = 0\n+ obs = self._tpA.getNbOccurrences( p1 )\n+ self.assertEquals( exp, obs )\n+ \n+ self._tpA.insert( p1 )\n+ exp = 1\n+ obs = self._tpA.getNbOccurrences( p1 )\n+ self.assertEquals( exp, obs )\n+ \n+ self._tpA.insert( p1 )\n+ exp = 2\n+ obs = self._tpA.getNbOccurrences( p1 )\n+ self.assertEquals( exp, obs )\n+ \n+ def test_getListOfUniqueOccPath(self):\n+ \n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p2 = Path()\n+ p2.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p3 = Path()\n+ p3.setFromTuple( ( "1", "chr1", "2", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p4 = Path()\n+ p4.setFromTuple( ( "2", "chr2", "2", "11", "TE4", "10", "18", "1e-30", "40", "95.0" ) )\n+ lPath = [p1,p2,p3,p4]\n+ \n+ expListPath = deepcopy([p1,p3,p4]) \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath(lPath)\n+ self.assertEquals( expListPath, obsListUniquePath )\n+\n+ def test_getListOfUniqueOccPath_empty_list(self):\n+ expListPath = [] \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath([])\n+ self.assertEquals( expListPath, obsListUniquePath )\n+ \n+ def test_getListOfUniqueOccPath_one_item(self):\n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ expListPath = deepcopy([p1]) \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath([p1])\n+ self.assertEquals( expListPath, obsListUniquePath )\n+\n+ def test_getListOfUniqueOccPath_unsorted_list(self):\n+ \n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p3 = Path()\n+ p3.setFromTuple( ( "1", "chr1", "3", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p4 = Path()\n+ p4.setFromTuple( ( "2", "chr2", "2", "11", "TE4", "10", "18", "1e-30", "40", "95.0" ) )\n+ p2 = Path()\n+ p2.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ \n+ lPath = [p1,p3,p4,p2]\n+ \n+ expListPath = deepcopy([p1,p3,p4]) \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath(lPath)\n+ self.assertEquals( expListPath, obsListUniquePath )\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TablePathAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableSeqAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableSeqAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,321 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.coord.Set import Set\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_TableSeqAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self.fileUtils = FileUtils()\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._db = DbMySql( cfgFileName=self._configFileName )\n+ self._table = "dummySeqTable_%s" % ( self._uniqId )\n+ self._tsA = TableSeqAdaptator( self._db, self._table )\n+ \n+ \n+ def tearDown( self ):\n+ self._db.dropTable( self._table )\n+ self._db.close()\n+ os.remove( self._configFileName )\n+ self._configFileName = ""\n+ \n+ \n+##################################################################################\n+################## Tests for methods in ITableSeqAdaptator #######################\n+##################################################################################\n+ \n+ def test_insert( self ):\n+ bs = Bioseq( "seq1", "AGCGATGACGATGCGAGT" )\n+ self._db.createTable( self._table, "fasta" )\n+ self._tsA.insert( bs )\n+ \n+ expBioseqTuple = (("seq1", "AGCGATGACGATGCGAGT", "seq1", 18L), )\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._db.execute( sqlCmd )\n+ obsBioseqTuple = self._db.cursor.fetchall()\n+ \n+ self.assertEqual( expBioseqTuple, obsBioseqTuple )\n+ \n+ \n+ def test_insertList( self ):\n+ bs1 = Bioseq( "seq1 desc", "AGCGATGACGATGCGAGT" )\n+ bs2 = Bioseq( "seq2", "AGCGATGACGATGCGAGT")\n+ '..b'")\n+ inF.write(">seq2\\n")\n+ inF.write("GCGATGCAGATGACGGCGGATGC\\n")\n+ inF.close()\n+ self._db.createTable( self._table, "fasta", inFileName )\n+ lSeq1 = ("seq1", 18)\n+ lSeq2 = ("seq2", 23)\n+ lExp = [lSeq1,lSeq2]\n+ lObs = self._tsA.getAccessionAndLengthList()\n+ self.assertEqual( lObs, lExp )\n+ os.remove( inFileName )\n+ \n+ \n+ def test_getSeqLengthFromAccessionWithSingleQuote( self ):\n+ inFileName = "dummyFaFile_%s" % ( self._uniqId )\n+ inF = open( inFileName, "w" )\n+ inF.write(">seq1\'\\n")\n+ inF.write("AGCGATGACGATGCGAGT\\n")\n+ inF.write(">seq2\\n")\n+ inF.write("GCGATGCAGATGACGGCGGATGC\\n")\n+ inF.close()\n+ self._db.createTable( self._table, "fasta", inFileName )\n+ exp = 18\n+ obs = self._tsA.getSeqLengthFromAccession( "seq1\'" )\n+ self.assertEqual( obs, exp )\n+ os.remove( inFileName )\n+ \n+ \n+ def test_getSubSequence_directStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ exp = "TTTGGG"\n+ obs = self._tsA.getSubSequence( "chr2", 13, 18 )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_getSubSequence_reverseStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ exp = "CCCAAA"\n+ obs = self._tsA.getSubSequence( "chr2", 18, 13 )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_getBioseqFromSetList_directStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ lSets = []\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 1, 10 ) )\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 16, 25 ) )\n+ exp = Bioseq( "Dm-B-G600-Map3_classI-LTR-incomp::3 chr2 1..10,16..25", "AAAAAAAAAAGGGGGGGGGG" )\n+ obs = self._tsA.getBioseqFromSetList( lSets )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_getBioseqFromSetList_reverseStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ lSets = []\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 10, 1 ) )\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 25, 16 ) )\n+ exp = Bioseq( "Dm-B-G600-Map3_classI-LTR-incomp::3 chr2 25..16,10..1", "CCCCCCCCCCTTTTTTTTTT" )\n+ obs = self._tsA.getBioseqFromSetList( lSets )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_isAccessionInTable_true( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ \n+ obs = self._tsA.isAccessionInTable( "chr2" )\n+ self.assertTrue( obs )\n+ \n+ \n+ def test_isAccessionInTable_false( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ \n+ obs = self._tsA.isAccessionInTable( "chr1" )\n+ self.assertFalse( obs )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableSeqAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Test_TableSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableSetAdaptator.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,330 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import time\n+import os\n+from commons.core.sql.TableSetAdaptator import TableSetAdaptator\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Set import Set\n+\n+\n+class Test_TableSetAdaptator( unittest.TestCase ):\n+\n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._iDb = DbMySql( cfgFileName=self._configFileName )\n+ self._table = "dummySetTable_%s" % ( self._uniqId )\n+ self._tSetA = TableSetAdaptator( self._iDb, self._table )\n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._iDb.dropTable( self._table )\n+ self._iDb.close()\n+ self._table = None\n+ self._tSetA = None\n+ os.remove( self._configFileName )\n+ self._configFileName = ""\n+\n+ def test_insert(self):\n+ set2Insert = Set()\n+ set2Insert.id = 1\n+ set2Insert.name = "name1"\n+ set2Insert.seqname = "name2"\n+ set2Insert.start = 1L\n+ set2Insert.end = 50L\n+ self._iDb.createTable( self._table, "set", "" )\n+ self._tSetA.insert( set2Insert, False )\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ expTsetTuple = ((1, "name1", "name2", 1L, 50L),)\n+ obsTsetTuples = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTsetTuple, obsTsetTuples )\n+ \n+ def test_insertList ( self ):\n+ self._iDb.createTable( self._table, "set", "" )\n+ set1 = Set()\n+ set1.setFromString( "1\\tname1\\tdesc1\\t1\\t120\\n" )\n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ lset = [ set1, set2 ]\n+ self._tSetA.insertList( lset )\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ '..b'3 ]: self._tSetA.insert( m )\n+ lId2del = []\n+ self._tSetA.deleteFromIdList(lId2del)\n+ expTSetTuples = ((1L, \'name1\', \'desc1\', 1L, 120L), (2L, \'name2\', \'desc2\', 1L, 20L), (3L, \'name2\', \'desc3\', 1L, 50L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ obsTsetTuples = self._iDb.cursor.fetchall()\n+ \n+ self.assertEqual( expTSetTuples, obsTsetTuples )\n+ \n+ def test_joinTwoSets(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ idSet1 = 5\n+ set1 = Set()\n+ set1.setFromString( "5\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ idSet2 = 2\n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ lset = [ set1, set2 ]\n+ self._tSetA.insertList( lset )\n+ self._tSetA.joinTwoSets(idSet1, idSet2)\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ \n+ expTSetTuples = ((2L, "name1", "desc1", 1L, 120L ), (2L, "name2", "desc2", 1L, 20L ))\n+ obsTSetTuples = self._iDb.cursor.fetchall()\n+ \n+ self.assertEqual( expTSetTuples, obsTSetTuples)\n+ self._iDb.dropTable(self._table)\n+ \n+ def test_joinTwoSetsWhereId1InfId2(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ idSet1 = 2\n+ set1 = Set()\n+ set1.setFromString( "5\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ \n+ idSet2 = 5\n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ \n+ lset = [ set1, set2 ]\n+ self._tSetA.insertList( lset )\n+\n+ self._tSetA.joinTwoSets(idSet1, idSet2)\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ \n+ expTSetTuples = ((2L, "name1", "desc1", 1L, 120L ), (2L, "name2", "desc2", 1L, 20L ))\n+ obsTSetTuples = self._iDb.cursor.fetchall()\n+ \n+ self.assertEqual( expTSetTuples, obsTSetTuples)\n+ self._iDb.dropTable(self._table)\n+ \n+ def test_getNewId(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ set1 = Set()\n+ set1.setFromString( "1\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ set3 = Set()\n+ set3.setFromString( "5\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ set4 = Set()\n+ set4.setFromString( "8\\tname2\\tdesc2\\t1\\t20\\n" )\n+ lset = [ set1, set2, set3, set4 ]\n+ self._tSetA.insertList( lset )\n+ expId = 9\n+ obsId = self._tSetA.getNewId()\n+ self.assertEqual( expId, obsId)\n+ self._iDb.dropTable(self._table)\n+ \n+ def test_getNewId_set_null(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ set1 = Set()\n+ lset = [ set1 ]\n+ self._tSetA.insertList( lset )\n+ expId = 1\n+ obsId = self._tSetA.getNewId()\n+ self.assertEqual( expId, obsId)\n+ self._iDb.dropTable(self._table) \n+ \n+ def test_getListOfAllSets( self ):\n+ self._iDb.createTable( self._table, "set" )\n+ s1 = Set()\n+ s1.setFromString( "1\\tchr1\\tTE3\\t1\\t10\\n" )\n+ s2a = Set()\n+ s2a.setFromString( "2\\tchr1\\tTE2\\t2\\t9\\n" )\n+ s2b = Set()\n+ s2b.setFromString( "2\\tchr1\\tTE2\\t12\\t19\\n" )\n+ lSets = [ s1, s2a, s2b ]\n+ self._tSetA.insertList( lSets )\n+ expLSets = [ s1, s2a, s2b ]\n+ obsLSets = self._tSetA.getListOfAllSets()\n+ self.assertEqual( expLSets, obsLSets )\n+ \n+ def test_getListOfAllSets_empty_table( self ):\n+ self._iDb.createTable( self._table, "set" )\n+ expList = []\n+ obsList = self._tSetA.getListOfAllSets()\n+ self.assertEqual( expList, obsList ) \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableSetAdaptator ) ) \n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Tst_F_RepetJob.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Tst_F_RepetJob.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,236 @@\n+import os\n+import time\n+import sys\n+import stat\n+import unittest\n+import glob\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.RepetJob import RepetJob\n+from commons.core.sql.Job import Job\n+\n+class Test_F_RepetJob(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._jobTableName = "dummyJobTable"\n+ self._db = DbMySql()\n+ self._iRepetJob = RepetJob()\n+ self._configFileName = "dummyConfigFile"\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+\n+ def tearDown(self):\n+ self._iRepetJob = None\n+ self._db.dropTable( self._jobTableName )\n+ self._db.close()\n+ os.remove(self._configFileName)\n+ \n+ def test_submitJob_with_multiple_jobs(self):\n+ job1 = self._createJobInstance("job1")\n+ self._createLauncherFile(job1)\n+\n+ job2 = self._createJobInstance("job2")\n+ self._createLauncherFile(job2)\n+\n+ job3 = self._createJobInstance("job3")\n+ self._createLauncherFile(job3)\n+ \n+ self._iRepetJob.submitJob( job1, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )\n+ self._iRepetJob.submitJob( job2, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )\n+ self._iRepetJob.submitJob( job3, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )\n+\n+ time.sleep(70)\n+ \n+ expJobStatus = "finished"\n+ obsJobStatus1 = self._iRepetJob.getJobStatus(job1)\n+ obsJobStatus2 = self._iRepetJob.getJobStatus(job2)\n+ obsJobStatus3 = self._iRepetJob.getJobStatus(job3)\n+ \n+ self.assertEquals(expJobStatus, obsJobStatus1)\n+ self.assertEquals(expJobStatus, obsJobStatus2)\n+ self.assertEquals(expJobStatus, obsJobStatus3)\n+ \n+ jobName1 = job1.jobname\n+ jobName2 = job2.jobname\n+ jobName3 = job3.jobname\n+ \n+ expErrorFilePrefix1 = jobName1+ ".e" \n+ expOutputFilePrefix1 = jobName1 + ".o"\n+ expErrorFilePrefix2 = jobName2 + ".e" \n+ expOutputFilePrefix2 = jobName2 + ".o"\n+ expErrorFilePrefix3 = jobName3 + ".e" \n+ expOutputFilePrefix3 = jobName3 + ".o"\n+ \n+ lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")\n+ lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")\n+ lErrorFiles2 = glob.glob(expErrorFilePrefix2 + "*")\n+ lOutputFiles2 = glob.glob(expOutputFilePrefix2 + "*")\n+ lErrorFiles3 = glob.glob(expErrorFilePrefix3 + "*")\n+ lOutputFiles3 = glob.glob(expOutputFilePrefix3 + "*")\n+ \n+ isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0) \n+ isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)\n+ isLErrorFileNotEmpty2 = (len(lErrorFiles2) != 0) \n+ isLOutputFileNotEmpty2 = (len(lOutputFiles2) != 0)\n+ isLErrorFileNotEmpty3 = (len(lErrorFiles3) != 0) \n+ isLOutputFileNotEmpty3 = (len(lOutputFiles3) != 0)\n+ \n+ os.system("rm launcherFileTest*.py *.e* *.o*")\n+ self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)\n+ self.assertTrue(isLErrorFileNotEmpty2 and isLOutputFileNotEmpty2)\n+ self.assertTrue(isLErrorFileNotEmpty3 and isLOutputFileNotEmpty3)\n+\n+ def test_submitJob_job_already_submitted(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance("job")\n+ self._iRepetJob.recordJob(iJob)\n+ \n+ isSysExitRaised = False\n+ try:\n+ self._iRepetJob.submitJob(iJob)\n+ except SystemExit:\n+ isSysExitRaised = True\n+ self.'..b'ordJob(iJob)\n+ self._iRepetJob.changeJobStatus(iJob, "error", "method")\n+ \n+ self._iRepetJob.waitJobGroup(self._jobTableName ,iJob.groupid, 0, 2)\n+ \n+ time.sleep(10)\n+ \n+ expJobStatus = "finished"\n+ obsJobStatus1 = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expJobStatus, obsJobStatus1)\n+ \n+ jobName = iJob.jobname\n+ \n+ expErrorFilePrefix1 = jobName + ".e" \n+ expOutputFilePrefix1 = jobName + ".o"\n+ \n+ lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")\n+ lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")\n+ \n+ isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0) \n+ isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)\n+ \n+ self._iRepetJob.removeJob(iJob) \n+ os.system("rm launcherFileTest*.py *.e* *.o*")\n+ self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)\n+ \n+\n+ def test_isJobStillHandledBySge_True(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance("job")\n+ self._createLauncherFile(iJob)\n+ self._iRepetJob.submitJob(iJob)\n+ \n+ isJobHandledBySge = self._iRepetJob.isJobStillHandledBySge(iJob.jobid, iJob.jobname)\n+ os.system("rm launcherFileTest*.py")\n+ \n+ self.assertTrue(isJobHandledBySge)\n+\n+ def test_isJobStillHandledBySge_False(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance("job")\n+ self._createLauncherFile(iJob)\n+ self._iRepetJob.recordJob(iJob)\n+ \n+ isJobHandledBySge = self._iRepetJob.isJobStillHandledBySge(iJob.jobid, iJob.jobname)\n+ os.system("rm launcherFileTest*.py")\n+ \n+ self.assertFalse(isJobHandledBySge)\n+ \n+ def _createJobInstance(self, name):\n+ return Job(self._jobTableName, 0, name, "test", "", "date;sleep 5;date", "./launcherFileTest_"+ name +".py")\n+ \n+ def _createLauncherFile(self, iJob):\n+ jobFileHandler = open( iJob.launcher , "w" )\n+\n+ launcher = "#!/usr/bin/python\\n"\n+ launcher += "import os\\n"\n+ launcher += "import sys\\n"\n+ \n+ launcher += "print \\"system:\\", os.uname()\\n"\n+ launcher += "sys.stdout.flush()\\n"\n+ newStatus = "running"\n+ prg = "%s/bin/srptChangeJobStatus.py" % (os.environ["REPET_PATH"])\n+ cmd = prg\n+ cmd += " -t %s" % ( iJob.tablename )\n+ cmd += " -n %s" % ( iJob.jobname )\n+ cmd += " -g %s" % ( iJob.groupid )\n+ if iJob.queue != "":\n+ cmd += " -q %s" % ( iJob.queue )\n+ cmd += " -s %s" % ( newStatus )\n+ cmd += " -c %s" %( self._configFileName )\n+ cmd += " -v 1"\n+ launcher +="os.system( \\"" + cmd + "\\" )\\n"\n+ \n+ launcher += "print \\"LAUNCH: "+ iJob.command + "\\"\\n"\n+ launcher += "sys.stdout.flush()\\n"\n+ launcher += "exitStatus = os.system (\\"" + iJob.command + "\\")\\n"\n+ launcher += "if exitStatus != 0:\\n"\n+ launcher += "\\tprint \\"ERROR: "+ iJob.command + " returned exit status \'%i\'\\" % ( exitStatus )\\n"\n+ \n+ newStatus = "finished"\n+ prg = os.environ["REPET_PATH"] + "/bin/srptChangeJobStatus.py"\n+ cmd = prg\n+ cmd += " -t %s" % ( iJob.tablename )\n+ cmd += " -n %s" % ( iJob.jobname )\n+ cmd += " -g %s" % ( iJob.groupid )\n+ if iJob.queue != "":\n+ cmd += " -q %s" % ( iJob.queue )\n+ cmd += " -s %s" % ( newStatus )\n+ cmd += " -c %s" %( self._configFileName )\n+ cmd += " -v 1"\n+ launcher +="os.system( \\"" + cmd + "\\" )\\n"\n+ launcher += "sys.exit(0)\\n"\n+ jobFileHandler.write(launcher)\n+ jobFileHandler.close()\n+ os.chmod( iJob.launcher, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC )\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/sql/test/Tst_RepetJob.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Tst_RepetJob.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,395 @@\n+import unittest\n+import sys\n+import os\n+import time\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.Job import Job\n+from commons.core.sql.RepetJob import RepetJob\n+from commons.core.utils.FileUtils import FileUtils\n+\n+#TODO: to remove... => replace all RepetJob() by TableJobAdaptator()...\n+class Test_RepetJob( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._jobTableName = "dummyJobTable"\n+ self._db = DbMySql()\n+ self._iRepetJob = RepetJob()\n+ \n+ def tearDown(self):\n+ self._iRepetJob = None\n+ self._db.close()\n+ \n+ def _createJobInstance(self):\n+ return Job( self._jobTableName, 0, "job1", "groupid", "queue", "command", "launcherFile", "node", "lResources" )\n+ \n+ def test_createJobTable_is_table_created(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ \n+ isTableCreated = self._db.doesTableExist(self._jobTableName)\n+ self.assertTrue(isTableCreated)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_createJobTable_field_list(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+\n+ obsLFiled = self._db.getFieldList(self._jobTableName)\n+ expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]\n+ \n+ self.assertEquals(expLField, obsLFiled)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_recordJob(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+ \n+ qryParams = "SELECT jobid, groupid, command, launcher, queue, status, node FROM " + self._jobTableName + " WHERE jobid = %s" \n+ params = (iJob.jobid)\n+ \n+ self._db.execute(qryParams, params)\n+ \n+ tObs = self._db.fetchall()[0]\n+ tExp =(iJob.jobid, iJob.groupid, iJob.command, iJob.launcher, iJob.queue, "waiting", "?")\n+ \n+ self.assertEquals(tExp,tObs)\n+\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_removeJob(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+\n+ self._iRepetJob.removeJob(iJob)\n+ \n+ isTableEmpty = self._db.isEmpty(self._jobTableName)\n+ \n+ self.assertTrue(isTableEmpty)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+\n+ expStatus = "waiting"\n+ obsStatus = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expStatus, obsStatus)\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus_unknown(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance() \n+\n+ expStatus = "unknown"\n+ obsStatus = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expStatus, obsStatus)\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus_no_name(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = Job( self._jobTableName, 20, "", "groupid", "queue", "command", "launcherFile", "node", "lResources" ) \n+ \n+ expStatus = "unknown"\n+ obsStatus = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expStatus, obsStatus)\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus_non_unique_job(self):\n+ # Warning : this case will not append, because recordJob() begin by removeJob()\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ sqlCmd = "INSERT I'..b'RepetJob.removeJob(iJob)\n+ \n+ def test_setJobIdFromSge(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+ self._iRepetJob.setJobIdFromSge(iJob, 1000)\n+ \n+ qryParams = "SELECT jobid FROM " + self._jobTableName + " WHERE jobname = %s AND queue = %s AND groupid = %s" \n+ params = (iJob.jobname, iJob.queue, iJob.groupid)\n+ \n+ self._db.execute(qryParams, params)\n+ \n+ tObs = self._db.fetchall()[0]\n+ tExp =(1000,)\n+ \n+ self.assertEquals(tExp,tObs)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_submitJob_8_fields_for_job_table(self):\n+ iJob = self._createJobInstance()\n+ self._db.dropTable(self._jobTableName)\n+ sqlCmd = "CREATE TABLE " + self._jobTableName \n+ sqlCmd += " ( jobid INT UNSIGNED"\n+ sqlCmd += ", groupid VARCHAR(255)"\n+ sqlCmd += ", command TEXT"\n+ sqlCmd += ", launcher VARCHAR(1024)"\n+ sqlCmd += ", queue VARCHAR(255)"\n+ sqlCmd += ", status VARCHAR(255)"\n+ sqlCmd += ", time DATETIME"\n+ sqlCmd += ", node VARCHAR(255) )"\n+ self._db.execute(sqlCmd)\n+ \n+ self._iRepetJob.submitJob(iJob)\n+ \n+ expFieldsNb = 9\n+ obsFieldsNb = len(self._iRepetJob.getFieldList(self._jobTableName))\n+ \n+ self.assertEquals(expFieldsNb, obsFieldsNb)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getNodesListByGroupId(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob1 = Job( self._jobTableName, 0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources" )\n+ iJob2 = Job( self._jobTableName, 1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources" )\n+ iJob3 = Job( self._jobTableName, 2, "job3", "groupid2", "queue", "command", "launcherFile", "node3", "lResources" )\n+ \n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ \n+ expNodeList = ["node1", "node2"]\n+ obsNodeList = self._iRepetJob.getNodesListByGroupId(self._jobTableName, "groupid")\n+ self.assertEquals(expNodeList, obsNodeList)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getNodesListByGroupId_empty_list(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob1 = Job( self._jobTableName, 0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources" )\n+ iJob2 = Job( self._jobTableName, 1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources" )\n+ iJob3 = Job( self._jobTableName, 2, "job3", "groupid32", "queue", "command", "launcherFile", "node3", "lResources" )\n+ \n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ \n+ expNodeList = []\n+ obsNodeList = self._iRepetJob.getNodesListByGroupId(self._jobTableName, "groupid3")\n+ self.assertEquals(expNodeList, obsNodeList)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def _insertJob(self, iJob):\n+ self._iRepetJob.removeJob( iJob )\n+ sqlCmd = "INSERT INTO %s" % ( iJob.tablename )\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % ( iJob.jobid )\n+ sqlCmd += " \\"%s\\"," % ( iJob.jobname )\n+ sqlCmd += " \\"%s\\"," % ( iJob.groupid )\n+ sqlCmd += " \\"%s\\"," % ( iJob.command.replace("\\"","\\\'") )\n+ sqlCmd += " \\"%s\\"," % ( iJob.launcher )\n+ sqlCmd += " \\"%s\\"," % ( iJob.queue )\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % ( time.strftime( "%Y-%m-%d %H:%M:%S" ) )\n+ sqlCmd += " \\"%s\\" );" % ( iJob.node )\n+ self._iRepetJob.execute( sqlCmd )\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/stat/Stat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/stat/Stat.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,209 @@
+import math
+
+class Stat(object):
+
+    def __init__(self, lValues = []):
+        self.reset()
+        if lValues != []:
+            self.fill(lValues)
+
+    def __eq__(self, o):
+        self._lValues.sort()
+        o._lValues.sort()
+        return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \
+            and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \
+            and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6)
+
+    def getValuesList(self):
+        return self._lValues
+
+    def getSum(self):
+        return self._sum
+
+    def getSumOfSquares(self):
+        return self._sumOfSquares
+
+    def getValuesNumber(self):
+        return self._n
+
+    def getMin(self):
+        return self._min
+
+    def getMax(self):
+        return self._max
+
+    ## Reset all attributes
+    #
+    def reset(self):
+        self._lValues = []
+        self._sum = 0.0
+        self._sumOfSquares = 0.0
+        self._n = 0
+        self._max = 0.0
+        self._min = 0.0
+
+    ## Add a value to Stat instance list and update attributes
+    #
+    # @param v float value to add
+    #
+    def add(self, v):
+        self._lValues.append( float(v) )
+        self._sum += float(v)
+        self._sumOfSquares += float(v) * float(v)
+        self._n = self._n + 1
+        if v > self._max:
+            self._max = float(v)
+        if self._n == 1:
+            self._min = float(v)
+        elif v < self._min:
+            self._min = float(v)
+
+    ## Add a list of values to Stat instance list and update attributes
+    #
+    # @param lValues list of float list to add
+    #
+    def fill(self, lValues):
+        for v in lValues:
+            self.add(v)
+
+    ## Get the arithmetic mean of the Stat instance list
+    #
+    # @return float
+    #
+    def mean(self):
+        if self._n == 0:
+            return 0
+        else:
+            return self._sum / float(self._n)
+
+    ## Get the variance of the sample
+    # @note we consider a sample, not a population. So for calculation, we use n-1
+    #
+    # @return float
+    #
+    def var(self):
+        if self._n < 2 or self.mean() == 0.0:
+            return 0
+        else:
+            variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean()
+            if round(variance, 10) == 0:
+                variance = 0
+            return variance
+
+    ## Get the standard deviation of the sample
+    #
+    # @return float
+    #
+    def sd(self):
+        return math.sqrt( self.var() )
+
+    ## Get the coefficient of variation of the sample
+    #
+    # @return float
+    #
+    def cv(self):
+        if self._n < 2 or self.mean() == 0.0:
+            return 0
+        else:
+            return self.sd() / self.mean()
+
+    ## Get the median of the sample
+    #
+    # @return number or "NA" (Not available)
+    #
+    def median( self ):
+        if len(self._lValues) == 0:
+            return "NA"
+        if len(self._lValues) == 1:
+            return self._lValues[0]
+        self._lValues.sort()
+        m = int( math.ceil( len(self._lValues) / 2.0 ) )
+        if len(self._lValues) % 2:
+            return self._lValues[m-1]
+        else:
+            return ( self._lValues[m-1] + self._lValues[m] ) / 2.0
+
+    ## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)).
+    #  k = 0 -> completely flat
+    #  k = 3 -> same as normal distribution
+    #  k >> 3 -> peak
+    #
+    # @return float
+    #
+    def kurtosis(self):
+        numerator = 0
+        for i in self._lValues:
+            numerator += math.pow( i - self.mean(), 4 )
+        return numerator / float(self._n - 1) * self.sd()
+
+    ## Prepare a string with calculations on your values
+    #
+    # @return string
+    #
+    def string(self):
+        msg = ""
+        msg += "n=%d" % ( self._n )
+        msg += " mean=%5.3f" % ( self.mean() )
+        msg += " var=%5.3f" % ( self.var() )
+        msg += " sd=%5.3f" % ( self.sd() )
+        msg += " min=%5.3f" % ( self.getMin() )
+        median = self.median()
+        if median == "NA":
+            msg += " med=%s" % (median)
+        else:
+            msg += " med=%5.3f" % (median)
+        msg += " max=%5.3f" % ( self.getMax() )
+        return msg
+
+    ## Print descriptive statistics
+    #
+    def view(self):
+        print self.string()
+
+    ## Return sorted list of values, ascending (default) or descending
+    #
+    # @return list
+    #
+    def sort( self, isReverse = False ):
+        self._lValues.sort(reverse = isReverse)
+        return self._lValues
+
+    ## Give the quantile corresponding to the chosen percentage
+    #
+    # @return number
+    #
+    def quantile( self, percentage ):
+        if self._n == 0:
+            return 0
+        elif percentage == 1:
+            return self.getMax()
+        else:
+            return self.sort()[int(self._n * percentage)]
+
+    ## Prepare a string with quantile values
+    #
+    # @return string
+    #
+    def stringQuantiles( self ):
+        return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \
+               (self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1))
+
+    ## Print quantiles string
+    #
+    def viewQuantiles( self ):
+        print self.stringQuantiles()
+
+    ## Compute N50
+    # @return number
+    def N50(self ):
+        lSorted = self.sort(True)
+        midlValues = self.getSum() / 2
+        cumul = 0
+        index = 0
+        while cumul < midlValues:
+            cumul =  cumul + lSorted[index]
+            index += 1
+        if (index == 0):
+            return lSorted[index]
+        else :
+            return lSorted[index - 1]
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/stat/test/Test_F_Stat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/stat/test/Test_F_Stat.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,22 @@
+import unittest
+from commons.core.stat.Stat import Stat
+
+
+class Test_F_Stat(unittest.TestCase):
+
+
+    def test_output(self):
+        lValues = [0, -1, -5, 112, 10.2, 0.5, 4, -0.5]
+        iStat = Stat(lValues)
+        expString = "n=8 mean=15.025 var=1554.934 sd=39.433 min=-5.000 med=0.250 max=112.000"
+        self.assertEquals(expString, iStat.string())
+
+    def test_outputQuantile(self):
+        lValues = [0, -1, -5, 112, 10.2, 0.5, 4, -0.5]
+        iStat = Stat(lValues)
+        expString = "n=8 min=-5.000 Q1=-0.500 median=0.500 Q3=10.200 max=112.000"
+        self.assertEquals(expString, iStat.stringQuantiles())
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/stat/test/Test_Stat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/stat/test/Test_Stat.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,356 @@\n+from commons.core.stat.Stat import Stat\n+import unittest\n+\n+class Test_Stat(unittest.TestCase):\n+ \n+ def test__eq__true(self):\n+ iStat1 = Stat([1, 2, 3, 46])\n+ iStat2 = Stat([1, 2, 3, 46])\n+ self.assertTrue(iStat1 == iStat2)\n+\n+ def test__eq__false(self):\n+ iStat1 = Stat([1, 2, 3, 4])\n+ iStat2 = Stat([1, 2, 3, 46])\n+ self.assertFalse(iStat1 == iStat2)\n+\n+ def test__eq__disordered_list(self):\n+ iStat1 = Stat([3, 2, 1, 46])\n+ iStat2 = Stat([1, 2, 3, 46])\n+ self.assertTrue(iStat1 == iStat2)\n+\n+ def test_reset(self):\n+ lValues = [1, 2, 5, 9, 12, 46]\n+ iStat = Stat(lValues)\n+ iStat.reset()\n+ expValuesList = []\n+ expSum = 0\n+ expSum2 = 0\n+ expN = 0\n+ expMin = 0\n+ expMax = 0\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+\n+ def test_add_EmptyList(self):\n+ lValues = []\n+ iStat = Stat(lValues)\n+ iStat.add(5)\n+ expValuesList = [5]\n+ expSum = 5\n+ expSum2 = 25\n+ expN = 1\n+ expMin = 5\n+ expMax = 5\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+ \n+ def test_add_Max(self):\n+ lValues = [0,1,1]\n+ iStat = Stat(lValues)\n+ iStat.add(2)\n+ expValuesList = [0,1,1,2]\n+ expSum = 4\n+ expSum2 = 6\n+ expN = 4\n+ expMin = 0\n+ expMax = 2\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+ \n+ def test_add_Min(self):\n+ lValues = [2,1,1]\n+ iStat = Stat(lValues)\n+ iStat.add(0)\n+ expValuesList = [2,1,1,0]\n+ expSum = 4\n+ expSum2 = 6\n+ expN = 4\n+ expMin = 0\n+ expMax = 2\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+ \n+ def test_fill_emptyList(self):\n+ lValues = [2,1,1]\n+ iStat = Stat(lValues)\n+ iStat.fill([])\n+ expValuesList = [2,1,1]\n+ expSum = 4\n+ expSum2 = 6\n+ expN = 3\n+ expMin = 1\n+ expMax = 2\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()'..b'\n+ lValues = [1, 2, 3, 4, 1, 2, 54, 6, 7]\n+ iStat = Stat(lValues)\n+ expMedian = 3\n+ obsMedian = iStat.median()\n+ self.assertEquals(expMedian, obsMedian)\n+ \n+ def test_median_odd(self):\n+ lValues = [1, 2, 3, 4, 2, 54, 6, 7]\n+ iStat = Stat(lValues)\n+ expMedian = 3.5\n+ obsMedian = iStat.median()\n+ self.assertEquals(expMedian, obsMedian)\n+ \n+ def test_kurtosis_flat(self):\n+ lValues = [1, 1, 1]\n+ iStat = Stat(lValues)\n+ expKurtosis = 0\n+ obsKurtosis = iStat.kurtosis()\n+ self.assertEquals(expKurtosis, obsKurtosis)\n+ \n+ def test_kurtosis_peak(self):\n+ lValues = [1, 100, -5]\n+ iStat = Stat(lValues)\n+ expKurtosis = round(712872278.6609683, 2)\n+ obsKurtosis = round(iStat.kurtosis(), 2)\n+ self.assertEquals(expKurtosis, obsKurtosis)\n+ \n+ def test_kurtosis_normal(self):\n+ lValues = [-1, 0, 1.64, 1.64, 0, -1]\n+ iStat = Stat(lValues)\n+ expKurtosis = 3.0\n+ obsKurtosis = round(iStat.kurtosis(), 1)\n+ self.assertEquals(expKurtosis, obsKurtosis)\n+ \n+ def test_sort(self):\n+ lValues = [-1, 0, 1.64, 1.64, 0, -1]\n+ iStat = Stat(lValues)\n+ expSort = [-1, -1, 0, 0, 1.64, 1.64]\n+ obsSort = iStat.sort()\n+ self.assertEquals(expSort, obsSort)\n+ \n+ def test_sort_reverse(self):\n+ lValues = [-1, 0, 1.64, 1.64, 0, -1]\n+ iStat = Stat(lValues)\n+ expSort = [1.64, 1.64, 0, 0, -1, -1]\n+ obsSort = iStat.sort(True)\n+ self.assertEquals(expSort, obsSort)\n+ \n+ def test_sort_emptyList(self):\n+ lValues = []\n+ iStat = Stat(lValues)\n+ expSort = []\n+ obsSort = iStat.sort()\n+ self.assertEquals(expSort, obsSort)\n+ \n+ def test_quantile_emptyList(self):\n+ lValues = []\n+ iStat = Stat(lValues)\n+ expQuantile = 0\n+ obsQuantile = iStat.quantile(0.25)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_0perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = -1\n+ obsQuantile = iStat.quantile(0)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_25perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 0\n+ obsQuantile = iStat.quantile(0.25)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_41perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 1.64\n+ obsQuantile = iStat.quantile(0.41)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_75perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 2.64\n+ obsQuantile = iStat.quantile(0.75)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_81perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 5\n+ obsQuantile = iStat.quantile(0.81)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_100perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 5\n+ obsQuantile = iStat.quantile(1)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_N50(self):\n+ lValues = [10, 10, 2, 16, 3, 4, 5]\n+ iStat = Stat(lValues)\n+ expN50 = 10\n+ obsN50 = iStat.N50()\n+ self.assertEquals(expN50, obsN50)\n+\n+ def test_N50SpecialValues(self):\n+ lValues = [1, 100, 2, 3]\n+ iStat = Stat(lValues)\n+ expN50 = 100\n+ obsN50 = iStat.N50()\n+ self.assertEquals(expN50, obsN50)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/test/Test_LoggerFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/test/Test_LoggerFactory.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,93 @@
+import unittest
+import logging
+from commons.core.LoggerFactory import LoggerFactory
+
+class Test_LoggerFactory( unittest.TestCase ):
+
+    def test_logger_debug(self):
+        iLogger = LoggerFactory.createLogger("test")
+        isMethodExecuted = True
+        try:
+            iLogger.debug("message")
+        except:
+            isMethodExecuted = False
+        self.assertTrue(isMethodExecuted)
+
+    def test_logger_info(self):
+        iLogger = LoggerFactory.createLogger("test")
+        isMethodExecuted = True
+        try:
+            iLogger.info("message")
+        except:
+            isMethodExecuted = False
+        self.assertTrue(isMethodExecuted)
+
+    def test_logger_warning(self):
+        iLogger = LoggerFactory.createLogger("test")
+        isMethodExecuted = True
+        try:
+            iLogger.warning("message")
+        except:
+            isMethodExecuted = False
+        self.assertTrue(isMethodExecuted)
+
+    def test_logger_error(self):
+        iLogger = LoggerFactory.createLogger("test")
+        isMethodExecuted = True
+        try:
+            iLogger.error("message")
+        except:
+            isMethodExecuted = False
+        self.assertTrue(isMethodExecuted)
+
+    def test_logger_level_debug(self):
+        iLogger = LoggerFactory.createLogger("test")
+        LoggerFactory.setLevel(iLogger, 4)
+        expLevel = logging.DEBUG
+        obsLevel = iLogger.getEffectiveLevel()
+        self.assertEquals(expLevel, obsLevel)
+
+    def test_logger_level_info(self):
+        iLogger = LoggerFactory.createLogger("test")
+        LoggerFactory.setLevel(iLogger, 3)
+        expLevel = logging.INFO
+        obsLevel = iLogger.getEffectiveLevel()
+        self.assertEquals(expLevel, obsLevel)
+
+    def test_logger_level_warning(self):
+        iLogger = LoggerFactory.createLogger("test")
+        LoggerFactory.setLevel(iLogger, 2)
+        expLevel = logging.WARNING
+        obsLevel = iLogger.getEffectiveLevel()
+        self.assertEquals(expLevel, obsLevel)
+
+    def test_logger_level_error(self):
+        iLogger = LoggerFactory.createLogger("test")
+        LoggerFactory.setLevel(iLogger, 1)
+        expLevel = logging.ERROR
+        obsLevel = iLogger.getEffectiveLevel()
+        self.assertEquals(expLevel, obsLevel)
+
+    def test_logger_default_level(self):
+        iLogger = LoggerFactory.createLogger("test")
+        expLevel = logging.ERROR
+        obsLevel = iLogger.getEffectiveLevel()
+        self.assertEquals(expLevel, obsLevel)
+
+    def test_logger_quiet(self):
+        iLogger = LoggerFactory.createLogger("test")
+        LoggerFactory.setLevel(iLogger, 0)
+        self.assertTrue(iLogger.disabled)
+
+    def test_logger_noduplicate_handler(self):
+        iLogger = LoggerFactory.createLogger("test")
+        iLogger2 = LoggerFactory.createLogger("test")
+
+        expNbHandlers = 1
+        obsNbHandlers = len(iLogger2.handlers)
+        self.assertEquals(expNbHandlers, obsNbHandlers)
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_LoggerFactory ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/tree/Tree.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/tree/Tree.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,122 @@
+import os, re, sys
+
+class Tree:
+
+    def __init__( self, inFileName="" ):
+        self.tree = None
+        self.inFileName = inFileName
+        if self.inFileName != "":
+            self.loadTree()
+
+    def loadTree( self, verbose=0 ):
+        inF = open( self.inFileName, "r" )
+        lines = inF.readlines()
+        inF.close()
+        line = "".join(lines).replace("\n","")
+        self.tree = self.parseTree( line )
+        if verbose > 0:
+            print "nb of leaves: %i" % ( self.getNbOfLeaves( self.tree ) )
+
+    def parseTree( self, sTree ):
+        if "," not in sTree:
+            name, length = sTree.split(":")
+            return self.makeLeaf( name, float(length) )
+
+        distPattern = re.compile(r'(?P<tree>$.+$)\:(?P<length>[e\-\d\.]+)$')
+ m = distPattern.search( sTree )
+ length = 0
+ if m:
+            if m.group('length'): length = float( m.group('length') )
+            sTree = m.group('tree')
+ if length == "": length = 0
+
+        lhs, rhs = self.parseSubTree( sTree )
+
+        return { "name": "internal",
+                       "left": self.parseTree( lhs ),
+                       "right": self.parseTree( rhs ),
+                       "length": length }
+
+    def makeLeaf( self, name, length ):
+        return { "left":None, "right":None, "name":name, "length":length }
+
+    def parseSubTree( self, sTree ):
+        """
+        Parse a newick-formatted string of type 'a,b' into [a,b]
+        """
+        chars = list( sTree[1:-1] )
+        count = 0
+        isLhs = True
+        leftS = ""
+ rightS = ""
+ for c in chars:
+            if c == "(":
+                count += 1
+            elif c == ")":
+                count -= 1
+            elif (c == ",") and (count == 0) and (isLhs) :
+                isLhs = False
+                continue
+            if isLhs: leftS += c
+            else: rightS += c
+ return [ leftS, rightS ]
+
+    def toNewick( self, tree ):
+        newString = ""
+        if tree["name"] is not "internal":
+            newString += tree["name"]
+        else:
+            newString += "("
+            newString += self.toNewick( tree["left"] )
+            newString += ","
+            newString += self.toNewick( tree["right"] )
+            newString += ")"
+        if tree["length"]:
+            newString += ":"
+            newString += "%f" % ( tree["length"] )
+ return newString
+
+    def saveTree( self, outFileName ):
+        outF = open( outFileName, "w" )
+        outF.write( self.toNewick( self.tree ) )
+        outF.close()
+
+    def replaceHeaderViaPrefixSearch( self, tree, dNew2Init ):
+        if dNew2Init.has_key( tree["name"] ):
+            tree["name"] = dNew2Init[ tree["name"] ].replace(" ","_").replace("::","-").replace(",","-")
+        if tree["left"] != None:
+            self.replaceHeaderViaPrefixSearch( tree["left"], dNew2Init )
+        if tree["right"] != None:
+            self.replaceHeaderViaPrefixSearch( tree["right"], dNew2Init )
+
+    def retrieveInitialSequenceHeaders( self, dNew2Init, outFileName  ):
+        tree = self.tree
+        self.replaceHeaderViaPrefixSearch( tree, dNew2Init )
+        self.tree = tree
+        self.saveTree( outFileName )
+
+    def getNbOfChildNodes( self, tree, nbNodes ):
+        if tree["left"] is not None:
+            nbNodes += 1
+            nbNodes = self.getNbOfChildNodes( tree["left"], nbNodes )
+        if tree["right"] is not None:
+            nbNodes += 1
+            nbNodes = self.getNbOfChildNodes( tree["right"], nbNodes )
+        return nbNodes
+
+    def getNbOfNodes( self ):
+        nbNodes = 0
+        return self.getNbOfChildNodes( self.tree, nbNodes )
+
+    def getNbOfChildLeaves( self, tree, nbLeaves ):
+        if tree["name"] != "internal":
+            nbLeaves += 1
+        if tree["left"] is not None:
+            nbLeaves = self.getNbOfChildLeaves( tree["left"], nbLeaves )
+        if tree["right"] is not None:
+            nbLeaves = self.getNbOfChildLeaves( tree["right"], nbLeaves )
+        return nbLeaves
+
+    def getNbOfLeaves( self ):
+        nbLeaves = 0
+        return self.getNbOfChildLeaves( self.tree, nbLeaves )

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/tree/test/Test_Tree.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/tree/test/Test_Tree.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,90 @@
+import unittest
+import os
+import time
+from commons.core.tree.Tree import Tree
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_Tree( unittest.TestCase ):
+
+    def setUp( self ):
+        self._tree = Tree()
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+
+
+    def test_parseTree_oneLeaf( self ):
+        inString = "seq1:0.0023"
+        obs = self._tree.parseTree( inString )
+        exp = { "left":None, "right":None, "name":"seq1", "length":0.0023 }
+        self.assertEqual( obs, exp )
+
+
+    def test_parseTree_twoLeaves( self ):
+        inString = "(seq1:0.0023,seq2:0.0017)"
+        obs = self._tree.parseTree( inString )
+        exp = {'length':0, 'right':{'length':0.0016999999999999999, 'right':None, 'name':'seq2', 'left':None}, 'name':'internal', 'left':{'length':0.0023, 'right':None, 'name':'seq1', 'left':None}}
+        self.assertEqual( obs, exp )
+
+##     def test_parseTree_threeLeaves( self ):
+##         inString = "(seq1:0.0023,(seq2:0.0017,seq3:0.0009))"
+##         obs = self._tree.parseTree( inString )
+##         print obs
+##         exp = {'length':0, 'right':{'length':0.0016999999999999999, 'right':None, 'name':'seq2', 'left':None}, 'name':'internal', 'left':{'length':0.0023, 'right':None, 'name':'seq1', 'left':None}}
+##         self.assertEqual( obs, exp )
+
+
+    def test_parseSubTree( self ):
+        inString = "(seq1:0.0023,seq2:0.0017)"
+        lExp = [ "seq1:0.0023", "seq2:0.0017" ]
+        lObs = self._tree.parseSubTree( inString )
+        self.assertEqual( lObs, lExp )
+
+
+    def test_saveTree( self ):
+        inFileName = "dummyInFile_%s" % ( self._uniqId )
+        inF = open( inFileName, "w" )
+        inF.write( "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))" )
+        inF.close()
+        self._tree = Tree( inFileName )
+        obsFileName = "dummyObsFile_%s" % ( self._uniqId )
+        self._tree.saveTree( obsFileName )
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, inFileName ) )
+        for f in [ inFileName, obsFileName ]:
+            os.remove( f )
+
+
+    def test_retrieveInitialSequenceHeaders( self ):
+        inString = "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))"
+        self._tree.tree = self._tree.parseTree( inString )
+        dNew2Init = { "seq2":"consensus524::215 dmel_chr4 142..765", "seq3":"DmelChr4-B-G387-MAP16", "seq4":"1360|1cl-3gr" }
+        expFileName = "dummyExpFile_%s"  % ( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write( "(1360|1cl-3gr:0.012511,(DmelChr4-B-G387-MAP16:0.005340,consensus524-215_dmel_chr4_142..765:0.002201))" )
+        expF.close()
+        obsFileName = "dummyObsFile_%s"  % ( self._uniqId )
+        self._tree.retrieveInitialSequenceHeaders( dNew2Init, obsFileName )
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ expFileName, obsFileName ]:
+            os.remove( f )
+
+
+    def test_getNbOfLeaves( self ):
+        inString = "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))"
+        self._tree.tree = self._tree.parseTree( inString )
+        exp = 3
+        obs = self._tree.getNbOfLeaves()
+        self.assertEqual( obs, exp )
+
+
+    def test_getNbOfNodes( self ):
+        inString = "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))"
+        self._tree.tree = self._tree.parseTree( inString )
+        exp = 4
+        obs = self._tree.getNbOfNodes()
+        self.assertEqual( obs, exp )
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_Tree ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/tree/test/treeTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/tree/test/treeTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,16 @@
+import unittest
+import sys
+import Test_Tree
+
+
+
+def main():
+
+        commonsTestSuite = unittest.TestSuite()
+        commonsTestSuite.addTest(unittest.makeSuite(Test_Tree.Test_Tree,'test'))
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(commonsTestSuite)
+
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/utils/RepetOptionParser.pyc

Binary file commons/core/utils/RepetOptionParser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/utils/__init__.pyc

Binary file commons/core/utils/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/utils/test/TestSuite_utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/test/TestSuite_utils.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_FileUtils
+
+
+def main():
+
+    TestSuite_utils = unittest.TestSuite()
+
+    TestSuite_utils.addTest( unittest.makeSuite( Test_FileUtils.Test_FileUtils, "test" ) )
+
+    runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+    runner.run( TestSuite_utils )
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/utils/test/Test_FileUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/test/Test_FileUtils.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,883 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import unittest\n+import time\n+import shutil\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_FileUtils( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n+ \n+ def tearDown( self ):\n+ self._uniqId = ""\n+ \n+ def test_getNbLinesInSingleFile_non_empty( self ):\n+ file = "dummyFile_%s" % ( self._uniqId )\n+ f = open( file, "w" )\n+ f.write( "line1\\n" )\n+ f.write( "line2\\n" )\n+ f.write( "line3" )\n+ f.close()\n+ exp = 3\n+ obs = FileUtils.getNbLinesInSingleFile( file )\n+ self.assertEquals( exp, obs )\n+ os.remove( file )\n+ \n+ def test_getNbLinesInSingleFile_non_empty_endEmptyLine( self ):\n+ file = "dummyFile_%s" % ( self._uniqId )\n+ f = open( file, "w" )\n+ f.write( "line1\\n" )\n+ f.write( "line2\\n" )\n+ f.write( "line3\\n" )\n+ f.write( "\\n" )\n+ f.close()\n+ exp = 3\n+ obs = FileUtils.getNbLinesInSingleFile( file )\n+ self.assertEquals( exp, obs )\n+ os.remove( file )\n+ \n+ def test_getNbLinesInSingleFile_empty( self ):\n+ file = "dummyFile_%s" % ( self._uniqId )\n+ os.system( "touch %s" % ( file ) )\n+ exp = 0\n+ obs = FileUtils.getNbLinesInSingleFile( file )\n+ self.assertEquals( exp, obs )\n+ os.remove( file )\n+ \n+ def test_getNbLinesInFileList_non_empty( self ):\n+ f = open("dummy1.txt", "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3")\n+ f.close()\n+ f = open("dummy2.txt", "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3")\n+ f.close()\n+ f = open("dummy3.txt", "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3")\n+ f.close()\n+ lFiles = [ "dummy1.txt", "dummy2.txt", "dummy3.txt" ]\n+ exp = 9\n+ obs = FileUtils.getNbLinesInFileList( lFiles )\n+ self.assertEqual( exp, obs )\n+ for f in lFiles:\n+ os.remove( f )\n+ \n+ def test_catFilesByPattern( self ):\n+ f = open("dummy1.txt", "w")\n+ f.write("line11\\n")\n+ f.write("line12\\n")\n+ f.write("line13")\n+ f.close()\n+ '..b'ead()\n+ \n+ self.assertEqual(exp1, obs1)\n+ self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def test_splitFileAccordingToLineNumber_more_maxLines_than_lines(self):\n+ inputFile = "dummy.txt"\n+ obsFile1 = "dummy-1.txt"\n+ \n+ f = open(inputFile, "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3\\n")\n+ f.close()\n+\n+ exp1 = "line1\\nline2\\nline3\\n"\n+ \n+ FileUtils.splitFileAccordingToLineNumber(inputFile, 10)\n+ \n+ obs1 = open(obsFile1).read()\n+\n+ self.assertEqual(exp1, obs1)\n+ self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def test_splitFileAccordingToLineNumber_empty_file(self):\n+ inputFile = "dummy.txt"\n+ obsFile1 = "dummy-1.txt"\n+\n+ os.system( "touch %s" % ( inputFile ) )\n+\n+ exp1 = ""\n+ \n+ FileUtils.splitFileAccordingToLineNumber(inputFile, 10)\n+ \n+ obs1 = open(obsFile1).read()\n+ \n+ self.assertEqual(exp1, obs1)\n+ self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def test_splitFileAccordingToLineNumber_0_lines(self):\n+ inputFile = "dummy.txt"\n+ obsFile1 = "dummy-1.txt"\n+ \n+ f = open(inputFile, "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3\\n")\n+ f.close()\n+\n+ exp1 = "line1\\nline2\\nline3\\n"\n+ \n+ FileUtils.splitFileAccordingToLineNumber(inputFile, 0)\n+ \n+ obs1 = open(obsFile1).read()\n+ \n+ self.assertEqual(exp1, obs1)\n+ self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def _writeFile( self, fileName ):\n+ inFile = open(fileName, \'w\')\n+ inFile.write(">Sequence_de_reference\\n")\n+ inFile.write("ATTTTGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ inFile.write(">Lignee1_mismatch\\n")\n+ inFile.write("ATTTTGCAGACTTATTCGAG-----GCCATTGCT\\n")\n+ inFile.write(">Lignee2_insertion\\n")\n+ inFile.write("ATTTTGCAGTCTTATTCGAGATTACGCCATTGCT\\n")\n+ inFile.write(">Lignee3_deletion\\n")\n+ inFile.write("A---TGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ inFile.close() \n+ \n+ def _writeFileWithEmptyLine( self, fileName ):\n+ fileWithEmptyLine = open(fileName, \'w\')\n+ fileWithEmptyLine.write(">Sequence_de_reference\\n")\n+ fileWithEmptyLine.write("ATTTTGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ fileWithEmptyLine.write("\\n\\n")\n+ fileWithEmptyLine.write(">Lignee1_mismatch\\n")\n+ fileWithEmptyLine.write("ATTTTGCAGACTTATTCGAG-----GCCATTGCT\\n")\n+ fileWithEmptyLine.write("\\n\\n")\n+ fileWithEmptyLine.write(">Lignee2_insertion\\n")\n+ fileWithEmptyLine.write("ATTTTGCAGTCTTATTCGAGATTACGCCATTGCT\\n")\n+ fileWithEmptyLine.write("\\n")\n+ fileWithEmptyLine.write(">Lignee3_deletion\\n")\n+ fileWithEmptyLine.write("A---TGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ fileWithEmptyLine.close() \n+ \n+ def _writeFileWithRepeatedBlanks( self, fileName ):\n+ fileWithRepeatedBlanks = open(fileName, \'w\')\n+ fileWithRepeatedBlanks.write(">Sequ ence_de _reference\\n")\n+ fileWithRepeatedBlanks.write("ATTTT GCAGTCTT TTCGAG- ----GCCATT GCT\\n")\n+ fileWithRepeatedBlanks.close() \n+ \n+ def _writeFileWithoutRepeatedBlanks( self, fileName ):\n+ fileWithoutRepeatedBlanks = open(fileName, \'w\')\n+ fileWithoutRepeatedBlanks.write(">Sequ ence_de _reference\\n")\n+ fileWithoutRepeatedBlanks.write("ATTTT GCAGTCTT TTCGAG- ----GCCATT GCT\\n")\n+ fileWithoutRepeatedBlanks.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/BedWriter.pyc

Binary file commons/core/writer/BedWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/CsvWriter.pyc

Binary file commons/core/writer/CsvWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/EmblWriter.pyc

Binary file commons/core/writer/EmblWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/FastaWriter.pyc

Binary file commons/core/writer/FastaWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/FastqWriter.pyc

Binary file commons/core/writer/FastqWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/GbWriter.pyc

Binary file commons/core/writer/GbWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/Gff2Writer.pyc

Binary file commons/core/writer/Gff2Writer.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/Gff3Writer.pyc

Binary file commons/core/writer/Gff3Writer.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/GtfWriter.pyc

Binary file commons/core/writer/GtfWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/MapWriter.pyc

Binary file commons/core/writer/MapWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/MySqlTranscriptWriter.pyc

Binary file commons/core/writer/MySqlTranscriptWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/SamWriter.pyc

Binary file commons/core/writer/SamWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/SequenceListWriter.pyc

Binary file commons/core/writer/SequenceListWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/TranscriptListWriter.pyc

Binary file commons/core/writer/TranscriptListWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/TranscriptWriter.pyc

Binary file commons/core/writer/TranscriptWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/UcscWriter.pyc

Binary file commons/core/writer/UcscWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/WigWriter.pyc

Binary file commons/core/writer/WigWriter.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/WriterChooser.pyc

Binary file commons/core/writer/WriterChooser.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/__init__.pyc

Binary file commons/core/writer/__init__.pyc has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/test/Test_Gff3Writer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/test/Test_Gff3Writer.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,101 @@
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+import unittest
+import os
+from SMART.Java.Python.misc import Utils
+
+class Test_Gff3Writer(unittest.TestCase):
+
+    def test_writer(self):
+        obsFileName = "testGffWriter1.gff3"
+        writer = Gff3Writer(obsFileName)
+
+        transcript = Transcript()
+        transcript.setName("test1.1")
+        transcript.setChromosome("arm_X")
+        transcript.setStart(1000)
+        transcript.setEnd(4000)
+        transcript.setDirection("+")
+        transcript.setTagValue("ID", "test1.1-1")
+        transcript.setTagValue("occurrence", 1)
+        transcript.setTagValue("nbOccurrences", 2)
+
+        exon1 = Interval()
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+        exon1.setDirection("+")
+
+        exon2 = Interval()
+        exon2.setChromosome("arm_X")
+        exon2.setStart(3000)
+        exon2.setEnd(4000)
+        exon2.setDirection("+")
+
+        transcript.addExon(exon1)
+        transcript.addExon(exon2)
+
+        writer.addTranscript(transcript)
+        writer.write()
+        writer.close()
+
+        expFileName = "expFile.gff3"
+        f = open(expFileName, "w")
+        f.write("arm_X\tS-MART\ttranscript\t1000\t4000\t.\t+\t.\tnbOccurrences=2;ID=test1.1-1;occurrence=1;Name=test1.1\n")
+        f.write("arm_X\tS-MART\texon\t1000\t2000\t.\t+\t.\tID=test1.1-1-exon1;Name=test1.1-exon1;Parent=test1.1-1\n")
+        f.write("arm_X\tS-MART\texon\t3000\t4000\t.\t+\t.\tID=test1.1-1-exon2;Name=test1.1-exon2;Parent=test1.1-1\n")
+        f.close()
+
+        self.assertTrue(Utils.diff(expFileName, obsFileName))
+
+        os.remove(expFileName)
+        os.remove(obsFileName)
+
+    def test_writerAltNames(self):
+        obsFileName = "testGffWriter1.gff3"
+        writer = Gff3Writer(obsFileName,title="ALTSOURCE", feature="Match", featurePart="Match-Part")
+
+        transcript = Transcript()
+        transcript.setName("test1.1")
+        transcript.setChromosome("arm_X")
+        transcript.setStart(1000)
+        transcript.setEnd(4000)
+        transcript.setDirection("+")
+        transcript.setTagValue("ID", "test1.1-1")
+        transcript.setTagValue("occurrence", 1)
+        transcript.setTagValue("nbOccurrences", 2)
+
+        exon1 = Interval()
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+        exon1.setDirection("+")
+
+        exon2 = Interval()
+        exon2.setChromosome("arm_X")
+        exon2.setStart(3000)
+        exon2.setEnd(4000)
+        exon2.setDirection("+")
+
+        transcript.addExon(exon1)
+        transcript.addExon(exon2)
+
+        writer.addTranscript(transcript)
+        writer.write()
+        writer.close()
+
+        expFileName = "expFile.gff3"
+        f = open(expFileName, "w")
+        f.write("arm_X\tALTSOURCE\tMatch\t1000\t4000\t.\t+\t.\tnbOccurrences=2;ID=test1.1-1;occurrence=1;Name=test1.1\n")
+        f.write("arm_X\tALTSOURCE\tMatch-Part\t1000\t2000\t.\t+\t.\tID=test1.1-1-Match-Part1;Name=test1.1-Match-Part1;Parent=test1.1-1\n")
+        f.write("arm_X\tALTSOURCE\tMatch-Part\t3000\t4000\t.\t+\t.\tID=test1.1-1-Match-Part2;Name=test1.1-Match-Part2;Parent=test1.1-1\n")
+        f.close()
+
+        self.assertTrue(Utils.diff(expFileName, obsFileName))
+
+        os.remove(expFileName)
+        os.remove(obsFileName)
+
+if __name__ == '__main__':
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/core/writer/test/Test_MapWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/test/Test_MapWriter.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,61 @@
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+import unittest
+import os
+from SMART.Java.Python.misc import Utils
+from commons.core.writer.MapWriter import MapWriter
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_MapWriter(unittest.TestCase):
+
+    def setUp(self):
+        self.expFileName = "expMapWriter.map"
+        self.obsFileName = "testMapWriter1.map"
+
+    def tearDown(self):
+        os.remove(self.expFileName)
+        os.remove(self.obsFileName)
+
+    def test_writer(self):
+        self.write_ExpMapFileName()
+        writer = MapWriter(self.obsFileName)
+
+        transcript = Transcript()
+        transcript.setName("test1.1")
+        transcript.setChromosome("arm_X")
+        transcript.setStart(1000)
+        transcript.setEnd(4000)
+        transcript.setDirection("+")
+        transcript.setTagValue("ID", "test1.1-1")
+        transcript.setTagValue("occurrence", 1)
+        transcript.setTagValue("nbOccurrences", 2)
+
+        exon1 = Interval()
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+        exon1.setDirection("+")
+
+        exon2 = Interval()
+        exon2.setChromosome("arm_X")
+        exon2.setStart(3000)
+        exon2.setEnd(4000)
+        exon2.setDirection("+")
+
+        transcript.addExon(exon1)
+        transcript.addExon(exon2)
+
+        writer.addTranscript(transcript)
+        writer.write()
+        writer.close()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self.expFileName, self.obsFileName))
+
+
+    def write_ExpMapFileName(self):
+        f = open(self.expFileName, "w")
+        f.write("test1.1\tarm_X\t1000\t4001\n")
+        f.close()
+
+if __name__ == '__main__':
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/BlatClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/BlatClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+##@file
+# Launch BlatProgramLauncher on several files in parallel on a cluster.
+
+
+from pyRepet.launcher.AbstractClusterLauncher import *
+from commons.launcher.BlatProgramLauncher import BlatProgramLauncher
+from commons.core.coord.AlignUtils import AlignUtils
+from commons.tools import srptBlasterMatcher
+
+
+class BlatClusterLauncher( AbstractClusterLauncher ):
+    """
+    Launch Blat on several files in parallel on a cluster.
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractClusterLauncher.__init__( self )
+        AbstractClusterLauncher.setAcronym( self, "Blat" )
+
+        self._cmdLineSpecificOptions = "s:p:A"
+
+        self._exeWrapper = "BlatProgramLauncher.py"
+        self._prgLauncher = BlatProgramLauncher()
+        self._prgLauncher.setInputFile( GENERIC_IN_FILE )
+        self._prgLauncher.setClean()
+        self._prgLauncher.setVerbosityLevel( 1 )
+        self._prgLauncher.setListFilesToKeep()
+        self._prgLauncher.setListFilesToRemove()
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -s: name of the subject file (format='fasta')"
+        string += "\n     -p: parameters for '%s'" % ( self._prgLauncher.getProgramName() )
+        string += "\n     -Z: concatenate output files"
+        string += "\n     -A: same sequences (all-by-all)"
+        return string
+
+
+    def getSubjectFile( self ):
+        return self._prgLauncher.getSubjectFile()
+
+
+    def getProgramParameters( self ):
+        return self._prgLauncher.getProgramParameters()
+
+
+    def processOutputFile( self, tmpFile, outFile ):
+        sortFile = "%s.sort" % ( tmpFile )
+        AlignUtils.sortAlignFile( tmpFile, sortFile )
+        if self._prgLauncher.getAllByAll():
+            srptBlasterMatcher.filterRedundantMatches( sortFile,
+                                                                  outFile )
+            os.remove( sortFile )
+        else:
+            os.rename( sortFile, outFile )
+
+    def setASpecificAttributeFromCmdLine( self, o, a="" ):
+        if o =="-s":
+            self._prgLauncher.setSubjectFile( a )
+
+if __name__ == "__main__":
+    i = BlatClusterLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/BlatProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/BlatProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+
+##@file
+# Launch Blat (pairwise alignment).
+#
+# options:
+#      -h: this help
+#      -i: name of the input file (queries, format='fasta')
+#      -s: name of the subject file (format='fasta')
+#      -p: parameters for 'blat' (default='-d 2')
+#      -o: name of the output file (format='align', default=inFile+'.align')
+#      -c: clean
+#      -v: verbosity level (default=0/1)
+
+
+import os
+import sys
+
+from commons.pyRepetUnit.components.AbstractProgramLauncher import  AbstractProgramLauncher
+
+
+class BlatProgramLauncher( AbstractProgramLauncher ):
+    """
+    Launch Blat (pairwise alignment).
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractProgramLauncher.__init__( self )
+        self._prgName = "blat"
+        self._formatInFile = "fasta"
+        self._sbjFile = ""
+        self._prgParam = ""
+        self._allByAll = False
+
+
+    def getHelpAsString( self ):
+        string = AbstractProgramLauncher.getHelpAsString(self)
+        string += "\nspecific options:"
+        string += "\n     -s: name of the subject file (database, format='fasta')"
+        string += "\n     -p: parameters for '%s'" % ( self.getProgramName() )
+        string += "\n     -A: same sequences (all-by-all)"
+        string += "\n     -o: name of the output file (format='align', default=inFile+'.align')"
+        return string
+
+    def getCmdLineOptions(self):
+        return AbstractProgramLauncher.getCmdLineOptions(self) + "s:p:Ao:"
+
+    def setAttributesFromCmdLine( self, o, a = "" ):
+        AbstractProgramLauncher.setAttributesFromCmdLine(self, o, a)
+        if o == "-s":
+            self.setSubjectFile( a )
+        elif o == "-p":
+            self.setProgramParameters( a )
+        elif o == "-A":
+            self.setAllByAll()
+        elif o == "-o":
+            self.setOutputFile( a )
+
+
+    def setSubjectFile( self, arg ):
+        self._sbjFile = arg
+
+
+    def getSubjectFile( self ):
+        return self._sbjFile
+
+
+    def setAllByAll( self ):
+        self._allByAll = True
+
+
+    def getAllByAll( self ):
+        return self._allByAll
+
+
+    def check( self ):
+        """
+        Check the specific attributes before running the program.
+        """
+        AbstractProgramLauncher.check(self)
+        if self._sbjFile == "":
+            string = "ERROR: missing subject file (-s)"
+            print string
+            print self.getHelpAsString()
+            sys.exit(1)
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+
+
+    def setWrapperCommandLine( self ):
+        """
+        Set the command-line of the wrapper.
+        Required for BlatClusterLauncher.
+        """
+        self._wrpCmdLine = self.getWrapperName()
+        self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
+        self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() )
+        if self.getProgramParameters() != "":
+            self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() )
+        if self.getAllByAll():
+            self._wrpCmdLine += " -A"
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
+        if self.getClean():
+            self._wrpCmdLine += " -c"
+        self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
+
+
+    def setProgramCommandLine( self ):
+        """
+        Set the command-line of the program.
+        """
+        self._prgCmdLine = self.getProgramName()
+        self._prgCmdLine += " %s" % ( self.getSubjectFile() )
+        self._prgCmdLine += " %s" % ( self.getInputFile() )
+        if self.getProgramParameters() != "":
+            self._prgCmdLine += " %s" % ( self.getProgramParameters() )
+        self._prgCmdLine += " -out=blast8"
+        self._prgCmdLine += " %s.blast" % ( self.getInputFile() )
+
+
+    def setListFilesToKeep( self ):
+        """
+        Set the list of files to keep.
+        """
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self.appendFileToKeep( self.getOutputFile() )
+
+
+    def setListFilesToRemove( self ):
+        """
+        Set the list of files to remove.
+        """
+        self.appendFileToRemove( "%s.blast" % ( self.getInputFile() ) )
+
+
+    def convertBlastIntoAlign( self ):
+        """
+        Convert a 'blast' file into the 'align' format.
+        """
+        cmd = os.environ["REPET_PATH"] + "/bin/blast2align.py"
+        cmd += " -i %s.blast" % ( self.getInputFile() )
+        cmd += " -o %s" % ( self.getOutputFile() )
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR while converting 'blast' file into 'align' format"
+            print string
+            sys.exit(1)
+
+
+    def setSummary( self ):
+        self._summary = "input file: %s" % ( self.getInputFile() )
+        self._summary += "\nsubject file: %s" % ( self.getSubjectFile() )
+        self._summary += "\nparameters: %s" % ( self.getProgramParameters() )
+        if self.getAllByAll():
+            self._summary += "\nall-by-all"
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self._summary += "\noutput file: %s" % ( self.getOutputFile() )
+
+
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+
+        self.setProgramCommandLine()
+        cmd = self.getProgramCommandLine()
+        if self.getVerbosityLevel() > 0:
+            print "LAUNCH: %s" % ( cmd )
+            sys.stdout.flush()
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
+            print string
+            sys.exit(1)
+
+        self.convertBlastIntoAlign()
+
+        self.end()
+
+
+if __name__ == "__main__":
+    i = BlatProgramLauncher()
+    i.checkAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchBlastclust.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchBlastclust.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,372 @@\n+#!/usr/bin/env python\n+\n+"""\n+Launch Blastclust on nucleotide sequences and return a fasta file.\n+"""\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import os\n+import sys\n+import subprocess\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+\n+class LaunchBlastclust(object):\n+ """\n+ Launch Blastclust on nucleotide sequences and return a fasta file.\n+ """\n+ \n+ def __init__(self, input = "", outFilePrefix = "", clean = False, verbose = 0):\n+ """\n+ Constructor.\n+ """\n+ self._inFileName = input\n+ self._identityThreshold = 95\n+ self._coverageThreshold = 0.9\n+ self._bothSeq = "T"\n+ self._filterUnclusteredSeq = False\n+ self._outFilePrefix = outFilePrefix\n+ self._isBlastToMap = False\n+ self._isHeaderForTEdenovo = False\n+ self._nbCPUs = 1\n+ self._clean = clean\n+ self._verbose = verbose\n+ self._tmpFileName = ""\n+ \n+ def setAttributesFromCmdLine(self):\n+ """\n+ Set the attributes from the command-line.\n+ """\n+ \n+ description = "Launch Blastclust on nucleotide sequences and return a fasta file."\n+ usage = "LaunchBlastclust.py -i inputFileName [options]"\n+ \n+ examples = "\\nExample 1: launch Blastclust with default options, highest verbose and clean temporary files.\\n"\n+ examples += "\\t$ python ./LaunchBlastclust.py -i MyBank.fa -v 2 -c"\n+ examples += "\\n\\t"\n+ examples += "\\t\\nExample 2: launch Blastclust with an identity threshold of 90%, rename output files and generate a map file corresponding to the fasta output.\\n"\n+ examples += "\\t$ python ./LaunchBlastclust.py -i MyBank.fa -S 90 -o SpecialOutputName -m"\n+ examples += "\\n\\tWARNING: Please refer to -m option limitations in the description above.\\n"\n+ \n+ #TODO: check if the optionParser can handle \'\\\' into strings for a better code readability in -m option\n+ \n+ parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples)\n+ parser.add_option("-i", "--input", dest = "inFileName", type = "string", help = "name of the input fasta file (nucleoti'..b'ine == "":\n+ break\n+ data = line.split("\\t")\n+ dNew2Init[data[0]] = data[1]\n+ line = linkFile.readline()\n+ linkFile.close()\n+ return dNew2Init\n+ \n+ \n+ def retrieveInitHeaders(self, dNewH2InitH):\n+ tmpFaFile = "%s.shortH_Blastclust.fa" % (self._inFileName)\n+ tmpFaFileHandler = open(tmpFaFile, "r")\n+ outFaFile = "%s_Blastclust.fa" % (self._outFilePrefix)\n+ outFaFileHandler = open(outFaFile, "w")\n+ while True:\n+ line = tmpFaFileHandler.readline()\n+ if line == "":\n+ break\n+ if line[0] == ">":\n+ tokens = line[1:-1].split("_")\n+ initHeader = dNewH2InitH[tokens[1]]\n+ if self._isHeaderForTEdenovo:\n+ classif = initHeader.split("_")[0]\n+ consensusName = "_".join(initHeader.split("_")[1:])\n+ clusterId = tokens[0].split("Cluster")[1].split("Mb")[0]\n+ newHeader = "%s_Blc%s_%s" % (classif, clusterId, consensusName)\n+ else:\n+ newHeader = "%s_%s" % (tokens[0], initHeader)\n+ outFaFileHandler.write(">%s\\n" % (newHeader))\n+ else:\n+ outFaFileHandler.write(line)\n+ tmpFaFileHandler.close()\n+ outFaFileHandler.close()\n+ if self._clean:\n+ os.remove(tmpFaFile)\n+\n+\n+ def blastclustToMap(self, blastclustFastaOut):\n+ """\n+ Write a map file from blastclust fasta output.\n+ Warning: only works if blastclust\'s fasta input headers are formated like LTRharvest fasta output.\n+ """\n+ fileDb = open(blastclustFastaOut , "r")\n+ mapFilename = "%s.map" % (os.path.splitext(blastclustFastaOut)[0])\n+ fileMap = open(mapFilename, "w")\n+ seq = Bioseq()\n+ numseq = 0\n+ while 1:\n+ seq.read(fileDb)\n+ if seq.sequence == None:\n+ break\n+ numseq = numseq + 1\n+ ID = seq.header.split(\' \')[0].split(\'_\')[0]\n+ chunk = seq.header.split(\' \')[0].split(\'_\')[1]\n+ start = seq.header.split(\' \')[-1].split(\',\')[0][1:]\n+ end = seq.header.split(\' \')[-1].split(\',\')[1][:-1]\n+ line= \'%s\\t%s\\t%s\\t%s\' % (ID, chunk, start, end)\n+ fileMap.write(line + "\\n")\n+ \n+ fileDb.close()\n+ fileMap.close()\n+ print "saved in %s" % mapFilename \n+ \n+ \n+ def start(self):\n+ """\n+ Useful commands before running the program.\n+ """\n+ self.checkAttributes()\n+ if self._verbose > 0:\n+ print "START %s" % (type(self).__name__)\n+ \n+ \n+ def end(self):\n+ """\n+ Useful commands before ending the program.\n+ """\n+ if self._verbose > 0:\n+ print "END %s" % (type(self).__name__)\n+ \n+ \n+ def run(self):\n+ """\n+ Run the program.\n+ """\n+ self.start()\n+ \n+ iCSH = ChangeSequenceHeaders(inFile = self._inFileName, format = "fasta", step = 1, outFile = "%s.shortH" % self._inFileName, linkFile = "%s.shortHlink" % self._inFileName)\n+ iCSH.run()\n+ \n+ self.launchBlastclust("%s.shortH" % (self._inFileName))\n+ \n+ self.getClusteringResultsInFasta("%s.shortH" % (self._inFileName))\n+ \n+ dNewH2InitH = self.getLinkInitNewHeaders()\n+ self.retrieveInitHeaders(dNewH2InitH)\n+ \n+ if self._isBlastToMap:\n+ blastclustFileName = "%s_Blastclust.fa" % (self._outFilePrefix)\n+ self.blastclustToMap(blastclustFileName)\n+ \n+ if self._clean:\n+ os.remove("%s.shortH" % (self._inFileName))\n+ os.remove("%s.shortHlink" % (self._inFileName))\n+ \n+ self.end()\n+ \n+if __name__ == "__main__":\n+ i = LaunchBlastclust()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchLastZ.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchLastZ.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,133 @@
+#! /usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import sys
+import os
+import optparse
+from commons.core.checker.CheckerUtils import CheckerUtils
+from commons.core.utils.FileUtils import FileUtils
+from subprocess import Popen
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+import subprocess
+import tempfile
+
+class LaunchLastZ(object):
+
+    def __init__(self,queryFileName="", refFileName ="", outputFileName=None,outputFileFormat="axt",noTransition=True, ambiguous=None, step=1,gfextend=False, chain=False, verbosity=1 ):
+        self.queryFileName = queryFileName
+        self.refFileName = refFileName
+        self.outputFileName = outputFileName
+        self.outputFileFormat = outputFileFormat
+        self.noTransition = noTransition
+        self.step = step
+        self.ambiguous = ambiguous
+        self.gfextend = gfextend
+        self.chain = chain
+        self.verbosity = verbosity
+
+    def setAttributesFromCmdLine(self):
+        description = "LaunchLastZ runs the LastZ program ."
+        parser = RepetOptionParser(description = description)
+        parser.add_option("-q", "--query", dest="queryFileName", default = None,  action="store", type="string", help="input query file [compulsory] [format: fasta]")
+        parser.add_option("-r", "--ref", dest="refFileName", default = None,  action="store", type="string", help="input ref file [compulsory] [format: fasta]")
+        parser.add_option("-o", "--output", dest="outputFileName", default = None,  action="store", type="string", help="output file [compulsory] ")
+        parser.add_option("-f", "--format", dest="outputFileFormat", default = "axt",  action="store", type="string", help="output file format[optional] ")
+        parser.add_option("-n", "--notransition", dest="noTransition", action="store_false", default=True, help="noTransition (default True) [optional] ")
+        parser.add_option("-a", "--ambiguous", dest="ambiguous", action="store",  type="string", help="ambiguous [optional] ")
+        parser.add_option("-s", "--step", dest="step", default = 1,  action="store", type="int", help="stepsize (default 1) [optional] ")
+        parser.add_option("-g", "--gfextend", dest="gfextend",  action="store_true",  help="extend gf (default false)[optional] ")
+        parser.add_option("-c", "--chain", dest="chain",  action="store_true", help="chain (default false)[optional] ")
+        parser.add_option("-v", "--verbosity", dest="verbosity", default = 1,  action="store", type="int", help="verbosity [optional] ")
+        (self._options, args) = parser.parse_args()
+        self._setAttributesFromOptions(self._options)
+
+    def _setAttributesFromOptions(self, options):
+        self.queryFileName = options.queryFileName
+        self.refFileName = options.refFileName
+        self.outputFileName = options.outputFileName
+        self.outputFileFormat = options.outputFileFormat
+ self.ambiguous = options.ambiguous
+        self.noTransition = options.noTransition
+        self.step = options.step
+        self.gfextend = options.gfextend
+        self.chain = options.chain
+        self.verbosity = options.verbosity
+
+    def checkOptions(self):
+        if self.queryFileName != "":
+            if not FileUtils.isRessourceExists(self.queryFileName):
+                raise Exception("ERROR: Query file does not exist!")
+        else:
+            raise Exception("ERROR: No specified --query option!")
+        if self.refFileName != "":
+            if not FileUtils.isRessourceExists(self.refFileName):
+                raise Exception("ERROR: Ref file does not exist!")
+        else:
+            raise Exception("ERROR: No specified --ref option!")
+        if self.outputFileName == None:
+            self.outputFileName = "%s_%s.axt" % (os.path.basename(self.queryFileName), os.path.basename(self.refFileName))
+
+    def run(self):
+        if not CheckerUtils.isExecutableInUserPath("lastz") :
+            print ("ERROR: LastZ must be in your path")
+        else:
+            self.checkOptions()
+
+            transition = ""
+            if self.noTransition:
+                transition = "--notransition"
+            ambiguous = ""
+            if self.ambiguous is not None:
+                ambiguous =  "--ambiguous=%s" % self.ambiguous
+
+            gfextend = ""
+            if self.gfextend:
+                gfextend = "--gfextend"
+
+            chain = ""
+            if self.chain:
+                chain = "--chain"
+
+            cmd = "lastz %s[format=fasta] %s[format=fasta] --output=%s --format=%s %s %s --step=%i %s %s" % (self.refFileName, self.queryFileName, self.outputFileName
+                                                                           , self.outputFileFormat, ambiguous, transition,self.step, gfextend, chain)
+            if self.verbosity>0:
+                print("Running LastZ with following commands : %s" %cmd)
+            sys.stdout.flush()
+            cmd = cmd.split()
+            process = subprocess.Popen(cmd)
+            process.wait()
+            return process.returncode
+
+if __name__ == "__main__":
+    iLaunchLastZ = LaunchLastZ()
+    iLaunchLastZ.setAttributesFromCmdLine()
+    iLaunchLastZ.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchMCL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchMCL.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,239 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.core.coord.MatchUtils import MatchUtils\n+import subprocess\n+import os\n+import time\n+import shutil\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+\n+LOG_DEPTH = "repet.base"\n+\n+##Launch MCL\n+#\n+class LaunchMCL(object):\n+ \n+ def __init__(self, fastaFileName = "", outFilePrefix = "", inflate = 1.5, covThres = 0.0, isJoined = False, isCluster2Map = False, isClusterConsensusHeaders = False, doClean = False, verbosity = 0):\n+ self._fastaFileName = fastaFileName\n+ self.setOutFilePrefix(outFilePrefix)\n+ self._inflate = inflate\n+ self._coverageThreshold = covThres\n+ self._isJoined = isJoined\n+ self._isCluster2Map = isCluster2Map\n+ self._isClusterConsensusHeaders = isClusterConsensusHeaders\n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Launch MCL clustering program."\n+ epilog = "\\nExample: launch without verbosity and keep temporary files.\\n"\n+ epilog += "\\t$ python LaunchMCL.py -i file.fa -v 0\\n"\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "")\n+ parser.add_option("-o", "--out", dest = "outFilePrefix", action = "store", type = "string", help = "prefix of the output files [default=input fasta file name]", default = "")\n+ parser.add_option("-I", "--inflate", dest = "inflate", action = "store", type = "float", help = "inflate parameter of MCL [optional] [default: 1.5]", default = 1.5)\n+ parser.add_option("-T", "--coverage", dest = "coverageThreshold", action = "store", type = "float", help = "length coverage threshold (default=0.0, 0.0 <= value <= 1.0)", default = 0.0)\n+ parser.add_option("-j", "--join", dest = "isJoined", actio'..b'= " -s %s" % linkToFastaFile\n+ cmd += " -a"\n+ cmd += " 1>&2 >> blasterMatcher.log"\n+ process = subprocess.Popen(cmd, shell = True)\n+ self._log.debug("Running : %s" % cmd)\n+ process.communicate()\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ outBlasterFileName = "%s.align" % fastaFileNameShorten\n+ \n+ cmd = "matcher"\n+ cmd += " -m %s" % outBlasterFileName\n+ cmd += " -q %s" % fastaFileNameShorten\n+ cmd += " -s %s" % linkToFastaFile\n+ cmd += " -a"\n+ if self._isJoined:\n+ cmd += " -j"\n+ cmd += " 1>&2 >> blasterMatcher.log"\n+ process = subprocess.Popen(cmd, shell=True)\n+ self._log.debug("Running : %s" % cmd)\n+ process.communicate()\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ self._log.info("END Blaster-Matcher (%s)" % time.strftime("%Y-%m-%d %H:%M:%S"))\n+ \n+ outMatcherFileName = "%s.match.tab" % outBlasterFileName\n+ inputABCFileName = "%s.shortH.abc" % os.path.splitext(fastaFileNameShorten)[0]\n+ MatchUtils.convertMatchFileIntoABCFileOnQueryCoverage(outMatcherFileName, inputABCFileName, coverage = self._coverageThreshold)\n+ outMCLPreprocessFileName = "MCLPreprocess.out"\n+ \n+ self._log.info("START MCL (%s)" % time.strftime("%Y-%m-%d %H:%M:%S"))\n+ cmd = "mcxload"\n+ cmd += " -abc %s" % inputABCFileName\n+ cmd += " --stream-mirror"\n+ cmd += " --stream-neg-log10"\n+ cmd += " -stream-tf \'ceil(200)\'"\n+ cmd += " -o %s" % outMCLPreprocessFileName\n+ cmd += " -write-tab %s.tab" % outMCLPreprocessFileName\n+ cmd += " 1>&2 > MCLpreprocess.log"\n+ process = subprocess.Popen(cmd, shell = True)\n+ self._log.debug("Running : %s" % cmd)\n+ process.communicate()\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ \n+ outMCLFileName = "out.shortH.mcl"\n+ cmd = "mcl"\n+ cmd += " %s" % outMCLPreprocessFileName\n+ cmd += " -I %s" % self._inflate\n+ cmd += " -use-tab %s.tab" % outMCLPreprocessFileName\n+ cmd += " -o %s" % outMCLFileName\n+ cmd += " 1>&2 > MCL.log"\n+ process = subprocess.Popen(cmd, shell = True)\n+ self._log.debug("Running : %s" % cmd)\n+ process.communicate()\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ self._log.info("END MCL (%s)" % time.strftime("%Y-%m-%d %H:%M:%S"))\n+\n+ outFastaFileNameShorten = "%s.fa" % os.path.splitext(outMCLFileName)[0]\n+\n+ FastaUtils.convertClusterFileToFastaFile(outMCLFileName, fastaFileNameShorten, outFastaFileNameShorten, "MCL", verbosity = self._verbosity - 1)\n+ \n+ outFastaFileName = "%s_MCL.fa" % self._outFilePrefix\n+ linkFileName = "%s.newHlink" % self._fastaFileName\n+ headerStyle = "A"\n+ if self._isClusterConsensusHeaders:\n+ headerStyle = "B"\n+ iChangeSequenceHeaders = ChangeSequenceHeaders(inFile=outFastaFileNameShorten, format="fasta", step=2, outFile=outFastaFileName, linkFile=linkFileName, whichCluster = headerStyle, verbosity=self._verbosity - 1)\n+ iChangeSequenceHeaders.run()\n+ \n+ if self._isCluster2Map:\n+ outMapFileName = "%s_MCL.map" % self._outFilePrefix\n+ FastaUtils.convertClusteredFastaFileToMapFile(outFastaFileName, outMapFileName)\n+ shutil.move(outMapFileName, "..")\n+\n+ shutil.move(outFastaFileName, "..")\n+ os.chdir("..")\n+ if self._doClean:\n+ self._log.warning("Working directory will be cleaned")\n+ shutil.rmtree(workingDir)\n+ self._log.info("END Launch MCL")\n+\n+if __name__ == "__main__":\n+ iLaunch = LaunchMCL()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchMap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchMap.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,186 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+from commons.core.utils.FileUtils import FileUtils\n+import os\n+import subprocess\n+\n+LOG_DEPTH = "repet.tools"\n+\n+##Reference launcher implementation\n+#\n+class LaunchMap(object):\n+ \n+ def __init__(self, fastaFileName = "", outFileName = "", gapSize = 50, mismatchPenalty = -8, gapOpenPenalty = 16, gapExtendPenalty = 4, doClean = False, verbosity = 0):\n+ self._fastaFileName = fastaFileName\n+ self.setOutFileName(outFileName)\n+ self._gapSize = gapSize\n+ self._mismatchPenalty = mismatchPenalty\n+ self._gapOpenPenalty = gapOpenPenalty\n+ self._gapExtendPenalty = gapExtendPenalty\n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+# description = "Launch template to create a launcher."\n+# epilog = "\\nExample 1: launch without verbosity and keep temporary files.\\n"\n+# epilog += "\\t$ python LaunchTemplate.py -i file.fa -v 0"\n+# epilog += "\\n\\t"\n+# epilog += "\\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\\n"\n+# epilog += "\\t$ python LaunchTemplate.py -i file.fa -c -v 2"\n+# parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser = RepetOptionParser(description = "", epilog = "")\n+ parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "")\n+ parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.fa_aln]", default = "")\n+ parser.add_option("-s", "--gapSize", dest = "gapSize", action = "store", type = "int", help = "size above which a gap is not penalized anymore [optional] [default: 50]", defaul'..b'elf.setGapOpenPenalty(options.gapOpen)\n+ self.setGapExtendPenalty(options.gapExtend)\n+ self.setDoClean(options.doClean)\n+ self.setVerbosity(options.verbosity)\n+\n+ def setFastaFileName(self, fastaFileName):\n+ self._fastaFileName = fastaFileName\n+ \n+ def setOutFileName(self, outFileName):\n+ if outFileName == "":\n+ self._outFileName = "%s.fa_aln" % self._fastaFileName\n+ else:\n+ self._outFileName = outFileName\n+ \n+ def setGapSize(self, gapSize):\n+ self._gapSize = gapSize\n+\n+ def setMismatchPenalty(self, mismatchPenalty):\n+ self._mismatchPenalty = mismatchPenalty\n+ \n+ def setGapOpenPenalty(self, gapOpenPenalty):\n+ self._gapOpenPenalty = gapOpenPenalty\n+ \n+ def setGapExtendPenalty(self, gapExtendPenalty):\n+ self._gapExtendPenalty = gapExtendPenalty\n+ \n+ def setDoClean(self, doClean):\n+ self._doClean = doClean\n+ \n+ def setVerbosity(self, verbosity):\n+ self._verbosity = verbosity\n+ \n+ def _checkOptions(self):\n+ if self._fastaFileName == "":\n+ self._logAndRaise("ERROR: Missing input fasta file name")\n+ if not FileUtils.isRessourceExists(self._fastaFileName):\n+ self._logAndRaise("ERROR: Input fasta file name %s doesn\'t exist." % self._fastaFileName)\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+ \n+ def getMapCmd(self):\n+ cmd = "rpt_map"\n+ cmd += " %s.shortH" % self._fastaFileName\n+ cmd += " %i" % self._gapSize\n+ cmd += " %i" % self._mismatchPenalty\n+ cmd += " %i" % self._gapOpenPenalty\n+ cmd += " %i" % self._gapExtendPenalty\n+ cmd += " > %s.shortH.fa_aln" % self._fastaFileName\n+ return cmd\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkOptions()\n+ self._log.info("START LaunchMap")\n+ self._log.debug("Fasta file name: %s" % self._fastaFileName)\n+ \n+ lInitHeaders = FastaUtils.dbHeaders(self._fastaFileName, self._verbosity - 1)\n+ \n+ csh = ChangeSequenceHeaders()\n+ csh.setInputFile(self._fastaFileName)\n+ csh.setFormat("fasta")\n+ csh.setStep(1)\n+ csh.setPrefix("seq")\n+ csh.setLinkFile("%s.shortHlink" % self._fastaFileName)\n+ csh.setOutputFile("%s.shortH" % self._fastaFileName)\n+ csh.setVerbosityLevel(self._verbosity - 1)\n+ csh.run()\n+ \n+ cmd = self.getMapCmd()\n+ process = subprocess.Popen(cmd, shell = True)\n+ self._log.debug("Running : %s" % cmd)\n+ process.communicate()\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ \n+ csh.setInputFile("%s.shortH.fa_aln" % self._fastaFileName)\n+ csh.setFormat("fasta")\n+ csh.setStep(2)\n+ csh.setLinkFile("%s.shortHlink" % self._fastaFileName)\n+ csh.setOutputFile("%s.shortH.fa_aln.initH" % self._fastaFileName)\n+ csh.setVerbosityLevel(self._verbosity - 1)\n+ csh.run()\n+ \n+ absDB = AlignedBioseqDB("%s.shortH.fa_aln.initH" % self._fastaFileName)\n+ outFileHandler = open(self._outFileName, "w")\n+ for header in lInitHeaders:\n+ bs = absDB.fetch(header)\n+ bs.upCase()\n+ bs.write(outFileHandler)\n+ outFileHandler.close()\n+ if self._doClean:\n+ os.remove("%s.shortH" % self._fastaFileName)\n+ os.remove("%s.shortHlink" % self._fastaFileName)\n+ os.remove("%s.shortH.fa_aln" % self._fastaFileName)\n+ os.remove("%s.shortH.fa_aln.initH" % self._fastaFileName)\n+ self._log.info("END Launch")\n+\n+if __name__ == "__main__":\n+ iLaunch = LaunchMap()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchMatcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchMatcher.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,168 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+import subprocess\n+\n+LOG_DEPTH = "repet.tools"\n+\n+##Launch MATCHER\n+#\n+class LaunchMatcher(object):\n+\n+ def __init__(self, align="", queryFileName="", subjectFileName="", evalue="1e-10", doJoin=False, keepConflict=False, prefix="", doClean = False, verbosity = 0):\n+ self._alignFileName = align\n+ self._queryFileName = queryFileName\n+ self.setSubjectFileName(subjectFileName)\n+ self.setOutPrefix(prefix)\n+ self._doJoin = doJoin\n+ self._eValue = evalue\n+ self._keepConflict = keepConflict\n+ \n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Launch Matcher."\n+ epilog = "\\nExample 1: launch without verbosity and keep temporary files.\\n"\n+ epilog += "\\t$ python LaunchMatcher.py -a in.align -v 0"\n+ epilog += "\\n\\t"\n+ epilog += "\\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\\n"\n+ epilog += "\\t$ python LaunchMatcher.py -a in.align -q query.fa -s nr.fa -c -v 2"\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser.add_option("-a", "--align", dest = "align", action = "store", type = "string", help = "input align file name [compulsory] [format: align]", default = "")\n+ parser.add_option("-q", "--query", dest = "query", action = "store", type = "string", help = "query fasta file name [optional] [format: fasta]", default = "")\n+ parser.add_option("-s", "--subject", dest = "subject", action = "store", type = "string", help = "subject fasta file name [optional] [format: fasta]", default = "")\n+ parser.add_option("-e", "--evalue", dest = "evalue", action = "store", type = "string", help = "E-value filter [default: 1e-10]", default = "1e-10")\n+ parser.add_option("-j", "--join", dest = "doJoin", action = "store_true", help = "j'..b' "string", help = "output file prefix [default: align file name]", default = "")\n+ parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [default: False]", default = False)\n+ parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [default: 1]", default = 1)\n+ options = parser.parse_args()[0]\n+ self._setAttributesFromOptions(options)\n+ \n+ def _setAttributesFromOptions(self, options):\n+ self.setAlignFileName(options.align)\n+ self.setQueryFileName(options.query)\n+ self.setSubjectFileName(options.subject)\n+ self.setEvalue(options.evalue)\n+ self.setDoJoin(options.doJoin)\n+ self.setKeepConflicts(options.keepConflict)\n+ self.setOutPrefix(options.outPrefix)\n+ self.setDoClean(options.doClean)\n+ self.setVerbosity(options.verbosity)\n+ \n+ def setAlignFileName(self, alignFileName):\n+ self._alignFileName = alignFileName\n+ \n+ def setQueryFileName(self, queryFileName):\n+ self._queryFileName = queryFileName\n+ \n+ def setSubjectFileName(self, subjectFileName):\n+ self._subjectFileName = subjectFileName\n+ \n+ def setEvalue(self, evalue):\n+ self._eValue = evalue\n+ \n+ def setDoJoin(self, doJoin):\n+ self._doJoin = doJoin\n+ \n+ def setKeepConflicts(self, keepConflict):\n+ self._keepConflict = keepConflict\n+ \n+ def setOutPrefix(self, outPrefix):\n+ if outPrefix == "":\n+ self._outPrefix = self._alignFileName\n+ else:\n+ self._outPrefix = outPrefix\n+ \n+ def setDoClean(self, doClean):\n+ self._doClean = doClean\n+ \n+ def setVerbosity(self, verbosity):\n+ self._verbosity = verbosity\n+ \n+ def _checkOptions(self):\n+ if self._alignFileName == "":\n+ self._logAndRaise("ERROR: Missing input align file name")\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+\n+ def _getMatcherCmd(self):\n+ lArgs = []\n+ lArgs.append("-m %s" % self._alignFileName)\n+ if self._queryFileName:\n+ lArgs.append("-q %s" % self._queryFileName)\n+ if self._subjectFileName:\n+ lArgs.append("-s %s" % self._subjectFileName)\n+ if self._doJoin:\n+ lArgs.append("-j")\n+ lArgs.append("-E %s" % self._eValue)\n+ lArgs.append("-B %s" % self._outPrefix)\n+ if self._keepConflict:\n+ lArgs.append("-a")\n+ lArgs.append("-v %i" % (self._verbosity - 1))\n+ return self._getSystemCommand("matcher", lArgs)\n+ \n+ def _getSystemCommand(self, prg, lArgs):\n+ systemCmd = prg \n+ for arg in lArgs:\n+ systemCmd += " " + arg\n+ return systemCmd\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkOptions()\n+ self._log.info("START LaunchMatcher")\n+ self._log.debug("Align file name: %s" % self._alignFileName)\n+ self._log.debug("Query file name: %s" % self._queryFileName)\n+ self._log.debug("Subject file name: %s" % self._subjectFileName)\n+ #TODO: clean files\n+# if self._doClean:\n+# self._log.warning("Files will be cleaned")\n+ cmd = self._getMatcherCmd()\n+ process = subprocess.Popen(cmd, shell = True)\n+ self._log.debug("Running : %s" % cmd)\n+ process.communicate()\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ self._log.info("END LaunchMatcher")\n+\n+if __name__ == "__main__":\n+ iLaunch = LaunchMatcher()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchMummerPlot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchMummerPlot.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,150 @@
+#! /usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.checker.CheckerUtils import CheckerUtils
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+import subprocess
+from commons.core.LoggerFactory import LoggerFactory
+import os
+import shutil
+
+LOG_DEPTH = "repet.tools"
+
+class LaunchMummerPlot(object):
+
+    def __init__(self, inputFileName="", queryFileName="", refFileName ="", prefix = None, fat=False, filter=False,clean=False, verbosity=0):
+        self._inputFileName = inputFileName
+        self._queryFileName = queryFileName
+        self._refFileName = refFileName
+        self._prefix = prefix
+        self._fat = fat
+        self._filter = filter
+        self.doClean = clean
+        self.verbosity = verbosity
+        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
+
+    def setAttributesFromCmdLine(self):
+        description = "LaunchMummerPlot runs the MummerPlot program (part of the mummer package) ."
+        parser = RepetOptionParser(description = description)
+        parser.add_option("-i", "--input", dest="inputFileName", default = None,  action="store", type="string", help="input file[mandatory] [format: delta]")
+        parser.add_option("-q", "--Qfile", dest="queryFileName", default = None,  action="store", type="string", help="Plot an ordered set of reference sequences from Qfile [optional] [format: fasta]")
+        parser.add_option("-r", "--Rfile", dest="refFileName", default = None,  action="store", type="string", help="Plot an ordered set of reference sequences from Rfile [optional] [format: fasta]")
+        parser.add_option("-p", "--prefix", dest="prefix", default = None,  action="store", type="string", help="prefix name [mandatory]")
+        parser.add_option("-o","--fat", dest="fat",action="store_true", help="Layout sequences using fattest alignment only[optional] ")
+        parser.add_option("-s","--filter", dest="filter",action="store_true", help="Only display .delta alignments which represent the 'best' hit [optional] ")
+        parser.add_option("-c", "--clean", dest = "clean", help = "clean temporary files", default = False, action="store_true")
+        parser.add_option("-v", "--verbosity", dest="verbosity", default = 0,  action="store", type="int", help="verbosity [optional] ")
+
+        (self._options, args) = parser.parse_args()
+        self._setAttributesFromOptions(self._options)
+
+    def _setAttributesFromOptions(self, options):
+        self._inputFileName = options.inputFileName
+        self._queryFileName = options.queryFileName
+        self._refFileName = options.refFileName
+        self._prefix = options.prefix
+        self._fat = options.fat
+        self._filter = options.filter
+        self.verbosity = options.verbosity
+
+    def _logAndRaise(self, errorMsg):
+        self._log.error(errorMsg)
+        raise Exception(errorMsg)
+
+    def checkOptions(self):
+        if self._inputFileName != "":
+            if not FileUtils.isRessourceExists(self._inputFileName):
+                self._logAndRaise("ERROR: Query file: %s does not exist!" % self._inputFileName)
+        else:
+            self._logAndRaise("ERROR: No specified --query option!")
+
+        if self._queryFileName != "":
+            if not FileUtils.isRessourceExists(self._queryFileName):
+                self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName)
+
+        if self._refFileName != "":
+            if not FileUtils.isRessourceExists(self._refFileName):
+                self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName)
+
+    def clean(self):
+        try:
+            os.remove("%s.filter" % self._prefix)
+        except Exception as inst:
+            self._log.error(inst)
+        try:
+            os.remove("%s.fplot" % self._prefix)
+        except Exception as inst:
+            self._log.error(inst)
+        try:
+            os.remove("%s.rplot" % self._prefix)
+        except Exception as inst:
+            self._log.error(inst)
+
+    def run(self):
+        if not CheckerUtils.isExecutableInUserPath("mummerplot") :
+            self._logAndRaise("ERROR: mummerplot must be in your path")
+        self.checkOptions()
+
+        ref=""
+        if self._refFileName != "":
+            ref = "-R %s"  % self._refFileName
+
+        query=""
+        if self._queryFileName != "":
+            query = "-Q %s"  % self._queryFileName
+
+        fat = ""
+        if self._fat:
+            fat =  "--fat"
+
+        filter = ""
+        if self._filter:
+            filter = "-f"
+
+        prefix = ""
+        if self._prefix is not None:
+            prefix = "--prefix=%s" %(self._prefix)
+
+        cmd = "mummerplot %s %s %s %s %s %s --png" % (self._inputFileName, prefix, ref, query, fat, filter)
+        self._log.debug("Running mummerplot with following commands : %s" %cmd)
+        cmd = cmd.split()
+        process = subprocess.Popen(cmd)
+        process.wait()
+
+        self.clean()
+        return process.returncode
+
+if __name__ == "__main__":
+    iLaunchNucmer = LaunchMummerPlot()
+    iLaunchNucmer.setAttributesFromCmdLine()
+    iLaunchNucmer.run()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchNucmer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchNucmer.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,158 @@\n+#! /usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.checker.CheckerUtils import CheckerUtils\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+import subprocess\n+from commons.core.LoggerFactory import LoggerFactory\n+import os\n+ \n+LOG_DEPTH = "repet.tools" \n+ \n+class LaunchNucmer(object):\n+\n+ def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=90, minMatch=20, nooptimize=False,mincluster=65, minIdentity=50, minLength=100, verbosity=0):\n+ self._queryFileName = queryFileName\n+ self._refFileName = refFileName\n+ self._prefix = prefix\n+ self._genCoords = genCoords\n+ self._showCoords = showCoords\n+ self._mum = mum\n+ self._maxgaps = maxGaps\n+ self._minMatch = minMatch\n+ self._nooptimize = nooptimize\n+ self._mincluster = mincluster\n+ self._minIdentity = minIdentity\n+ self._minLength = minLength\n+ self.verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)\n+ \n+ def setMincluster(self, value):\n+ self._mincluster = value\n+ def getMincluster(self):\n+ return self._mincluster\n+ \n+ mincluster = property(getMincluster, setMincluster)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "LaunchNucmer runs the Nucmer program (part of the mummer package) ." \n+ parser = RepetOptionParser(description = description)\n+ parser.add_option("-q", "--query", dest="queryFileName", default = "", action="store", type="string", help="input query file [compulsory] [format: fasta]")\n+ parser.add_option("-r", "--ref", dest="refFileName", default = "", action="store", type="string", help="input ref file [compulsory] [format: fasta]")\n+ parser.add_option("-p", "--prefix", dest="prefix", default = None, action="store", type="string", help="prefix name [optional]")\n+ parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ")\n+ parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 10'..b'ize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ") \n+ parser.add_option("-j", "--mincluster", dest="mincluster", default = 65, action="store", type="int", help="Minimum length of a cluster of matches (default 65) [optional] ")\n+ \n+ parser.add_option("-i", "--minIdentity", dest="minIdentity", default = 50, action="store", type="int", help="Minimum identity for show_coords (default 50) [optional] ")\n+ parser.add_option("-u", "--minLength", dest="minLength", default = 100, action="store", type="int", help="Minimum alignment length for show_coords (default 100) [optional] ")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", default = 0, action="store", type="int", help="verbosity [optional] ")\n+ \n+ (self._options, args) = parser.parse_args()\n+ self._setAttributesFromOptions(self._options)\n+\n+ def _setAttributesFromOptions(self, options):\n+ self._queryFileName = options.queryFileName\n+ self._refFileName = options.refFileName\n+ self._prefix = options.prefix\n+ self._genCoords = options.genCoords\n+ self._showCoords = options.showCoords\n+ self._mum = options.mum \n+ self._maxgaps = options.maxgaps\n+ self._minMatch = options.minMatch\n+ self._nooptimize = options.nooptimize\n+ self._mincluster = options.mincluster\n+ \n+ self._minIdentity = options.minIdentity\n+ self._minLength = options.minLength\n+ \n+ self.verbosity = options.verbosity\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+ \n+ def checkOptions(self): \n+ if self._queryFileName != "":\n+ if not FileUtils.isRessourceExists(self._queryFileName):\n+ self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName)\n+ else:\n+ self._logAndRaise("ERROR: No specified --query option!")\n+ \n+ if self._refFileName != "":\n+ if not FileUtils.isRessourceExists(self._refFileName):\n+ self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName)\n+ else:\n+ self._logAndRaise("ERROR: No specified --ref option!")\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self.verbosity)\n+ if not CheckerUtils.isExecutableInUserPath("nucmer") :\n+ self._logAndRaise("ERROR: nucmer must be in your path")\n+ self.checkOptions()\n+ \n+ genCoords = ""\n+ if self._genCoords:\n+ genCoords = "-o" \n+ mum = ""\n+ if self._mum:\n+ mum = "--mum"\n+ nooptimize = "--optimize"\n+ if self._nooptimize:\n+ nooptimize = "--nooptimize" \n+ prefix = ""\n+ if self._prefix is not None:\n+ prefix = "--prefix=%s" %(self._prefix) \n+ cmd = "nucmer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster)\n+ self._log.debug("Running nucmer with following commands : %s" %cmd)\n+ cmd = cmd.split()\n+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+ process.wait()\n+ \n+ if self._showCoords:\n+ #use of os.system because redirect on process is broken in python < 3.0\n+ cmd = "show-coords -r -c -l -d -I %d -L %d -T %s.delta > %s.coords" % (self._minIdentity, self._minLength, self._prefix, self._prefix)\n+ self._log.debug("Running show-coords with following commands : %s" %cmd)\n+ os.system(cmd)\n+\n+ \n+ return process.returncode\n+ \n+if __name__ == "__main__":\n+ iLaunchNucmer = LaunchNucmer()\n+ iLaunchNucmer.setAttributesFromCmdLine()\n+ iLaunchNucmer.run()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchPhyML.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchPhyML.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,177 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+import subprocess\n+import os\n+from commons.core.seq.Bioseq import Bioseq\n+import shutil\n+\n+LOG_DEPTH = "repet.core.launchers"\n+\n+\n+\n+class LaunchPhyML(object):\n+ """\n+ Launch \'PhyML\'\n+ """\n+ def __init__(self, inputFileName="", outFileName="",dataType= "nt", interleavedFormat= True, nbDataSets=1, nbBootDataSets=0, substModel="HKY85", ratioTsTv=4.0, propInvSites= 0.0, nbCat=1, gammaParam=1.0, startTree="BIONJ", paramOptimisation = "tlr", clean=False, verbosity=3 ):\n+ self.inputFileName = inputFileName\n+ self.outFileName=outFileName\n+ self.dataType = dataType #"nt or aa"\n+ self._setSeqFormat(interleavedFormat) #if False -q"\n+ self.nbDataSets = nbDataSets\n+ self.nbBootDataSets = nbBootDataSets\n+ self.substModel = substModel\n+ self.ratioTsTv = ratioTsTv\n+ self.propInvSites = propInvSites # propInvSites="e" replaced by 0.0; should be in [0-1]\n+ self.nbCat = nbCat # Number of categories less than four or higher than eight are not recommended.\n+ self.gammaParam = gammaParam\n+ self.startTree = startTree #by default is BIONJ used reformatedInputFileName+"_phyml_tree.txt" instead\n+ self.paramOptimisation = paramOptimisation # used instead of self.optTopology="y", self.optBranchRate="y" \n+ #This option focuses on specific parameter optimisation.\n+ #tlr : tree topology (t), branch length (l) and rate parameters (r) are optimised.\n+ #tl : tree topology and branch length are optimised.\n+ #lr : branch length and rate parameters are optimised.\n+ #l : branch length are optimised.\n+ #r : rate parameters are optimised.\n+ #n : no parameter is optimised.\n+\n+ self._clean = clean\n+ self._v'..b'ns(options)\n+ \n+ def _setAttributesFromOptions(self, options):\n+ self.inputFileName = options.inputFileName\n+ self.setOutFileName = options.outFileName\n+ self._verbosity = options.verbosity\n+\n+ def _checkOptions(self):\n+ if self.inputFileName == "":\n+ self._logAndRaise("ERROR: Missing input file name")\n+ \n+ if self.outFileName == "":\n+ self.outFileName = "%s_phyml.newick" % (self.inputFileName)\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+ \n+ def _shortenHeaders(self):\n+ self.csh = ChangeSequenceHeaders()\n+ self.csh.setInputFile(self.inputFileName)\n+ self.csh.setFormat("fasta")\n+ self.csh.setStep(1)\n+ self.csh.setPrefix("seq")\n+ self.csh.setLinkFile(self.inputFileName+".shortHlink")\n+ self.csh.setOutputFile(self.inputFileName+".shortH")\n+ self.csh.setVerbosityLevel(self._verbosity-1)\n+ self.csh.run()\n+ self.shortInputFileName = self.inputFileName+".shortH" \n+ \n+ def _renameHeaders(self):\n+ self.csh.setInputFile(self.phyml_tree)\n+ self.csh.setFormat("newick")\n+ self.csh.setStep(2)\n+ self.csh.setLinkFile(self.inputFileName+".shortHlink" )\n+ self.csh.setOutputFile(self.outFileName)\n+ self.csh.setVerbosityLevel(self._verbosity-1)\n+ self.csh.run()\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkOptions()\n+ self._log.info("START LaunchPhyML")\n+ self._log.debug("building a multiple alignment from \'%s\'..." % ( self.inputFileName))\n+ \n+ inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName))\n+ if not os.path.exists(inputFileName):\n+ os.symlink(self.inputFileName, inputFileName)\n+ self.inputFileName = inputFileName\n+ \n+ self._shortenHeaders()\n+\n+ cmd = "sreformat phylip %s" % (self.shortInputFileName)\n+ \n+ with open (self.reformatedInputFileName, "w") as fPhylip :\n+ \n+ process = subprocess.Popen(cmd.split(\' \'), stdout= fPhylip , stderr=subprocess.PIPE)\n+ self._log.debug("Running : %s" % cmd)\n+ output = process.communicate()\n+ self._log.debug("Output:\\n%s" % output[0])\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ \n+ self.reformatedInputFileName = "%s.phylip" % self.shortInputFileName\n+ self.phyml_tree = "%s_phyml_tree.txt" %self.reformatedInputFileName \n+ cpyPhyml_tree = "%s_cpy" %self.phyml_tree\n+ shutil.copyfile(self.phyml_tree,cpyPhyml_tree)\n+ \n+ cmd = "phyml -i %s -d %s%s -n %d -b %d -m %s -t %f -v %f -c %d -a %f -u %s -o %s" % (self.reformatedInputFileName, self.dataType, self.seqFormat, self.nbDataSets,self.nbBootDataSets,self.substModel, self.ratioTsTv, self.propInvSites,self.nbCat,self.gammaParam, cpyPhyml_tree , self.paramOptimisation )\n+ print cmd \n+ process = subprocess.Popen(cmd.split(\' \'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+ self._log.debug("Running : %s" % cmd)\n+ output = process.communicate()\n+ self._log.debug("Output:\\n%s" % output[0])\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ \n+ self._renameHeaders()\n+ \n+ if self._clean:\n+ for f in [ self.shortInputFileName, self.inputFileName+".shortHlink", self.inputFileName+".shortH.phylip",self.inputFileName+".shortH.phylip_phyml_lk.txt", self.phyml_tree ]:\n+ os.remove(f)\n+ os.system( "mv %s.phylip_phyml_stat.txt %s_phyml.txt" % ( self.shortInputFileName, self.inputFileName ) )\n+ \n+ self._log.info("Finished running LaunchPhyML")\n+ \n+ \n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchPromer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchPromer.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,147 @@
+#! /usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.checker.CheckerUtils import CheckerUtils
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+import subprocess
+from commons.core.LoggerFactory import LoggerFactory
+import os
+
+LOG_DEPTH = "repet.tools"
+
+class LaunchPromer(object):
+
+    def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=30, minMatch=6, nooptimize=False,mincluster=20, verbosity=0):
+        self._queryFileName = queryFileName
+        self._refFileName = refFileName
+        self._prefix = prefix
+        self._genCoords = genCoords
+        self._showCoords = showCoords
+        self._mum = mum
+        self._maxgaps = maxGaps
+        self._minMatch = minMatch
+        self._nooptimize = nooptimize
+        self._mincluster = mincluster
+        self.verbosity = verbosity
+        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
+
+    def setMincluster(self, value):
+        self._mincluster = value
+    def getMincluster(self):
+        return self._mincluster
+
+    mincluster = property(getMincluster, setMincluster)
+
+    def setAttributesFromCmdLine(self):
+        description = "LaunchPromer runs the Promer program (part of the mummer package) ."
+        parser = RepetOptionParser(description = description)
+        parser.add_option("-q", "--query", dest="queryFileName", default = "",  action="store", type="string", help="input query file [compulsory] [format: fasta]")
+        parser.add_option("-r", "--ref", dest="refFileName", default = "",  action="store", type="string", help="input ref file [compulsory] [format: fasta]")
+        parser.add_option("-p", "--prefix", dest="prefix", default = None,  action="store", type="string", help="prefix name [optional]")
+        parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ")
+        parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 100 -T [optional] ")
+        parser.add_option("-m", "--mum", dest="mum",  action="store_true", help="Use anchor matches that are unique in both the reference and query [optional] ")
+        parser.add_option("-g", "--maxgaps", dest="maxgaps", default = 30,  action="store", type="int", help="Maximum gap between two adjacent matches in a cluster (default 30) [optional] ")
+        parser.add_option("-l", "--minmatch", dest="minMatch", default = 6,  action="store", type="int", help="Minimum length of an maximal exact match (default 6) [optional] ")
+        parser.add_option("-n", "--nooptimize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ")
+        parser.add_option("-j", "--mincluster", dest="mincluster", default = 20,  action="store", type="int", help="Minimum length of a cluster of matches (default 20) [optional] ")
+        parser.add_option("-v", "--verbosity", dest="verbosity", default = 0,  action="store", type="int", help="verbosity [optional] ")
+
+        (self._options, args) = parser.parse_args()
+        self._setAttributesFromOptions(self._options)
+
+    def _setAttributesFromOptions(self, options):
+        self._queryFileName = options.queryFileName
+        self._refFileName = options.refFileName
+        self._prefix = options.prefix
+        self._genCoords = options.genCoords
+        self._showCoords = options.showCoords
+        self._mum = options.mum
+        self._maxgaps = options.maxgaps
+        self._minMatch = options.minMatch
+        self._nooptimize =  options.nooptimize
+        self._mincluster = options.mincluster
+        self.verbosity = options.verbosity
+
+    def _logAndRaise(self, errorMsg):
+        self._log.error(errorMsg)
+        raise Exception(errorMsg)
+
+    def checkOptions(self):
+        if self._queryFileName != "":
+            if not FileUtils.isRessourceExists(self._queryFileName):
+                self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName)
+        else:
+            self._logAndRaise("ERROR: No specified --query option!")
+
+        if self._refFileName != "":
+            if not FileUtils.isRessourceExists(self._refFileName):
+                self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName)
+        else:
+            self._logAndRaise("ERROR: No specified --ref option!")
+
+    def run(self):
+        if not CheckerUtils.isExecutableInUserPath("promer") :
+            self._logAndRaise("ERROR: promer must be in your path")
+        self.checkOptions()
+
+        genCoords = ""
+        if self._genCoords:
+            genCoords = "-o"
+        mum = ""
+        if self._mum:
+            mum =  "--mum"
+        nooptimize = "--optimize"
+        if self._nooptimize:
+            nooptimize = "--nooptimize"
+        prefix = ""
+        if self._prefix is not None:
+            prefix = "--prefix=%s" %(self._prefix)
+
+        cmd = "promer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster)
+        self._log.debug("Running promer with following commands : %s" %cmd)
+        cmd = cmd.split()
+        process = subprocess.Popen(cmd)
+        process.wait()
+
+        if self._showCoords:
+            #use of os.system because redirect on process is broken in python < 3.0
+            cmd = "show-coords -r -c -l -d -I 50 -L 100 -T %s.delta > %s.coords" % (self._prefix, self._prefix)
+            os.system(cmd)
+
+        return process.returncode
+
+if __name__ == "__main__":
+    iLaunchPromer = LaunchPromer()
+    iLaunchPromer.setAttributesFromCmdLine()
+    iLaunchPromer.run()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchRefAlign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchRefAlign.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,208 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.checker.ConfigChecker import ConfigRules\n+from commons.core.checker.ConfigChecker import ConfigChecker\n+import subprocess\n+import os\n+from commons.core.seq.Bioseq import Bioseq\n+\n+LOG_DEPTH = "repet.core.launchers"\n+\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+\n+\n+class LaunchRefAlign(object):\n+ """\n+ Launch \'refalign\' to build a master-slave multiple sequence alignment.\n+ """\n+ def __init__(self, inputFileName="", outFileName="", gapSize=10, match=10, mismatch=8, gapOpen=16, gapExtend=4, refseqName="", keepRefseq =False, verbosity=3 ):\n+ self.inputFileName = inputFileName\n+ self.outFileName=outFileName\n+ self.gapSize = gapSize\n+ self.match = match\n+ self.mismatch = mismatch\n+ self.gapOpen = gapOpen\n+ self.gapExtend = gapExtend\n+ self.gapExtend = gapExtend\n+ self.refseqName = refseqName\n+ self.keepRefseq = keepRefseq\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "usage: LaunchRefalign.py [ options ]"\n+ epilog = "\\n -h: this help\\n"\n+ epilog += "\\t -i: name of the input file (refseq is first, format=\'fasta\')"\n+ epilog += "\\t -r: keep the reference sequence"\n+ epilog += "\\t -o: name of the output file (default=inFileName+\'.fa_aln\')"\n+ epilog += "\\t -v: verbosity (default=0)"\n+ epilog += "\\n\\t"\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser.add_option("-i", "--fasta", dest = "inputFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "")\n+ parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.out]", default = "")\n+ parser.add_option("-r", "--keepRefseq", dest = "keepRefseq", action = "store_true", help = "keep reference sequence [optional] [default: Fals'..b'rs(self):\n+ self.csh = ChangeSequenceHeaders()\n+ self.csh.setInputFile(self.inputFileName)\n+ self.csh.setFormat("fasta")\n+ self.csh.setStep(1)\n+ self.csh.setPrefix("seq")\n+ self.csh.setLinkFile(self.inputFileName+".shortHlink")\n+ self.csh.setOutputFile(self.inputFileName+".shortH")\n+ self.csh.setVerbosityLevel(self._verbosity-1)\n+ self.csh.run()\n+ \n+ bsDB = BioseqDB(self.inputFileName+".shortH")\n+ bsDB.upCase()\n+ bsDB.save(self.inputFileName+".shortHtmp")\n+ del bsDB\n+ os.rename(self.inputFileName+".shortHtmp", self.inputFileName+".shortH")\n+ \n+ def _renameHeaders(self):\n+ self.csh.setInputFile(self.inputFileName+".shortH.fa_aln")\n+ self.csh.setFormat("fasta")\n+ self.csh.setStep(2)\n+ self.csh.setLinkFile(self.inputFileName+".shortHlink" )\n+ self.csh.setOutputFile(self.outFileName)\n+ self.csh.setVerbosityLevel(self._verbosity-1)\n+ self.csh.run()\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkOptions()\n+ self._log.info("START LaunchRefAlign")\n+ self._log.debug("building a multiple alignment from \'%s\'..." % ( self.inputFileName))\n+ \n+ inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName))\n+ if not os.path.exists(inputFileName):\n+ os.symlink(self.inputFileName, inputFileName)\n+ self.inputFileName = inputFileName\n+ \n+ self._shortenHeaders()\n+ if self.keepRefseq:\n+ self.refseqName="seq1"\n+ self._prepareRefAlign()\n+ \n+ if self._numseq > 1:\n+ cmd = "refalign %s %s -m %d -l %d -d %d -g %d -e %d" % (self.refFileName, self.cpyFileName, self.match, self.gapSize, self.mismatch, self.gapOpen, self.gapExtend)\n+ \n+ process = subprocess.Popen(cmd.split(\' \'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+ self._log.debug("Running : %s" % cmd)\n+ output = process.communicate()\n+ self._log.debug("Output:\\n%s" % output[0])\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ refseqNameParam = ""\n+ if self.refseqName != "":\n+ refseqNameParam = "-r %s " % (self.refseqName)\n+ outFileName = self.inputFileName+".shortH.fa_aln"\n+ #self.cpyFileName = os.path.join(os.getcwd(),os.path.basename(self.cpyFileName))\n+ \n+ self._log.info("Copy file path %s " % self.cpyFileName)\n+ print("Copy file path %s " % self.cpyFileName)\n+ cmd = "refalign2fasta.py -i %s.aligner %s-g d -o %s -v 1" % (self.cpyFileName, refseqNameParam, outFileName)\n+ self._log.debug("Running : %s" % cmd)\n+ process = subprocess.Popen(cmd.split(\' \'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+ output = process.communicate()\n+ self._log.debug("Output:\\n%s" % output[0])\n+ \n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ \n+ cmd = "rm -f "+ self.refFileName + " " + self.cpyFileName + " " + self.cpyFileName + ".aligner " + self.cpyFileName + ".oriented " + self.cpyFileName + ".refalign.stat"\n+ os.system(cmd)\n+ \n+ else:\n+ self._logAndRaise("Only one sequence available")\n+ cmd = "echo empty"\n+ \n+ self._renameHeaders()\n+ \n+ for fileName in [self.inputFileName + ".shortH", self.inputFileName + ".shortHlink", self.inputFileName + ".shortH.fa_aln"]:\n+ os.remove(fileName)\n+ self._log.info("END LaunchRefAlign")\n+ return 0\n+ \n+\n+if __name__ == "__main__":\n+ iLaunchRefAlign = LaunchRefAlign()\n+ iLaunchRefAlign.setAttributesFromCmdLine()\n+ iLaunchRefAlign.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchRefalign_old.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchRefalign_old.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+
+import pyRepet.launcher.programLauncher
+from commons.core.seq.BioseqDB import BioseqDB
+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
+
+
+def help():
+    print
+    print "usage: launchRefalign.py [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (refseq is first, format='fasta')"
+    print "     -r: keep the reference sequence"
+    print "     -o: name of the output file (default=inFileName+'.fa_aln')"
+    print "     -v: verbose (default=0)"
+    print
+
+
+def main():
+    """
+    Launch 'refalign' to build a master-slave multiple sequence alignment.
+    """
+    inFileName = ""
+    keepRefseq = False
+    outFileName = ""
+    verbose = 0
+    try:
+        opts,args=getopt.getopt(sys.argv[1:],"hi:ro:v:")
+    except getopt.GetoptError, err:
+            print str(err); help(); sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-r":
+            keepRefseq = True
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+    if inFileName == "":
+        print "ERROR: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "START %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    if verbose > 0:
+        print "build a multiple alignment from '%s'..." % ( inFileName )
+        sys.stdout.flush()
+
+    if outFileName == "":
+        outFileName = "%s.fa_aln" % ( inFileName )
+
+    csh = ChangeSequenceHeaders()
+    csh.setInputFile( inFileName )
+    csh.setFormat( "fasta" )
+    csh.setStep( 1 )
+    csh.setPrefix( "seq" )
+    csh.setLinkFile(  inFileName+".shortHlink" )
+    csh.setOutputFile( inFileName+".shortH" )
+    csh.setVerbosityLevel( verbose - 1 )
+    csh.run()
+
+    bsDB = BioseqDB( inFileName+".shortH" )
+    bsDB.upCase()
+    bsDB.save( inFileName+".shortHtmp" )
+    del bsDB
+    os.rename( inFileName+".shortHtmp", inFileName+".shortH" )
+
+    pL = pyRepet.launcher.programLauncher.programLauncher( inFileName+".shortH" )
+    if keepRefseq:
+        pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", refseqName="seq1", verbose=verbose )
+    else:
+        pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", verbose=verbose )
+
+    csh.setInputFile( inFileName+".shortH.fa_aln" )
+    csh.setFormat( "fasta" )
+    csh.setStep( 2 )
+    csh.setLinkFile(  inFileName+".shortHlink" )
+    csh.setOutputFile(  outFileName )
+    csh.setVerbosityLevel( verbose - 1 )
+    csh.run()
+
+    for f in [ inFileName+".shortH", inFileName+".shortHlink", inFileName+".shortH.fa_aln" ]:
+            os.remove( f )
+
+    if verbose > 0:
+        print "END %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchRepeatMasker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchRepeatMasker.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,113 @@
+#! /usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.checker.CheckerUtils import CheckerUtils
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+import subprocess
+
+class LaunchRepeatMasker(object):
+
+    def __init__(self,queryFileName="", libFileName ="",sensitivity="", engine="wu", cutOff=225, outputDir = ".",verbosity=0):
+        self._queryFileName = queryFileName
+        self._libFileName = libFileName
+        self._engine = engine
+        self._sensitivity = sensitivity
+        self._cutOff = cutOff
+        self._outputDir = outputDir
+        self._verbosity = verbosity
+
+    def setAttributesFromCmdLine(self):
+        description = "LaunchRepeatMasker runs the RepeatMasker program  ."
+        parser = RepetOptionParser(description = description)
+        parser.add_option("-q", "--query", dest="queryFileName", default = None,  action="store", type="string", help="input query file [compulsory] [format: fasta]")
+        parser.add_option("-l", "--libFileName", dest="libFileName", default = None,  action="store", type="string", help="custom library [optional]")
+        parser.add_option("-n", "--outputDir", dest="outputDir", default=".", action="store", type="string", help="outputDir (default : current directory) [optional] ")
+        parser.add_option("-c", "--cutOff", dest="cutOff", default=225, action="store", type="int", help="Sets cutoff score for masking repeats when using -lib (default 225) [optional] ")
+        parser.add_option("-e", "--engine", dest="engine", default = "wu",  action="store", type="string", help="engine  [optional] ")
+        parser.add_option("-u", "--sensitivity", dest="sensitivity", default = "",  action="store", type="string", help="sensitivity  can be s, q, qq[optional] ")
+        parser.add_option("-v", "--verbosity", dest="verbosity", default = 0,  action="store", type="int", help="verbosity [optional] ")
+
+        (options, args) = parser.parse_args()
+        self._setAttributesFromOptions(options)
+
+    def _setAttributesFromOptions(self, options):
+        self._queryFileName = options.queryFileName
+        self._libFileName = options.libFileName
+        self._outputDir = options.outputDir
+        self._engine = options.engine
+        self._sensitivity = options.sensitivity
+        self._cutOff = options.cutOff
+        self._verbosity = options.verbosity
+
+    def checkOptions(self):
+        if self._queryFileName != "":
+            if not FileUtils.isRessourceExists(self._queryFileName):
+                raise Exception("ERROR: Query file: %s does not exist!" % self._queryFileName)
+        else:
+            raise Exception("ERROR: No specified --query option!")
+
+    def run(self):
+        if not CheckerUtils.isExecutableInUserPath("RepeatMasker") :
+            print "ERROR: RepeatMasker must be in your path"
+        else:
+            self.checkOptions()
+
+            engine = ""
+            if self._engine == "wu":
+                engine = "-e wublast"
+            elif self._engine == "cm":
+                engine = "-e crossmatch"
+            sensitivity = ""
+            if self._sensitivity:
+                sensitivity = "-%s" % self._sensitivity
+            libFileName = ""
+            if self._libFileName != "":
+                libFileName = "-lib %s" % self._libFileName
+
+            cmd = "RepeatMasker %s -dir %s -pa 1 -gccalc -no_is -nolow %s %s %s" % (self._queryFileName,self._outputDir,engine,sensitivity,libFileName)
+            cmd = cmd.split()
+
+            if self._verbosity>0:
+                print("Running RepeatMasker with following commands : %s" %cmd)
+
+            process = subprocess.Popen(cmd, stdout=subprocess.PIPE,stderr = subprocess.PIPE)
+#            process.wait()
+            output= process.communicate()
+            if self._verbosity>0:
+                print("".join(output))
+            return process.returncode
+
+if __name__ == "__main__":
+    iLaunchRepeatMasker = LaunchRepeatMasker()
+    iLaunchRepeatMasker.setAttributesFromCmdLine()
+    iLaunchRepeatMasker.run()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchTRF.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchTRF.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.LoggerFactory import LoggerFactory
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+from commons.core.checker.ConfigChecker import ConfigRules
+from commons.core.checker.ConfigChecker import ConfigChecker
+import subprocess
+import glob
+import os
+
+LOG_DEPTH = "repet.launchers"
+
+class LaunchTRF(object):
+
+    def __init__(self, inFileName = "", outFileName = "", maxPeriod=15, doClean = False, verbosity = 0):
+        self.inFileName = inFileName
+        self.setOutFileName(outFileName)
+        self.maxPeriod=maxPeriod
+        self._doClean = doClean
+        self._verbosity = verbosity
+        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
+
+    def setAttributesFromCmdLine(self):
+        description = "Launch TRF to detect micro-satellites in sequences."
+        epilog = "\nExample 1: launch without verbosity and keep temporary files.\n"
+        epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0"
+        epilog += "\n\t"
+        parser = RepetOptionParser(description = description, epilog = epilog)
+        parser.add_option("-i", "--in",      dest = "inFileName", action = "store",       type = "string", help = "input file name [compulsory] [format: fasta]", default = "")
+        parser.add_option("-o", "--out",        dest = "outFileName",   action = "store",       type = "string", help = "output file name [default: <input>.TRF.set]", default = "")
+        parser.add_option("-m", "--maxPeriod",  dest = "maxPeriod",     action = "store",       type = "int",    help = " maximum period size to report  [default: 15]", default = 15)
+        parser.add_option("-c", "--clean",      dest = "doClean",       action = "store_true",                   help = "clean temporary files [optional] [default: False]", default = False)
+        parser.add_option("-v", "--verbosity",  dest = "verbosity",     action = "store",       type = "int",    help = "verbosity [optional] [default: 1]", default = 1)
+        options = parser.parse_args()[0]
+        self._setAttributesFromOptions(options)
+
+    def _setAttributesFromOptions(self, options):
+        self.setInFileName(options.inFileName)
+        self.setOutFileName(options.outFileName)
+        self.maxPeriod = options.maxPeriod
+        self.setDoClean(options.doClean)
+        self.setVerbosity(options.verbosity)
+
+    def setInFileName(self, inFileName):
+        self.inFileName = inFileName
+
+    def setOutFileName(self, outFileName):
+        if outFileName == "":
+            self._outFileName = "%s.TRF.set" % self.inFileName
+        else:
+            self._outFileName = outFileName
+
+    def setDoClean(self, doClean):
+        self._doClean = doClean
+
+    def setVerbosity(self, verbosity):
+        self._verbosity = verbosity
+
+    def _checkOptions(self):
+        if self.inFileName == "":
+            self._logAndRaise("ERROR: Missing input file name")
+
+    def _logAndRaise(self, errorMsg):
+        self._log.error(errorMsg)
+        raise Exception(errorMsg)
+
+
+    def _launchTRF(self):
+        cmd = "trf %s 2 3 5 80 10 20 %d -h -d" % (self.inFileName, self.maxPeriod)
+        self._log.debug("Running : %s" % cmd)
+        process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        output = process.communicate()
+        self._log.debug("Output:\n%s" % output[0])
+
+    def _parseTRF(self):
+        self._log.debug("Parsing TRF output")
+        with open( "%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod),'r') as inFile:
+            with open(self._outFileName,'w') as outFile:
+                nbPatterns = 0
+                nbInSeq = 0
+                for line in inFile.readlines():
+                    if line == "":
+                        break
+                    data = line.split(" ")
+                    if len(data) > 1 and "Sequence:" in data[0]:
+                        nbInSeq += 1
+                        seqName = data[1][:-1]
+                    if len(data) < 14:
+                        continue
+                    nbPatterns += 1
+                    consensus = data[13]
+                    copyNb = int( float(data[3]) + 0.5 )
+                    start = data[0]
+                    end = data[1]
+                    outFile.write( "%i\t(%s)%i\t%s\t%s\t%s\n" % ( nbPatterns, consensus, copyNb, seqName, start, end ) )
+        self._log.debug("Finished Parsing TRF output")
+
+    def _clean(self):
+        try:
+            os.remove("%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod))
+        except:pass
+
+
+    def run(self):
+        """
+        Launch TRF to detect micro-satellites in sequences.
+        """
+        LoggerFactory.setLevel(self._log, self._verbosity)
+        self._checkOptions()
+        self._log.info("START Launch")
+        self._log.debug("Input file name: %s" % self.inFileName)
+
+        self._launchTRF()
+        self._parseTRF()
+
+        if self._doClean:
+            self._log.warning("Files will be cleaned")
+            self._clean()
+        self._log.info("END Launch")
+
+if __name__ == "__main__":
+    iLaunchTRF = LaunchTRF()
+    iLaunchTRF.setAttributesFromCmdLine()
+    iLaunchTRF.run()
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/LaunchTallymer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchTallymer.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,328 @@\n+#!/usr/bin/env python\n+\n+"""\n+Launch Tallymer\'s sub programs, generate map file, and convert output to gff and wig, as well as visual (RPlot) data\n+"""\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import os\n+import shutil\n+import subprocess\n+import time\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.LoggerFactory import LoggerFactory\n+from SMART.Java.Python.convertTranscriptFile import ConvertTranscriptFile\n+from commons.core.seq.BioseqUtils import BioseqUtils\n+from commons.core.seq.BioseqDB import BioseqDB\n+from Tallymer_pipe.PlotBenchMarkGFFFiles import PlotBenchMarkGFFFiles\n+\n+LOG_DEPTH = "repet.tools"\n+\n+\n+class LaunchTallymer(object):\n+ """\n+ Launch Tallymer\'s sub programs, generate map file, and convert output to\n+ gff and wig, as well as visual (RPlot) data\n+ """\n+ \n+ _lValidFormats = ["gff", "gff3", "wig", "bed", "map"]\n+ \n+ def __init__(self, inputFasta="", indexationFasta=None, merSize=20, minOccs=4, outputFormats="gff", nLargestScaffoldsToPlot=0, clean=False, verbosity=0):\n+ self.inputFasta = inputFasta\n+ self.indexationFasta = indexationFasta if indexationFasta != None else inputFasta \n+ self.merSize = merSize\n+ self.minOccs = minOccs\n+ self.outputFormats = outputFormats\n+ self.nLargestScaffoldsToPlot = nLargestScaffoldsToPlot\n+ self.doClean = clean\n+ self.verbosity = verbosity\n+ \n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)\n+ self._workdir = os.path.join(os.getcwd(), "launchTallymer_%s" % time.strftime("%Y%m%d%H%M%S"))\n+ self._tmpSearchFileName = None\n+ self._tmpMapFileName = None\n+ self._tmpStatFileName = None \n+ self._tmpPngFileName = None\n+ self._plot_data = {}\n+ self._plot_data2 = {}\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Generates stats from the results of the tallymer search ." \n+ parser = RepetOptionParser(description=description)\n+ parser.add_option("-i", "--inputFasta", dest="inputFasta", default = "", action="store", type="string", help="input fasta file [compulsory] [format: fasta]")\n+ parser.add_option("-u", "--indexationFasta", dest="indexationFasta", default = "", action="store", type="string", help="input indexation fasta file used to generate'..b':\n+ statFile.write("%s\\t%i\\t%.10f\\n" % (kmer, dKmer2Occ[kmer], dKmer2Occ[kmer] / float(totalNbOcc)))\n+\n+ def _writeOutputFiles(self):\n+ for format in self.outputFormats:\n+ self._log.info("Generating %s file" % format)\n+ outputFileName = "%s.tallymer.%s" % (os.path.splitext(self.inputFasta)[0], format)\n+ try:\n+ iConvertTranscriptFile = ConvertTranscriptFile(inputFileName=self._tmpMapFileName, name="Tallymer",\\\n+ inputFormat="map", outputFileName=outputFileName, outputFormat=format,feature= "Match", featurePart="Match-part", verbosity=0) #self.verbosity\n+ iConvertTranscriptFile.run()\n+ except Exception as inst:\n+ self._log.error("Error: %s - Failed to generate %s format ouput, skipping" % (inst, format))\n+ shutil.copy2(outputFileName, "../.")\n+\n+\n+class ConvertUtils(object):\n+ \n+ def convertTallymerFormatIntoMapFormatAndGenerateData(fastaFileName, searchFileName, mapFileName):\n+ dIndex2NameLengthList = ConvertUtils._createDictOfNameLengthAccordingToSeqOrder(fastaFileName)\n+ plotData = {}\n+ plotData2 = {}\n+ with open(searchFileName, "r") as talFile:\n+ with open(mapFileName, "w") as mapFile:\n+ totalNbOcc = 0\n+ dKmer2Occ = {}\n+ line = talFile.readline()\n+ while line:\n+ data = line[:-1].split("\\t")\n+ name = "%s_%s" % (data[3], data[2])\n+ nbOccs = int(data[2])\n+ chrname = dIndex2NameLengthList[int(data[0])][0]\n+ if data[1][0] == "+":\n+ start = int(data[1][1:]) + 1\n+ end = start + len(data[3])\n+ elif data[1][0] == "-":\n+ start_revcomp = int(data[1][1:])\n+ start = dIndex2NameLengthList[int(data[0])][1] - start_revcomp - 1\n+ end = end - len(data[3]) + 1\n+ mapLine = "%s\\t%s\\t%s\\t%s\\t%i\\n" % (name, chrname, start, end, nbOccs)\n+ mapFile.write(mapLine)\n+ \n+ if plotData2.get(chrname,None) is None:\n+ plotData2[chrname] = {}\n+ if plotData2[chrname].get(start, None) is None:\n+ plotData2[chrname][start]=0\n+ plotData2[chrname][start] += nbOccs\n+ \n+ totalNbOcc += 1\n+ if dKmer2Occ.has_key(data[3]):\n+ dKmer2Occ[data[3]] += 1\n+ else:\n+ dKmer2Occ[data[3]] = 1\n+ plotData[start] = nbOccs\n+ line = talFile.readline()\n+ return totalNbOcc, dKmer2Occ, plotData, plotData2\n+ \n+ convertTallymerFormatIntoMapFormatAndGenerateData = staticmethod(convertTallymerFormatIntoMapFormatAndGenerateData)\n+ \n+ def _createDictOfNameLengthAccordingToSeqOrder(fastaFileName):\n+ with open(fastaFileName) as fastaFile:\n+ line = fastaFile.readline()\n+ i = 0\n+ length = 0\n+ dIndex2Name = {}\n+ while line:\n+ if line[0] == ">":\n+ dIndex2Name[i] = [line[1:-1]]\n+ if i > 0:\n+ dIndex2Name[i - 1].append(length)\n+ length = 0\n+ i += 1\n+ else:\n+ length += len(line[:-1])\n+ line = fastaFile.readline()\n+ dIndex2Name[i - 1].append(length)\n+ return dIndex2Name\n+ \n+ _createDictOfNameLengthAccordingToSeqOrder = staticmethod(_createDictOfNameLengthAccordingToSeqOrder)\n+\n+if __name__ == "__main__":\n+ iLaunchTallymer = LaunchTallymer()\n+ iLaunchTallymer.setAttributesFromCmdLine()\n+ iLaunchTallymer.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/MafftClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/MafftClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+##@file
+# Launch MafftProgramLauncher on several files in parallel on a cluster.
+
+
+from pyRepet.launcher.AbstractClusterLauncher import *
+from commons.launcher.MafftProgramLauncher import MafftProgramLauncher
+
+
+class MafftClusterLauncher( AbstractClusterLauncher ):
+    """
+    Launch Mafft on several files in parallel on a cluster.
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractClusterLauncher.__init__( self )
+        AbstractClusterLauncher.setAcronym( self, "Mafft" )
+
+        self._cmdLineSpecificOptions = "p:"
+
+        self._exeWrapper = "MafftProgramLauncher.py"
+        self._prgLauncher = None
+        self._prgLauncher = self.getProgramLauncherInstance()
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -p: parameters for 'mafft' (default='--auto')"
+        return string
+
+
+    def getProgramParameters( self ):
+        return self._prgLauncher.getProgramParameters()
+
+
+    def getProgramLauncherInstance( self ):
+        if self._prgLauncher == None:
+            self._prgLauncher = MafftProgramLauncher()
+            self._prgLauncher.setInputFile( GENERIC_IN_FILE )
+            self._prgLauncher.setOutputFile( "%s.fa_aln" % ( GENERIC_IN_FILE ) )
+            self._prgLauncher.setClean()
+            self._prgLauncher.setVerbosityLevel( 1 )
+            self._prgLauncher.setListFilesToKeep()
+            self._prgLauncher.setListFilesToRemove()
+        return self._prgLauncher
+
+
+if __name__ == "__main__":
+    i = MafftClusterLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/MafftProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/MafftProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+
+##@file
+# Launch Mafft (multiple alignment).
+#
+# options:
+#      -h: this help
+#      -i: name of the input file (format='fasta')
+#      -p: parameters for 'mafft' (default='--auto')
+#      -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')
+#      -c: clean
+#      -v: verbosity level (default=0/1)
+
+
+import os
+import sys
+import getopt
+import exceptions
+
+from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
+from pyRepet.seq.fastaDB import *
+from commons.core.seq.FastaUtils import FastaUtils
+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
+
+
+class MafftProgramLauncher( AbstractProgramLauncher ):
+    """
+    Launch Mafft (multiple alignment).
+    """
+
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractProgramLauncher.__init__( self )
+        self._prgName = "mafft"
+        self._formatInFile = "fasta"
+        self._prgParam = "--auto"
+        self._cmdLineSpecificOptions = "p:o:"
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -p: parameters for '%s' (default='--auto')" % ( self.getProgramName() )
+        string += "\n     -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
+        return string
+
+
+    def setASpecificAttributeFromCmdLine( self, o, a="" ):
+        """
+        Set a specific attribute from the command-line arguments.
+        """
+        if o == "-p":
+            self.setProgramParameters( a )
+        elif o == "-o":
+            self.setOutputFile( a )
+
+
+    def checkSpecificAttributes( self ):
+        """
+        Check the specific attributes before running the program.
+        """
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
+
+
+    def setWrapperCommandLine( self ):
+        """
+        Set the command-line of the wrapper.
+        Required for MafftClusterLauncher.
+        """
+        self._wrpCmdLine = self.getWrapperName()
+        self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
+        self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() )
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
+        self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
+        if self.getClean():
+            self._wrpCmdLine += " -c"
+        self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
+
+
+    def setProgramCommandLine( self ):
+        """
+        Set the command-line of the program.
+        """
+        self._prgCmdLine = self.getProgramName()
+        self._prgCmdLine += " %s" % ( self.getProgramParameters() )
+        if self.getVerbosityLevel() == 0 and "--quiet" not in self._prgCmdLine:
+            self._prgCmdLine += " --quiet"
+        self._prgCmdLine += " %s.shortH" % ( self.getInputFile() )
+        self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() )
+        if self._verbose < 2:
+            self._prgCmdLine += " 2> /dev/null"
+
+
+    def setListFilesToKeep( self ):
+        """
+        Set the list of files to keep.
+        """
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
+        self.appendFileToKeep( self.getOutputFile() )
+
+
+    def setListFilesToRemove( self ):
+        """
+        Set the list of files to remove.
+        """
+        self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) )
+
+
+    def setSummary( self ):
+        self._summary = "input file: %s" % ( self.getInputFile() )
+        self._summary += "\nparameters: %s" % ( self.getProgramParameters() )
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
+        self._summary += "\noutput file: %s" % ( self.getOutputFile() )
+
+
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+
+        lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )
+
+        csh = ChangeSequenceHeaders()
+        csh.setInputFile( self.getInputFile() )
+        csh.setFormat( "fasta" )
+        csh.setStep( 1 )
+        csh.setPrefix( "seq" )
+        csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
+        csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )
+        csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
+        csh.run()
+
+        bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) )
+        bsDB.upCase()
+        bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) )
+        del bsDB
+        os.rename( "%s.shortHtmp" % ( self.getInputFile() ),
+                   "%s.shortH" % ( self.getInputFile() ) )
+
+        self.setProgramCommandLine()
+        cmd = self.getProgramCommandLine()
+        if self.getVerbosityLevel() > 0:
+            print "LAUNCH: %s" % ( cmd )
+            sys.stdout.flush()
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
+            print string
+            sys.exit(1)
+
+        csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
+        csh.setFormat( "fasta" )
+        csh.setStep( 2 )
+        csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
+        csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
+        csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
+        csh.run()
+
+        absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
+        outFileHandler = open( self.getOutputFile(), "w" )
+        for header in lInitHeaders:
+            bs = absDB.fetch( header )
+            bs.upCase()
+            bs.write( outFileHandler )
+        outFileHandler.close()
+        os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
+
+        self.end()
+
+
+if __name__ == "__main__":
+    i = MafftProgramLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/MapClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/MapClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+##@file
+# Launch MapProgramLauncher.py on several files in parallel on a cluster.
+
+
+import os
+import sys
+import getopt
+import exceptions
+
+from pyRepet.launcher.AbstractClusterLauncher import *
+from commons.launcher.MapProgramLauncher import MapProgramLauncher
+
+
+class MapClusterLauncher( AbstractClusterLauncher ):
+    """
+    Launch Map on several files in parallel on a cluster.
+    """
+
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractClusterLauncher.__init__( self )
+        AbstractClusterLauncher.setAcronym( self, "Map" )
+
+        self._cmdLineSpecificOptions = "s:m:O:e:"
+
+        self._exeWrapper = "MapProgramLauncher.py"
+        self._prgLauncher = None
+        self._prgLauncher = self.getProgramLauncherInstance()
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -s: size above which a gap is not penalized anymore (default='%i')" % ( self.getGapSize() )
+        string += "\n     -m: penalty for a mismatch (default='%i')" % ( self.getMismatchPenalty() )
+        string += "\n     -O: penalty for a gap openning (default='%i')" % ( self.getGapOpenPenalty() )
+        string += "\n     -e: penalty for a gap extension (default='%i')" % ( self.getGapExtendPenalty() )
+        return string
+
+
+    def getGapSize( self ):
+        return self._prgLauncher.getGapSize()
+
+
+    def getMismatchPenalty( self ):
+        return self._prgLauncher.getMismatchPenalty()
+
+
+    def getGapOpenPenalty( self ):
+        return self._prgLauncher.getGapOpenPenalty()
+
+
+    def getGapExtendPenalty( self ):
+        return self._prgLauncher.getGapExtendPenalty()
+
+
+    def getProgramLauncherInstance( self ):
+        if self._prgLauncher == None:
+            self._prgLauncher = MapProgramLauncher()
+            self._prgLauncher.setInputFile( GENERIC_IN_FILE )
+            self._prgLauncher.setOutputFile( "%s.fa_aln" % ( GENERIC_IN_FILE ) )
+            #TESt de SATannot verifier si clean est a vrai
+            #self._prgLauncher.setClean()
+            self._prgLauncher.setVerbosityLevel( 1 )
+            self._prgLauncher.setListFilesToKeep()
+            self._prgLauncher.setListFilesToRemove()
+        return self._prgLauncher
+
+
+if __name__ == "__main__":
+    i = MapClusterLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/MapProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/MapProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,251 @@\n+#!/usr/bin/env python\n+\n+##@file\n+# Launch Map (multiple alignment).\n+#\n+# options:\n+# -h: this help\n+# -i: name of the input file (format=\'fasta\')\n+# -s: size above which a gap is not penalized anymore (default=\'50\')\n+# -m: penalty for a mismatch (default=\'-8\')\n+# -O: penalty for a gap opening (default=\'16\')\n+# -e: penalty for a gap extension (default=\'4\')\n+# -o: name of the output file (format=\'aligned fasta\', default=inFile+\'.fa_aln\')\n+# -c: clean\n+# -v: verbosity level (default=0/1)\n+\n+\n+import sys\n+import os\n+\n+from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+\n+\n+class MapProgramLauncher( AbstractProgramLauncher ):\n+ """\n+ Launch Map (multiple alignment).\n+ """\n+ \n+ \n+ def __init__( self ):\n+ """\n+ Constructor.\n+ """\n+ AbstractProgramLauncher.__init__( self )\n+ self._prgName = "rpt_map"\n+ self._formatInFile = "fasta"\n+ self._cmdLineSpecificOptions = "s:m:O:e:o:"\n+ self._gapSize = 50\n+ self._mismatchPenalty = -8\n+ self._gapOpenPenalty = 16\n+ self._gapExtendPenalty = 4\n+ self._outFile = ""\n+ \n+ def getSpecificHelpAsString( self ):\n+ """\n+ Return the specific help as a string.\n+ """\n+ string = ""\n+ string += "\\nspecific options:"\n+ string += "\\n -s: size above which a gap is not penalized anymore (default=\'%i\')" % ( self.getGapSize() )\n+ string += "\\n -m: penalty for a mismatch (default=\'%i\', match=10)" % ( self.getMismatchPenalty() )\n+ string += "\\n -O: penalty for a gap opening (default=\'%i\')" % ( self.getGapOpenPenalty() )\n+ string += "\\n -e: penalty for a gap extension (default=\'%i\')" % ( self.getGapExtendPenalty() )\n+ string += "\\n -o: name of the output file (format=\'aligned fasta\', default=inFile+\'.fa_aln\')"\n+ return string\n+ \n+ \n+ def setASpecificAttributeFromCmdLine( self, o, a="" ):\n+ """\n+ Set a specific attribute from the command-line arguments.\n+ """\n+ if o == "-s":\n+ self.setGapSize( a )\n+ elif o == "-m":\n+ self.setMismatchPenalty( a )\n+ elif o == "-O":\n+ self.setGapOpenPenalty( a )\n+ elif o == "-e":\n+ self.setGapExtendPenalty( a )\n+ elif o == "-o":\n+ self.setOutputFile( a )\n+\n+ \n+ def setGapSize( self, arg ):\n+ self._gapSize = int(arg)\n+ \n+ \n+ def setMismatchPenalty( self, arg ):\n+ self._mismatchPenalty = int(arg)\n+ \n+ \n+ def setGapOpenPenalty( self, arg ):\n+ self._gapOpenPenalty = int(arg)\n+ \n+ \n+ def setGapExtendPenalty( self, arg ):\n+ self._gapExtendPenalty = int(arg)\n+ \n+ \n+ def getGapSize( self ):\n+ return self._gapSize\n+ \n+ \n+ def getMismatchPenalty( self ):\n+ return self._mismatchPenalty\n+ \n+ \n+ def getGapOpenPenalty( self ):\n+ return self._gapOpenPenalty\n+ \n+ \n+ def getGapExtendPenalty( self ):\n+ return self._gapExtendPenalty\n+ \n+ \n+ def checkSpecificAttributes( self ):\n+ """\n+ Check the specific attributes before running the program.\n+ """\n+ if self.getGapSize() <= 0:\n+ string = "ERROR: gap size should be > 0"\n+ print string\n+ print self.getHelpAsString()\n+ sys.exit(1)\n+ if self.getMismatchPenalty() >= 0:\n+ string = "ERROR: mismatch penalty should be < 0"\n+ print string\n+ print self.getHelpAsString()\n+ sys.exit(1)\n+ if self.getGapOpenPenalty() < 0:\n+ st'..b'_wrpCmdLine += " -c"\n+ self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )\n+ \n+ \n+ def setProgramCommandLine( self ):\n+ """\n+ Set the command-line of the program.\n+ """\n+ self._prgCmdLine = self.getProgramName()\n+ self._prgCmdLine += " %s.shortH" % ( self.getInputFile() )\n+ self._prgCmdLine += " %i" % ( self.getGapSize() )\n+ self._prgCmdLine += " %i" % ( self.getMismatchPenalty() )\n+ self._prgCmdLine += " %i" % ( self.getGapOpenPenalty() )\n+ self._prgCmdLine += " %i" % ( self.getGapExtendPenalty() )\n+ self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() )\n+ \n+ \n+ def setListFilesToKeep( self ):\n+ """\n+ Set the list of files to keep.\n+ """\n+ if self.getOutputFile() == "":\n+ self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )\n+ self.appendFileToKeep( self.getOutputFile() )\n+ \n+ \n+ def setListFilesToRemove( self ):\n+ """\n+ Set the list of files to remove.\n+ """\n+ self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) )\n+ \n+ \n+ def setSummary( self ):\n+ self._summary = "input file: %s" % ( self.getInputFile() )\n+ self._summary += "\\ngap size: %i" % ( self.getGapSize() )\n+ self._summary += "\\nmismatch penalty: %i" % ( self.getMismatchPenalty() )\n+ self._summary += "\\ngap openning penalty: %i" % ( self.getGapOpenPenalty() )\n+ self._summary += "\\ngap extension penalty: %i" % ( self.getGapExtendPenalty() )\n+ if self.getOutputFile() == "":\n+ self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )\n+ self._summary += "\\noutput file: %s" % ( self.getOutputFile() )\n+ \n+ \n+ def run( self ):\n+ """\n+ Run the program.\n+ """\n+ self.start()\n+ \n+ lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )\n+ \n+ csh = ChangeSequenceHeaders()\n+ csh.setInputFile( self.getInputFile() )\n+ csh.setFormat( "fasta" )\n+ csh.setStep( 1 )\n+ csh.setPrefix( "seq" )\n+ csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )\n+ csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )\n+ csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )\n+ csh.run()\n+ \n+ self.setProgramCommandLine()\n+ cmd = self.getProgramCommandLine()\n+ if self.getVerbosityLevel() > 0:\n+ print "LAUNCH: %s" % ( cmd )\n+ sys.stdout.flush()\n+ returnStatus = os.system( cmd )\n+ if returnStatus != 0:\n+ string = "ERROR: program \'%s\' returned status \'%i\'" % ( self.getProgramName(), returnStatus )\n+ print string\n+ sys.exit(1)\n+ \n+ csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )\n+ csh.setFormat( "fasta" )\n+ csh.setStep( 2 )\n+ csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )\n+ csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )\n+ csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )\n+ csh.run()\n+ \n+ absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )\n+ outFileHandler = open( self.getOutputFile(), "w" )\n+ for header in lInitHeaders:\n+ bs = absDB.fetch( header )\n+ bs.upCase()\n+ bs.write( outFileHandler )\n+ outFileHandler.close()\n+ if self.getClean():\n+ os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )\n+ \n+ self.end()\n+ \n+ \n+if __name__ == "__main__":\n+ i = MapProgramLauncher()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/NWalignProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/NWalignProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+
+##@file
+# Launch NWalign (pairwise alignment).
+#
+# options:
+#      -h: this help
+#      -i: name of the input file (queries, format='fasta')
+#      -s: name of the subject file (format='fasta')
+#      -p: parameters for 'NWalign' (default='-d 2')
+#      -o: name of the output file (format='align', default=inFile+'.align')
+#      -c: clean
+#      -v: verbosity level (default=0/1)
+
+
+import os
+import sys
+
+from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
+from commons.core.coord.AlignUtils import AlignUtils
+
+
+class NWalignProgramLauncher( AbstractProgramLauncher ):
+    """
+    Launch NWalign (pairwise alignment).
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractProgramLauncher.__init__( self )
+        self._prgName = "NWalign"
+        self._formatInFile = "fasta"
+        self._sbjFile = ""
+        self._prgParam = ""
+        self._cmdLineSpecificOptions = "s:p:o:"
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -s: name of the subject file (format='fasta')"
+        string += "\n     -p: parameters for '%s'" % ( self.getProgramName() )
+        string += "\n     -o: name of the output file (format='align', default=inFile+'.align')"
+        return string
+
+
+    def setASpecificAttributeFromCmdLine( self, o, a="" ):
+        """
+        Set a specific attribute from the command-line arguments.
+        """
+        if o =="-s":
+            self.setSubjectFile( a )
+        elif o == "-p":
+            self.setProgramParameters( a )
+        elif o == "-o":
+            self.setOutputFile( a )
+
+
+    def setSubjectFile( self, arg ):
+        self._sbjFile = arg
+
+
+    def getSubjectFile( self ):
+        return self._sbjFile
+
+
+    def checkSpecificAttributes( self ):
+        """
+        Check the specific attributes before running the program.
+        """
+        if self._sbjFile == "":
+            string = "ERROR: missing subject file (-s)"
+            print string
+            print self.getHelpAsString()
+            sys.exit(1)
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+
+
+    def setWrapperCommandLine( self ):
+        """
+        Set the command-line of the wrapper.
+        Required for NWalignClusterLauncher.
+        """
+        self._wrpCmdLine = self.getWrapperName()
+        self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
+        self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() )
+        if self.getProgramParameters() != "":
+            self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() )
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
+        if self.getClean():
+            self._wrpCmdLine += " -c"
+        self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
+
+
+    def setProgramCommandLine( self ):
+        """
+        Set the command-line of the program.
+        """
+        self._prgCmdLine = self.getProgramName()
+        if self.getProgramParameters() != "":
+            self._prgCmdLine += " %s" % ( self.getProgramParameters() )
+        self._prgCmdLine += " -o %s.afa" % ( self.getInputFile() )
+        self._prgCmdLine += " -v %i" % ( self.getVerbosityLevel() )
+        self._prgCmdLine += " %s" % ( self.getSubjectFile() )
+        self._prgCmdLine += " %s" % ( self.getInputFile() )
+
+
+    def setListFilesToKeep( self ):
+        """
+        Set the list of files to keep.
+        """
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.afa" % ( self.getInputFile() ) )
+        self.appendFileToKeep( self.getOutputFile() )
+
+
+    def postprocess( self ):
+        absDB = AlignedBioseqDB( "%s.afa" % ( self.getInputFile() ) )
+        lHeaders = absDB.getHeaderList()
+        queryHeader = lHeaders[0]
+        subjectHeader = lHeaders[1]
+        queryLength = absDB.fetch( queryHeader ).getLength()
+        subjectLength = absDB.fetch( subjectHeader ).getLength()
+        lAligns = absDB.getAlignList( queryHeader, subjectHeader )
+        for i in lAligns:
+            if "re-oriented" in i.getQueryName():
+                i.setQueryName( queryHeader.replace(" re-oriented","") )
+                start = i.getQueryStart()
+                end = i.getQueryEnd()
+                i.setQueryStart( queryLength - end + 1 )
+                i.setQueryEnd( queryLength - start + 1 )
+            if "re-oriented" in i.getSubjectName():
+                i.setSubjectName( subjectHeader.replace(" re-oriented","") )
+                start = i.getSubjectStart()
+                end = i.getSubjectEnd()
+                i.setSubjectEnd( subjectLength - end + 1 )
+                i.setSubjectStart( subjectLength - start + 1 )
+            if not i.isQueryOnDirectStrand():
+                i.reverse()
+        AlignUtils.writeListInFile( lAligns, self.getOutputFile() )
+        os.remove( "%s.afa" % ( self.getInputFile() ) )
+
+
+    def setSummary( self ):
+        self._summary = "input file: %s" % ( self.getInputFile() )
+        self._summary += "\nsubject file: %s" % ( self.getSubjectFile() )
+        self._summary += "\nparameters: %s" % ( self.getProgramParameters() )
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self._summary += "\noutput file: %s" % ( self.getOutputFile() )
+
+
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+
+        self.setProgramCommandLine()
+        cmd = self.getProgramCommandLine()
+        if self.getVerbosityLevel() > 0:
+            print "LAUNCH: %s" % ( cmd )
+            sys.stdout.flush()
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
+            print string
+            sys.exit(1)
+
+        self.postprocess()
+
+        self.end()
+
+
+if __name__ == "__main__":
+    i = NWalignProgramLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/RepeatMaskerClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/RepeatMaskerClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+##@file
+# Launch RepeatMaskerProgramLauncher on several files in parallel on a cluster.
+
+
+import os
+import sys
+import getopt
+import exceptions
+
+from pyRepet.launcher.AbstractClusterLauncher import *
+from commons.launcher.RepeatMaskerProgramLauncher import RepeatMaskerProgramLauncher
+
+
+class RepeatMaskerClusterLauncher( AbstractClusterLauncher ):
+    """
+    Launch RepeatMasker on several files in parallel on a cluster.
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractClusterLauncher.__init__( self )
+        AbstractClusterLauncher.setAcronym( self, "RM" )
+
+        self._cmdLineSpecificOptions = "s:n:gbl"
+
+        self._exeWrapper = "RepeatMaskerProgramLauncher.py"
+        self._prgLauncher = RepeatMaskerProgramLauncher()
+        self._prgLauncher.setInputFile( GENERIC_IN_FILE )
+        self._prgLauncher.setOutputFile( GENERIC_IN_FILE )
+        self._prgLauncher.setClean()
+        self._prgLauncher.setVerbosityLevel( 1 )
+        self._prgLauncher.setListFilesToKeep()
+        self._prgLauncher.setListFilesToRemove()
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -s: name of the subject file (repeats, format='fasta')"
+        string += "\n     -n: nb of processors to use in parallel (default=1)"
+        string += "\n     -g: calculate the GC content"
+        string += "\n     -b: skip bacterial insertion element check"
+        string += "\n     -l: mask low-complexity DNA or simple repeats"
+        return string
+
+
+    def getSubjectFile( self ):
+        return self._prgLauncher.getSubjectFile()
+
+
+    def getNbProcessors( self ):
+        return self._prgLauncher.getNbProcessors()
+
+
+    def getCalculateGCcontent( self ):
+        return self._prgLauncher.getCalculateGCcontent()
+
+
+    def getSkipBacterialIsCheck( self ):
+        return self._prgLauncher.getSkipBacterialIsCheck()
+
+
+    def getMaskSsr( self ):
+        return self._prgLauncher.getMaskSsr()
+
+
+if __name__ == "__main__":
+    i = RepeatMaskerClusterLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/RepeatMaskerProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/RepeatMaskerProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,286 @@\n+#!/usr/bin/env python\n+\n+##@file\n+# Launch RepeatMasker (pairwise alignment for repeat detection).\n+\n+\n+import os\n+import sys\n+\n+from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher\n+\n+\n+class RepeatMaskerProgramLauncher( AbstractProgramLauncher ):\n+ """\n+ Launch RepeatMasker (pairwise alignment for repeat detection).\n+ """\n+ \n+ def __init__( self ):\n+ """\n+ Constructor.\n+ """\n+ AbstractProgramLauncher.__init__( self )\n+ self._prgName = "RepeatMasker"\n+ self._formatInFile = "fasta"\n+ self._sbjFile = ""\n+ self._nbProc = 1\n+ self._calcGc = False\n+ self._skipIs = False\n+ self._maskSsr = True\n+ self._onlySsr = False\n+ self._cmdLineSpecificOptions = "s:n:gblmo:"\n+ \n+ \n+ def getSpecificHelpAsString( self ):\n+ """\n+ Return the specific help as a string.\n+ """\n+ string = ""\n+ string += "\\nspecific options:"\n+ string += "\\n -s: name of the subject file (repeats, format=\'fasta\')"\n+ string += "\\n -n: nb of processors to use in parallel (default=\'%i\')" % ( self.getNbProcessors() )\n+ string += "\\n -g: calculate the GC content"\n+ string += "\\n -b: skip bacterial insertion element check"\n+ string += "\\n -l: does not mask low-complexity DNA or simple repeats"\n+ string += "\\n -m: only masks low complex/simple repeats (no interspersed repeats)"\n+ string += "\\n -o: name of the output file"\n+ string += "\\n with -s: format=\'align\', default=inFile+\'.cat.align\')"\n+ string += "\\n with -m: format=\'path\', default=inFile+\'.cat.path\')"\n+ return string\n+ \n+ \n+ def setASpecificAttributeFromCmdLine( self, o, a="" ):\n+ """\n+ Set a specific attribute from the command-line arguments.\n+ """\n+ if o =="-s":\n+ self.setSubjectFile( a )\n+ elif o == "-n":\n+ self.setNbProcessors( a )\n+ elif o == "-g":\n+ self.setCalculateGCcontent()\n+ elif o == "-b":\n+ self.setSkipBacterialIsCheck()\n+ elif o == "-l":\n+ self.unsetMaskSsr()\n+ elif o == "-m":\n+ self.setOnlySsr()\n+ elif o == "-o":\n+ self.setOutputFile( a )\n+ \n+ \n+ def setSubjectFile( self, arg ):\n+ self._sbjFile = arg\n+ \n+ \n+ def setNbProcessors( self, arg ):\n+ self._nbProc = int(arg)\n+ \n+ \n+ def setCalculateGCcontent( self ):\n+ self._calcGc = True\n+ \n+ \n+ def setSkipBacterialIsCheck( self ):\n+ self._skipIs = True\n+ \n+ \n+ def unsetMaskSsr( self ):\n+ self._maskSsr = False\n+ \n+ \n+ def setOnlySsr( self ):\n+ self._onlySsr = True\n+ \n+ \n+ def getSubjectFile( self ):\n+ return self._sbjFile\n+ \n+ \n+ def getNbProcessors( self ):\n+ return self._nbProc\n+ \n+ \n+ def getCalculateGCcontent( self ):\n+ return self._calcGc\n+ \n+ \n+ def getSkipBacterialIsCheck( self ):\n+ return self._skipIs\n+ \n+ \n+ def getMaskSsr( self ):\n+ return self._maskSsr\n+ \n+ \n+ def getOnlySsr( self ):\n+ return self._onlySsr\n+ \n+ \n+ def checkSpecificAttributes( self ):\n+ """\n+ Check the specific attributes before running the program.\n+ """\n+ if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \\\n+ or ( self.getSubjectFile() != "" and self.getOnlySsr() ):\n+ string = "ERROR: need to specify -s or -m"\n+ print string\n+ print self.getHelpAsString()\n+ sys.exit(1)\n+ if self.getOutputFile() == "":\n+ if not self.getOnlySsr():\n+ self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )\n+ else:\n+ self.s'..b'OutputFile() == "":\n+ if not self.getOnlySsr():\n+ self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )\n+ else:\n+ self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )\n+ self.appendFileToKeep( self.getOutputFile() )\n+ self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) )\n+ \n+ \n+ def setListFilesToRemove( self ):\n+ """\n+ Set the list of files to remove.\n+ """\n+ self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) )\n+ self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) )\n+ \n+ \n+ def convertCatIntoAlign( self ):\n+ """\n+ Convert a \'cat\' file into the \'align\' format.\n+ """\n+ cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py"\n+ cmd += " -i %s.cat" % ( self.getInputFile() )\n+ cmd += " -o %s.cat.align" % ( self.getInputFile() )\n+ exitStatus = os.system( cmd )\n+ if exitStatus != 0:\n+ string = "ERROR while converting \'cat\' file into \'align\' format"\n+ print string\n+ sys.exit(1)\n+ \n+ \n+ def convertCatIntoPath( self ):\n+ """\n+ Convert a \'cat\' file into the \'path\' format.\n+ """\n+ cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py"\n+ cmd += " -i %s.cat" % ( self.getInputFile() )\n+ cmd += " -o %s.cat.path" % ( self.getInputFile() )\n+ exitStatus = os.system( cmd )\n+ if exitStatus != 0:\n+ string = "ERROR while converting \'cat\' file into \'path\' format"\n+ print string\n+ sys.exit(1)\n+ \n+ \n+ def setSummary( self ):\n+ self._summary = "input file: %s" % ( self.getInputFile() )\n+ if self.getSubjectFile() != "":\n+ self._summary += "\\nsubject file: %s" % ( self.getSubjectFile() )\n+ self._summary += "\\nnb processors: %i" % ( self.getNbProcessors() )\n+ if self.getCalculateGCcontent():\n+ self._summary += "\\ncalculate the GC content"\n+ if self.getSkipBacterialIsCheck():\n+ self._summary += "\\nskip bacterial insertion element check"\n+ if self.getMaskSsr():\n+ self._summary += "\\nmask low-complexity DNA or simple repeats"\n+ if self.getOnlySsr():\n+ self._summary = "\\nonly masks low complex/simple repeats (no interspersed repeats)"\n+ if self.getOutputFile() == "":\n+ if not self.getMaskSsr():\n+ self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )\n+ else:\n+ self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )\n+ self._summary += "\\noutput file: %s" % ( self.getOutputFile() )\n+ \n+ \n+ def run( self ):\n+ """\n+ Run the program.\n+ """\n+ self.start()\n+ \n+ self.setProgramCommandLine()\n+ cmd = self.getProgramCommandLine()\n+ if self.getVerbosityLevel() > 0:\n+ print "LAUNCH: %s" % ( cmd )\n+ sys.stdout.flush()\n+ exitStatus = os.system( cmd )\n+ if exitStatus != 0:\n+ string = "ERROR: program \'%s\' returned exit status \'%i\'" % ( self.getProgramName(), exitStatus )\n+ print string\n+ sys.exit(1)\n+ \n+ if not self.getOnlySsr():\n+ self.convertCatIntoAlign()\n+ else:\n+ self.convertCatIntoPath()\n+ \n+ self.end()\n+ \n+ \n+if __name__ == "__main__":\n+ i = RepeatMaskerProgramLauncher()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/YassClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/YassClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+##@file
+# Launch YassProgramLauncher on several files in parallel on a cluster.
+
+
+from pyRepet.launcher.AbstractClusterLauncher import *
+from commons.core.coord.AlignUtils import AlignUtils
+from commons.launcher.YassProgramLauncher import YassProgramLauncher
+from commons.tools import srptBlasterMatcher
+
+
+class YassClusterLauncher( AbstractClusterLauncher ):
+    """
+    Launch Yass on several files in parallel on a cluster.
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractClusterLauncher.__init__( self )
+        AbstractClusterLauncher.setAcronym( self, "Yass" )
+
+        self._cmdLineSpecificOptions = "s:p:A"
+
+        self._exeWrapper = "YassProgramLauncher.py"
+        self._prgLauncher = None
+        self._prgLauncher = self.getProgramLauncherInstance()
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -s: name of the subject file (format='fasta')"
+        string += "\n     -p: parameters for 'yass'"
+        string += "\n     -Z: concatenate output files"
+        string += "\n     -A: same sequences (all-by-all)"
+        return string
+
+
+    def getSubjectFile( self ):
+        return self._prgLauncher.getSubjectFile()
+
+
+    def getProgramParameters( self ):
+        return self._prgLauncher.getProgramParameters()
+
+
+    def getProgramLauncherInstance( self ):
+        if self._prgLauncher == None:
+            self._prgLauncher = YassProgramLauncher()
+            self._prgLauncher.setInputFile( GENERIC_IN_FILE )
+            self._prgLauncher.setClean()
+            self._prgLauncher.setVerbosityLevel( 1 )
+            self._prgLauncher.setListFilesToKeep()
+            self._prgLauncher.setListFilesToRemove()
+        return self._prgLauncher
+
+
+    def processOutputFile( self, tmpFile, outFile ):
+        sortFile = "%s.sort" % ( tmpFile )
+        AlignUtils.sortAlignFile( tmpFile, sortFile )
+        if self._prgLauncher.getAllByAll():
+            srptBlasterMatcher.filterRedundantMatches( sortFile,
+                                                                  outFile )
+            os.remove( sortFile )
+        else:
+            os.rename( sortFile, outFile )
+
+
+if __name__ == "__main__":
+    i = YassClusterLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/YassProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/YassProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,195 @@
+#!/usr/bin/env python
+
+##@file
+# Launch Yass (pairwise alignment).
+#
+# options:
+#      -h: this help
+#      -i: name of the input file (queries, format='fasta')
+#      -s: name of the subject file (format='fasta')
+#      -p: parameters for 'yass' (default='-d 2')
+#      -o: name of the output file (format='align', default=inFile+'.align')
+#      -c: clean
+#      -v: verbosity level (default=0/1)
+
+
+import os
+import sys
+
+from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
+
+
+class YassProgramLauncher( AbstractProgramLauncher ):
+    """
+    Launch Yass (pairwise alignment).
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractProgramLauncher.__init__( self )
+        self._prgName = "yass"
+        self._formatInFile = "fasta"
+        self._sbjFile = ""
+        self._prgParam = ""
+        self._allByAll = False
+        self._cmdLineSpecificOptions = "s:p:Ao:"
+
+
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -s: name of the subject file (format='fasta')"
+        string += "\n     -p: parameters for '%s'" % ( self.getProgramName() )
+        string += "\n     -A: same sequences (all-by-all)"
+        string += "\n     -o: name of the output file (format='align', default=inFile+'.align')"
+        return string
+
+
+    def setASpecificAttributeFromCmdLine( self, o, a="" ):
+        """
+        Set a specific attribute from the command-line arguments.
+        """
+        if o =="-s":
+            self.setSubjectFile( a )
+        elif o == "-p":
+            self.setProgramParameters( a )
+        elif o == "-A":
+            self.setAllByAll()
+        elif o == "-o":
+            self.setOutputFile( a )
+
+
+    def setSubjectFile( self, arg ):
+        self._sbjFile = arg
+
+
+    def getSubjectFile( self ):
+        return self._sbjFile
+
+
+    def setAllByAll( self ):
+        self._allByAll = True
+
+
+    def getAllByAll( self ):
+        return self._allByAll
+
+
+    def checkSpecificAttributes( self ):
+        """
+        Check the specific attributes before running the program.
+        """
+        if self._sbjFile == "":
+            string = "ERROR: missing subject file (-s)"
+            print string
+            print self.getHelpAsString()
+            sys.exit(1)
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+
+
+    def setWrapperCommandLine( self ):
+        """
+        Set the command-line of the wrapper.
+        Required for YassClusterLauncher.
+        """
+        self._wrpCmdLine = self.getWrapperName()
+        self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
+        self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() )
+        if self.getProgramParameters() != "":
+            self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() )
+        if self.getAllByAll():
+            self._wrpCmdLine += " -A"
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
+        if self.getClean():
+            self._wrpCmdLine += " -c"
+        self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
+
+
+    def setProgramCommandLine( self ):
+        """
+        Set the command-line of the program.
+        """
+        self._prgCmdLine = self.getProgramName()
+        self._prgCmdLine += " -d 2"
+        if self.getProgramParameters() != "":
+            self._prgCmdLine += " %s" % ( self.getProgramParameters() )
+        self._prgCmdLine += " -o %s.blast" % ( self.getInputFile() )
+        self._prgCmdLine += " %s" % ( self.getInputFile() )
+        self._prgCmdLine += " %s" % ( self.getSubjectFile() )
+
+
+    def setListFilesToKeep( self ):
+        """
+        Set the list of files to keep.
+        """
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self.appendFileToKeep( self.getOutputFile() )
+
+
+    def setListFilesToRemove( self ):
+        """
+        Set the list of files to remove.
+        """
+        pass
+
+
+    def convertBlastIntoAlign( self ):
+        """
+        Convert a 'blast' file into the 'align' format.
+        """
+        cmd = os.environ["REPET_PATH"] + "/bin/blast2align.py"
+        cmd += " -i %s.blast" % ( self.getInputFile() )
+        cmd += " -o %s" % ( self.getOutputFile() )
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR while converting 'blast' file into 'align' format"
+            print string
+            sys.exit(1)
+
+
+    def setSummary( self ):
+        self._summary = "input file: %s" % ( self.getInputFile() )
+        self._summary += "\nsubject file: %s" % ( self.getSubjectFile() )
+        self._summary += "\nparameters: %s" % ( self.getProgramParameters() )
+        if self.getAllByAll():
+            self._summary += "\nall-by-all"
+        if self.getOutputFile() == "":
+            self.setOutputFile( "%s.align" % ( self.getInputFile() ) )
+        self._summary += "\noutput file: %s" % ( self.getOutputFile() )
+
+
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+
+        self.setProgramCommandLine()
+        cmd = self.getProgramCommandLine()
+        if self.getVerbosityLevel() > 0:
+            print "LAUNCH: %s" % ( cmd )
+            sys.stdout.flush()
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
+            print string
+            sys.exit(1)
+
+        self.convertBlastIntoAlign()
+
+        self.end()
+
+
+if __name__ == "__main__":
+    i = YassProgramLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/launchBlasterMatcherPerQuery.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchBlasterMatcherPerQuery.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+
+"""
+This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
+"""
+
+import os
+import sys
+import getopt
+import exceptions
+import logging
+import ConfigParser
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "*** Error: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+import pyRepet.launcher.programLauncher
+import pyRepet.seq.fastaDB
+
+#-----------------------------------------------------------------------------
+
+def help():
+
+    """
+    Give the list of the command-line options.
+    """
+
+    print
+    print "usage:",sys.argv[0]," [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -q: fasta filename of the queries"
+    print "     -s: fasta filename of the subjects (same as queries if not specified)"
+    print "     -Q: queue name on the cluster"
+    print "     -d: absolute path to the temporary directory"
+    print "     -C: configuration file"
+    print "     -n: max. number of jobs (default=10,given a min. of 1 query per job)"
+    print "     -m: mix of Blaster and/or Matcher"
+    print "         1: launch Blaster only"
+    print "         2: launch Matcher only (on '*.align' query files)"
+    print "         3: launch Blaster+Matcher in the same job (default)"
+    print "     -B: parameters for Blaster (e.g. \"-a -n tblastx\")"
+    print "     -M: parameters for Matcher (e.g. \"-j\")"
+    print "     -Z: collect all the results into a single file (format 'align', 'path' or 'tab')"
+    print "     -c: clean"
+    print "     -v: verbose (default=0/1/2)"
+    print
+
+#-----------------------------------------------------------------------------
+
+def main():
+
+    """
+    This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards.
+    """
+
+    qryFileName = ""
+    sbjFileName = ""
+    queue = ""
+    tmpDir = ""
+    configFileName = ""
+    maxNbJobs = 10
+    minQryPerJob = 1
+    mix = "3"
+    paramBlaster = ""
+    paramMatcher = ""
+    collectFormat = ""
+    clean = False
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hq:s:Q:d:C:n:m:B:M:Z:cv:")
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-q":
+            qryFileName = a
+        elif o == "-s":
+            sbjFileName = a
+        elif o == "-Q":
+            queue = a
+        elif o == "-d":
+            tmpDir = a
+        elif o == "-C":
+            configFileName = a
+        elif o == "-n":
+            maxNbJobs = int(a)
+        elif o == "-m":
+            mix = a
+        elif o == "-B":
+            paramBlaster = a
+        elif o == "-M":
+            paramMatcher = a
+        elif o == "-Z":
+            collectFormat = a
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+
+    if qryFileName == "" or configFileName == "" or collectFormat == "":
+        print "*** Error: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "\nbeginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    if not os.path.exists( qryFileName ):
+        print "*** Error: query file '%s' doesn't exist" % ( qryFileName )
+        sys.exit(1)
+    if sbjFileName != "":
+        if not os.path.exists( sbjFileName ):
+            print "*** Error: subject file '%s' doesn't exist" % ( sbjFileName )
+            sys.exit(1)
+    else:
+        sbjFileName = qryFileName
+
+    pL = pyRepet.launcher.programLauncher.programLauncher()
+
+    nbSeqQry = pyRepet.seq.fastaDB.dbSize( qryFileName )
+    qryPerJob = nbSeqQry / float(maxNbJobs)
+
+    # split the input query file in single files into a new directory
+    prg = os.environ["REPET_PATH"] + "/bin/dbSplit.py"
+    cmd = prg
+    cmd += " -i %s" % ( qryFileName )
+    if qryPerJob <= 1.0:
+        cmd += " -n %i" % ( minQryPerJob )
+    else:
+        cmd += " -n %i" % ( qryPerJob + 1 )
+    cmd += " -d"
+    pL.launch( prg, cmd )
+
+    # prepare the subject databank
+    if sbjFileName != qryFileName:
+        prg = "blaster"
+        cmd = prg
+        cmd += " -q %s" % ( sbjFileName )
+        cmd += " -P"
+        pL.launch( prg, cmd )
+
+    # launch Blaster+Matcher in parallel
+    prg = "srptBlasterMatcher.py"
+    cmd = prg
+    cmd += " -g %s_vs_%s" % ( qryFileName, sbjFileName )
+    cmd += " -q %s/batches" % ( os.getcwd() )
+    cmd += " -s %s/%s" % ( os.getcwd(), sbjFileName )
+    cmd += " -Q '%s'" % ( queue )
+    if tmpDir != "":
+        cmd += " -d %s" % ( tmpDir )
+    cmd += " -m %s" % ( mix )
+    if paramBlaster != "":
+        cmd += " -B \"%s\"" % ( paramBlaster )
+    if paramMatcher != "":
+        cmd += " -M \"%s\"" % ( paramMatcher )
+    cmd += " -Z %s" % ( collectFormat )
+    cmd += " -C %s" % ( configFileName )
+    if clean == True:
+        cmd += " -c"
+    cmd += " -v %i" % ( verbose - 1 )
+    pL.launch( prg, cmd )
+
+    suffix = ""
+    if mix in ["2","3"]:
+        if "-a" in paramMatcher:
+            suffix = "match.%s" % ( collectFormat )
+        else:
+            suffix = "clean_match.%s" % ( collectFormat )
+        os.system( "mv %s_vs_%s.%s %s_vs_%s.align.%s" % ( qryFileName, sbjFileName, collectFormat, qryFileName, sbjFileName, suffix ) )
+
+    # clean
+    if clean == True:
+        prg = "rm"
+        cmd = prg
+        cmd += " -rf batches formatdb.log %s_cut* %s.Nstretch.map" % ( sbjFileName, sbjFileName )
+        pL.launch( prg, cmd )
+
+    if verbose > 0:
+        print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+#----------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/launchMafft.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchMafft.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+# DEPRECATED
+
+import user, os, sys, getopt, exceptions
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "*** Error: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+import pyRepet.launcher.programLauncher
+import pyRepet.seq.fastaDB
+from pyRepet.seq.BioseqDB import *
+
+#------------------------------------------------------------------------------
+
+def help():
+
+    print
+    print "DEPRECATED"
+    print
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='fasta')"
+    print "     -o: name of the output file (default=inFileName+'.fa_aln')"
+    print "     -v: verbose (default=0/1)"
+    print
+
+#------------------------------------------------------------------------------
+
+def main():
+
+    """
+    This program launches MAFFT to build a multiple sequence alignment.
+    """
+
+    inFileName = ""
+    outFileName = ""
+    verbose = 0
+
+    try:
+        opts,args=getopt.getopt(sys.argv[1:],"hi:o:v:")
+    except getopt.GetoptError:
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFileName == "":
+        print "*** Error: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "beginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    if verbose > 0:
+        print "build a multiple alignment from '%s'..." % ( inFileName )
+        sys.stdout.flush()
+
+    pyRepet.seq.fastaDB.shortenSeqHeaders( inFileName )
+
+    bsDB = BioseqDB( inFileName+".shortH" )
+    bsDB.upCase()
+    bsDB.save( inFileName+".shortHtmp" )
+    del bsDB
+    os.rename( inFileName+".shortHtmp", inFileName+".shortH" )
+
+    pL = pyRepet.launcher.programLauncher.programLauncher( inFileName+".shortH" )
+    pL.launchMafft( outFileName=inFileName+".shortH.fa_aln", verbose=verbose )
+
+    pyRepet.seq.fastaDB.retrieveInitSeqHeaders( inFileName+".shortH.fa_aln",
+                                                inFileName+".shortHlink",
+                                                inFileName+".shortH.fa_aln.initH",
+                                                verbose-1 )
+
+    if outFileName == "":
+        outFileName = "%s.fa_aln" % ( inFileName )
+    os.system( "mv %s.shortH.fa_aln.initH %s" % ( inFileName, outFileName ) )
+
+    for f in [inFileName+".shortH",inFileName+".shortH.fa_aln",inFileName+".shortHlink"]:
+        os.remove( f )
+
+    if verbose > 0:
+        print "%s finished successfully" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+#------------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/launchMreps.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchMreps.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+
+from commons.core.seq.BioseqDB import BioseqDB
+from commons.core.parsing.MrepsToSet import MrepsToSet
+import subprocess
+import os
+import sys
+import getopt
+
+def help():
+    """
+    Give the list of the command-line options.
+    """
+    print
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='fasta')"
+    print "     -o: name of the output file (default=inFileName+'.Mreps.set')"
+    print "     -f: error filter (default=1.0)"
+    print "     -c: clean"
+    print "     -v: verbosity level (default=0/1)"
+    print
+
+def main():
+    """
+    Launch Mreps.
+    """
+    inFileName = ""
+    outFileName = ""
+    errorFilter = 1.0
+    clean = False
+    verbose = 0
+
+    try:
+        opts=getopt.getopt(sys.argv[1:],"hi:o:f:cv:")[0]
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-f":
+            errorFilter = float(a)
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFileName == "":
+        print "ERROR: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "beginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    # Mreps 2.5 doesn't fully support IUPAC nomenclature
+    if verbose > 0:
+        print "* check IUPAC symbols"; sys.stdout.flush()
+    tmpInFileName = "%s.tmp%i" % ( inFileName, os.getpid() )
+    if os.path.exists( tmpInFileName ):
+        os.system( "rm -f %s" % ( tmpInFileName ) )
+    bsDB = BioseqDB( inFileName )
+    for bs in bsDB.db:
+        if verbose > 0:
+            print bs.header; sys.stdout.flush()
+        bs.partialIUPAC()
+        onlyN = True
+        for nt in ["A","T","G","C"]:
+            if nt in bs.sequence:
+                onlyN = False
+        if onlyN == True:
+            if verbose > 0:
+                print "** Warning: only Ns"; sys.stdout.flush()
+        else:
+            bsDB.save( tmpInFileName )
+
+    if not os.path.exists( tmpInFileName ):
+        sys.exit(0)
+
+    if verbose > 0:
+        print "* remove N stretches"; sys.stdout.flush()
+    prg = os.environ["REPET_PATH"] + "/bin/cutterDB"
+    cmd = prg
+    cmd += " -l 200000"
+    cmd += " -o 0"
+    cmd += " -w 11"
+    cmd += " %s" % ( tmpInFileName )
+    if verbose > 0:
+        print cmd; sys.stdout.flush()
+    log = os.system( cmd )
+    if log != 0:
+        print "ERROR: %s returned %i" % ( prg, log )
+        sys.exit(1)
+
+    # launch Mreps on the input file
+    MrepsOutFileName = "%s.Mreps.xml" % ( tmpInFileName )
+    prg = "mreps"
+    cmd = prg
+    cmd += " -res 3"
+    cmd += " -exp 3.0"
+    cmd += " -maxsize 50"
+    cmd += " -xmloutput %s" % MrepsOutFileName
+    cmd += " -fasta %s_cut" % tmpInFileName
+    process = subprocess.Popen(cmd, shell = True)
+    process.communicate()
+    if process.returncode != 0:
+        raise Exception("ERROR when launching '%s'" % cmd)
+
+    if outFileName == "":
+        outFileName = inFileName + ".Mreps.set"
+
+    # parse Mreps results in xml format
+    iMrepsToSet = MrepsToSet(inFileName, MrepsOutFileName, outFileName, errorFilter)
+    iMrepsToSet.run()
+    if clean:
+        iMrepsToSet.clean()
+
+    # remove temporary input filename
+    os.remove(tmpInFileName)
+    os.remove("%s_cut" % tmpInFileName)
+    os.remove("%s.Nstretch.map" % tmpInFileName)
+
+    if verbose > 0:
+        print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/launchPhyML.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchPhyML.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+
+from pyRepet.launcher.programLauncher import programLauncher
+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
+
+
+def help():
+    print
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (aligned fasta)"
+    print "     -c: clean"
+    print "     -v: verbose (default=0)"
+    print
+
+
+def main():
+
+    inFileName = ""
+    clean = False
+    verbose = 0
+    try:
+        opts,args=getopt.getopt(sys.argv[1:],"hi:cv:")
+    except getopt.GetoptError, err:
+        print str(err)
+        help(); sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help(); sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+    if inFileName == "":
+        print "ERROR: missing compulsory options"
+        help(); sys.exit(1)
+
+    if verbose > 0:
+        print "START %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    csh = ChangeSequenceHeaders()
+    csh.setInputFile( inFileName )
+    csh.setFormat( "fasta" )
+    csh.setStep( 1 )
+    csh.setPrefix( "seq" )
+    csh.setLinkFile( inFileName+".shortHlink" )
+    csh.setOutputFile( inFileName+".shortH" )
+    csh.run()
+
+    pL = programLauncher( inFileName+".shortH" )
+
+    pL.launchSreformat( outFormat="phylip", outFileName=inFileName+".shortH.phylip", verbose=verbose )
+
+    pL.reset( inFileName+".shortH.phylip" )
+
+    pL.launchPhyML( verbose=verbose )
+
+    csh.setInputFile( inFileName+".shortH.phylip_phyml_tree.txt" )
+    csh.setFormat( "newick" )
+    csh.setStep( 2 )
+    csh.setLinkFile( inFileName+".shortHlink" )
+    csh.setOutputFile( inFileName+"_phyml.newick" )
+    csh.run()
+
+    if clean:
+        for f in [ inFileName+".shortH", inFileName+".shortHlink", inFileName+".shortH.phylip",
+                   inFileName+".shortH.phylip_phyml_lk.txt", inFileName+".shortH.phylip_phyml_tree.txt" ]:
+            os.remove( f )
+        os.system( "mv %s.shortH.phylip_phyml_stat.txt %s_phyml.txt" % ( inFileName, inFileName ) )
+
+    if verbose > 0:
+        print "END %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/launchPrank.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchPrank.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+
+from pyRepet.launcher.programLauncher import programLauncher
+
+
+def help():
+    print
+    print "usage: launchPrank.py [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format=fasta)"
+    print "     -o: name of the output file (format=aligned fasta, default='inFileName'+fa_aln)"
+    print "     -P: Prank's parameters"
+    print "     -c: clean"
+    print "     -v: verbose (default=0/1)"
+    print
+
+
+def main():
+    """
+    Launch PRANK.
+    """
+    inFileName = ""
+    outFileName = ""
+    parameters = ""
+    clean = False
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt( sys.argv[1:], "hi:o:P:cv:" )
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-P":
+            parameters = a
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFileName == "":
+        print "ERROR: missing input file (-i)"
+        help()
+        sys.exit(1)
+
+    if not os.path.exists( inFileName ):
+        print "ERROR: can't find file '%s'" % ( inFileName )
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "START %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    if outFileName == "":
+        outFileName = "%s.fa_aln" % ( inFileName )
+
+    pL = programLauncher( inFileName )
+    returnStatus = pL.launchPrank( outFileName, parameters, "yes", verbose )
+    if returnStatus != 0:
+        print "ERROR: launchPrank() returned '%i'" % ( returnStatus )
+        sys.exit(1)
+
+    if verbose > 0:
+        print "END %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/launchTCoffee.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchTCoffee.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+import exceptions
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "ERROR: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+from pyRepet.launcher.programLauncher import programLauncher
+
+
+def help():
+    print
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='fasta')"
+    print "     -P: parameters"
+    print "     -o: name of the output file (format='aligned fasta', default='inFileName'+fa_aln)"
+    print "     -c: clean"
+    print "     -v: verbosity level (default=0/1)"
+    print
+
+
+def main():
+
+    inFileName = ""
+    parameters = ""
+    outFileName = ""
+    clean = False
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:P:o:cv:")
+    except getopt.GetoptError, err:
+        print str(err); help(); sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help(); sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-P":
+            parameters = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = "yes"
+
+    if inFileName == "" and parameters == "":
+        print "ERROR: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if outFileName == "":
+        outFileName = "%s.fa_aln" % ( inFileName )
+
+    if verbose > 0:
+        print "START %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    pL = programLauncher( inFileName )
+    pL.launchTcoffee( outFileName, parameters )
+
+    if verbose > 0:
+        print "END %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/launchTEclass.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchTEclass.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+import glob
+import shutil
+
+
+def help():
+    print
+    print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='fasta')"
+    print "     -o: name of the output file (format='map', default=inFileName+'.map')"
+    print "     -c: clean"
+    print "     -v: verbosity level (default=0/1)"
+    print
+
+def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ):
+    tmpHandler = open( inFile, "r" )
+    outHandler = open( outFile, "w" )
+    dClassif2Count = {}
+    header = ""
+    classif = ""
+    while True:
+        line = tmpHandler.readline()
+        if line == "":
+            break
+        if line[0] == ">":
+            header = line[1:].split("|")[0]
+            classif = line[1:-1].split(": ")[1].split("|")[0]
+            if not dClassif2Count.has_key( classif ):
+                dClassif2Count[ classif ] = 0
+            dClassif2Count[ classif ] += 1
+        else:
+            seqLength = len(line[:-1])
+            outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) )
+    tmpHandler.close()
+    outHandler.close()
+    if verbose > 0:
+        for classif in dClassif2Count.keys():
+            print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] )
+            sys.stdout.flush()
+
+
+def main():
+    """
+    Launch TEclass to classify TE sequences.
+    """
+    inFileName = ""
+    outFileName = ""
+    clean = False
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" )
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFileName == "":
+        print "ERROR: missing input file (-i)"
+        help()
+        sys.exit(1)
+    if not os.path.exists( inFileName ):
+        print "ERROR: can't find input file '%s'" % ( inFileName )
+        help()
+        sys.exit(1)
+    if outFileName == "":
+        outFileName = "%s.TEclass.map" % ( inFileName )
+
+    if verbose > 0:
+        print "START %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    if verbose > 0:
+        print "launch TEclass..."
+        sys.stdout.flush()
+    prg = "test_consensi_2.1.pl"
+    cmd = prg
+    cmd += " %s" % ( inFileName )
+    returnValue = os.system( cmd )
+    if returnValue != 0:
+        print "ERROR: '%s' returned %i" % ( prg, returnValue )
+        sys.exit(1)
+
+    lOut1 = glob.glob( "%s_*" % ( inFileName ) )
+    outDir = ""
+    for i in lOut1:
+        if os.path.isdir( i ):
+            lOut2 = glob.glob( "%s/*" % ( i ) )
+            if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2:
+                outDir = i
+                break
+    if outDir == "":
+        print "ERROR: can't find output directory"
+        sys.exit(1)
+    os.chdir( outDir )
+
+    if verbose > 0:
+        print "parse the results..."
+        sys.stdout.flush()
+    parseFastaFileFromTEclass( "%s.lib" % ( inFileName ),
+                               outFileName,
+                               verbose )
+    os.system( "mv %s .." % ( outFileName ) )
+    os.chdir( ".." )
+
+    if clean:
+        if verbose > 0:
+            print "clean the temporary files..."
+            sys.stdout.flush()
+        shutil.rmtree( outDir )
+
+    if verbose > 0:
+        print "END %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/MockDataBankForBlat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/MockDataBankForBlat.py Tue Apr 30 14:33:21 2013 -0400

b"@@ -0,0 +1,299 @@\n+class MockDataBankForBlat(object):\n+\t\n+\tdef write(self, inFileName):\n+\t\tf = open(inFileName, 'w')\n+\t\tf.write('>Vein_ctg_2286\\n')\n+\t\tf.write('AGCAAAAaCGTATTTTCTAAGCTAAAATTTAGCGTAGAAGCTTGGACTCGCTtATTTTTT\\n')\n+\t\tf.write('ATCCTAGGAATTAGTAGAATCTCTCTATAAATATTTATTTTTTATCCGCGCAAAGCTAGG\\n')\n+\t\tf.write('AAACTATAGAATCTTATATATTAGTATATTCTAGCGATTATCTAGAGATTGTaGGGGGGG\\n')\n+\t\tf.write('ggAAAGGTAGTAATAGGCGTGTAAGAGAGATAAAGAGTTtATCTTAGAATCTCCCTACGC\\n')\n+\t\tf.write('CAAGCGAGATTCCTAAGGTAAGAATCCTAAGGTAAAGAATCTAAGCGAGAATTTAGAATC\\n')\n+\t\tf.write('ACGTGCTCGCTTCCTACTACCGTAATTTTACGGTTATAGAGTACGACAGTTCCTATATAT\\n')\n+\t\tf.write('AACCTATACTTATAACAACGAATATAaTATAAGACGTTAATTTGATTTTTTACCGTGTTA\\n')\n+\t\tf.write('CTTATAATAAGGTGTCCCAAAGTTGTTCGAATTTTTTTTGTCCAAGATTGAATACGGTGT\\n')\n+\t\tf.write('TGGTCATGTGATAGTATTTTCACGGTATTTAAACGGAAATTTTACCTACGGCCCCCcGCC\\n')\n+\t\tf.write('TATGTACATCATGAGGATGCACGATGGAGATGAAATGCTTCGTTTCATACTGAGTGGCCG\\n')\n+\t\tf.write('ATTGATGGACGCGTGAACGTCAAAGTAACTTCCATTTGTTTCCTGTCGTTGTCCGGTAGC\\n')\n+\t\tf.write('CTGGCAAATAGTGACATGTTTGATGTCTTTGGAACAGGGTGGAAACTTTTTTTGGGTtGC\\n')\n+\t\tf.write('AAATCGTCAAGAGTCACGTGTCCACACAGTGGCGCAAGTGGGGGCTTAAAATGGTGGACT\\n')\n+\t\tf.write('TGCGCGAGGGCGATTTCTTTTCTTCCCCAACCACAATCTATCGTGATCAAAGAAGCACCC\\n')\n+\t\tf.write('ATCCGGTTGTTGAGGTATGCCAGCCAGCATCCAATGATGCGCTCAAACAACAACTAACTC\\n')\n+\t\tf.write('TCATACAGATCGCCATGCAGATTTTCGTGAAGACCCTCACGGGCAAGACTATCACCCTcG\\n')\n+\t\tf.write('AGGTGGAGTCCTCCGACACCATTGACAATGTCAAGACCAAGATTCAGGGTACGTGGCCTC\\n')\n+\t\tf.write('GCGCAAGCATTTGTAACCATCTTCTAACCCTCTGCCGTAGACAAGGAAGgAATCCCCCCA\\n')\n+\t\tf.write('GACCAGCAGCGCTTGATCTTCGCCGGCAAGCAGCTCGAAGATGGACGCACCCTATCGGAC\\n')\n+\t\tf.write('TACAACATCCAGAAGGAGTCGACCCTCCACCTCGTCCTCCGACTCCGTGGTGGTATGGCC\\n')\n+\t\tf.write('AAGAaGCGCAAGAAGAAGGTCTACACCACCCcAAAGAaGATCAAGCACAAGCGCAAGAaG\\n')\n+\t\tf.write('ACCAAGTTGGCTGTCcTCAAGTACTACAAGGTGGATGGCGATGGAAAgaTCGAGcGCcTC\\n')\n+\t\tf.write('CGACGAGAGTGCCcAGCTGACGAGTGTGGTgCCGGTgTCTTCATGGCTGCCATGCACAAC\\n')\n+\t\tf.write('CGtcAATACTGCGGAAAGTGCCACCTCACCtACGTTTTCGACGAGAGCAAGTAGACGGTT\\n')\n+\t\tf.write('CTGGAAAGCATACAATGACAACCATGATTATGCTGTGGGATGATGGATGCTGAACAAAGG\\n')\n+\t\tf.write('GAGGGAATAGCGACGGACACCGCGTTCTGTAGCTACACAATCAAGCAAAACCTTCACCTC\\n')\n+\t\tf.write('GATAGCGGGCCTCGATCCATGTACTATCTTATGATATGTGACATTGAATGTGCGAGTGTT\\n')\n+\t\tf.write('CATTATTCTTTCGCCGTAGCTTCGTTCTCTCTGTGTTCACCCAACCTCCAAGAATCCCGA\\n')\n+\t\tf.write('CAGTTTCTGGCGATGTTCAAGAATGATATTGCCAGAAATTTATCGGTACCGAACCCTAGA\\n')\n+\t\tf.write('TAAAAGGATGGGTTCGGAGTTAACGTAATTTCGAAGGGCAGATGCCATGTGCTTGGGTGC\\n')\n+\t\tf.write('CGCGTTACGAGAATTACTATGATGGTGCTGTGTATCTTCACAATGGCAACATGGCAGAAC\\n')\n+\t\tf.write('AACGCCAAGAAGTTTCTCCAATGTCAACCCAAGGTCTCCGGACGAATCTAGATGCACCCT\\n')\n+\t\tf.write('GACTGCATCGTTATCGGTCATGCAAAAGCACGCCTAATGTGCAGCGTGATGACAAGGTGG\\n')\n+\t\tf.write('AAAaGGAGGCAAATGCTGGTCTAGTGTCTTGTGGCAGAAGTCAGAGTCACGATGAGCAAG\\n')\n+\t\tf.write('CATGGCGCATGAGGGTTGCCTCCGCAAGCATCCGACAGAGTAGAGACAGAGAAAACACCA\\n')\n+\t\tf.write('CAGCAAAAGATGTCACGGCACTCTCGATTCGCTTGTGACGGCCAAGTCTAGGTCCAGGTC\\n')\n+\t\tf.write('CAGGTCCAGGATACGGATACAAGGCGAATGTGGACACCCCAGCCTCCCGATTAGGGAACT\\n')\n+\t\tf.write('ATACAGCGGGCTGGGTTTATGAATAATGAATCCAATTGCAGCATGAGATCATAGCGTTTG\\n')\n+\t\tf.write('TCTAAGAGGCTTTTTGCTACTGTACATGGCGATTGGCGAGTGTTGGGGGTTAGGTGAGAC\\n')\n+\t\tf.write('GGTCTTGGCTGGCAACCCTGTGAGCGCAAAGATGGAGAAGGGAATGGCAGGCGATCAGGA\\n')\n+\t\tf.write('CGATTGTTTCATGGATGAGCAATGGGCATCTCAAaGAAACACGGATCGTTTCTGATGGAC\\n')\n+\t\tf.write('AGACGGGCCAAGACTGACCAAGCATTTGTGAAATTGGGACAGGAGAAGGGATGGTGGCTT\\n')\n+\t\tf.write('CGACGTGCTGCAGCTGCATACTGTGTAGCTGCGTGTTGATAGCTGCACAACTGCATTGGC\\n')\n+\t\tf.write('TCGACCAGACACATTCCCGGAGCGTTATGCATCCAGCGCCTGATTCGCTTGGGACTTGGG\\n')\n+\t\tf.write('TCGCGACTCGCGAGAGAATTGGTACTCGTAGTCGGTACCTAAGCTGCACCTTGTCCCCGA\\n')\n+\t\tf.write('AGTAGACTGTCGAGACTGTATAGtAGAGGTCGAGGTATATTCTATGCTATACTGTACATT\\n')\n+\t\tf.write('ATTGAAGTGCTCCCATCATATCACCAACCCTCCCGCCTCGTCTTCCATGTCcGTTTCCGC\\n')\n+\t\tf.write('CTTCACATTTCAAAAAGTCGTTGAGTGGTCCTAGTGCTCAATTAAATTGCTTACCAGAGC\\n')\n+\t\tf.write('GTCAGAGCTCTCCTGGAGTCCTGGCAAAAGCCAaaGCTCAACGACCAATTGCCCCATCCT\\n')\n+\t\tf.write('CTCTCCTACCGAGTACTCCGTACCGCTCATCTCGTCAACCACCACCGCAGACAACAATCG\\n')\n+"..b"CCA\\n')\n+\t\tf.write('AAAATTGGGCGCTTTGAAAAAACGGAACTTCCCTATCTCTTAGGATCGACTAACCCTGGA\\n')\n+\t\tf.write('CCAACTGATGTTCTCCAGGAACCTTTCTCCACTTCAGTCTTCAAAGTTCTCATTTGAATA\\n')\n+\t\tf.write('TTTGCTACTACCACCAAGATCTGCACTAGAGGCTGTTTCACTCCGGTTCACACCAAGAGC\\n')\n+\t\tf.write('TTCTTAACAGTTTATAAAAACCTCCACGCCTGCCTACTCGTTATTGCTTCGCTTTTACAA\\n')\n+\t\tf.write('TAACGGCAGAGTATGGGTAACACGCTTAAGCGCCATCCATTTTCAGGGctAGTTCATTCG\\n')\n+\t\tf.write('GCAGGTGAGTTGTTACACACTCCTTAGCGGATTCCGACTTCCATGGCCACCGTCCTGCTG\\n')\n+\t\tf.write('TCTAGATGAACTAACACCTTTTGTGGTGTCTGATGAGCGTGTATTCCGGCACCTTAACTC\\n')\n+\t\tf.write('TGCGTTCGGTTCATCCCGCATCGCCAGTTCTGCTTACCAAAAaTGGCCCACTAGAAACTC\\n')\n+\t\tf.write('TGCATTCAATGACCTGCTTCAATTAAGCAAACAGGTCGTCTTACATATTTAAAGTTTGAG\\n')\n+\t\tf.write('AGTGGTTGAAGGGCGTTTAGCCCCCCGAGACCCCAATCATTCGCTTTACCACATAAAACT\\n')\n+\t\tf.write('GCGTATAAGTTTCTGCTATCCTGAGGGAAACTTCGGCAGGAACCAGCTACTAGATGGTTC\\n')\n+\t\tf.write('GATTAGtCTTTcGCCCCTATaCCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC\\n')\n+\t\tf.write('GAGCCTCCACCAGAGTTTCCTCTGGCTTCACCCTATTCAGGCATAGTTCACCATCTTTCG\\n')\n+\t\tf.write('GGTCCCATCATTAGTGCTTTGTCTCGGTCAATTCAGTATAAAACGTCAGCGCCGGACGAT\\n')\n+\t\tf.write('ACTGCCTCCTTAATGGATTCGTATCAATCAGTTTCCTTACGCATATGGGTTTGGCACCCA\\n')\n+\t\tf.write('AATACTCGCACTAATGGTGGACTCCTTGGTCCGTGTTTCAAGACGGGTCATTTAGAGTCA\\n')\n+\t\tf.write('TTAAGCCAACAACCTAAGCGAATAGAAGTATAACCAAAAAGATCAACCTTGATACCGTAG\\n')\n+\t\tf.write('TACCTCAGAAAACCTTCCTGGAAAACTCGCCAATAAGCATTCGCTGCGTTCCTCAATCCA\\n')\n+\t\tf.write('ACCCAAGGTATTTTCTAAGGGACTATAACACCCACAAGTGGGCCACATTTCCCCTAGTTT\\n')\n+\t\tf.write('TTTCCCTCAAGTCAAATTGTCGTTGGCAGGCATAGCCTGCAAGTGCATCCAGGCCGAAGC\\n')\n+\t\tf.write('CTAGATTGATTACAGACAAGCCAGTCTGGCTCCAAACGGTTCCCTTTTAACAATTTCACA\\n')\n+\t\tf.write('TACTGTTTAACTCTCTTTTCAAAGTTCTTTTCATCTTTCCCTCACGGTACTTGTTCGCTA\\n')\n+\t\tf.write('TCGGTTTCTCGCCAATATTTAGCTTTAGGTGAGATTTACCACCCAATTTAGGCTGCATTC\\n')\n+\t\tf.write('CCAAACAACCTGACTCTTTGAAAGCGTATCACAAAAGGCAAATGCTCAAGCCAAAGACGG\\n')\n+\t\tf.write('GATTCTCACCCTCTATGATGCCCTGTTCCAAAGGACTTATTTACTCGGCTTGCCTGGAAA\\n')\n+\t\tf.write('ACACTTCTACAGTCTACAATCCGGTTTAGCTAGGCCAAACAGGTTCCAACTTTGAGCTCT\\n')\n+\t\tf.write('TTCCTCTTCACTCGCCGTTACTAGGGAAATCATTGTTATTTTCTTTTCCTCCGCTTATTG\\n')\n+\t\tf.write('ATATGCTTAAGTTCAGCGGGTAATCCCACCTGACTTCAGATCATAGTTTGAAAGTTACTG\\n')\n+\t\tf.write('GATTATACTCTTGTACTTTACTTCCTGGGCGAACCAAAAAAAAaGATCCTGAGACCAGCG\\n')\n+\t\tf.write('TAATATTCCTGCCTAGCAAGCCAGACAGAAAATCACACACATTTTAGGTGCTCACTGTAA\\n')\n+\t\tf.write('TAAAACAGCGATGCGACCCATCACCACATAAACAAATGTTATGTGTGGGTTTGTGATGAT\\n')\n+\t\tf.write('ACTGAAGCAGGCGTACTCTATAGAAAAACCAT\\n')\n+\t\tf.write('>Vein_ctg_10639\\n')\n+\t\tf.write('ACAAACAGACAAACAGACAAACAGACAAACAGACAAACAGACAAACAGAGAGGCAGACAA\\n')\n+\t\tf.write('ACAGAGAGGCAGACAAGCAGACAAACTTAACATAATGCTTGCATACAAGTATCCTTGAAG\\n')\n+\t\tf.write('ATCAGAAGCCAAGTGTCAAACTGCTAAAACTGAATTACATAAGTGAATCTAGATAAAGAA\\n')\n+\t\tf.write('TCACATGTGGGGGAAGAACATTAAACTAATACTGTTTACATAAAAAAAAaGCAAAAAAAA\\n')\n+\t\tf.write('ATAATATTTAATTTGTATAGCTGAAAGTGTTCCCGTAAGGAACAAATTCAATGACAAGGG\\n')\n+\t\tf.write('CTTAATCTCAGTACATCGTAGCAACAAAGGCTACTCTAGTACTTACAATACCCCGTCCAT\\n')\n+\t\tf.write('TTCATGTCGTCTGCATGCGATTTATCACTTTGATCATTTGCATTATCATCACAGGGTAGT\\n')\n+\t\tf.write('GAATCACAGCATTTCCGCTGCAAAGCCTATCCCGCAAGTAAGGTTTTCAAGCCGAAGCTT\\n')\n+\t\tf.write('TATTTGTACACAACTAGTACAATCAAAGCACAGTAGTATCGCTTCCAGCATGGATTCTGA\\n')\n+\t\tf.write('CTTAGAGGCGTTCAGCCATTATCCAGCAGATGGTAGCTTCGCGGCATTGGCCTTTCAACC\\n')\n+\t\tf.write('AGCCGCAAATACCAATTATCTGAATGAAGGGTTCCTCTCGTACTAACTTCAATTACTGTT\\n')\n+\t\tf.write('GCGATACCAATACCATCAGTAGGGTAAAACTAACCTGTCTCACGACGGTCTAAACCCAGC\\n')\n+\t\tf.write('TCACGTTCCCTATTAGTGGGTGAACAATCCAACACTTAATGAATTCTGCTTCATTATGAT\\n')\n+\t\tf.write('AGGAAGAGCCGACATCGAAGAATCAAAAAGCAACGTCGCTATGAACGCTTGGCTGCCACA\\n')\n+\t\tf.write('AGCCAGTTATCCCTGTGGTAACTTTTCTGGCACCTCTAGCCTCAAATCTTGAGATTCTAA\\n')\n+\t\tf.write('AGGATCGATAGGCCACACTTTCATGGTTTGTATTCACACTGAAAATCAAAATCAAGGGGA\\n')\n+\t\tf.write('CTTTTACCCTTTTATTCTACAGGAGATTTCTGTTCTCCTTGAGTCCCCCTTAGGACACCT\\n')\n+\t\tf.write('GCGTTATCTTTTAACAGATGTGCCGCCCCAGCCAAACTCCCCACTTGACAATGTCAATAA\\n')\n+\t\tf.write('CATGGGTCGCACCCTAATGGATGCTTAAAGCTAGAAGGTGAGTCTTGCGACTCAATCCCA\\n')\n+\t\tf.write('CTTAATTATTTAAGTAAAAAAaCAATAGA\\n')\n+\t\tf.close() \n"

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/MockESTBankForBlat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/MockESTBankForBlat.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,5996 @@\n+class MockESTBankForBlat(object):\n+\t\n+\tdef write(self, inFileName):\n+\t\tf = open(inFileName, \'w\')\n+\t\tf.write(">gi|226792376|gb|GO546081.1|GO546081 Mdas9010M17_e784.b1 Apple_EST_Mdas Malus x domestica cDNA 3\' similar to ref|NP_850886.1| expressed protein [Arabidopsis thaliana], mRNA sequence\\n")\n+\t\tf.write("GTTTCACGCTCTTCCTCAGCTTCATCAACCTCCGAACCCTAAATTCCCAAATGGCCTCCTCGCCGGACCA\\n")\n+\t\tf.write("CCACATCAACGGCGACACCAAAACCCACATCTCCAAAAAACCTAAACTTTCCCCAAATTTCATCACTGCC\\n")\n+\t\tf.write("GCCGAAATCGCCGCCGAATTCTCCCACCACGACCCGAACGTCGCCCGGATCAACAACGGCAGCTTCGGCT\\n")\n+\t\tf.write("CCTGCCCCGCCTCCTTGATCGAAGCCCAGCGGCGGTGGCAGCTCAAAAACCTCGCCCAGCCCGACCATTT\\n")\n+\t\tf.write("CTACGTCAACGAGCTCAAGAAGGGAATCCACCGTTCCAGAACCATTATCAAGGAGCTCATCAATGCGGAC\\n")\n+\t\tf.write("CATGTCAACGAGGTCTCACTCGTCGACAACGCCACCACCGCCGCCGCCATCGTGCTTCAGCAGACGGC\\n")\n+\t\tf.write("\\n")\n+\t\tf.write(">gi|226792375|gb|GO546080.1|GO546080 Mdas9010J22_e767.b1 Apple_EST_Mdas Malus x domestica cDNA 3\', mRNA sequence\\n")\n+\t\tf.write("TTTTTTTCCCATTAAAGAATTATAAAAGATCAATACATAGAGAAATAAGAGAGGGACATATATATTGTCA\\n")\n+\t\tf.write("CATATACTGGCTAGATATATATATATCTAGCTAGCTACTTGACAGTTATTTGTACGTAATTACTTGACAA\\n")\n+\t\tf.write("TTAAAAGAAGAAGAAGCAAAGAAAAGTGAACAAGACAAAGATGGAAGTAAGACCATGAGGGTGGAAGTGC\\n")\n+\t\tf.write("ATTCTCAAATGAACCGATTTAGAAAAATAAGGTGGCGGGGGACTGTAAGGATTTACAGGATAAAGGCTTG\\n")\n+\t\tf.write("GTGGTACTATAGAGGGCGCAAATGGA\\n")\n+\t\tf.write("\\n")\n+\t\tf.write(">gi|226792374|gb|GO546079.1|GO546079 Mdas9010L17_e779.b1 Apple_EST_Mdas Malus x domestica cDNA 3\' similar to gb|AAG35782.1|AF280060_1 tonneau 1 [Oryza sativa], mRNA sequence\\n")\n+\t\tf.write("TTTTTTTTTTTTTTCAGTTACTAATTTATATTTCAAGAAATCAATCAAAAAGCACTCCAACAAAGCACTA\\n")\n+\t\tf.write("AATCAAATCAAAATAGGGTATAATCTTACATAACATGCCTATCATTTGCGGGTGAAATACACAAAATCAA\\n")\n+\t\tf.write("ATACCGTGCAGTTATACAAAGGTTGTCATCGTGCAGCTACATCTGGTCAGCCCTGCCGTCATCCTCACTA\\n")\n+\t\tf.write("ATTCCATCTCCTGCGTGCCGCCAAAATGTTGTTAAATTGCGAGCTTTTCTATCCAACTGGAGGTTTTCCA\\n")\n+\t\tf.write("TCGCACTTGAAGCTCGAATGACATCCTCTGGACTTTCATCACTGTCGTATCTATAGCTGTAGTCATCTTT\\n")\n+\t\tf.write("CCTGTAGCCAGACATGGAGGACCCACCTCTTCGATCAAATGACTGGGAAGATGCACCAGGCCTTCCTAGT\\n")\n+\t\tf.write("GGAGGTAGCCCACCAGCAACAGATGATGAAGAAGGTCTTCTCATATTGCGAGAATCTAAATTGGACAGGG\\n")\n+\t\tf.write("ATTCTGTTTCTGAAGTCGTTAGCCTCCTTCCGGTACCCCTTGATTGAGATAGATTCTCAAATTTCAAAAA\\n")\n+\t\tf.write("TCCTTCAAGCACATCCAAAAGCAAAGGACCACTATCTCCATTTCGGTTAAGATCATATCCATTCTTGCTA\\n")\n+\t\tf.write("CTAAAGTCCTTCAACTCAGCTTTCCAAGAATCCTTTT\\n")\n+\t\tf.write("\\n")\n+\t\tf.write(">gi|226792373|gb|GO546078.1|GO546078 Mdas9010H22_e754.b1 Apple_EST_Mdas Malus x domestica cDNA 3\' similar to gb|AAC34983.1| light harvesting chlorophyll A/B binding protein [Prunus persica], mRNA sequence\\n")\n+\t\tf.write("AAAAAAAGCAGCAAGCAATGGCAACCTCTGCAATCCAACAATCAGCATTTGCTGGCCAGACTGCTTTGAA\\n")\n+\t\tf.write("GCAGTCCAATGAGCTCGTCCGAAAGATCGGCGGCCTTGGCGGCGGCCGCTTCTCCATGCGGCGCACCGTC\\n")\n+\t\tf.write("AAAAGTGCCCCCCAGAGCATATGGTACGGCCCAGACCGCCCCAAGTACTTGGGACCATTCTCCGAGCAAA\\n")\n+\t\tf.write("CTCCGTCATACTTGACCGGTGAATTCCCCGGAGACTACGGATGGGACACTGCTGGACTATCTGCAGACCC\\n")\n+\t\tf.write("CGAGACATTTGCCAAGAACCGTGAGCTTGAGGTGATCCACTCCAGATGGGCCATGCTTGGTGCACTGGGA\\n")\n+\t\tf.write("TGCGTCTTCACAGAAATCTTGTCAAGGAATGGCGTCAAGTTCGGCGAGGCTGTCTGGTTCAAGGCTGGAT\\n")\n+\t\tf.write("CGCAAATCTTCTCTGAGGGCGGCCTTGACTACCTTGGGCACCCAAACCTTATCCATGCTCAGAGCATCTT\\n")\n+\t\tf.write("GGCAATCTGGGCTGTCCAGGTCGTGCTCATGGGATTCATTGAGGGATACAGAGTTGGAGGAGGAACCACT\\n")\n+\t\tf.write("CGGTGAAGGACTAGACCCACTTTACCCAGGAGGGGCCTTTGACCCCCTTGGACTTGCTGATGATCCCGAA\\n")\n+\t\tf.write("GCTTTTGCTGAACTGAAGGT\\n")\n+\t\tf.write("\\n")\n+\t\tf.write(">gi|226792372|gb|GO546077.1|GO546077 Mdas9010K23_e774.b1 Apple_EST_Mdas Malus x domestica cDNA 3\' similar to ref|NP_187130.1| ribose 5-phosphate isomerase -related [Arabidopsis thaliana] gb|AAF04905.1|AC011437_20 putative ribose 5-phosphate isomerase [Arabidopsis thaliana] gb|AAG51427.1|AC009465_27 putative ribose 5-phosphate isomerase; 91580-90750 [Arabi, mRNA sequence\\n")\n+\t\tf.write("TTTTTTTGAAGCACGAAGGTAATTCATTTCACATTATTTCACAATTCACAATTCCATCATTCTTTAGTAT\\n")\n+\t\tf.write("CATTAGTATACCACTGAGTC'..b'AACCTCTTCTCCAAGATGGACG\\n")\n+\t\tf.write("TCTACGCCGCCGTTTCGGTCGCCGGCGACCCGCGCAACAAGAAGCAGAAGAACAAAACCCCCGTCGTCAA\\n")\n+\t\tf.write("GGACGGAGGCACCAACCCCAAGTGGACCAGCTACCCCATCAGGTTCACCGTCGACGAAGCCGCCCTCCTC\\n")\n+\t\tf.write("AACAACCGCCTCACCCTCAACATCAAGCTCGTCTCCGAACGCACTTTGGGAGACACCAAGATCGGCAAGG\\n")\n+\t\tf.write("TCAAGATCCCGCTCAAGGACCTACTAGACACCATGAGCGGCGGCGATGATCACA\\n")\n+\t\tf.write("\\n")\n+\t\tf.write(">gi|226789543|gb|GO546082.1|GO546082 Mdas9010H14_e752.b1 Apple_EST_Mdas Malus x domestica cDNA 3\' similar to pir|G86463 hypothetical protein F12G12.18 - Arabidopsis thaliana gb|AAG12539.1|AC015446_20 Unknown protein [Arabidopsis thaliana] gb|AAG12849.1|AC079286_6 photosystem II 22 kDa protein, putative; 13290-12539 [Arabidopsis thaliana] gb|AAK55677.1|A, mRNA sequence\\n")\n+\t\tf.write("TTTTTTTATCAACGAGCTCTGGGGAACTTCTTCAATCGTTGAAATCCTGNTAAACATTGTAAGTAAAATA\\n")\n+\t\tf.write("CAGCAGCGTATGATAGCCGAGTATTTCTTGCAAATGAAATTTAAACAAATTACTCAACAAAGTTACAAGT\\n")\n+\t\tf.write("GAAACAGTGAATGTATCTGCAGTAGCTCCAGAGACAAGAAGGTATTAAGGCTTTCAACATCCCACCGGGC\\n")\n+\t\tf.write("CGGTGTATCATTCCAGATCCAGTATTCCGAAATTTGAAAGAAGTATCTTTACTTGATCAACAAAGTCCGA\\n")\n+\t\tf.write("GCCAGTTGCATACTCCGTTAGCATTCCAACAGCAAAACCGAACATTGCCCATCTGCCATTGCTAATCTCG\\n")\n+\t\tf.write("TTCTTGCCAACAAAGCCAAAGAAAGGACCCTGGTTAGCAGCCTCAAGCTTCTTCTGCTTGAAGTACTCCT\\n")\n+\t\tf.write("GAATCTCCTTAGCCTTCTGCCTCTGAAACTCCAAGGTCACCACATTCTTGTCCTCCACCATCGTCAGCCC\\n")\n+\t\tf.write("GGAAACCTGGGCCGGATTCGAGGGCTGCTGAGGCGGTGGAGAAGGAGACGGAGGAGAAGGAGGCGTTGGT\\n")\n+\t\tf.write("TTGAGAGGGGAAGACGGCTGGGAGGGTTCTCGGAGGGAAGGAGCAGCAGGTCTTCTGATAGGCCCTTCCG\\n")\n+\t\tf.write("TCTTGGGAGCTCTTTATGCTAATTTTTA\\n")\n+\t\tf.write("\\n")\n+\t\tf.write(">gi|226789542|gb|GO545826.1|GO545826 Mdas9003M24_e233.b1 Apple_EST_Mdas Malus x domestica cDNA 3\' similar to sp|Q9M4S8|TPIC_FRAAN Triosephosphate isomerase, chloroplast precursor (TIM) gb|AAF66071.1|AF257322_1 triosephosphate isomerase [Fragaria x ananassa], mRNA sequence\\n")\n+\t\tf.write("TCGAGTTTTTTTTTTTTTTTTTTAAGTGACCCATGGAAAATGGATAGAAGATATATTTTACTATCTTTAT\\n")\n+\t\tf.write("GGAGGGGGAAACACTTCAAAAACGACCCAGGAAAAAAAATTCCTTATGATAAACTGGCTCATACATTTGT\\n")\n+\t\tf.write("TACAATATGCTTCAATCTCAAGGTACCTTCGCATCGGCCCTTTATTTGATCTCGGAACCACCATAATCAA\\n")\n+\t\tf.write("GCAGCAACTTTCTTGGCTGTCACAGCGTTGACGATAGTAGCAAATTCAGGGCCCTTTAGGGAAGCACCGC\\n")\n+\t\tf.write("CAACAAGAAAACCATCAATATCTTCCTTCTTTGCGAGCTCAACAGAATTGCCTCCATTTACAGATCCTCC\\n")\n+\t\tf.write("ATAAATAATTCTTGTTTTAGATGCAACTTCTGCTGACACATTCTGTTTCAGCCAATCACGAACAGCTACA\\n")\n+\t\tf.write("TGTACTTCCTGAGCTTGTTCTGGACTGGCTACCTTACCAGTCCCAATGGCCCATACAGGCTCATAAGCAA\\n")\n+\t\tf.write("CAACTATCTTATCCCAGCTGGGTACTGCGTCTGCAAAAGCCTTCAGCTGCTGAAAGCAGACGTCAAAAGT\\n")\n+\t\tf.write("TTTCCCTGCTTCCCTTTCTTCTAGCTTCTCGCCCAATGCAAGCAATTACTCCGAGACCCTCATTCAACGC\\n")\n+\t\tf.write("ATAGG\\n")\n+\t\tf.write("\\n")\n+\t\tf.write(">gi|226789541|gb|GO545825.1|GO545825 Mdas9003P18_e247.b1 Apple_EST_Mdas Malus x domestica cDNA 3\' similar to ref|NP_189513.1| expressed protein [Arabidopsis thaliana] sp|Q9LHA4|V0D2_ARATH Probable vacuolar ATP synthase subunit d 2 (V-ATPase d subunit 2) (Vacuolar proton pump d subunit 2) gb|AAL16278.1|AF428348_1 AT3g28715/MZN14_21 [Arabidopsis thaliana], mRNA sequence\\n")\n+\t\tf.write("TTTTTTATAAACTCACTACGAGACTGCTACTATAGTAGCTCTCAAATTGCAATATATATATTCAATTTAT\\n")\n+\t\tf.write("CCTCACAACTGTAACATGCTCGATTTCAGAACATGACATATTCCCTCTATGTAAAAACGACCGGAATAAT\\n")\n+\t\tf.write("ATATGCAAAAGTGCTTATTAGGAAGGAACACCGGATAGTGCAAACAACGCAACCGCACCCATCTCTACAA\\n")\n+\t\tf.write("TTCGTTCGGGGATATGATGGGGAGGAGATTTACAGAAGATTATGCGAAAGAACGAAGGATCATCTGTAAT\\n")\n+\t\tf.write("GGTGTGTTCTGAGTCCCAATCCGACTAAAATATGAAGACCACGCTGTCGTGAACTCTGGACTTCTGGTTC\\n")\n+\t\tf.write("TGAGCCACACACTCAGATATCCACATCAAATTTCTGATCTCCTGCTCCCTCAACCTCATGTATGCAAAGA\\n")\n+\t\tf.write("AAACAGCGTAATGGAACTGTTGCTCAAATGCTAAGCAAAGCCTTTTCACCTCCTCTTCATAAAATGCCTT\\n")\n+\t\tf.write("GTCAAGCATCTGGGTTTCGCCATAGGATAGTTTTGAAAAAATAGACTGATAAGGAGGATATTTTTCCATG\\n")\n+\t\tf.write("CAACCACGTACCTGATCAATGTCCTCACATACAGCAAGTTCCTCATGGCCATAGGGAATAGAGTAAACCA\\n")\n+\t\tf.write("AAGCTAGAGTACAACTTCCTGCGATCATCTCGAGTAAGCTCAGTTCAATACTATTTATGGGT\\n")\n+\t\tf.write("\\n")\n+\t\tf.close() \n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/MockOutputForBlat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/MockOutputForBlat.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,33 @@
+class MockOutputForBlat(object):
+
+ def write(self, inFileName):
+ f = open(inFileName, 'w')
+ f.write('gi|226792305|gb|GO545799.1|GO545799 290 551 Vein_ctg_10639 674 935 1.0e-130 463.0 93.51\n')
+ f.write('gi|226792305|gb|GO545799.1|GO545799 138 283 Vein_ctg_10639 519 664 1.0e-50 198.0 82.88\n')
+ f.write('gi|226792305|gb|GO545799.1|GO545799 577 686 Vein_ctg_10639 961 1070 1.4e-46 184.0 90.91\n')
+ f.write('gi|226792305|gb|GO545799.1|GO545799 566 572 Vein_ctg_10639 944 950 3.1e+05 13.0 100.00\n')
+ f.write('gi|226792293|gb|GO546055.1|GO546055 107 193 Vein_ctg_10638 87 1 2.2e-42 170.0 100.00\n')
+ f.write('gi|226792293|gb|GO546055.1|GO546055 34 91 Vein_ctg_10638 161 104 2.5e-22 103.0 94.83\n')
+ f.write('gi|226792293|gb|GO546055.1|GO546055 521 611 Vein_ctg_10639 1075 985 2.9e-38 156.0 92.31\n')
+ f.write('gi|226790810|gb|GO546215.1|GO546215 57 520 Vein_ctg_5197 1862 1399 8.7e-128 454.0 72.84\n')
+ f.write('gi|226790791|gb|GO546209.1|GO546209 135 199 Vein_ctg_5197 1994 1930 7.4e-25 112.0 93.85\n')
+ f.write('gi|226790791|gb|GO546209.1|GO546209 36 82 Vein_ctg_5197 2126 2080 1.9e-06 50.0 76.60\n')
+ f.write('gi|226790483|gb|GO545656.1|GO545656 280 541 Vein_ctg_10639 674 935 1.0e-130 463.0 93.51\n')
+ f.write('gi|226790483|gb|GO545656.1|GO545656 128 273 Vein_ctg_10639 519 664 1.0e-50 198.0 82.88\n')
+ f.write('gi|226790483|gb|GO545656.1|GO545656 567 640 Vein_ctg_10639 961 1034 1.1e-25 114.0 87.84\n')
+ f.write('gi|226790483|gb|GO545656.1|GO545656 556 562 Vein_ctg_10639 944 950 3.1e+05 13.0 100.00\n')
+ f.write('gi|226790458|gb|GO545644.1|GO545644 1 95 Vein_ctg_10638 1369 1275 3.5e-40 163.0 92.63\n')
+ f.write('gi|226790458|gb|GO545644.1|GO545644 146 185 Vein_ctg_10638 1225 1186 3.7e-07 53.0 82.50\n')
+ f.write('gi|226790049|gb|GO546186.1|GO546186 177 263 Vein_ctg_10638 87 1 5.6e-39 159.0 96.55\n')
+ f.write('gi|226790049|gb|GO546186.1|GO546186 106 161 Vein_ctg_10638 159 104 3.8e-21 99.0 94.64\n')
+ f.write('gi|226790011|gb|GO546174.1|GO546174 138 283 Vein_ctg_10639 519 664 1.0e-50 198.0 82.88\n')
+ f.write('gi|226789634|gb|GO545875.1|GO545875 135 306 Vein_ctg_10638 1446 1275 7.3e-80 294.0 91.86\n')
+ f.write('gi|226789634|gb|GO545875.1|GO545875 451 496 Vein_ctg_10638 1132 1087 1.8e-18 90.0 100.00\n')
+ f.write('gi|226789634|gb|GO545875.1|GO545875 357 393 Vein_ctg_10638 1225 1189 1.8e-05 47.0 81.08\n')
+ f.write('gi|226789597|gb|GO546106.1|GO546106 432 537 Vein_ctg_2286 1166 1061 5.0e-40 162.0 86.79\n')
+ f.write('gi|226789597|gb|GO546106.1|GO546106 538 632 Vein_ctg_2286 1008 914 6.0e-33 139.0 85.26\n')
+ f.write('gi|226789571|gb|GO546096.1|GO546096 353 474 Vein_ctg_5197 2426 2305 4.1e-46 182.0 84.43\n')
+ f.write('gi|226789566|gb|GO545840.1|GO545840 73 232 Vein_ctg_10639 776 935 2.5e-75 279.0 92.50\n')
+ f.write('gi|226789566|gb|GO545840.1|GO545840 258 372 Vein_ctg_10639 961 1075 4.2e-50 196.0 92.17\n')
+ f.write('gi|226789566|gb|GO545840.1|GO545840 247 253 Vein_ctg_10639 944 950 3.1e+05 13.0 100.00\n')
+ f.close()

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_BlatClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_BlatClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,34 @@
+import unittest
+from commons.launcher.BlatClusterLauncher import BlatClusterLauncher
+
+
+class Test_BlatClusterLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = BlatClusterLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: name of the subject file (format='fasta')"
+        exp += "\n     -p: parameters for 'blat'"
+        exp += "\n     -Z: concatenate output files"
+        exp += "\n     -A: same sequences (all-by-all)"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "dummySubjectFile.fa" )
+        self.assertEqual( "dummySubjectFile.fa", self._i.getSubjectFile() )
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_BlatClusterLauncher ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_BlatProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_BlatProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,98 @@
+import unittest
+from commons.launcher.BlatProgramLauncher import BlatProgramLauncher
+
+
+class Test_BlatProgramLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = BlatProgramLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getHelpAsString( self ):
+        exp = ""
+        exp += "usage: BlatProgramLauncher.py [options]"
+        exp += "\ngeneric options:"
+        exp += "\n     -h: this help"
+        exp += "\n     -i: name of the input file (format='fasta')"
+        exp += "\n     -c: clean"
+        exp += "\n     -v: verbosity level (default=0/1)"
+        exp += "\nspecific options:"
+        exp += "\n     -s: name of the subject file (database, format='fasta')"
+        exp += "\n     -p: parameters for 'blat'"
+        exp += "\n     -A: same sequences (all-by-all)"
+        exp += "\n     -o: name of the output file (format='align', default=inFile+'.align')"
+        obs = self._i.getHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setAttributesFromCmdLine( self ):
+        self._i.setAttributesFromCmdLine( "-s", "dummySubjectFile.fa" )
+        self.assertEqual( "dummySubjectFile.fa", self._i.getSubjectFile() )
+
+        self._i.setAttributesFromCmdLine( "-o", "dummyOutFile.align" )
+        self.assertEqual( "dummyOutFile.align", self._i.getOutputFile() )
+
+
+    def test_setWrapperCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "BlatProgramLauncher.py"
+        exp += " -i %s" % ( "dummyInFile.fa" )
+        exp += " -s %s" % ( "dummySubjectFile.fa" )
+        exp += " -o %s.align" % ( "dummyInFile.fa" )
+        exp += " -v 0"
+        self._i.setWrapperCommandLine()
+        obs = self._i.getWrapperCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setProgramCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        self._i.setProgramParameters( "-minIdentity=90" )
+        exp = "blat dummySubjectFile.fa dummyInFile.fa -minIdentity=90 -out=blast8 dummyInFile.fa.blast"
+        self._i.setProgramCommandLine()
+        obs = self._i.getProgramCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setListFilesToKeep( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        lExp = [ "dummyInFile.fa.align" ]
+        self._i.setListFilesToKeep()
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setListFilesToRemove( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        lExp = [ "dummyInFile.fa.blast" ]
+        self._i.setListFilesToRemove()
+        lObs = self._i.getListFilesToRemove()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setSummary( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "input file: %s" % ( self._i.getInputFile() )
+        exp += "\nsubject file: %s" % ( self._i.getSubjectFile() )
+        exp += "\nparameters: %s" % ( self._i.getProgramParameters() )
+        exp += "\noutput file: %s" % ( "dummyInFile.fa.align" )
+        self._i.setSummary()
+        obs = self._i.getSummary()
+        self.assertEqual( exp, obs )
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_BlatProgramLauncher ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_BlatProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_BlatProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,139 @@
+import os
+import unittest
+from commons.core.utils.FileUtils import FileUtils
+from devTools.tests.MockFastaSupctg30 import MockFastaSupctg30
+from devTools.tests.MockMiniProtBankSupctg30 import MockMiniProtBankSupctg30
+from commons.launcher.tests.MockDataBankForBlat import MockDataBankForBlat
+from commons.launcher.tests.MockESTBankForBlat import MockESTBankForBlat
+from commons.launcher.tests.MockOutputForBlat import MockOutputForBlat
+from commons.launcher.BlatProgramLauncher import BlatProgramLauncher
+
+class Test_F_BlatProgramLauncher(unittest.TestCase):
+
+    def test_run_empty_result(self):
+        queryFileName = "smallProtBank_supctg30.fa"
+        mock = MockMiniProtBankSupctg30()
+        mock.write(queryFileName)
+        subjectFileName = "subjectBank.fa"
+        mock = MockFastaSupctg30()
+        mock.write(subjectFileName)
+
+        expOutputFileName = "expBlatOutput.blast.align"
+        self.writeEmptyExpOutputFile(expOutputFileName)
+
+        obsOutputFileName = "obsBlatOutput.blast.align"
+
+        bpl = BlatProgramLauncher()
+        bpl.setInputFile(queryFileName)
+        bpl.setSubjectFile(subjectFileName)
+        bpl.setOutputFile(obsOutputFileName)
+        bpl.setProgramParameters("")
+        bpl.run()
+
+        blastFileName = queryFileName + ".blast"
+
+        self.assertTrue(os.path.exists(obsOutputFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+
+        os.remove(queryFileName)
+        os.remove(subjectFileName)
+        os.remove(blastFileName)
+        os.remove(expOutputFileName)
+        os.remove(obsOutputFileName)
+
+    def test_as_script_empty_result(self):
+        queryFileName = "smallProtBank_supctg30.fa"
+        mock = MockMiniProtBankSupctg30()
+        mock.write(queryFileName)
+        subjectFileName = "subjectBank.fa"
+        mock = MockFastaSupctg30()
+        mock.write(subjectFileName)
+
+        expOutputFileName = "expBlatOutput.blast.align"
+        self.writeEmptyExpOutputFile(expOutputFileName)
+
+        obsOutputFileName = "obsBlatOutput.blast.align"
+
+        cmd2Launch = "python ../BlatProgramLauncher.py -s " + subjectFileName + " -i " + queryFileName + " -p '' -o " + obsOutputFileName
+
+        os.system(cmd2Launch)
+
+        blastFileName = queryFileName + ".blast"
+
+        self.assertTrue(os.path.exists(obsOutputFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+
+        os.remove(queryFileName)
+        os.remove(subjectFileName)
+        os.remove(blastFileName)
+        os.remove(expOutputFileName)
+        os.remove(obsOutputFileName)
+
+    def test_run(self):
+        queryFileName = "smallESTBank.fa"
+        mock = MockESTBankForBlat()
+        mock.write(queryFileName)
+        subjectFileName = "subjectBank.fa"
+        mock = MockDataBankForBlat()
+        mock.write(subjectFileName)
+
+        expOutputFileName = "expBlatOutput.blast.align"
+        mock = MockOutputForBlat()
+        mock.write(expOutputFileName)
+
+        obsOutputFileName = "obsBlatOutput.blast.align"
+
+        bpl = BlatProgramLauncher()
+        bpl.setInputFile(queryFileName)
+        bpl.setSubjectFile(subjectFileName)
+        bpl.setOutputFile(obsOutputFileName)
+        bpl.setProgramParameters("")
+        bpl.run()
+
+        blastFileName = queryFileName + ".blast"
+
+        self.assertTrue(os.path.exists(obsOutputFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+
+        os.remove(queryFileName)
+        os.remove(subjectFileName)
+        os.remove(blastFileName)
+        os.remove(expOutputFileName)
+        os.remove(obsOutputFileName)
+
+    def test_as_script(self):
+        queryFileName = "smallESTBank.fa"
+        mock = MockESTBankForBlat()
+        mock.write(queryFileName)
+        subjectFileName = "subjectBank.fa"
+        mock = MockDataBankForBlat()
+        mock.write(subjectFileName)
+
+        expOutputFileName = "expBlatOutput.blast.align"
+        mock = MockOutputForBlat()
+        mock.write(expOutputFileName)
+
+        obsOutputFileName = "obsBlatOutput.blast.align"
+
+        cmd2Launch = "python ../BlatProgramLauncher.py -s " + subjectFileName + " -i " + queryFileName + " -p '' -o " + obsOutputFileName
+
+        os.system(cmd2Launch)
+
+        blastFileName = queryFileName + ".blast"
+
+        self.assertTrue(os.path.exists(obsOutputFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+
+        os.remove(queryFileName)
+        os.remove(subjectFileName)
+        os.remove(blastFileName)
+        os.remove(expOutputFileName)
+        os.remove(obsOutputFileName)
+
+    def writeEmptyExpOutputFile(self, outputFileName):
+        f = open(outputFileName, "w")
+        f.write("")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchBlastclust.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchBlastclust.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,103 @@
+import unittest
+import os
+from commons.launcher.LaunchBlastclust import LaunchBlastclust
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_LaunchBlastclust(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = "DmelChr4_LTRharvest_expected_with_default_params.fa"
+        os.symlink("%s/Tools/%s" % (os.environ["REPET_DATA"], self._inputFileName), self._inputFileName)
+        self._outputFileName = "%s_Blastclust.fa" % self._inputFileName
+        self._outBlastclustFileName = "%s_blastclust.txt" % self._inputFileName
+
+    def tearDown(self):
+        if os.path.exists(self._outputFileName):
+            os.remove(self._outputFileName)
+        if os.path.exists(self._outBlastclustFileName):
+            os.remove(self._outBlastclustFileName)
+        os.remove(self._inputFileName)
+
+    def test_run(self):
+        iLaunchBlastclust = LaunchBlastclust(self._inputFileName, clean = True)
+        iLaunchBlastclust.run()
+        expFileName = "%s/Tools/DmelChr4_LTRharvest_Blastclust_expected.fa" % os.environ["REPET_DATA"]
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+
+    def test_run_with_tweaked_params(self):
+        iLaunchBlastclust = LaunchBlastclust(self._inputFileName, clean = True)
+        iLaunchBlastclust.setBothSequences("F")
+        iLaunchBlastclust.setIdentityThreshold(0)
+        iLaunchBlastclust.setIsBlastToMap()
+        iLaunchBlastclust.run()
+
+        faExpFileName = "%s/Tools/DmelChr4_LTRharvest_Blastclust_expected_with_tweaked_params.fa" % os.environ["REPET_DATA"]
+        mapExpFileName = "%s/Tools/DmelChr4_LTRharvest_Blastclust_expected_with_tweaked_params.map" % os.environ["REPET_DATA"]
+
+        self.assertTrue(FileUtils.are2FilesIdentical(faExpFileName, self._outputFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(mapExpFileName, "%s.map" % os.path.splitext(self._outputFileName)[0]))
+
+        os.remove("%s.map" % os.path.splitext(self._outputFileName)[0])
+
+    def test_run_with_header_options(self):
+        inFileName = "dummyHeaderForTEdenovo.fa"
+        expFileName = "expDummyHeaderForTEdenovo.fa"
+        self._writeInputFile_header_options(inFileName)
+        self._writeExpFile_header_options(expFileName)
+        obsFileName = "%s_Blastclust.fa" % inFileName
+
+        iLaunchBlastclust = LaunchBlastclust(inFileName, clean = True)
+        iLaunchBlastclust.setIsHeaderForTEdenovo(True)
+        iLaunchBlastclust.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(inFileName)
+        os.remove(expFileName)
+        os.remove(obsFileName)
+        os.remove("%s_blastclust.txt" % inFileName)
+
+    def test_run_as_script(self):
+        cmd = "LaunchBlastclust.py -i %s" % self._inputFileName
+        os.system(cmd)
+        expFileName = "%s/Tools/DmelChr4_LTRharvest_Blastclust_expected.fa" % os.environ["REPET_DATA"]
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+
+    def _writeInputFile_header_options(self, inFileName):
+        f = open(inFileName,"w")
+        f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
+        f.write("CATTAGATTCAAGGCATCATGGATCAGCACATTTACACAGATATCCTGGAAAATGTGATG\n")
+        f.write("CTGCCATATGCCGGGGATGAAATGCCGTTGGTTTGGACATTTCAACAGGATAACGATTCA\n")
+        f.write("AAACACACGAGCAAGAAAGCTTGAAAGTGGTTTGAGCAGAAATCGATCCGAGTAATGAAA\n")
+        f.write("TGGCCTGCTCTGTCATCCGACTTGAATCCAATCGAAAACCTTTGGGCGGACGTGGAAAAA\n")
+        f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
+        f.write("CATTAGATTCAAGGCATCATGGATCAGCACATTTACACAGATATCCTGGAAAATGTGATG\n")
+        f.write("CTGCCATATGCCGGGGATGAAATGCCGTTGGTTTGGACATTTCAACAGGATAACGATTCA\n")
+        f.write("AAACACACGAGCAAGAAAGCTTGAAAGTGGTTTGAGCAGAAATCGATCCGAGTAATGAAA\n")
+        f.write("TGGCCTGCTCTGTCATCCGACTTGAATCCAATCGAAAACCTTTGGGCGGACGTGGAAAAA\n")
+        f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
+        f.write("TACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACGATTT\n")
+        f.write("TTTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAG\n")
+        f.write("AGCGGAGAGCGCTACAGCGAACAGCTCTTTTCAACGCATAAAGTGATAGCAGACAACTGT\n")
+        f.close()
+
+    def _writeExpFile_header_options(self, expFileName):
+        f = open(expFileName,"w")
+        f.write(">DTX-incomp_Blc1_DmelChr4-B-R9-Map3_reversed\n")
+        f.write("CATTAGATTCAAGGCATCATGGATCAGCACATTTACACAGATATCCTGGAAAATGTGATG\n")
+        f.write("CTGCCATATGCCGGGGATGAAATGCCGTTGGTTTGGACATTTCAACAGGATAACGATTCA\n")
+        f.write("AAACACACGAGCAAGAAAGCTTGAAAGTGGTTTGAGCAGAAATCGATCCGAGTAATGAAA\n")
+        f.write("TGGCCTGCTCTGTCATCCGACTTGAATCCAATCGAAAACCTTTGGGCGGACGTGGAAAAA\n")
+        f.write(">DTX-incomp_Blc1_DmelChr4-B-R10-Map3_reversed\n")
+        f.write("CATTAGATTCAAGGCATCATGGATCAGCACATTTACACAGATATCCTGGAAAATGTGATG\n")
+        f.write("CTGCCATATGCCGGGGATGAAATGCCGTTGGTTTGGACATTTCAACAGGATAACGATTCA\n")
+        f.write("AAACACACGAGCAAGAAAGCTTGAAAGTGGTTTGAGCAGAAATCGATCCGAGTAATGAAA\n")
+        f.write("TGGCCTGCTCTGTCATCCGACTTGAATCCAATCGAAAACCTTTGGGCGGACGTGGAAAAA\n")
+        f.write(">PotentialHostGene-chim_Blc2_DmelChr4-B-R4-Map5_reversed\n")
+        f.write("TACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACGATTT\n")
+        f.write("TTTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAG\n")
+        f.write("AGCGGAGAGCGCTACAGCGAACAGCTCTTTTCAACGCATAAAGTGATAGCAGACAACTGT\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchLastZ.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchLastZ.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,31 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.launcher.LaunchLastZ import LaunchLastZ
+
+class Test_F_LaunchLastZ(unittest.TestCase):
+
+    def setUp(self):
+        self._inFastaFileName = "%s/Tools/input_TestFLastZ.fa" % os.environ["REPET_DATA"]
+        self._expAxtFileName  = "%s/Tools/exp_TestFLastZ.axt" % os.environ["REPET_DATA"]
+        self._obsAxtFileName = "obsLastZ.axt"
+
+    def tearDown(self):
+        try:
+            os.remove(self._obsAxtFileName)
+        except:pass
+
+    def test_run(self):
+        ilauncher = LaunchLastZ(self._inFastaFileName, self._inFastaFileName, self._obsAxtFileName,verbosity=1)
+        ilauncher.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expAxtFileName, self._obsAxtFileName))
+
+    def test_run_as_script(self):
+        #FIXME : change path
+        cmd = 'python %s/commons/launcher/LaunchLastZ.py -q %s -r %s -o %s' % (os.environ["REPET_PATH"], self._inFastaFileName, self._inFastaFileName, self._obsAxtFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expAxtFileName, self._obsAxtFileName))
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchMCL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchMCL.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,87 @@
+from commons.core.utils.FileUtils import FileUtils
+import unittest
+import os
+import shutil
+from commons.launcher.LaunchMCL import LaunchMCL
+
+class Test_F_LaunchMCL(unittest.TestCase):
+# TODO: test query coverage filter
+
+    def setUp(self):
+        self._inputFileName = "DmelChr4_LTRharvest_expected_with_default_params.fa"
+        try:
+            shutil.rmtree("MCLtmpDirectory")
+            os.remove(self._inputFileName)
+        except:
+            pass
+        try:
+            os.symlink("%s/Tools/%s" % (os.environ["REPET_DATA"], self._inputFileName), self._inputFileName)
+        except:
+            pass
+        self._outputFileName = "%s_MCL.fa" % os.path.splitext(self._inputFileName)[0]
+
+    def tearDown(self):
+        try:
+            os.remove(self._inputFileName)
+            os.remove(self._outputFileName)
+            shutil.rmtree("MCLtmpDirectory")
+        except:
+            pass
+        try:
+            os.remove("DmelChr4_LTRharvest_expected_with_default_params_MCL.map")
+        except:
+            pass
+
+    def test_run(self):
+        iLaunchMCL = LaunchMCL(self._inputFileName, doClean = True, verbosity = 1)
+        iLaunchMCL.run()
+        expFileName = "%s/Tools/DmelChr4_LTRharvest_MCL_expected.fa" % os.environ["REPET_DATA"]
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+
+    def test_run_with_map_file(self):
+        iLaunchMCL = LaunchMCL(self._inputFileName, isCluster2Map = True, doClean = False, verbosity = 1)
+#        iLaunchMCL.setCoverageThreshold(0.60)
+        iLaunchMCL.run()
+        expFastaFileName = "%s/Tools/DmelChr4_LTRharvest_MCL_expected.fa" % os.environ["REPET_DATA"]
+        expMapFileName = "%s/Tools/DmelChr4_LTRharvest_MCL_expected.map" % os.environ["REPET_DATA"]
+        obsMapFileName = "%s.map" % os.path.splitext(self._outputFileName)[0]
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFastaFileName, self._outputFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expMapFileName, obsMapFileName))
+
+        os.remove(obsMapFileName)
+
+    def test_run_clusterConsensus_headers(self):
+        os.remove(self._inputFileName)
+        self._inputFileName = "DmelChr4_LTRharvest_MCL_ClusterConsHeaders_input.fa"
+        os.symlink("%s/Tools/%s" % (os.environ["REPET_DATA"], self._inputFileName), self._inputFileName)
+        iLaunchMCL = LaunchMCL(self._inputFileName, isClusterConsensusHeaders = True, doClean = True, verbosity = 1)
+        iLaunchMCL.run()
+        expFastaFileName = "%s/Tools/DmelChr4_LTRharvest_MCL_ClusterConsHeaders_expected.fa" % os.environ["REPET_DATA"]
+        self._outputFileName = "%s_MCL.fa" % os.path.splitext(self._inputFileName)[0]
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFastaFileName, self._outputFileName))
+
+    def test_run_as_script(self):
+        cmd = "LaunchMCL.py -i %s -j -v 6" % self._inputFileName
+        os.system(cmd)
+        expFileName = "%s/Tools/DmelChr4_LTRharvest_MCL_expected.fa" % os.environ["REPET_DATA"]
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+        self.assertTrue(FileUtils.isRessourceExists("MCLtmpDirectory"))
+
+    def test_run_as_script_allParams(self):
+        cmd = "LaunchMCL.py -i %s -j -I 1.5 -T 0.0 -m -v 6" % self._inputFileName
+        os.system(cmd)
+        expFileName = "%s/Tools/DmelChr4_LTRharvest_MCL_expected.fa" % os.environ["REPET_DATA"]
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+        self.assertTrue(FileUtils.isRessourceExists("MCLtmpDirectory"))
+
+    def test_run_as_script_without_join(self):
+        cmd = "LaunchMCL.py -i %s -v 2" % self._inputFileName
+        os.system(cmd)
+        expFileName = "%s/Tools/DmelChr4_LTRharvest_MCL_expected.fa" % os.environ["REPET_DATA"]
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+        self.assertTrue(FileUtils.isRessourceExists("MCLtmpDirectory"))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchMap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchMap.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,41 @@
+import unittest
+import os
+import subprocess
+from commons.core.utils.FileUtils import FileUtils
+from commons.launcher.LaunchMap import LaunchMap
+
+class Test_F_LaunchMap(unittest.TestCase):
+
+    def setUp(self):
+        self._fastaFileName = "LaunchMap_seqCluster1.fa"
+        fastaFilePath = "%s/Tools/%s" % (os.environ["REPET_DATA"], self._fastaFileName)
+        os.symlink(fastaFilePath, self._fastaFileName)
+        self._obsFileName = "%s.fa_aln" % self._fastaFileName
+
+    def tearDown(self):
+        os.remove(self._fastaFileName)
+        try:
+            os.remove(self._obsFileName)
+        except: pass
+
+    def test_run_as_script(self):
+        expFileName = "%s/Tools/expLaunchMap_seqCluster1.fa.fa_aln" % os.environ["REPET_DATA"]
+        obsFileName = "obs.fa_aln"
+        cmd = "LaunchMap.py "
+        cmd += "-i %s " % self._fastaFileName
+        cmd += "-o %s " % obsFileName
+        cmd += "-c "
+        process = subprocess.Popen(cmd, shell = True)
+        process.communicate()
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(obsFileName)
+
+    def test_run(self):
+        expFileName = "%s/Tools/expLaunchMap_seqCluster1.fa.fa_aln" % os.environ["REPET_DATA"]
+        iLaunchMap = LaunchMap(self._fastaFileName)
+        iLaunchMap.setDoClean(True)
+        iLaunchMap.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._obsFileName))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchMatcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchMatcher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,63 @@
+from commons.core.utils.FileUtils import FileUtils
+import unittest
+import os
+import subprocess
+from commons.launcher.LaunchMatcher import LaunchMatcher
+
+class Test_F_LaunchMatcher(unittest.TestCase):
+
+    def setUp(self):
+        self._inFileName = "DmelChr4.align"
+        self._fastaFileName = "DmelChr4.fa"
+        inFilePath = "%s/Tools/%s" % (os.environ["REPET_DATA"], self._inFileName)
+        inFastaPath = "%s/Tools/%s" % (os.environ["REPET_DATA"], self._fastaFileName)
+        try:
+            os.remove(self._inFileName)
+        except:
+            pass
+        os.symlink(inFilePath, self._inFileName)
+        os.symlink(inFastaPath, self._fastaFileName)
+        self._iLaunchMatcher = LaunchMatcher(align=self._inFileName, queryFileName=self._fastaFileName, subjectFileName=self._fastaFileName, doJoin=True, evalue="1e-20", keepConflict=True)
+        self._iLaunchMatcher.setVerbosity(2)
+
+    def tearDown(self):
+        try:
+            FileUtils.removeFilesByPattern("%s*" % self._inFileName)
+            os.remove(self._fastaFileName)
+        except:
+            pass
+
+    def test_run_as_class_1_file(self):
+        expPathFileName = "%s/Tools/DmelChr4.align.match.path" % os.environ["REPET_DATA"]
+        expTabFileName = "%s/Tools/DmelChr4.align.match.tab" % os.environ["REPET_DATA"]
+
+        obsPathFileName = "%s.match.path" % self._inFileName
+        obsTabFileName = "%s.match.tab" % self._inFileName
+
+        self._iLaunchMatcher.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expPathFileName, obsPathFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expTabFileName, obsTabFileName))
+
+    def test_run_as_script(self):
+        expPathFileName = "%s/Tools/DmelChr4.align.match.path" % os.environ["REPET_DATA"]
+        expTabFileName = "%s/Tools/DmelChr4.align.match.tab" % os.environ["REPET_DATA"]
+
+        cmd = "LaunchMatcher.py "
+        cmd += "-a %s " % self._inFileName
+        cmd += "-q %s " % self._fastaFileName
+        cmd += "-s %s " % self._fastaFileName
+        cmd += "-o DmelChr4.align "
+        cmd += "-j "
+        cmd += "-k "
+        cmd += "-e 1e-20 "
+        process = subprocess.Popen(cmd, shell = True)
+        process.communicate()
+        obsPathFileName = "%s.match.path" % self._inFileName
+        obsTabFileName = "%s.match.tab" % self._inFileName
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expPathFileName, obsPathFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expTabFileName, obsTabFileName))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchMummerPlot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchMummerPlot.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,166 @@\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.launcher.LaunchMummerPlot import LaunchMummerPlot\n+\n+class Test_F_LaunchMummerPlot(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._queryFastaFileName = "query.fa"\n+ self._refFastaFileName = "ref.fa"\n+ self._writeInputFastaFiles()\n+ \n+ self._inputDeltaFileName = "inputNucmer.delta"\n+ self._writeInputDeltaFileName()\n+ \n+ self._expGpFileName = "expMummerPlot.gp"\n+ self._obsGpFileName = "obsMummerPlot.gp"\n+ self._obsPngFileName = "obsMummerPlot.png"\n+ \n+ def tearDown(self):\n+ try:\n+ os.remove(self._queryFastaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._refFastaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._inputDeltaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._expGpFileName)\n+ except:pass\n+ try:\n+ os.remove(self._obsGpFileName)\n+ except:pass\n+ try:\n+ os.remove(self._obsPngFileName)\n+ except:pass\n+ \n+ \n+ def test_run(self):\n+ self._writeExpGpFile()\n+ ilaunchMummerPlot = LaunchMummerPlot(self._inputDeltaFileName, self._refFastaFileName, self._queryFastaFileName,"obsMummerPlot",fat=True, filter=True, clean=True, verbosity=1)\n+ ilaunchMummerPlot.run()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expGpFileName, self._obsGpFileName))\n+ \n+ def test_run_as_script(self):\n+ self._writeExpGpFile()\n+ cmd = \'LaunchMummerPlot.py --input %s --Qfile %s --Rfile %s --prefix %s --fat --filter --clean\' % (self._inputDeltaFileName, self._refFastaFileName, self._queryFastaFileName, "obsMummerPlot")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expGpFileName, self._obsGpFileName))\n+ \n+ def _writeInputFastaFiles(self):\n+ InputFile = open(self._queryFastaFileName, \'w\')\n+ InputFile.write(\'>Bovc-tA2:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGCTTCCCTGGTAGCTCAGCTGGTAAAGAATCCGCCTGCAATGCAGGAGACCCCGGTTC\\n\')\n+ InputFile.write(\'GATTCCTGGGTCGGGAAGATCCCCTGGAGAAGGGATAGGCTACCCACTCCAGTATTCTTG\\n\')\n+ InputFile.write(\'GGCTTCCCTGGTGGCTCAGACGGTAAAGAATCCGCCTGCAATGCGGGAGACCTGGGTTCG\\n\')\n+ InputFile.write(\'ATCCCTGGGTTGGGAAGATCCCCTGGAGGAGGGCATGGCAACCCACTCCAGTATTCTTGC\\n\')\n+ InputFile.write(\'CTGGAGAATCCCCATGGACAGAGGAGCCTGGCGGGCTACAGTCCATGGGGTCGCAAAGAG\\n\')\n+ InputFile.write(\'TCGGACACGACTGAGCGACTAAGCACAGCACAG\\n\')\n+ InputFile.write(\'>SUSINE2:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGAGTTCTCTGATGGCCTAGCGGGTTGAGGCTCCTGCGTTCTCACCGCTGTGGCTCTGG\\n\')\n+ InputFile.write(\'TTGCTGCTGTGCGGCGTAGGTTCAATCCCTGGCCCAGGAATTCCCACATACTGCCTGTGT\\n\')\n+ InputFile.write(\'GGCAAAAAAGAAAAAAAAAAAAAATACAAAAAAAAAAAAAACAAGAGAGAACCTGAAATA\\n\')\n+ InputFile.write(\'AACGTTGCAACTCTCATTNAAAAAAAAAAA\\n\')\n+ InputFile.write(\'>SINE1A_SS:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGAGTTCTCTTGTGGCACAGCAGGTTAAGGATCCAGCGTTGTCACTGCAGTGGCTTGGG\\n\')\n+ InputFile.write(\'TCGCTGCTGTGGCACGGGTTCAATCCCTGGCCCAGGAACTTCCACATGCCACGGGCATGG\\n\')\n+ InputFile.write(\'CCAAAAAAAAA\\n\')\n+ InputFile.write(\'>SINE2-1_Pca:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGCAGTGGTGGTTCAGTGGTAGAATTCTTGCCTTCCATGCGGGAGACCCGGGTTCGATT\\n\')\n+ InputFile.write(\'CCCGGCCAGTGCACCTCATGCGCAGCCACCACCCGTCTGTCAGTGGAGGCTTGCGTGTTG\\n\')\n+ InputFile.write(\'CTGTGATGCTGAACAGGTTTCAGCGGAGCTTCCAGACTAAGACGGACTAGGAAGAAAGGC\\n\')\n+ InputFile.write(\'CTGGCGATCTACTTCTGAAAATCAGCCAATGAAAACCCTGTGGATCACAGTGGTCTGATC\\n\')\n+ InputFile.write(\'TGCAACTGATCATGGGGATGGCGCAGGACCGGGCAGCGTTTTGTTCTATTGTGCATGGGG\\n\')\n+ InputFile.write(\'TCGCCATGAGTCGGGCCGACTCGATGGCAGCTAACAACAA\\n\')\n+ InputFile.write(\'>SINE_FR2:classI:SINE:?\\n\')\n+ InputFile.write(\'GGGCGG'..b'ACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\\n\')\n+ InputFile.write(\'TGTCTCTGCCTCTCTCTCTCTCTGTGTCTCTCATGAATAAATAA\\n\')\n+ InputFile.write(\'>ALPINE1:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGGAGGGTATAGCTCAGTGGTAGAGCGCATGCTTAGCATGCACGAGGTCCTGGGTTCAA\\n\')\n+ InputFile.write(\'TCCCCAGTACCTCCATTAAAAATAAATAAATAAATAAACCTAATTACCTCCCCCACCAAA\\n\')\n+ InputFile.write(\'AAAAAAA\\n\')\n+ InputFile.close()\n+ \n+ InputFile = open(self._refFastaFileName, \'w\')\n+ InputFile.write(\'>SINE_FR2:classI:SINE:?\\n\')\n+ InputFile.write(\'GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC\\n\')\n+ InputFile.write(\'GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT\\n\')\n+ InputFile.write(\'GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGA\\n\')\n+ InputFile.write(\'TTGGGGATTAGGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA\\n\')\n+ InputFile.write(\'TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC\\n\')\n+ InputFile.write(\'CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCG\\n\')\n+ InputFile.write(\'CAAGCGGTAGAAAGTGAGTGAGTGAGTGA\\n\')\n+ InputFile.write(\'>SINEC1B2_CF:classI:SINE:?\\n\')\n+ InputFile.write(\'GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA\\n\')\n+ InputFile.write(\'GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\\n\')\n+ InputFile.write(\'TGTCTCTGCCTCTCTCTCTCTCTGTGTCTCTCATGAATAAA\\n\')\n+ InputFile.close()\n+ \n+ def _writeExpGpFile(self):\n+ f = open(self._expGpFileName, \'w\')\n+ f.write(\'set terminal png tiny size 800,800\\n\')\n+ f.write(\'set output "obsMummerPlot.png"\\n\')\n+ f.write(\'set xtics rotate ( \\\\\\n\')\n+ f.write(\' "SINE_FR2:classI:SINE:?" 1, \\\\\\n\')\n+ f.write(\' "SINEC1B2_CF:classI:SINE:?" 389, \\\\\\n\')\n+ f.write(\' "SINE1A_SS:classI:SINE:SINE2/tRNA" 552, \\\\\\n\')\n+ f.write(\' "SUSINE2:classI:SINE:SINE2/tRNA" 682, \\\\\\n\')\n+ f.write(\' "Bovc-tA2:classI:SINE:SINE2/tRNA" 891, \\\\\\n\')\n+ f.write(\' "ALPINE1:classI:SINE:SINE2/tRNA" 1223, \\\\\\n\')\n+ f.write(\' "SINE2-1_Pca:classI:SINE:SINE2/tRNA" 1349, \\\\\\n\')\n+ f.write(\' "" 1694 \\\\\\n\')\n+ f.write(\')\\n\')\n+ f.write(\'set ytics ( \\\\\\n\')\n+ f.write(\' "SINE_FR2:classI:SINE:?" 1, \\\\\\n\')\n+ f.write(\' "SINEC1B2_CF:classI:SINE:?" 371, \\\\\\n\')\n+ f.write(\' "" 532 \\\\\\n\')\n+ f.write(\')\\n\')\n+ f.write(\'set size 1,1\\n\')\n+ f.write(\'set grid\\n\')\n+ f.write(\'unset key\\n\')\n+ f.write(\'set border 0\\n\')\n+ f.write(\'set tics scale 0\\n\')\n+ f.write(\'set xlabel "REF"\\n\')\n+ f.write(\'set ylabel "QRY"\\n\')\n+ f.write(\'set format "%.0f"\\n\')\n+ f.write(\'set mouse format "%.0f"\\n\')\n+ f.write(\'set mouse mouseformat "[%.0f, %.0f]"\\n\')\n+ f.write(\'set mouse clipboardformat "[%.0f, %.0f]"\\n\')\n+ f.write(\'set xrange [1:1694]\\n\')\n+ f.write(\'set yrange [1:532]\\n\')\n+ f.write(\'set style line 1 lt 1 lw 3 pt 6 ps 1\\n\')\n+ f.write(\'set style line 2 lt 3 lw 3 pt 6 ps 1\\n\')\n+ f.write(\'set style line 3 lt 2 lw 3 pt 6 ps 1\\n\')\n+ f.write(\'plot \\\\\\n\')\n+ f.write(\' "obsMummerPlot.fplot" title "FWD" w lp ls 1, \\\\\\n\')\n+ f.write(\' "obsMummerPlot.rplot" title "REV" w lp ls 2\\n\')\n+ f.write(\'\')\n+ f.close()\n+ \n+ def _writeInputDeltaFileName(self):\n+ f = open(self._inputDeltaFileName, \'w\')\n+ f.write("%s %s\\n" %(os.path.abspath(self._queryFastaFileName), os.path.abspath(self._refFastaFileName)))\n+ f.write(\'NUCMER\\n\')\n+ f.write(\'>SINE_FR2:classI:SINE:? SINE_FR2:classI:SINE:? 389 371\\n\')\n+ f.write(\'1 343 1 343 0 0 0\\n\')\n+ f.write(\'0\\n\')\n+ f.write(\'>SINEC1B2_CF:classI:SINE:? SINEC1B2_CF:classI:SINE:? 164 161\\n\')\n+ f.write(\'1 161 1 161 0 0 0\\n\')\n+ f.write(\'0\\n\')\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n+ \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchNucmer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchNucmer.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,153 @@\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.launcher.LaunchNucmer import LaunchNucmer\n+\n+class Test_F_LaunchNucmer(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._queryFastaFileName = "query.fa"\n+ self._refFastaFileName = "ref.fa"\n+ self._writeInputFastaFiles()\n+ self._expDeltaFileName = "expNucmer.delta"\n+ self._writeExpDeltaFile()\n+ self._obsDeltaFileName = "obsNucmer.delta"\n+ self._obsCoordFileName= "obsNucmer.coords"\n+ self._expCoordFileName= "expNucmer.coords"\n+ \n+ def tearDown(self):\n+ try:\n+ os.remove(self._queryFastaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._refFastaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._expDeltaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._obsDeltaFileName)\n+ except:pass\n+ \n+ def test_run(self):\n+ ilauncher = LaunchNucmer(self._refFastaFileName, self._queryFastaFileName,"obsNucmer",verbosity=1)\n+ ilauncher.run()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expDeltaFileName, self._obsDeltaFileName))\n+ \n+ def test_run_as_script(self):\n+ cmd = \'LaunchNucmer.py --query %s --ref %s --prefix %s\' % (self._refFastaFileName, self._queryFastaFileName, "obsNucmer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expDeltaFileName, self._obsDeltaFileName))\n+ \n+ def test_run_as_script_gencoords_output(self):\n+ self._writeExpGenCoordsFile()\n+ cmd = \'LaunchNucmer.py --query %s --ref %s --prefix %s --gencoords\' % (self._refFastaFileName, self._queryFastaFileName, "obsNucmer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expCoordFileName, self._obsCoordFileName))\n+ os.remove(self._obsCoordFileName)\n+ os.remove(self._expCoordFileName)\n+ \n+ def test_run_as_script_with_mincluster(self):\n+ cmd = \'LaunchNucmer.py --query %s --ref %s --prefix %s --mincluster 30\' % (self._refFastaFileName, self._queryFastaFileName, "obsNucmer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expDeltaFileName, self._obsDeltaFileName)) \n+ \n+ def test_run_as_script_showcoords_output(self):\n+ self._writeExpShowCoordsFile()\n+ cmd = \'LaunchNucmer.py --query %s --ref %s --prefix %s --showcoords\' % (self._refFastaFileName, self._queryFastaFileName, "obsNucmer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expCoordFileName, self._obsCoordFileName))\n+ os.remove(self._obsCoordFileName)\n+ os.remove(self._expCoordFileName) \n+ \n+ def _writeInputFastaFiles(self):\n+ InputFile = open(self._queryFastaFileName, \'w\')\n+ InputFile.write(\'>Bovc-tA2:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGCTTCCCTGGTAGCTCAGCTGGTAAAGAATCCGCCTGCAATGCAGGAGACCCCGGTTC\\n\')\n+ InputFile.write(\'GATTCCTGGGTCGGGAAGATCCCCTGGAGAAGGGATAGGCTACCCACTCCAGTATTCTTG\\n\')\n+ InputFile.write(\'GGCTTCCCTGGTGGCTCAGACGGTAAAGAATCCGCCTGCAATGCGGGAGACCTGGGTTCG\\n\')\n+ InputFile.write(\'ATCCCTGGGTTGGGAAGATCCCCTGGAGGAGGGCATGGCAACCCACTCCAGTATTCTTGC\\n\')\n+ InputFile.write(\'CTGGAGAATCCCCATGGACAGAGGAGCCTGGCGGGCTACAGTCCATGGGGTCGCAAAGAG\\n\')\n+ InputFile.write(\'TCGGACACGACTGAGCGACTAAGCACAGCACAG\\n\')\n+ InputFile.write(\'>SUSINE2:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGAGTTCTCTGATGGCCTAGCGGGTTGAGGCTCCTGCGTTCTCACCGCTGTGGCTCTGG\\n\')\n+ InputFile.write(\'TTGCTGCTGTGCGGCGTAGGTTCAATCCCTGGCCCAGGAATTCCCACATACTGCCTGTGT\\n\')\n+ InputFile.write(\'GGCAAAAAAGAAAAAAAAAAAAAATACAAAAAAAAAAAAAACAAGAGAGAACCTGAAATA\\n\')\n+ InputFile.write(\'AACGTTGCAACTCTCATTNAAAAAAAAAAA\\n\')\n+ InputFile.write(\'>SINE1A_SS:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGAGTTCTCTTGTGGCACAGCAGGTTAAG'..b'GTGAGTGTGAGAGAGAA\\n\')\n+ InputFile.write(\'TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC\\n\')\n+ InputFile.write(\'CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCGCGACCCTCAGTGGAGGAA\\n\')\n+ InputFile.write(\'CAAGCGGTAGAAAGTGAGTGAGTGAGTGA\\n\')\n+ InputFile.write(\'>SINEC1B2_CF:classI:SINE:?\\n\')\n+ InputFile.write(\'GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA\\n\')\n+ InputFile.write(\'GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\\n\')\n+ InputFile.write(\'TGTCTCTGCCTCTCTCTCTCTCTGTGTCTCTCATGAATAAATAA\\n\')\n+ InputFile.write(\'>ALPINE1:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGGAGGGTATAGCTCAGTGGTAGAGCGCATGCTTAGCATGCACGAGGTCCTGGGTTCAA\\n\')\n+ InputFile.write(\'TCCCCAGTACCTCCATTAAAAATAAATAAATAAATAAACCTAATTACCTCCCCCACCAAA\\n\')\n+ InputFile.write(\'AAAAAAA\\n\')\n+ InputFile.close()\n+ \n+ InputFile = open(self._refFastaFileName, \'w\')\n+ InputFile.write(\'>SINE_FR2:classI:SINE:?\\n\')\n+ InputFile.write(\'GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC\\n\')\n+ InputFile.write(\'GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT\\n\')\n+ InputFile.write(\'GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGA\\n\')\n+ InputFile.write(\'TTGGGGATTAGGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA\\n\')\n+ InputFile.write(\'TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC\\n\')\n+ InputFile.write(\'CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCG\\n\')\n+ InputFile.write(\'CAAGCGGTAGAAAGTGAGTGAGTGAGTGA\\n\')\n+ InputFile.write(\'>SINEC1B2_CF:classI:SINE:?\\n\')\n+ InputFile.write(\'GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA\\n\')\n+ InputFile.write(\'GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\\n\')\n+ InputFile.write(\'TGTCTCTGCCTCTCTCTCTCTCTGTGTCTCTCATGAATAAA\\n\')\n+ InputFile.close()\n+ \n+ def _writeExpDeltaFile(self):\n+ f = open(self._expDeltaFileName, \'w\')\n+ f.write("%s %s\\n" %(os.path.abspath(self._queryFastaFileName), os.path.abspath(self._refFastaFileName)))\n+ f.write(\'NUCMER\\n\')\n+ f.write(\'>SINE_FR2:classI:SINE:? SINE_FR2:classI:SINE:? 389 371\\n\')\n+ f.write(\'1 343 1 343 0 0 0\\n\')\n+ f.write(\'0\\n\')\n+ f.write(\'>SINEC1B2_CF:classI:SINE:? SINEC1B2_CF:classI:SINE:? 164 161\\n\')\n+ f.write(\'1 161 1 161 0 0 0\\n\')\n+ f.write(\'0\\n\')\n+ f.close()\n+ \n+ def _writeExpGenCoordsFile(self):\n+ f = open(self._expCoordFileName, \'w\')\n+ f.write("%s %s\\n" %(os.path.abspath(self._queryFastaFileName), os.path.abspath(self._refFastaFileName)))\n+ f.write(\'NUCMER\\n\\n\')\n+ f.write(\' [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [TAGS]\\n\')\n+ f.write(\'=====================================================================================\\n\')\n+ f.write(\' 1 161 | 1 161 | 161 161 | 100.00 | SINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\' 1 343 | 1 343 | 343 343 | 100.00 | SINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.close()\n+ \n+ def _writeExpShowCoordsFile(self):\n+ f = open(self._expCoordFileName, \'w\')\n+ f.write("%s %s\\n" %(os.path.abspath(self._queryFastaFileName), os.path.abspath(self._refFastaFileName)))\n+ f.write(\'NUCMER\\n\\n\')\n+ f.write(\'[S1]\\t[E1]\\t[S2]\\t[E2]\\t[LEN 1]\\t[LEN 2]\\t[% IDY]\\t[LEN R]\\t[LEN Q]\\t[COV R]\\t[COV Q]\\t[FRM]\\t[TAGS]\\n\')\n+ f.write(\'1\\t161\\t1\\t161\\t161\\t161\\t100.00\\t164\\t161\\t98.17\\t100.00\\t1\\t1\\tSINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\'1\\t343\\t1\\t343\\t343\\t343\\t100.00\\t389\\t371\\t88.17\\t92.45\\t1\\t1\\tSINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchPhyML.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchPhyML.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,83 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from shutil import rmtree
+from commons.launcher.LaunchPhyML import LaunchPhyML
+
+
+class Test_F_LaunchPhyML(unittest.TestCase):
+
+    def setUp(self):
+        self._curTestDir = os.getcwd()
+        self._testPrefix = 'test_LRA'
+        try:
+            os.makedirs(self._testPrefix)
+        except:pass
+        os.chdir(self._testPrefix)
+
+
+        self._inputFastaFileName = "%s_input.fa" % self._testPrefix
+        self._obsOutputFileName = self._inputFastaFileName +'_obs.phylip'
+        self._expOutputFileName = "%s_exp.phylip" % self._testPrefix
+        self._writeInputFile()
+
+#    def tearDown(self):
+#        os.chdir(self._curTestDir)
+#        try:
+#            rmtree(self._testPrefix)
+#        except:pass
+
+    def test_run(self):
+        #phylip use only same length sequences
+        self._writeExpFile()
+        ilauncher = LaunchPhyML(self._inputFastaFileName,verbosity=7)
+        ilauncher.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutputFileName, self._obsOutputFileName))
+
+
+#    def test_run_as_script(self):
+#        self._writeExpOutputFile_wo_refseq()
+#        cmd = 'LaunchPhyML.py -i %s -o %s -v 5' % (self._inputFastaFileName, self._obsOutputFileName)
+#        os.system(cmd)
+#        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutputFileName, self._obsOutputFileName))
+
+
+    def _writeInputFile(self):
+        InputFile = open(self._inputFastaFileName, 'w')
+        InputFile.write(""">sequence1
+GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC
+GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT
+GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAG
+>sequence2
+GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA
+GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG
+GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGG
+>sequence3
+ATCTTAGTTTTGCTGAGCGCCTTCATGGCTGCTTGACTATCAGACAGTATAGCAATGTCC
+TTGCCATGATAGTTCCTTTTCAGATTAAACTCTGCACAGCGTCCAATAGCACAGACTTCA
+GCTTGAATGCTGGTGTATCTGCCCATTGATTCGTGGTATTTCAACCTGGG
+""")
+        InputFile.close()
+
+    def _writeExpFile(self):
+        InputFile = open(self._expOutputFileName, 'w')
+        InputFile.write(""" 3  170
+sequence1      GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGG
+sequence2      GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCG
+sequence3      ATCTTAGTTTTGCTGAGCGCCTTCATGGCTGCTTGACTATCAGACAGTAT
+
+CCCCGGGTTCGATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGG
+TGATCCTGGAGACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAG
+AGCAATGTCCTTGCCATGATAGTTCCTTTTCAGATTAAACTCTGCACAGC
+
+AGTTTGCATGTTCTCCCTGTGCCTGCGTGGGTTCTCTCCGGGTACTCCGG
+CCTGCTTCTCCCTCTGCCTGGGGCGGCACGGTGGTGTGGTGGTTAGCACT
+GTCCAATAGCACAGACTTCAGCTTGAATGCTGGTGTATCTGCCCATTGAT
+
+CTTCCTCCCACAGTCCAAAG
+GTTGCCTCACAGCAAGAAGG
+TCGTGGTATTTCAACCTGGG""")
+        InputFile.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchPromer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchPromer.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,231 @@\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.launcher.LaunchPromer import LaunchPromer\n+\n+\n+class Test_F_LaunchPromer(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._queryFastaFileName = "query.fa"\n+ self._refFastaFileName = "ref.fa"\n+ self._writeInputFastaFiles()\n+ self._expDeltaFileName = "expPromer.delta"\n+ self._writeExpDeltaFile()\n+ self._obsDeltaFileName = "obsPromer.delta"\n+ self._obsCoordFileName= "obsPromer.coords"\n+ self._expCoordFileName= "expPromer.coords"\n+ \n+ def tearDown(self):\n+ try:\n+ os.remove(self._queryFastaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._refFastaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._expDeltaFileName)\n+ except:pass\n+ try:\n+ os.remove(self._obsDeltaFileName)\n+ except:pass\n+ \n+ def test_run(self):\n+ ilauncher = LaunchPromer(self._refFastaFileName, self._queryFastaFileName,"obsPromer",verbosity=1)\n+ ilauncher.run()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expDeltaFileName, self._obsDeltaFileName))\n+ \n+ def test_run_as_script(self):\n+ cmd = \'LaunchPromer.py --query %s --ref %s --prefix %s\' % (self._refFastaFileName, self._queryFastaFileName, "obsPromer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expDeltaFileName, self._obsDeltaFileName))\n+ \n+ def test_run_as_script_gencoords_output(self):\n+ self._writeExpGenCoordsFile()\n+ cmd = \'LaunchPromer.py --query %s --ref %s --prefix %s --gencoords\' % (self._refFastaFileName, self._queryFastaFileName, "obsPromer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expCoordFileName, self._obsCoordFileName))\n+ os.remove(self._obsCoordFileName)\n+ os.remove(self._expCoordFileName)\n+ \n+ def test_run_as_script_with_mincluster(self):\n+ cmd = \'LaunchPromer.py --query %s --ref %s --prefix %s --mincluster 30\' % (self._refFastaFileName, self._queryFastaFileName, "obsPromer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expDeltaFileName, self._obsDeltaFileName)) \n+ \n+ def test_run_as_script_showcoords_output(self):\n+ self._writeExpShowCoordsFile()\n+ cmd = \'LaunchPromer.py --query %s --ref %s --prefix %s --showcoords\' % (self._refFastaFileName, self._queryFastaFileName, "obsPromer")\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expCoordFileName, self._obsCoordFileName))\n+ os.remove(self._obsCoordFileName)\n+ os.remove(self._expCoordFileName) \n+ \n+ def _writeInputFastaFiles(self):\n+ InputFile = open(self._queryFastaFileName, \'w\')\n+ InputFile.write(\'>Bovc-tA2:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGCTTCCCTGGTAGCTCAGCTGGTAAAGAATCCGCCTGCAATGCAGGAGACCCCGGTTC\\n\')\n+ InputFile.write(\'GATTCCTGGGTCGGGAAGATCCCCTGGAGAAGGGATAGGCTACCCACTCCAGTATTCTTG\\n\')\n+ InputFile.write(\'GGCTTCCCTGGTGGCTCAGACGGTAAAGAATCCGCCTGCAATGCGGGAGACCTGGGTTCG\\n\')\n+ InputFile.write(\'ATCCCTGGGTTGGGAAGATCCCCTGGAGGAGGGCATGGCAACCCACTCCAGTATTCTTGC\\n\')\n+ InputFile.write(\'CTGGAGAATCCCCATGGACAGAGGAGCCTGGCGGGCTACAGTCCATGGGGTCGCAAAGAG\\n\')\n+ InputFile.write(\'TCGGACACGACTGAGCGACTAAGCACAGCACAG\\n\')\n+ InputFile.write(\'>SUSINE2:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGAGTTCTCTGATGGCCTAGCGGGTTGAGGCTCCTGCGTTCTCACCGCTGTGGCTCTGG\\n\')\n+ InputFile.write(\'TTGCTGCTGTGCGGCGTAGGTTCAATCCCTGGCCCAGGAATTCCCACATACTGCCTGTGT\\n\')\n+ InputFile.write(\'GGCAAAAAAGAAAAAAAAAAAAAATACAAAAAAAAAAAAAACAAGAGAGAACCTGAAATA\\n\')\n+ InputFile.write(\'AACGTTGCAACTCTCATTNAAAAAAAAAAA\\n\')\n+ InputFile.write(\'>SINE1A_SS:classI:SINE:SINE2/tRNA\\n\')\n+ InputFile.write(\'GGGAGTTCTCTTGTGGCACAGCAGGTTA'..b'.00 100.00 1.89 | 3 3 SINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\' 159 1 | 159 1 | 159 159 | 100.00 100.00 3.77 | -3 -3 SINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\' 160 2 | 160 2 | 159 159 | 100.00 100.00 1.89 | -2 -2 SINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\' 161 3 | 161 3 | 159 159 | 100.00 100.00 0.00 | -1 -1 SINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\' 1 387 | 1 369 | 387 369 | 95.35 95.35 3.88 | 1 1 SINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\' 2 388 | 2 370 | 387 369 | 95.35 95.35 3.10 | 2 2 SINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\' 3 389 | 3 371 | 387 369 | 95.35 95.35 6.20 | 3 3 SINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\' 387 1 | 369 1 | 387 369 | 95.35 95.35 1.94 | -3 -3 SINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\' 388 2 | 370 2 | 387 369 | 95.35 95.35 2.33 | -2 -2 SINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\' 389 3 | 371 3 | 387 369 | 95.35 95.35 1.55 | -1 -1 SINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ \n+ \n+ def _writeExpShowCoordsFile(self):\n+ with open(self._expCoordFileName, \'w\') as f:\n+ f.write("%s %s\\n" %(os.path.abspath(self._queryFastaFileName), os.path.abspath(self._refFastaFileName)))\n+ f.write(\'PROMER\\n\\n\')\n+ f.write(\'[S1]\\t[E1]\\t[S2]\\t[E2]\\t[LEN 1]\\t[LEN 2]\\t[% IDY]\\t[% SIM]\\t[% STP]\\t[LEN R]\\t[LEN Q]\\t[COV R]\\t[COV Q]\\t[FRM]\\t[TAGS]\\n\')\n+ f.write(\'1\\t159\\t1\\t159\\t159\\t159\\t100.00\\t100.00\\t1.89\\t164\\t161\\t96.95\\t98.76\\t1\\t1\\tSINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\'2\\t160\\t2\\t160\\t159\\t159\\t100.00\\t100.00\\t3.77\\t164\\t161\\t96.95\\t98.76\\t2\\t2\\tSINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\'3\\t161\\t3\\t161\\t159\\t159\\t100.00\\t100.00\\t1.89\\t164\\t161\\t96.95\\t98.76\\t3\\t3\\tSINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\'159\\t1\\t159\\t1\\t159\\t159\\t100.00\\t100.00\\t3.77\\t164\\t161\\t96.95\\t98.76\\t-3\\t-3\\tSINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\'160\\t2\\t160\\t2\\t159\\t159\\t100.00\\t100.00\\t1.89\\t164\\t161\\t96.95\\t98.76\\t-2\\t-2\\tSINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\'161\\t3\\t161\\t3\\t159\\t159\\t100.00\\t100.00\\t0.00\\t164\\t161\\t96.95\\t98.76\\t-1\\t-1\\tSINEC1B2_CF:classI:SINE:?\\tSINEC1B2_CF:classI:SINE:?\\n\')\n+ f.write(\'1\\t387\\t1\\t369\\t387\\t369\\t95.35\\t95.35\\t3.88\\t389\\t371\\t99.49\\t99.46\\t1\\t1\\tSINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\'2\\t388\\t2\\t370\\t387\\t369\\t95.35\\t95.35\\t3.10\\t389\\t371\\t99.49\\t99.46\\t2\\t2\\tSINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\'3\\t389\\t3\\t371\\t387\\t369\\t95.35\\t95.35\\t6.20\\t389\\t371\\t99.49\\t99.46\\t3\\t3\\tSINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\'387\\t1\\t369\\t1\\t387\\t369\\t95.35\\t95.35\\t1.94\\t389\\t371\\t99.49\\t99.46\\t-3\\t-3\\tSINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\'388\\t2\\t370\\t2\\t387\\t369\\t95.35\\t95.35\\t2.33\\t389\\t371\\t99.49\\t99.46\\t-2\\t-2\\tSINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+ f.write(\'389\\t3\\t371\\t3\\t387\\t369\\t95.35\\t95.35\\t1.55\\t389\\t371\\t99.49\\t99.46\\t-1\\t-1\\tSINE_FR2:classI:SINE:?\\tSINE_FR2:classI:SINE:?\\n\')\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchRefAlign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchRefAlign.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,72 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.launcher.LaunchRefAlign import LaunchRefAlign
+
+
+class Test_F_LaunchRefAlign(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFastaFileName = "input.fa"
+        self._obsOutputFileName = "obs"
+        self._expOutputFileName = "exp"
+        self._writeInputFile()
+        self._writeExpOutputFile()
+
+#    def tearDown(self):
+#        try:
+#            os.remove(self._inputFastaFileName)
+#        except:pass
+#
+#        try:
+#            os.remove(self._expOutputFileName)
+#        except:pass
+#        try:
+#            os.remove(self._obsOutputFileName)
+#        except:pass
+#        try:
+#            os.remove("input.fa.2.3.5.80.10.20.15.dat")
+#        except:pass
+
+    def test_run(self):
+        ilauncher = LaunchRefAlign(self._inputFastaFileName,verbosity=5)
+        ilauncher.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutputFileName, self._obsOutputFileName))
+
+#    def test_run_as_script(self):
+#        cmd = 'LaunchTRF.py -i %s -o %s -v 3' % (self._inputFastaFileName, self._obsTRFOutputFileName)
+#        os.system(cmd)
+#        self.assertTrue(FileUtils.are2FilesIdentical(self._expTRFOutputFileName, self._obsTRFOutputFileName))
+
+
+    def _writeInputFile(self):
+        InputFile = open(self._inputFastaFileName, 'w')
+        InputFile.write('>sequence\n')
+        InputFile.write('GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC\n')
+        InputFile.write('GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT\n')
+        InputFile.write('GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGA\n')
+        InputFile.write('TTGGGGATTAGGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA\n')
+        InputFile.write('TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC\n')
+        InputFile.write('CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCG\n')
+        InputFile.write('CAAGCGGTAGAAAGTGAGTGAGTGAGTGA\n')
+        InputFile.write('>sequence2\n')
+        InputFile.write('GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA\n')
+        InputFile.write('GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\n')
+        InputFile.write('TGTCTCTGCCTCTCTCTCTCTCTGTGTCTCTCATGAATAAA\n')
+        InputFile.close()
+
+    def _writeExpOutputFile(self):
+        with open(self._expOutputFileName,'w') as expOutputFile:
+            expOutputFile.write("""1\t(GTGGT)2\tsequence\t11\t21
+2\t(GGT)5\tsequence\t10\t23
+3\t(GTGTGA)4\tsequence\t222\t242
+4\t(GTCCA)2\tsequence\t282\t292
+5\t(AGTG)4\tsequence\t355\t371
+6\t(GCCTGTCTCTCCTCT)4\tsequence2\t100\t152
+7\t(CTCTGCCTGTGT)3\tsequence2\t112\t151
+8\t(TC)23\tsequence2\t107\t152
+""")
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchRefalign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchRefalign.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,104 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.launcher.LaunchRefAlign import LaunchRefAlign
+from shutil import rmtree
+
+
+class Test_F_LaunchRefAlign(unittest.TestCase):
+
+    def setUp(self):
+        self._curTestDir = os.getcwd()
+        self._testPrefix = 'test_LRA'
+        try:
+            os.makedirs(self._testPrefix)
+        except:pass
+        os.chdir(self._testPrefix)
+
+
+        self._inputFastaFileName = "%s_input.fa" % self._testPrefix
+        self._obsOutputFileName = self._inputFastaFileName +'.fa_aln'
+        self._expOutputFileName = "%s_exp.fa_aln" % self._testPrefix
+        self._writeInputFile()
+
+    def tearDown(self):
+        os.chdir(self._curTestDir)
+        try:
+            rmtree(self._testPrefix)
+        except:pass
+
+    def test_run(self):
+        self._writeExpOutputFile_wo_refseq()
+        ilauncher = LaunchRefAlign(self._inputFastaFileName,verbosity=5)
+        ilauncher.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutputFileName, self._obsOutputFileName))
+
+    def test_run_KeepRefSeq(self):
+        self._writeExpOutputFile_with_refseq()
+        ilauncher = LaunchRefAlign(self._inputFastaFileName,verbosity=5, keepRefseq=True)
+        ilauncher.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutputFileName, self._obsOutputFileName))
+
+    def test_run_as_script(self):
+        self._writeExpOutputFile_wo_refseq()
+        cmd = 'LaunchRefAlign.py -i %s -o %s -v 5' % (self._inputFastaFileName, self._obsOutputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutputFileName, self._obsOutputFileName))
+
+    def test_run_as_script_KeepRefSeq(self):
+        self._writeExpOutputFile_with_refseq()
+        cmd = 'LaunchRefAlign.py -i %s -r -o %s -v 5' % (self._inputFastaFileName, self._obsOutputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutputFileName, self._obsOutputFileName))
+
+    def _writeInputFile(self):
+        InputFile = open(self._inputFastaFileName, 'w')
+        InputFile.write('>sequence\n')
+        InputFile.write('GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC\n')
+        InputFile.write('GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT\n')
+        InputFile.write('GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGA\n')
+        InputFile.write('TTGGGGATTAGGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA\n')
+        InputFile.write('TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC\n')
+        InputFile.write('CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCG\n')
+        InputFile.write('CAAGCGGTAGAAAGTGAGTGAGTGAGTGA\n')
+        InputFile.write('>sequence2\n')
+        InputFile.write('GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA\n')
+        InputFile.write('GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\n')
+        InputFile.write('TGTCTCTGCCTCTCTCTCTCTCTGTGTCTCTCATGAATAAA\n')
+        InputFile.close()
+
+    def _writeExpOutputFile_wo_refseq(self):
+        with open(self._expOutputFileName,'w') as expOutputFile:
+            expOutputFile.write(""">sequence2
+GGGCAGC-CGGTGGTGCGGT--TTAGCGCTGTTGGCCCAGGGCGTGATGGACCCGGGATC
+GATCCCACATCGGG--------------CTCCCTGCATGGAGCCTGC---TTCTCCCTCT
+GCCTGTGTCTCTTCTCTC------------------------------------------
+------------------------------------------------------------
+----------TCTCTCTGTGTCTCTCATGA-ATAAA------------------------
+------------------------------------------------------------
+-----------
+""")
+
+    def _writeExpOutputFile_with_refseq(self):
+        with open(self._expOutputFileName,'w') as expOutputFile:
+            expOutputFile.write(""">sequence
+GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC
+GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT
+GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGA
+TTGGGGATTAGGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA
+TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC
+CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCGCAAGCGGTAGAAAGTGAG
+TGAGTGAGTGA
+>sequence2
+GGGCAGC-CGGTGGTGCGGT--TTAGCGCTGTTGGCCCAGGGCGTGATGGACCCGGGATC
+GATCCCACATCGGG--------------CTCCCTGCATGGAGCCTGC---TTCTCCCTCT
+GCCTGTGTCTCTTCTCTC------------------------------------------
+------------------------------------------------------------
+----------TCTCTCTGTGTCTCTCATGA-ATAAA------------------------
+------------------------------------------------------------
+-----------
+""")
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchRepeatMasker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchRepeatMasker.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,110 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.launcher.LaunchRepeatMasker import LaunchRepeatMasker
+
+
+class Test_F_RepeatMasker(unittest.TestCase):
+
+    def setUp(self):
+        self._queryFastaFileName = "query.fa"
+        self._libFastaFileName = "lib.fa"
+        self._expMaskedQueryFileName = "expMaskedQuery.fa"
+        self._obsMaskedQueryFileName = "%s.masked" % self._queryFastaFileName
+
+        self._expCatFileName = "expCat.fa"
+        self._obsCatFileName = "%s.cat" % self._queryFastaFileName
+
+        self._writeInputFastaFiles()
+        self._writeExpMaskedQueryFile()
+        self._writeExpCatFile()
+
+    def tearDown(self):
+        os.remove(self._queryFastaFileName)
+        os.remove(self._libFastaFileName)
+        os.remove(self._expMaskedQueryFileName)
+        os.remove(self._obsMaskedQueryFileName)
+        os.remove(self._expCatFileName)
+        os.remove(self._obsCatFileName)
+        os.remove("%s.tbl" % self._queryFastaFileName)
+        os.remove("%s.out" % self._queryFastaFileName)
+        os.remove("%s.ori.out" % self._queryFastaFileName)
+
+    def test_run(self):
+        ilauncher = LaunchRepeatMasker(self._queryFastaFileName, self._libFastaFileName,verbosity=1 )
+        ilauncher.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._obsMaskedQueryFileName, self._expMaskedQueryFileName))
+
+    def test_run_as_script(self):
+        #FIXME : change path to launcher
+        cmd = 'python %s/commons/launcher/LaunchRepeatMasker.py -q %s -l %s' % (os.environ["REPET_PATH"],self._queryFastaFileName, self._libFastaFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.are2FilesIdentical(self._obsMaskedQueryFileName, self._expMaskedQueryFileName))
+
+    def _writeInputFastaFiles(self):
+        InputFile = open(self._queryFastaFileName, 'w')
+        InputFile.write('>Bovc-tA2:classI:SINE:SINE2/tRNA\n')
+        InputFile.write('GGGCTTCCCTGGTAGCTCAGCTGGTAAAGAATCCGCCTGCAATGCAGGAGACCCCGGTTC\n')
+        InputFile.write('GATTCCTGGGTCGGGAAGATCCCCTGGAGAAGGGATAGGCTACCCACTCCAGTATTCTTG\n')
+        InputFile.write('GGCTTCCCTGGTGGCTCAGACGGTAAAGAATCCGCCTGCAATGCGGGAGACCTGGGTTCG\n')
+        InputFile.write('ATCCCTGGGTTGGGAAGATCCCCTGGAGGAGGGCATGGCAACCCACTCCAGTATTCTTGC\n')
+        InputFile.write('CTGGAGAATCCCCATGGACAGAGGAGCCTGGCGGGCTACAGTCCATGGGGTCGCAAAGAG\n')
+        InputFile.write('TCGGACACGACTGAGCGACTAAGCACAGCACAGGTCTTTCCTGCCACTTACCTCCTTCCA\n')
+        InputFile.write('GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC\n')
+        InputFile.write('GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT\n')
+        InputFile.write('GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGA\n')
+        InputFile.write('TTGGGGATTAGGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA\n')
+        InputFile.write('TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC\n')
+        InputFile.write('CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCGCGACCCTCAGTGGAGGAA\n')
+        InputFile.write('GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA\n')
+        InputFile.write('GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\n')
+        InputFile.write('GGGGAGGGTATAGCTCAGTGGTAGAGCGCATGCTTAGCATGCACGAGGTCCTGGGTTCAA\n')
+        InputFile.write('TCCCCAGTACCTCCATTAAAAATAAATAAATAAATAAACCTAATTACCTCCCCCACCAAA\n')
+        InputFile.write('AAAAAAA\n')
+        InputFile.close()
+
+        InputFile = open(self._libFastaFileName, 'w')
+        InputFile.write('>RefLib\n')
+        InputFile.write('GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC\n')
+        InputFile.write('GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT\n')
+        InputFile.close()
+
+    def _writeExpMaskedQueryFile(self):
+        f = open(self._expMaskedQueryFileName, 'w')
+        f.write('>Bovc-tA2:classI:SINE:SINE2/tRNA\n')
+        f.write('GGGCTTCCCTGGTAGCTCAGCTGGTAAAGAATCCGCCTGCAATGCAGGAG\n')
+        f.write('ACCCCGGTTCGATTCCTGGGTCGGGAAGATCCCCTGGAGAAGGGATAGGC\n')
+        f.write('TACCCACTCCAGTATTCTTGGGCTTCCCTGGTGGCTCAGACGGTAAAGAA\n')
+        f.write('TCCGCCTGCAATGCGGGAGACCTGGGTTCGATCCCTGGGTTGGGAAGATC\n')
+        f.write('CCCTGGAGGAGGGCATGGCAACCCACTCCAGTATTCTTGCCTGGAGAATC\n')
+        f.write('CCCATGGACAGAGGAGCCTGGCGGGCTACAGTCCATGGGGTCGCAAAGAG\n')
+        f.write('TCGGACACGACTGAGCGACTAAGCACAGCACAGGTCTTTCCTGCCACTTA\n')
+        f.write('CCTCCTTCCANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n')
+        f.write('NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n')
+        f.write('NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCCTGCGTGGGTTCTCTCCG\n')
+        f.write('GGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGATTGGGGATTA\n')
+        f.write('GGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA\n')
+        f.write('TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCC\n')
+        f.write('AGGGTGTACCCTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCC\n')
+        f.write('CGCGACCCTCAGTGGAGGAAGGGCAGCCTGGGTGGCTCAGCGGTTTAGCG\n')
+        f.write('CCTGCCTTTGGCCCAGGGCGTGATCCTGGAGACCCGGGATCGAGTCCCAC\n')
+        f.write('ATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTGGGGGAGGGTA\n')
+        f.write('TAGCTCAGTGGTAGAGCGCATGCTTAGCATGCACGAGGTCCTGGGTTCAA\n')
+        f.write('TCCCCAGTACCTCCATTAAAAATAAATAAATAAATAAACCTAATTACCTC\n')
+        f.write('CCCCACCAAAAAAAAAA\n')
+        f.close()
+
+    def _writeExpCatFile(self):
+        f = open(self._expCatFileName, 'w')
+        f.write('958 0.00 0.00 0.00 Bovc-tA2:classI:SINE:SINE2/tRNA 361 480 (487) RefLib 1 120 (0) 5\n')
+        f.write('## Total Sequences: 1\n')
+        f.write('## Total Length: 967\n')
+        f.write('## Total NonMask ( excluding >20bp runs of N/X bases ): 967\n')
+        f.write('## Total NonSub ( excluding all non ACGT bases ):967\n')
+        f.write('RepeatMasker version open-3.2.6 , default mode\n')
+        f.write('run with blastp version 2.0MP-WashU\n')
+        f.write('RepBase Update 20080801, RM database version 20080801\n')
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchTRF.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchTRF.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,84 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.launcher.LaunchTRF import LaunchTRF
+
+
+class Test_F_LaunchTRF(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFastaFileName = "input.fa"
+        self._obsTRFOutputFileName = "obsTRF.TRF.set"
+        self._expTRFOutputFileName = "expTRF.TRF.set"
+        self._writeInputFile()
+        self._writeExpTRFOutputFile()
+
+    def tearDown(self):
+        try:
+            os.remove(self._inputFastaFileName)
+        except:pass
+
+        try:
+            os.remove(self._expTRFOutputFileName)
+        except:pass
+        try:
+            os.remove(self._obsTRFOutputFileName)
+        except:pass
+        try:
+            os.remove("input.fa.2.3.5.80.10.20.15.dat")
+        except:pass
+
+    def test_run(self):
+        ilauncher = LaunchTRF(self._inputFastaFileName,self._obsTRFOutputFileName,verbosity=5)
+        ilauncher.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expTRFOutputFileName, self._obsTRFOutputFileName))
+
+    def test_run_as_script(self):
+        cmd = 'LaunchTRF.py -i %s -o %s -v 3' % (self._inputFastaFileName, self._obsTRFOutputFileName)
+        os.system(cmd)
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expTRFOutputFileName, self._obsTRFOutputFileName))
+
+    def test_run_as_scriptNoOutputGiven(self):
+        cmd = 'LaunchTRF.py -i %s -v 3' % (self._inputFastaFileName)
+        os.system(cmd)
+        self._obsTRFOutputFileName = "input.fa.TRF.set"
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expTRFOutputFileName, self._obsTRFOutputFileName))
+
+    def test_run_as_scriptNoOutputGivenDoClean(self):
+        cmd = 'LaunchTRF.py -i %s -c -v 3' % (self._inputFastaFileName)
+        os.system(cmd)
+        self._obsTRFOutputFileName = "input.fa.TRF.set"
+        datFilePath = os.path.join(os.getcwd(),"%s.2.3.5.80.10.20.15.dat" % self._inputFastaFileName)
+        self.assertFalse(os.path.exists(datFilePath))
+
+    def _writeInputFile(self):
+        InputFile = open(self._inputFastaFileName, 'w')
+        InputFile.write('>sequence\n')
+        InputFile.write('GGGCGGCACGGTGGTGTGGTGGTTAGCACTGTTGCCTCACAGCAAGAAGGCCCCGGGTTC\n')
+        InputFile.write('GATCCCCGGTTGGGACTGAGGCTGGGGACTTTCTGTGTGGAGTTTGCATGTTCTCCCTGT\n')
+        InputFile.write('GCCTGCGTGGGTTCTCTCCGGGTACTCCGGCTTCCTCCCACAGTCCAAAGACATGCATGA\n')
+        InputFile.write('TTGGGGATTAGGCTAATTGGAAACTCTAAAATTGCCCGTAGGTGTGAGTGTGAGAGAGAA\n')
+        InputFile.write('TGGTTGTTTGTCTATATGTGTTAGCCCTGCGATTGACTGGCGTCCAGTCCAGGGTGTACC\n')
+        InputFile.write('CTGCCTCCGCCCATTGTGCTGGGATAGGCTCCAGTCCCCCCG\n')
+        InputFile.write('CAAGCGGTAGAAAGTGAGTGAGTGAGTGA\n')
+        InputFile.write('>sequence2\n')
+        InputFile.write('GGGCAGCCTGGGTGGCTCAGCGGTTTAGCGCCTGCCTTTGGCCCAGGGCGTGATCCTGGA\n')
+        InputFile.write('GACCCGGGATCGAGTCCCACATCGGGCTCCCTGCATGGAGCCTGCTTCTCCCTCTGCCTG\n')
+        InputFile.write('TGTCTCTGCCTCTCTCTCTCTCTGTGTCTCTCATGAATAAA\n')
+        InputFile.close()
+
+    def _writeExpTRFOutputFile(self):
+        with open(self._expTRFOutputFileName,'w') as expTRFOutputFile:
+            expTRFOutputFile.write("""1\t(GTGGT)2\tsequence\t11\t21
+2\t(GGT)5\tsequence\t10\t23
+3\t(GTGTGA)4\tsequence\t222\t242
+4\t(GTCCA)2\tsequence\t282\t292
+5\t(AGTG)4\tsequence\t355\t371
+6\t(GCCTGTCTCTCCTCT)4\tsequence2\t100\t152
+7\t(CTCTGCCTGTGT)3\tsequence2\t112\t151
+8\t(TC)23\tsequence2\t107\t152
+""")
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_F_LaunchTallymer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_F_LaunchTallymer.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,151 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+import shutil
+from commons.launcher.LaunchTallymer import LaunchTallymer
+
+class Test_F_LaunchTallymer(unittest.TestCase):
+
+    def setUp(self):
+        self._inFastaFileName = "%s/Tools/DmelChr4.fa" % os.environ["REPET_DATA"]
+
+    def test_run_output_as_gff_cleaned(self):
+        iLaunchTallymer = LaunchTallymer(self._inFastaFileName,  clean = True, verbosity=2)
+        iLaunchTallymer.run()
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k20.gff" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.gff"
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(obsFileName)
+        os.remove("DmelChr4.tallymer.stat")
+
+    def test_run_with_remote_dir(self):
+        os.makedirs("sourceDir")
+        changedInFastaFileName = os.path.basename(self._inFastaFileName)
+        changedInFastaFilepath = os.path.join("sourceDir",changedInFastaFileName)
+        shutil.copy2(self._inFastaFileName, changedInFastaFilepath)
+
+        iLaunchTallymer = LaunchTallymer(changedInFastaFilepath,  clean = True, verbosity=2)
+        iLaunchTallymer.run()
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k20.gff" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.gff"
+
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        obsSourceDirFiles = os.listdir("sourceDir")
+        expSourceDirFiles = [changedInFastaFileName,]
+        self.assertEquals(expSourceDirFiles, obsSourceDirFiles)
+
+        os.remove(obsFileName)
+        os.remove("DmelChr4.tallymer.stat")
+        shutil.rmtree("sourceDir")
+
+
+    def test_run_output_as_gff_cleaned_changed_minOccs2(self):
+        iLaunchTallymer = LaunchTallymer(self._inFastaFileName, minOccs=2, clean = True, verbosity=2)
+        iLaunchTallymer.run()
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k20_minOcc2.gff" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.gff"
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(obsFileName)
+        os.remove("DmelChr4.tallymer.stat")
+
+    def test_run_output_as_gff_cleaned_relative_input(self):
+        srcFastaFileName = self._inFastaFileName
+        dstFastaFileName = os.path.basename(self._inFastaFileName)
+        shutil.copy2(srcFastaFileName, dstFastaFileName)
+        iLaunchTallymer = LaunchTallymer(dstFastaFileName,  clean = True, verbosity=2)
+        iLaunchTallymer.run()
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k20.gff" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.gff"
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(obsFileName)
+        os.remove("DmelChr4.tallymer.stat")
+        os.remove(dstFastaFileName)
+
+    def test_run_as_script_output_as_gff_cleaned_relative_input_(self):
+        srcFastaFileName = self._inFastaFileName
+        dstFastaFileName = os.path.basename(self._inFastaFileName)
+        shutil.copy2(srcFastaFileName, dstFastaFileName)
+
+        cmd = 'LaunchTallymer.py -i %s -s 20 -c -v 3' % (self._inFastaFileName)
+        os.system(cmd)
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k20.gff" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.gff"
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(obsFileName)
+        os.remove("DmelChr4.tallymer.stat")
+        os.remove(dstFastaFileName)
+
+    def test_run_output_as_wig_cleaned(self):
+        iLaunchTallymer = LaunchTallymer(self._inFastaFileName, outputFormats="wig", clean=True)
+        iLaunchTallymer.run()
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k20.wig" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.wig"
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(obsFileName)
+        os.remove("DmelChr4.tallymer.stat")
+
+    def test_run_output_as_gff_and_wig_k15_cleaned(self):
+        iLaunchTallymer = LaunchTallymer(self._inFastaFileName, merSize=15, outputFormats="wig,gff3", clean=True)
+        iLaunchTallymer.run()
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k15.wig" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.wig"
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(obsFileName)
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k15.gff3" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.gff3"
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(obsFileName)
+
+        os.remove("DmelChr4.tallymer.stat")
+
+    def test_run_as_script_run_output_as_gff_and_wig_k15_cleaned(self):
+        logFileName = "test.log"
+        cmd = 'LaunchTallymer.py -i %s -s 15 -f gff,wig,vero -c -v 3 > %s' % (self._inFastaFileName, logFileName)
+        os.system(cmd)
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k15.wig" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.wig"
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(obsFileName)
+
+        expFileName = "%s/Tools/DmelChr4.fa.tallymer_k15.gff3" % os.environ["REPET_DATA"]
+        obsFileName = "DmelChr4.tallymer.gff"
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(obsFileName)
+
+        expLogFileName = "exp.log"
+        with open(expLogFileName, "w") as f:
+            f.write(" LaunchTallymer - WARNING - Warning: ignoring invalid formats: <vero>\n")
+            f.write(" LaunchTallymer - INFO - Starting to run tallymer search of sequence DmelChr4.fa \n")
+            f.write(" LaunchTallymer - INFO - Finished running tallymer scan of sequence DmelChr4.fa \n")
+            f.write(" LaunchTallymer - INFO - Starting to run tallymer search to map conversion\n")
+            f.write(" LaunchTallymer - INFO - Finished tallymer search to map conversion\n")
+            f.write(" LaunchTallymer - INFO - Generating wig file\n")
+            f.write(" LaunchTallymer - INFO - Generating gff file\n")
+        obsLogFileName = "obs.log"
+        os.system("cut -d'-' -f4,5,6 %s > %s" % (logFileName, obsLogFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expLogFileName, obsLogFileName))
+
+        os.remove(logFileName)
+        os.remove(expLogFileName)
+        os.remove(obsLogFileName)
+        os.remove("DmelChr4.tallymer.stat")
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_LaunchBlastclust.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_LaunchBlastclust.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,237 @@\n+import unittest\n+import time\n+import os\n+from commons.launcher.LaunchBlastclust import LaunchBlastclust\n+from commons.core.utils.FileUtils import FileUtils\n+\n+class Test_LaunchBlastclust( unittest.TestCase ):\n+\n+ def setUp(self):\n+ self._iLaunchBlastclust = LaunchBlastclust()\n+ self._iLaunchBlastclust.setClean()\n+ self._uniqId = "%s_%s" % (time.strftime("%Y%m%d%H%M%S"), os.getpid())\n+\n+ def tearDown(self):\n+ self._iLaunchBlastclust = None\n+ self._uniqId = None\n+\n+ def test_getClustersFromTxtFile(self):\n+ inFileName = "dummyInFile_%s" % self._uniqId\n+ inF = open(inFileName, "w")\n+ inF.write("seq1 seq3 seq4 \\n")\n+ inF.write("seq2 seq5 \\n")\n+ inF.close()\n+ dExp = {1:["seq1","seq3","seq4"], 2:["seq2","seq5"]}\n+ self._iLaunchBlastclust.setTmpFileName(inFileName)\n+ dObs = self._iLaunchBlastclust.getClustersFromTxtFile()\n+ self.assertEqual(dObs, dExp)\n+ os.remove(inFileName)\n+ \n+ def test_getClusteringResultsInFasta_without_filtering(self):\n+ inFileName = "dummyInFile_%s" % self._uniqId\n+ inF = open(inFileName, "w")\n+ inF.write(">seq1\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq2\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq3\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq4\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq5\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.close()\n+ \n+ tmpFileName = "%s_blastclust.txt" % self._uniqId\n+ inF = open(tmpFileName, "w")\n+ inF.write("seq1 seq3 seq4 \\n")\n+ inF.write("seq2 seq5 \\n")\n+ inF.close()\n+ self._iLaunchBlastclust.setTmpFileName(tmpFileName)\n+ \n+ fileExp = "getClusteringResultsInFastaExpected.fa"\n+ outF = open(fileExp, "w")\n+ outF.write(">BlastclustCluster1Mb1_seq1\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster1Mb2_seq3\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster1Mb3_seq4\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster2Mb1_seq2\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster2Mb2_seq5\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.close()\n+ \n+ self._iLaunchBlastclust.getClusteringResultsInFasta(inFileName)\n+ fileObs = "%s_Blastclust.fa" % os.path.splitext(inFileName)[0]\n+ \n+ if not FileUtils.are2FilesIdentical(fileObs, fileExp):\n+ print "Files are different"\n+ return\n+ else:\n+ print "Files are identical\\n"\n+\n+ os.remove(inFileName)\n+ os.remove(tmpFileName)\n+ os.remove(fileExp)\n+ os.remove(fileObs)\n+\n+ def test_getClusteringResultsInFasta_with_filtering(self):\n+ inFileName = "dummyInFile_%s" % self._uniqId\n+ inF = open(inFileName, "w")\n+ inF.write(">seq1\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq2\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq3\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq4\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">seq5\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.close()\n+ \n+ tmpFileName = "%s_blastclust.txt" % self._uniqId\n+ inF = open(tmpFileName, "w")\n+ inF.write("seq1 seq3 seq4 \\n")\n+ inF.write("seq2\\n")\n+ inF.write("seq5\\n")\n+ inF.close()\n+ self._iLaunchBlastclust.setTmpFileName(tmpFileName)\n+ \n+ fileExp = "getClusteringResultsInFastaExpected.fa"\n+ outF = open(fileExp, "w")\n+ outF.write(">BlastclustCluster1Mb1_seq1\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster1Mb2_seq3\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">Bl'..b'Headers()\n+ dExp = {"seq1":"Header1", "seq2":"Header2", "seq3":"Header3", "seq4":"Header4"}\n+ \n+ self.assertEqual(dObs, dExp)\n+ os.remove(inFileName)\n+ \n+ def test_retrieveInitHeaders(self):\n+ dIn = {"seq1":"Header1", "seq2":"Header2", "seq3":"Header3", "seq4":"Header4"}\n+ \n+ inFileName = "dummyInFile_%s" % self._uniqId\n+ outFilePrefix = self._uniqId\n+ \n+ tmpFileName = "%s_blastclust.txt" % outFilePrefix\n+ inF = open(tmpFileName, "w")\n+ inF.write("seq1 seq3 seq4\\n")\n+ inF.write("seq2\\n")\n+ inF.close()\n+ \n+ shortHFile = "%s.shortH_Blastclust.fa" % inFileName\n+ shF = open(shortHFile, "w")\n+ shF.write(">BlastclustCluster1Mb1_seq1\\n")\n+ shF.write("gaattgtttactta\\n")\n+ shF.write(">BlastclustCluster1Mb2_seq3\\n")\n+ shF.write("gaattgtttactta\\n")\n+ shF.write(">BlastclustCluster1Mb3_seq4\\n")\n+ shF.write("gaattgtttactta\\n")\n+ shF.write(">BlastclustCluster2Mb1_seq2\\n")\n+ shF.write("gaattgtttactta\\n")\n+ shF.close()\n+ \n+ fileExp = "retrieveInitHeadersExpected.fa"\n+ outF = open(fileExp, "w")\n+ outF.write(">BlastclustCluster1Mb1_Header1\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster1Mb2_Header3\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster1Mb3_Header4\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.write(">BlastclustCluster2Mb1_Header2\\n")\n+ outF.write("gaattgtttactta\\n")\n+ outF.close()\n+\n+ self._iLaunchBlastclust.setInputFileName(inFileName)\n+ self._iLaunchBlastclust.setTmpFileName(tmpFileName)\n+ self._iLaunchBlastclust.setOutputFilePrefix(outFilePrefix)\n+ self._iLaunchBlastclust.retrieveInitHeaders(dIn)\n+ fileObs = "%s_Blastclust.fa" % outFilePrefix\n+ \n+ if not FileUtils.are2FilesIdentical(fileObs, fileExp):\n+ print "Files are different"\n+ return\n+ else:\n+ print "Files are identical\\n"\n+ \n+ os.remove(fileObs)\n+ os.remove(fileExp)\n+ os.remove(tmpFileName)\n+\n+ def test_filterUnclusteredSequences(self):\n+ dClusterId2SeqHeaders = {1: ["seq1","seq2"], 2: ["seq3"]}\n+ dExp = {1: ["seq1","seq2"]}\n+ dObs = self._iLaunchBlastclust.filterUnclusteredSequences(dClusterId2SeqHeaders)\n+ self.assertEqual(dObs, dExp)\n+ \n+ def test_blastclustToMap(self):\n+ inFileName = "dummyBlastclustOut_%s.fa" % self._uniqId\n+ inF = open(inFileName, "w")\n+ inF.write(">BlastclustCluster1Mb1_chunk1 (dbseq-nr 1) [1,14]\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">BlastclustCluster1Mb2_chunk1 (dbseq-nr 1) [30,44]\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">BlastclustCluster2Mb1_chunk2 (dbseq-nr 1) [100,114]\\n")\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">BlastclustCluster3Mb1_chunk5 (dbseq-nr 8) [1000,1014]\\n")\n+ inF.write("gaattgtttactta")\n+ inF.close()\n+ \n+ fileExp = "blastclustToMapExpected.map"\n+ outF = open(fileExp, "w")\n+ outF.write("BlastclustCluster1Mb1\\tchunk1\\t1\\t14\\n")\n+ outF.write("BlastclustCluster1Mb2\\tchunk1\\t30\\t44\\n")\n+ outF.write("BlastclustCluster2Mb1\\tchunk2\\t100\\t114\\n")\n+ outF.write("BlastclustCluster3Mb1\\tchunk5\\t1000\\t1014\\n")\n+ outF.close()\n+ \n+ self._iLaunchBlastclust.blastclustToMap(inFileName)\n+ fileObs = "%s.map" % os.path.splitext(inFileName)[0]\n+ \n+ if not FileUtils.are2FilesIdentical(fileObs, fileExp):\n+ print "Files are different"\n+ return\n+ else:\n+ print "Files are identical\\n"\n+ \n+ os.remove(inFileName)\n+ os.remove(fileObs)\n+ os.remove(fileExp)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_LaunchTallymer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_LaunchTallymer.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,121 @@
+from commons.core.utils.FileUtils import FileUtils
+import os
+import unittest
+from commons.launcher.LaunchTallymer import ConvertUtils
+
+class Test_ConvertUtils(unittest.TestCase):
+
+    def test_convertTallymerFormatIntoMapFormatAndGenerateData(self):
+        inputFastaFileName = "input.fa"
+        inputTallymerSearchFileName = "input.tallymer"
+        expMapFileName = "exp.map"
+        obsMapFileName = "obs.map"
+        self._writeInputFasta(inputFastaFileName)
+        self._writeInputTallymerSearch(inputTallymerSearchFileName)
+        self._writeExpMap(expMapFileName)
+
+        expOccNb = 4
+        expDKmer2Occ = {"caagatgcgtaacggccata":1,
+                        "aagatgcgtaacggccatac":1,
+                        "agatgcgtaacggccataca":1,
+                        "gatgcgtaacggccatacat":1}
+        expPlotData = {4379: 17,
+                       4380: 17,
+                       4381: 16,
+                       4382: 16}
+
+        obsOccNb, obsDKmer2Occ, obsPlotData, obsPlotData2 = ConvertUtils.convertTallymerFormatIntoMapFormatAndGenerateData(inputFastaFileName, inputTallymerSearchFileName, obsMapFileName)
+
+        self.assertEquals(expOccNb, obsOccNb)
+        self.assertEquals(expDKmer2Occ, obsDKmer2Occ)
+        self.assertEquals(expPlotData, obsPlotData)
+        self.assertTrue(FileUtils.are2FilesIdentical(expMapFileName, obsMapFileName))
+
+        os.remove(inputFastaFileName)
+        os.remove(inputTallymerSearchFileName)
+        os.remove(expMapFileName)
+        os.remove(obsMapFileName)
+
+    def test_convertTallymerFormatIntoMapFormatAndGenerateDataMultiFasta(self):
+        inputFastaFileName = "input.fa"
+        inputTallymerSearchFileName = "input.tallymer"
+        expMapFileName = "exp.map"
+        obsMapFileName = "obs.map"
+        self._writeInputMultiFasta(inputFastaFileName)
+        self._writeInputTallymerSearchMultiFasta(inputTallymerSearchFileName)
+        self._writeExpMapMultiFasta(expMapFileName)
+
+        expOccNb = 6
+        expDKmer2Occ = {"caagatgcgtaacggccata":1,
+                        "aagatgcgtaacggccatac":1,
+                        "agatgcgtaacggccataca":1,
+                        "gatgcgtaacggccatacat":1,
+                        "agatgcgtaacggccataca":2,
+                        "gatgcgtaacggccatacat":2}
+        expPlotData = {4379: 17, 4380: 17, 4381: 16, 4382: 35}
+
+        obsOccNb, obsDKmer2Occ, obsPlotData, obsPlotData2 = ConvertUtils.convertTallymerFormatIntoMapFormatAndGenerateData(inputFastaFileName, inputTallymerSearchFileName, obsMapFileName)
+
+        self.assertEquals(expOccNb, obsOccNb)
+        self.assertEquals(expDKmer2Occ, obsDKmer2Occ)
+        self.assertEquals(expPlotData, obsPlotData)
+        self.assertTrue(FileUtils.are2FilesIdentical(expMapFileName, obsMapFileName))
+
+        os.remove(inputFastaFileName)
+        os.remove(inputTallymerSearchFileName)
+        os.remove(expMapFileName)
+        os.remove(obsMapFileName)
+
+    def _writeInputTallymerSearch(self, tallymerFormatFileName):
+        with open(tallymerFormatFileName, 'w') as f:
+            f.write("0\t+4378\t17\tcaagatgcgtaacggccata\n")
+            f.write("0\t+4379\t17\taagatgcgtaacggccatac\n")
+            f.write("0\t+4380\t16\tagatgcgtaacggccataca\n")
+            f.write("0\t+4381\t16\tgatgcgtaacggccatacat\n")
+
+    def _writeInputFasta(self, fastaFileName):
+        with open(fastaFileName, 'w') as f:
+            f.write(">Seq1\n")
+            f.write("caagatgcgtaacggccata\n")
+            f.write("aagatgcgtaacggccatac\n")
+            f.write("agatgcgtaacggccataca\n")
+            f.write("gatgcgtaacggccatacat\n")
+
+    def _writeExpMap(self, mapFileName):
+        with open(mapFileName, 'w') as f:
+            f.write("caagatgcgtaacggccata_17\tSeq1\t4379\t4399\t17\n")
+            f.write("aagatgcgtaacggccatac_17\tSeq1\t4380\t4400\t17\n")
+            f.write("agatgcgtaacggccataca_16\tSeq1\t4381\t4401\t16\n")
+            f.write("gatgcgtaacggccatacat_16\tSeq1\t4382\t4402\t16\n")
+
+    def _writeInputTallymerSearchMultiFasta(self, tallymerFormatFileName):
+        with open(tallymerFormatFileName, 'w') as f:
+            f.write("0\t+4378\t17\tcaagatgcgtaacggccata\n")
+            f.write("0\t+4379\t17\taagatgcgtaacggccatac\n")
+            f.write("0\t+4380\t16\tagatgcgtaacggccataca\n")
+            f.write("0\t+4381\t16\tgatgcgtaacggccatacat\n")
+            f.write("1\t+4380\t16\tagatgcgtaacggccataca\n")
+            f.write("1\t+4381\t35\tgatgcgtaacggccatacat\n")
+
+    def _writeInputMultiFasta(self, fastaFileName):
+        with open(fastaFileName, 'w') as f:
+            f.write(">Seq1\n")
+            f.write("caagatgcgtaacggccata\n")
+            f.write("aagatgcgtaacggccatac\n")
+            f.write("agatgcgtaacggccataca\n")
+            f.write("gatgcgtaacggccatacat\n")
+            f.write(">Seq2\n")
+            f.write("agatgcgtaacggccataca\n")
+            f.write("gatgcgtaacggccatacat\n")
+
+    def _writeExpMapMultiFasta(self, mapFileName):
+        with open(mapFileName, 'w') as f:
+            f.write("caagatgcgtaacggccata_17\tSeq1\t4379\t4399\t17\n")
+            f.write("aagatgcgtaacggccatac_17\tSeq1\t4380\t4400\t17\n")
+            f.write("agatgcgtaacggccataca_16\tSeq1\t4381\t4401\t16\n")
+            f.write("gatgcgtaacggccatacat_16\tSeq1\t4382\t4402\t16\n")
+            f.write("agatgcgtaacggccataca_16\tSeq2\t4381\t4401\t16\n")
+            f.write("gatgcgtaacggccatacat_35\tSeq2\t4382\t4402\t35\n")
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_MafftClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_MafftClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,28 @@
+import unittest
+from commons.launcher.MafftClusterLauncher import MafftClusterLauncher
+
+
+class Test_MafftClusterLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = MafftClusterLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -p: parameters for 'mafft' (default='--auto')"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-p", "dummyPrgParam" )
+        self.assertEqual( "dummyPrgParam", self._i.getProgramParameters() )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_MafftProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_MafftProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,105 @@
+import unittest
+from commons.launcher.MafftProgramLauncher import MafftProgramLauncher
+
+class Test_MafftProgramLauncher( unittest.TestCase ):
+
+
+    def setUp( self ):
+        self._i = MafftProgramLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -p: parameters for '%s' (default='--auto')" % ( "mafft" )
+        exp += "\n     -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-p", "'--auto --maxiterate 500'" )
+        self.assertEqual( "'--auto --maxiterate 500'", self._i.getProgramParameters() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-o", "dummyOutFile.fa_aln" )
+        self.assertEqual( "dummyOutFile.fa_aln", self._i.getOutputFile() )
+
+
+    def test_setWrapperCommandLine( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        exp = "MafftProgramLauncher.py"
+        exp += " -i %s" % ( inFile )
+        exp += " -p '%s'" % ( "--auto" )
+        exp += " -o %s.fa_aln" % ( inFile )
+        exp += " -v 0"
+        self._i.setWrapperCommandLine()
+        obs = self._i.getWrapperCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setProgramCommandLine( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        exp = self._i.getProgramName()
+        exp += " %s" % ( "--auto" )
+        exp += " --quiet"
+        exp += " %s.shortH" % ( inFile )
+        exp += " > %s.shortH.fa_aln" % ( inFile )
+        exp += " 2> /dev/null"
+        self._i.setProgramCommandLine()
+        obs = self._i.getProgramCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setListFilesToKeep( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        self._i.setListFilesToKeep()
+        lExp = [ "dummyInFile.fa.fa_aln" ]
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+        self._i._lFilesToKeep = []
+
+        outFile = "dummyOutFile.fa_aln"
+        self._i.setOutputFile( outFile )
+        self._i.setListFilesToKeep()
+        lExp = [ outFile ]
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setListFilesToRemove( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        self._i.setListFilesToRemove()
+        lExp = [ "dummyInFile.fa.shortH", \
+                 "dummyInFile.fa.shortH.fa_aln", \
+                 "dummyInFile.fa.shortHlink" ]
+        lObs = self._i.getListFilesToRemove()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setSummary( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        exp = "input file: %s" % ( "dummyInFile.fa" )
+        exp += "\nparameters: %s" % ( "--auto" )
+        exp += "\noutput file: %s" % ( "dummyInFile.fa.fa_aln" )
+        self._i.setSummary()
+        obs = self._i.getSummary()
+        self.assertEqual( exp, obs )
+
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_MapClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_MapClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,41 @@
+import unittest
+from commons.launcher.MapClusterLauncher import MapClusterLauncher
+
+
+class Test_MapClusterLauncher( unittest.TestCase ):
+
+
+    def setUp( self ):
+        self._i = MapClusterLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: size above which a gap is not penalized anymore (default='%i')" % ( self._i.getGapSize() )
+        exp += "\n     -m: penalty for a mismatch (default='%i')" % ( self._i.getMismatchPenalty() )
+        exp += "\n     -O: penalty for a gap openning (default='%i')" % ( self._i.getGapOpenPenalty() )
+        exp += "\n     -e: penalty for a gap extension (default='%i')" % ( self._i.getGapExtendPenalty() )
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "42" )
+        self.assertEqual( 42, self._i.getGapSize() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-m", "-7" )
+        self.assertEqual( -7, self._i.getMismatchPenalty() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-O", "15" )
+        self.assertEqual( 15, self._i.getGapOpenPenalty() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-e", "5" )
+        self.assertEqual( 5, self._i.getGapExtendPenalty() )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_MapProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_MapProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,115 @@
+import unittest
+from commons.launcher.MapProgramLauncher import MapProgramLauncher
+
+class Test_MapProgramLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = MapProgramLauncher()
+
+    def tearDown( self ):
+        self._i = None
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: size above which a gap is not penalized anymore (default='%i')" % ( self._i.getGapSize() )
+        exp += "\n     -m: penalty for a mismatch (default='%i', match=10)" % ( self._i.getMismatchPenalty() )
+        exp += "\n     -O: penalty for a gap opening (default='%i')" % (  self._i.getGapOpenPenalty())
+        exp += "\n     -e: penalty for a gap extension (default='%i')" % ( self._i.getGapExtendPenalty() )
+        exp += "\n     -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "40" )
+        self.assertEqual( 40, self._i.getGapSize() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-m", "-7" )
+        self.assertEqual( -7, self._i.getMismatchPenalty() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-O", "15" )
+        self.assertEqual( 15, self._i.getGapOpenPenalty() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-e", "5" )
+        self.assertEqual( 5, self._i.getGapExtendPenalty() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-o", "dummyOutFile.fa_aln" )
+        self.assertEqual( "dummyOutFile.fa_aln", self._i.getOutputFile() )
+
+    def test_setWrapperCommandLine( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        exp = "MapProgramLauncher.py"
+        exp += " -i %s" % ( inFile )
+        exp += " -s 50"
+        exp += " -m -8"
+        exp += " -O 16"
+        exp += " -e 4"
+        exp += " -o %s.fa_aln" % ( inFile )
+        exp += " -v 0"
+        self._i.setWrapperCommandLine()
+        obs = self._i.getWrapperCommandLine()
+        self.assertEqual( exp, obs )
+
+    def test_setProgramCommandLine( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        exp = self._i.getProgramName()
+        exp += " %s.shortH" % ( inFile )
+        exp += " 50"
+        exp += " -8"
+        exp += " 16"
+        exp += " 4"
+        exp += " > %s.shortH.fa_aln" % ( inFile )
+        self._i.setProgramCommandLine()
+        obs = self._i.getProgramCommandLine()
+        self.assertEqual( exp, obs )
+
+    def test_setListFilesToKeep( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        self._i.setListFilesToKeep()
+        lExp = [ "dummyInFile.fa.fa_aln" ]
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+        self._i._lFilesToKeep = []
+
+        outFile = "dummyOutFile.fa_aln"
+        self._i.setOutputFile( outFile )
+        self._i.setListFilesToKeep()
+        lExp = [ outFile ]
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+    def test_getListFilesToRemove( self ):
+        inFile = "dummyInFile.fa"
+        self._i.setInputFile( inFile )
+        self._i.setListFilesToRemove()
+        lExp = [ "dummyInFile.fa.shortH", \
+                 "dummyInFile.fa.shortH.fa_aln", \
+                 "dummyInFile.fa.shortHlink" ]
+        lObs = self._i.getListFilesToRemove()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+    def test_setSummary( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setGapSize( 104 )
+        exp = "input file: %s" % ( "dummyInFile.fa" )
+        exp += "\ngap size: %i" % ( self._i.getGapSize() )
+        exp += "\nmismatch penalty: %i" % ( self._i.getMismatchPenalty() )
+        exp += "\ngap openning penalty: %i" % ( self._i.getGapOpenPenalty() )
+        exp += "\ngap extension penalty: %i" % ( self._i.getGapExtendPenalty() )
+        exp += "\noutput file: %s" % ( "dummyInFile.fa.fa_aln" )
+        self._i.setSummary()
+        obs = self._i.getSummary()
+        self.assertEqual( exp, obs )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_NWalignProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_NWalignProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,78 @@
+import unittest
+from commons.launcher.NWalignProgramLauncher import NWalignProgramLauncher
+
+
+class Test_NWalignProgramLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = NWalignProgramLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: name of the subject file (format='fasta')"
+        exp += "\n     -p: parameters for 'NWalign'"
+        exp += "\n     -o: name of the output file (format='align', default=inFile+'.align')"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "dummySubjectFile.fa" )
+        self.assertEqual( "dummySubjectFile.fa", self._i.getSubjectFile() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-o", "dummyOutFile.align" )
+        self.assertEqual( "dummyOutFile.align", self._i.getOutputFile() )
+
+
+    def test_setWrapperCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "NWalignProgramLauncher.py"
+        exp += " -i %s" % ( "dummyInFile.fa" )
+        exp += " -s %s" % ( "dummySubjectFile.fa" )
+        exp += " -o %s.align" % ( "dummyInFile.fa" )
+        exp += " -v 0"
+        self._i.setWrapperCommandLine()
+        obs = self._i.getWrapperCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setProgramCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        self._i.setProgramParameters( "-m 2" )
+        exp = "NWalign -m 2 -o dummyInFile.fa.afa -v 0 dummySubjectFile.fa dummyInFile.fa"
+        self._i.setProgramCommandLine()
+        obs = self._i.getProgramCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setListFilesToKeep( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        lExp = [ "dummyInFile.fa.afa" ]
+        self._i.setListFilesToKeep()
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setSummary( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "input file: %s" % ( self._i.getInputFile() )
+        exp += "\nsubject file: %s" % ( self._i.getSubjectFile() )
+        exp += "\nparameters: %s" % ( self._i.getProgramParameters() )
+        exp += "\noutput file: %s" % ( "dummyInFile.fa.align" )
+        self._i.setSummary()
+        obs = self._i.getSummary()
+        self.assertEqual( exp, obs )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_RepeatMaskerClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_RepeatMaskerClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,45 @@
+import unittest
+from commons.launcher.RepeatMaskerClusterLauncher import RepeatMaskerClusterLauncher
+
+
+class Test_RepeatmaskerClusterLauncher( unittest.TestCase ):
+
+
+    def setUp( self ):
+        self._i = RepeatMaskerClusterLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: name of the subject file (repeats, format='fasta')"
+        exp += "\n     -n: nb of processors to use in parallel (default=1)"
+        exp += "\n     -g: calculate the GC content"
+        exp += "\n     -b: skip bacterial insertion element check"
+        exp += "\n     -l: mask low-complexity DNA or simple repeats"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "dummySubjectFile.fa" )
+        self.assertEqual( "dummySubjectFile.fa", self._i.getSubjectFile() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-n", "2" )
+        self.assertEqual( 2, self._i.getNbProcessors() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-g" )
+        self.assertTrue( self._i.getCalculateGCcontent() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-b" )
+        self.assertTrue( self._i.getSkipBacterialIsCheck() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-l" )
+        self.assertFalse( self._i.getMaskSsr() )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_RepeatMaskerProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_RepeatMaskerProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,171 @@
+import unittest
+from commons.launcher.RepeatMaskerProgramLauncher import RepeatMaskerProgramLauncher
+
+
+class Test_RepeatMaskerProgramLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = RepeatMaskerProgramLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: name of the subject file (repeats, format='fasta')"
+        exp += "\n     -n: nb of processors to use in parallel (default='%i')" % ( self._i.getNbProcessors() )
+        exp += "\n     -g: calculate the GC content"
+        exp += "\n     -b: skip bacterial insertion element check"
+        exp += "\n     -l: does not mask low-complexity DNA or simple repeats"
+        exp += "\n     -m: only masks low complex/simple repeats (no interspersed repeats)"
+        exp += "\n     -o: name of the output file"
+        exp += "\n         with -s: format='align', default=inFile+'.cat.align')"
+        exp += "\n         with -m: format='path', default=inFile+'.cat.path')"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "dummySubjectFile.fa" )
+        self.assertEqual( "dummySubjectFile.fa", self._i.getSubjectFile() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-n", "2" )
+        self.assertEqual( 2, self._i.getNbProcessors() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-g" )
+        self.assertEqual( True, self._i.getCalculateGCcontent() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-b" )
+        self.assertEqual( True, self._i.getSkipBacterialIsCheck() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-l" )
+        self.assertEqual( False, self._i.getMaskSsr() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-o", "dummyOutFile.align" )
+        self.assertEqual( "dummyOutFile.align", self._i.getOutputFile() )
+
+
+    def test_setWrapperCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "RepeatMaskerProgramLauncher.py"
+        exp += " -i %s" % ( "dummyInFile.fa" )
+        exp += " -s %s" % ( "dummySubjectFile.fa" )
+        exp += " -n 1"
+        self._i.setWrapperCommandLine()
+        obs = self._i.getWrapperCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setProgramCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        self._i.setCalculateGCcontent()
+        self._i.setSkipBacterialIsCheck()
+        exp = "RepeatMasker -dir . -pa 1 -gccalc -no_is -nolow -lib dummySubjectFile.fa dummyInFile.fa"
+        self._i.setProgramCommandLine()
+        obs = self._i.getProgramCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setProgramCommandLine_withoutSubjects_withMaskSsr( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setCalculateGCcontent()
+        self._i.setSkipBacterialIsCheck()
+        exp = "RepeatMasker -dir . -pa 1 -gccalc -no_is -nolow dummyInFile.fa"
+        self._i.setProgramCommandLine()
+        obs = self._i.getProgramCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setListFilesToKeep_onlyInFile( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setOnlySsr()
+        lExp = [ "dummyInFile.fa.cat", "dummyInFile.fa.cat.path" ]
+        self._i.setListFilesToKeep()
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setListFilesToKeep_withSubjectFile( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySbjFile.fa" )
+        lExp = [ "dummyInFile.fa.cat", "dummyInFile.fa.cat.align" ]
+        self._i.setListFilesToKeep()
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setListFilesToKeep_inFileAndOutFile( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setOutputFile( "dummyOutFile.align" )
+        lExp = [ "dummyInFile.fa.cat", "dummyOutFile.align" ]
+        self._i.setListFilesToKeep()
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setListFilesToKeep_inFileAndOutFileAndSsr( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.unsetMaskSsr()
+        self._i.setOutputFile( "dummyOutFile.path" )
+        lExp = [ "dummyInFile.fa.cat", "dummyOutFile.path" ]
+        self._i.setListFilesToKeep()
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setListFilesToRemove( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        lExp = [ "dummyInFile.fa.out", \
+                 "dummyInFile.fa.log", \
+                 "dummyInFile.fa.stderr", \
+                 "dummyInFile.fa.ori.out", \
+                 "dummyInFile.fa.masked", \
+                 "dummyInFile.fa.tbl", \
+                 "dummyInFile.fa.ref" ]
+        self._i.setListFilesToRemove()
+        lObs = self._i.getListFilesToRemove()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setSummary( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "input file: %s" % ( self._i.getInputFile() )
+        exp += "\nsubject file: %s" % ( self._i.getSubjectFile() )
+        exp += "\nnb processors: %i" % ( self._i.getNbProcessors() )
+        exp += "\nmask low-complexity DNA or simple repeats"
+        exp += "\noutput file: %s" % ( "dummyInFile.fa.cat.path" )
+        self._i.setSummary()
+        obs = self._i.getSummary()
+        self.assertEqual( exp, obs )
+
+
+    def test_setSummary_noMaskSsr( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        self._i.unsetMaskSsr()
+        exp = "input file: %s" % ( self._i.getInputFile() )
+        exp += "\nsubject file: %s" % ( self._i.getSubjectFile() )
+        exp += "\nnb processors: %i" % ( self._i.getNbProcessors() )
+        exp += "\noutput file: %s" % ( "dummyInFile.fa.cat.align" )
+        self._i.setSummary()
+        obs = self._i.getSummary()
+        self.assertEqual( exp, obs )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_YassClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_YassClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,30 @@
+import unittest
+from commons.launcher.YassClusterLauncher import YassClusterLauncher
+
+class Test_YassClusterLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = YassClusterLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: name of the subject file (format='fasta')"
+        exp += "\n     -p: parameters for 'yass'"
+        exp += "\n     -Z: concatenate output files"
+        exp += "\n     -A: same sequences (all-by-all)"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "dummySubjectFile.fa" )
+        self.assertEqual( "dummySubjectFile.fa", self._i.getSubjectFile() )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_YassProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_YassProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,79 @@
+import unittest
+from commons.launcher.YassProgramLauncher import YassProgramLauncher
+
+
+
+class Test_YassProgramLauncher( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = YassProgramLauncher()
+
+
+    def tearDown( self ):
+        self._i = None
+
+
+    def test_getSpecificHelpAsString( self ):
+        exp = ""
+        exp += "\nspecific options:"
+        exp += "\n     -s: name of the subject file (format='fasta')"
+        exp += "\n     -p: parameters for 'yass'"
+        exp += "\n     -A: same sequences (all-by-all)"
+        exp += "\n     -o: name of the output file (format='align', default=inFile+'.align')"
+        obs = self._i.getSpecificHelpAsString()
+        self.assertEqual( exp, obs )
+
+
+    def test_setASpecificAttributeFromCmdLine( self ):
+        self._i.setASpecificAttributeFromCmdLine( "-s", "dummySubjectFile.fa" )
+        self.assertEqual( "dummySubjectFile.fa", self._i.getSubjectFile() )
+
+        self._i.setASpecificAttributeFromCmdLine( "-o", "dummyOutFile.align" )
+        self.assertEqual( "dummyOutFile.align", self._i.getOutputFile() )
+
+
+    def test_setWrapperCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "YassProgramLauncher.py"
+        exp += " -i %s" % ( "dummyInFile.fa" )
+        exp += " -s %s" % ( "dummySubjectFile.fa" )
+        exp += " -o %s.align" % ( "dummyInFile.fa" )
+        exp += " -v 0"
+        self._i.setWrapperCommandLine()
+        obs = self._i.getWrapperCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setProgramCommandLine( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "yass -d 2 -o dummyInFile.fa.blast dummyInFile.fa dummySubjectFile.fa"
+        self._i.setProgramCommandLine()
+        obs = self._i.getProgramCommandLine()
+        self.assertEqual( exp, obs )
+
+
+    def test_setListFilesToKeep( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        lExp = [ "dummyInFile.fa.align" ]
+        self._i.setListFilesToKeep()
+        lObs = self._i.getListFilesToKeep()
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_setSummary( self ):
+        self._i.setInputFile( "dummyInFile.fa" )
+        self._i.setSubjectFile( "dummySubjectFile.fa" )
+        exp = "input file: %s" % ( self._i.getInputFile() )
+        exp += "\nsubject file: %s" % ( self._i.getSubjectFile() )
+        exp += "\nparameters: %s" % ( self._i.getProgramParameters() )
+        exp += "\noutput file: %s" % ( "dummyInFile.fa.align" )
+        self._i.setSummary()
+        obs = self._i.getSummary()
+        self.assertEqual( exp, obs )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/launcher/tests/Test_launchTEclass.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/tests/Test_launchTEclass.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,34 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.launcher import launchTEclass
+
+
+class Test_launchTEclass( unittest.TestCase ):
+
+    def test_parseFastaFileFromTEclass( self ):
+        inFile = "dummyLibFile.fa"
+        inHandler = open( inFile, "w" )
+        inHandler.write( ">1360|TEclass result: DNA|ORFs: 1360..2226:+1\n" )
+        inHandler.write( "AGCATGACGCA\n" )
+        inHandler.write( ">FB|TEclass result: unclear\n" )
+        inHandler.write( "GCATGCAGCGACGCGAC\n" )
+        inHandler.close()
+
+        expFile = "dummyExpFile.map"
+        expHandler = open( expFile, "w" )
+        expHandler.write( "DNA\t1360\t1\t11\n" )
+        expHandler.write( "unclear\tFB\t1\t17\n" )
+        expHandler.close()
+
+        obsFile = "dummyObsFile.map"
+
+        launchTEclass.parseFastaFileFromTEclass( inFile, obsFile )
+
+        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )
+
+        for f in [ inFile, expFile, obsFile ]:
+            os.remove( f )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/AlignList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/AlignList.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,51 @@
+## class of list of align object
+#
+class AlignList:
+
+    list;
+
+    def __init__( self ):
+        self.list = []
+
+    ## append align instance in the align instance list
+    #
+    # @param AlignInstance instance of align object
+    #
+    def append(self, AlignInstance):
+        self.list.append(AlignInstance)
+
+    ## get length of list of align instance
+    #
+    #@return length integer length of list
+    #
+    def len(self):
+        return len(self.list)
+
+    ## get list of align instance
+    #
+    #@return list of align instance
+    #
+    def getList(self):
+        return self.list
+
+    ## get item in list of align instance according to index
+    #
+    #@param index integer index of list
+    #@return align instance item of list of align instance
+    #
+    def get(self, index):
+        return self.list[index]
+
+    ## extend align instance in the align instance list
+    #
+    # @param AlignInstance instance of align object
+    #
+    def extend(self, AlignInstance):
+        self.list.extend(AlignInstance)
+
+    ## take off an align instance from the align instance list
+    #
+    # @param AlignInstance instance of align object
+    #
+    def remove(self, AlignInstance):
+       self.list.remove(AlignInstance)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/AlignListUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/AlignListUtils.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,53 @@
+import os
+import commons.core.coord.Align
+import commons.pyRepetUnit.align.AlignList
+from commons.core.utils.FileUtils import FileUtils
+
+class AlignListUtils:
+
+    ##read a file in align format and return a AlignList
+    #
+    # @param inFileName string name of file
+    # @return listAlignInstance list list of align instance
+    #
+    def read( inFileName ):
+        alignInstance = commons.core.coord.Align.Align()
+        listAlignInstance = commons.pyRepetUnit.align.AlignList.AlignList()
+        f = open( inFileName , "r")
+        while alignInstance.read( f ):
+            listAlignInstance.append(alignInstance)
+            alignInstance = commons.core.coord.Align.Align()
+        f.close
+        return (listAlignInstance)
+
+    read = staticmethod( read )
+
+    ## write a file in align format from an AlignList
+    #
+    # @param alignListInstance list list of align instance object
+    # @param  inFileName string name of file
+    def write( alignListInstance, inFileName ):
+        f = open( inFileName , "w")
+        for alignInstance in alignListInstance.getList():
+            alignInstance.write( f )
+        f.close
+
+    write = staticmethod( write )
+
+    ## Filter an AlignList by removing all is <= minScore
+    #
+    # @param listAlignInstance list list of align instance object
+    # @param minScore integer minimum score to keep in result
+    #
+    def filterOnAMinimalScore( listAlignInstance, minScore ):
+        listAlignInstanceOld = commons.pyRepetUnit.align.AlignList.AlignList()
+        for alignInstance in listAlignInstance.getList():
+            listAlignInstanceOld.append(alignInstance)
+        for alignInstance in listAlignInstanceOld.getList():
+            if alignInstance.score <= minScore:
+                listAlignInstance.remove(alignInstance)
+
+    filterOnAMinimalScore = staticmethod( filterOnAMinimalScore )
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/HmmpfamOutput2align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/HmmpfamOutput2align.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,32 @@
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmpfamOutputProcessing import HmmpfamOutputProcessing
+
+##data processor : read an output from hmmpfam and transform it into .align file
+#
+class HmmpfamOutput2align( object ):
+
+    ## constructor
+    #
+    def __init__(self):
+        self.hmmpfamOutputProcess = HmmpfamOutputProcessing()
+        self._inputFile = ""
+        self._outputFile =  ""
+
+    ## set input file
+    #
+    # @param input file input file
+    #
+    def setInputFile(self, input):
+        self._inputFile = input
+
+    ## set output file
+    #
+    # @param output file output file
+    #
+    def setOutputFile(self, output):
+        self._outputFile = output
+
+
+    ##read a hmmpfam output file, parse it and, write the corresponding .align file
+    #
+    def run( self ):
+        self.hmmpfamOutputProcess.readHmmOutputsAndWriteAlignFile( self._inputFile, self._outputFile )

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/HmmscanOutput2align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/HmmscanOutput2align.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,28 @@
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmscanOutputProcessing import HmmscanOutputProcessing
+##data processor : read an output from hmmscan and transform it into .align file
+#
+class HmmscanOutput2align( object ):
+
+    ## constructor
+    #
+    def __init__(self):
+        self.hmmscanOutputProcess = HmmscanOutputProcessing()
+        self._inputFile = ""
+        self._outputFile =  ""
+
+    ## set input file
+    #
+    # @param input file input file
+    #
+    def setInputFile(self, input):
+        self._inputFile = input
+    ## set output file
+    # @param output file output file
+    #
+    def setOutputFile(self, output):
+        self._outputFile = output
+
+    ##read a hmmscan output file, parse it and, write the corresponding .align file
+    #
+    def run( self ):
+        self.hmmscanOutputProcess.readHmmOutputsAndWriteAlignFile( self._inputFile, self._outputFile )

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/HmmpfamOutput2AlignTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/HmmpfamOutput2AlignTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,21 @@
+import unittest
+import sys
+from Test_F_HmmpfamOutput2align import Test_F_HmmpfamOutput2align
+from Test_F_HmmscanOutput2align import Test_F_HmmscanOutput2align
+from Test_F_Hmmerpfam2align import Test_F_Hmmerpfam2align
+from commons.tools.tests.Test_F_HmmOutput2alignAndTransformCoordInNtAndFilterScores_script import Test_F_HmmOutput2alignAndTransformCoordInNtAndFilterScores_script
+
+def main():
+
+        hmmerTestSuite = unittest.TestSuite()
+        hmmerTestSuite.addTest(unittest.makeSuite(Test_F_HmmpfamOutput2align,'test'))
+        hmmerTestSuite.addTest(unittest.makeSuite(Test_F_HmmscanOutput2align,'test'))
+        hmmerTestSuite.addTest(unittest.makeSuite(Test_F_HmmOutput2alignAndTransformCoordInNtAndFilterScores_script,'test'))
+        hmmerTestSuite.addTest(unittest.makeSuite(Test_F_Hmmerpfam2align,'test'))
+
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(hmmerTestSuite)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_Hmmerpfam2align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_Hmmerpfam2align.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,20 @@
+import os
+import unittest
+import commons.pyRepetUnit.hmmer.hmmOutput.HmmpfamOutputProcessing
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_Hmmerpfam2align(unittest.TestCase):
+
+    def setUp(self):
+        self.hmmpfamOutputProcess = commons.pyRepetUnit.hmmer.hmmOutput.HmmpfamOutputProcessing.HmmpfamOutputProcessing()
+        self._inputFile = "./datas/test_hmmpfam_output"
+        self._outputFile =  "./datas/PostProcessResult.align"
+
+    def testAcceptanceHmmpfam2align(self):
+        self.hmmpfamOutputProcess.readHmmOutputsAndWriteAlignFile( self._inputFile, self._outputFile )
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._inputFile + ".align", self._outputFile))
+        os.remove(self._outputFile)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_HmmpfamOutput2align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_HmmpfamOutput2align.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,22 @@
+import os
+import unittest
+from commons.pyRepetUnit.align.hmmOutputParsing.HmmpfamOutput2align import HmmpfamOutput2align
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_HmmpfamOutput2align(unittest.TestCase):
+
+    def setUp(self):
+        self.hmmpfamOutput2align = HmmpfamOutput2align()
+        self._inputFile = "./datas/test_hmmpfam_output"
+        self._outputFile = "./datas/hmmpfam_output.align"
+
+    def testParseHmmpfamOutput2align(self):
+        self.hmmpfamOutput2align.setInputFile(self._inputFile)
+        self.hmmpfamOutput2align.setOutputFile(self._outputFile)
+        self.hmmpfamOutput2align.run()
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._inputFile + ".align", self._outputFile))
+        os.remove( self._outputFile )
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_HmmscanOutput2align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_HmmscanOutput2align.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,27 @@
+import os
+import unittest
+from commons.pyRepetUnit.align.hmmOutputParsing.HmmscanOutput2align import HmmscanOutput2align
+from commons.core.utils.FileUtils import FileUtils
+
+CURRENT_DIR = os.getcwd()
+
+class Test_F_HmmscanOutput2align(unittest.TestCase):
+
+    def setUp(self):
+        self.hmmscanOutput2align = HmmscanOutput2align()
+        self._inputFile = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput"
+        self._outputFile = "./datas/test_hmmscanOutput.align"
+        os.chdir(CURRENT_DIR)
+
+    def testParseHmmscanOutput2align(self):
+        self.hmmscanOutput2align.setInputFile(self._inputFile)
+        self.hmmscanOutput2align.setOutputFile(self._outputFile)
+        self.hmmscanOutput2align.run()
+        expectedFile = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput.align"
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(expectedFile, self._outputFile))
+        os.remove( self._outputFile )
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/ConsensusTestFile_nt.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/ConsensusTestFile_nt.fsa Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,18 @@
+>blumeria_Grouper_590_20:NoCat
+TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA
+AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT
+ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA
+ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA
+CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC
+TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC
+TCCATAATTTCAACACTNAAGAATATTTGTA
+>blumeria_Grouper_4152_12:NoCat
+GGACCGGCCGCCACGAATTGCGCGATTGCTGCTCGCAAGTAGACTTTGATGGAGTCTACA
+AAATTTTTGTCTTCACCGGTGGAGAGCGGTTGAAGAGCTGCCTCGACACTGCTAATAGCC
+GTCGAGCATATTGTGAATTGCGCGGCTTTTTGTCTTGCCCTGCGCTCCTCCGCTTCGATC
+GCTGCAAGCAATTCTGGAGGGTGTGTATTTTTTCTGCCTGCTGCTTCTAGTGCTGGAGGC
+TGCGGGGCCAGTGGAGGGTTTTCGGCTCCTGCTGCCTTAGTGGATGGTGTTTCAGCCCTT
+TTCGCGGGCCTCACTTCTGCAGGTCGCGGTAGTGCTGGAACCGTGATGCGCTTCTCGGGT
+GCGACGACGGTTTTTCTGGGGGATCCGGTGGGATCCAAGACTTGCTCTGCGTCTTCTGGG
+CTGGAGGATGCCCAAACTGAATCGGCGAGGGTTTTTAGCTTCTCGACTTCTGCGTCCACC
+ATATCTACCTCGGGGACATTG

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/OutputHmmpfamTest
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/OutputHmmpfamTest Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,406 @@\n+hmmpfam - search one or more sequences against HMM database\n+HMMER 2.3.2 (Oct 2003)\n+Copyright (C) 1992-2003 HHMI/Washington University School of Medicine\n+Freely distributed under the GNU General Public License (GPL)\n+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n+HMM file: /home/choede/hmmer3/Pfam_fs\n+Sequence file: ConsensusTestFile_aaWithoutStop.fsa\n+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n+\n+Query sequence: blumeria_Grouper_590_20:NoCat_1\n+Accession: [none]\n+Description: [none]\n+\n+Scores for sequence family classification (score includes all domains):\n+Model Description Score E-value N \n+-------- ----------- ----- ------- ---\n+DUF234 Archaea bacterial proteins of unknown 3.2 1.5 1\n+DUF1414 Protein of unknown function (DUF1414) 2.9 6.3 1\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+-------- ------- ----- ----- ----- ----- ----- -------\n+DUF234 1/1 91 108 .. 5 22 .. 3.2 1.5\n+DUF1414 1/1 111 119 .. 1 9 [. 2.9 6.3\n+\n+Alignments of top-scoring domains:\n+DUF234: domain 1 of 1, from 91 to 108: score 3.2, E = 1.5\n+ *->VyPNrseIEsGnikevle<-*\n+ VyPN++ IEs ++k++++ \n+ blumeria_G 91 VYPNIEIIESSTPKPLIN 108 \n+\n+DUF1414: domain 1 of 1, from 111 to 119: score 2.9, E = 6.3\n+ *->HkAPvDLSL<-*\n+ H ++DLSL \n+ blumeria_G 111 HXSSPDLSL 119 \n+\n+//\n+\n+Query sequence: blumeria_Grouper_590_20:NoCat_2\n+Accession: [none]\n+Description: [none]\n+\n+Scores for sequence family classification (score includes all domains):\n+Model Description Score E-value N \n+-------- ----------- ----- ------- ---\n+\t[no hits above thresholds]\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+-------- ------- ----- ----- ----- ----- ----- -------\n+\t[no hits above thresholds]\n+\n+Alignments of top-scoring domains:\n+\t[no hits above thresholds]\n+//\n+\n+Query sequence: blumeria_Grouper_590_20:NoCat_3\n+Accession: [none]\n+Description: [none]\n+\n+Scores for sequence family classification (score includes all domains):\n+Model Description Score E-value N \n+-------- ----------- ----- ------- ---\n+CPW_WPC Plasmodium falciparum domain of unknown func 1.5 7.7 1\n+HECT HECT-domain (ubiquitin-transferase) 0.0 9.2 1\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+-------- ------- ----- ----- ----- ----- ----- -------\n+CPW_WPC 1/1 30 37 .. 1 9 [. 1.5 7.7\n+HECT 1/1 55 69 .. 341 355 .] 0.0 9.2\n+\n+Alignments of top-scoring domains:\n+CPW_WPC: domain 1 of 1, from 30 to 37: score 1.5, E = 7.7\n+ *->CerdYsisk<-*\n+ C++dYs sk \n+ blumeria_G 30 CQFDYS-SK 37 \n+\n+HECT: domain 1 of 1, from 55 to 69: score 0.0, E = 9.2\n+ *->LllAIneeteGFgle<-*\n+ Ll+A+n+ + G+ ++ \n+ blumeria_G 55 LLTAVNNANTGYTQS 69 \n+\n+//\n+\n+Query sequence: blumeria_Grouper_590_20:NoCat_4\n+Accession: [none]\n+Description: [none]\n+\n+Scores for sequence family classification (score includes all domains):\n+Model Description Score E-value N \n+-------- ----------- ----- ------- ---\n+DUF46 Putative integral membrane protein DUF46 6.4 0.11 1\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+---'..b'inal do 1.8 3.8 1\n+Jun Jun-like transcription factor -0.7 6.5 1\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+-------- ------- ----- ----- ----- ----- ----- -------\n+DUF1798 1/1 2 19 .. 35 52 .. 3.0 3.3\n+DUF881 1/1 46 69 .. 214 237 .] 4.0 1.6\n+Jun 1/1 77 93 .. 264 284 .. -0.7 6.5\n+SLT 1/1 89 108 .. 1 20 [. 4.5 1.2\n+DUF2346 1/1 93 115 .. 63 85 .] 3.2 2.8\n+LBP_BPI_CETP 1/1 124 139 .. 191 209 .] 1.8 3.8\n+\n+Alignments of top-scoring domains:\n+DUF1798: domain 1 of 1, from 2 to 19: score 3.0, E = 3.3\n+ *->KPfvdevdqllaeWkelA<-*\n+ +P+vd+vd++++++k+lA \n+ blumeria_G 2 VPEVDMVDAEVEKLKTLA 19 \n+\n+DUF881: domain 1 of 1, from 46 to 69: score 4.0, E = 1.6\n+ *->VeksdditiPAydgplklrYAkPv<-*\n+ V + +it+PA++ p +r Ak++ \n+ blumeria_G 46 VAPEKRITVPALPRPAEVRPAKRA 69 \n+\n+Jun: domain 1 of 1, from 77 to 93: score -0.7, E = 6.5\n+ *->qHheNPpgfqhsavgpPRlaa<-*\n+ +++eNPp+ a++pP+l a \n+ blumeria_G 77 AGAENPPL----APQPPALEA 93 \n+\n+SLT: domain 1 of 1, from 89 to 108: score 4.5, E = 1.2\n+ CS HHHHHHHHHHTS-HHHHHHH \n+ *->dliikaaekygidpsllaAi<-*\n+ ++ ++a+ k+ ++p llaAi \n+ blumeria_G 89 PALEAAGRKNTHPPELLAAI 108 \n+\n+DUF2346: domain 1 of 1, from 93 to 115: score 3.2, E = 2.8\n+ *->AtKRrEkhdneLlealeeeEaKk<-*\n+ A R+ h eLl+a+e eE+ \n+ blumeria_G 93 AAGRKNTHPPELLAAIEAEERRA 115 \n+\n+LBP_BPI_CETP: domain 1 of 1, from 124 to 139: score 1.8, E = 3.8\n+ CS HHHHHHHCHHHH...HTTS \n+ *->lCPviessVnslNvhLstl<-*\n+ +C +++ssV++ L++l \n+ blumeria_G 124 ICSTAISSVEAA---LQPL 139 \n+\n+//\n+\n+Query sequence: blumeria_Grouper_4152_12:NoCat_6\n+Accession: [none]\n+Description: [none]\n+\n+Scores for sequence family classification (score includes all domains):\n+Model Description Score E-value N \n+-------- ----------- ----- ------- ---\n+DUF258 Protein of unknown function, DUF258 1.5 3.8 1\n+TRAP_alpha Translocon-associated protein (TRAP), alph 0.1 4.1 1\n+DUF1289 Protein of unknown function (DUF1289) 2.8 5.3 1\n+SOCS_box SOCS box 2.4 9.7 1\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+-------- ------- ----- ----- ----- ----- ----- -------\n+DUF258 1/1 50 62 .. 293 305 .] 1.5 3.8\n+SOCS_box 1/1 85 90 .. 1 6 [. 2.4 9.7\n+DUF1289 1/1 95 115 .. 36 56 .] 2.8 5.3\n+TRAP_alpha 1/1 106 116 .. 317 327 .] 0.1 4.1\n+\n+Alignments of top-scoring domains:\n+DUF258: domain 1 of 1, from 50 to 62: score 1.5, E = 3.8\n+ CS -HHHHHHHHHHHH \n+ *->seeRYesYlklle<-*\n+ s++R+++Y+ l++ \n+ blumeria_G 50 SASRFQHYRDLQK 62 \n+\n+SOCS_box: domain 1 of 1, from 85 to 90: score 2.4, E = 9.7\n+ *->prSLqh<-*\n+ prSLqh \n+ blumeria_G 85 PRSLQH 90 \n+\n+DUF1289: domain 1 of 1, from 95 to 115: score 2.8, E = 5.3\n+ *->dERravlqllpqRlaalglkp<-*\n+ +E+ + lq+++qR +++++ + \n+ blumeria_G 95 AEKIHTLQNCLQRSKRRSAGQ 115 \n+\n+TRAP_alpha: domain 1 of 1, from 106 to 116: score 0.1, E = 4.1\n+ *->kRkvKRsvGdD<-*\n+ +R +Rs+G+D \n+ blumeria_G 106 QRSKRRSAGQD 116 \n+\n+//\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/PostPostProcessTestFiltered.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/PostPostProcessTestFiltered.align Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,25 @@
+blumeria_Grouper_590_20:NoCat 271 324 DUF234 5 22 1.5 3 0.000000
+blumeria_Grouper_590_20:NoCat 331 357 DUF1414 1 9 6.3 2 0.000000
+blumeria_Grouper_590_20:NoCat 90 113 CPW_WPC 1 9 7.7 1 0.000000
+blumeria_Grouper_590_20:NoCat 119 148 DUF46 182 173 0.11 6 0.000000
+blumeria_Grouper_4152_12:NoCat 97 201 TrbL 231 285 8 0 0.000000
+blumeria_Grouper_4152_12:NoCat 172 240 TNV_CP 167 189 7.9 0 0.000000
+blumeria_Grouper_4152_12:NoCat 280 327 DGOK 283 298 1.3 0 0.000000
+blumeria_Grouper_4152_12:NoCat 337 381 Peptidase_S29 1 15 1.1 4 0.000000
+blumeria_Grouper_4152_12:NoCat 442 471 DUF1301 1 10 9.9 0 0.000000
+blumeria_Grouper_4152_12:NoCat 62 79 Toxin_18 50 55 4.4 4 0.000000
+blumeria_Grouper_4152_12:NoCat 377 412 ABC_transp_aux 276 287 5.2 1 0.000000
+blumeria_Grouper_4152_12:NoCat 428 478 DUF1602 23 39 0.39 5 0.000000
+blumeria_Grouper_4152_12:NoCat 306 368 zf-P11 1 20 1.8 3 0.000000
+blumeria_Grouper_4152_12:NoCat 378 407 V-ATPase_G 1 10 5.3 2 0.000000
+blumeria_Grouper_4152_12:NoCat 136 228 XhoI 202 172 0.014 9 0.000000
+blumeria_Grouper_4152_12:NoCat 145 165 Endomucin 267 261 6 0 0.000000
+blumeria_Grouper_4152_12:NoCat 444 497 DUF1798 52 35 3.3 3 0.000000
+blumeria_Grouper_4152_12:NoCat 294 365 DUF881 237 214 1.6 4 0.000000
+blumeria_Grouper_4152_12:NoCat 177 236 SLT 20 1 1.2 4 0.000000
+blumeria_Grouper_4152_12:NoCat 156 224 DUF2346 85 63 2.8 3 0.000000
+blumeria_Grouper_4152_12:NoCat 84 131 LBP_BPI_CETP 209 191 3.8 1 0.000000
+blumeria_Grouper_4152_12:NoCat 314 352 DUF258 305 293 3.8 1 0.000000
+blumeria_Grouper_4152_12:NoCat 230 247 SOCS_box 6 1 9.7 2 0.000000
+blumeria_Grouper_4152_12:NoCat 155 217 DUF1289 56 36 5.3 2 0.000000
+blumeria_Grouper_4152_12:NoCat 152 184 TRAP_alpha 327 317 4.1 0 0.000000

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/hmmscanTransformedExpected.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/hmmscanTransformedExpected.align Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,7 @@
+Polinton-1_DY:classII:Polinton 2974 3642 DNA_pol_B_2 13 252 3.7e-06 24 0.000000
+Polinton-1_DY:classII:Polinton 7897 7944 ASFV_p27 59 74 6 4 0.000000
+Polinton-1_DY:classII:Polinton 9022 9123 Ribosomal_S4 39 72 18 4 0.000000
+Polinton-1_DY:classII:Polinton 11896 11955 Homo_sperm_syn 49 68 8.8 2 0.000000
+Polinton-1_DY:classII:Polinton 5480 5932 rve 168 4 6.7e-30 103 0.000000
+Polinton-1_DY:classII:Polinton 5099 5191 Chromo 51 4 8.5e-06 24 0.000000
+Polinton-1_DY:classII:Polinton 8408 8434 GARS_A 204 196 11 2 0.000000

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/repetHmmscan.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/repetHmmscan.fa Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,314 @@\n+>SAR:satellite:satellite\n+acagtatataatatatattttgggtactttgatattttatgtacagtatataatatatat\n+tttgggtactttgatattttatgt\n+>BARI_DM:classII:TIR\n+acagtcatggtcaaaattattttcacaaagtgcatttttgtgcatgggtcacaaacagtt\n+gcttgtgcagcaagtggggggaggtgaaatgcaaaaaaacttttgcttttgcaaattcaa\n+acctatgcagagtcagatgaaagaagaattgaaaaaataactgttcctatgcgcaaggaa\n+gaggcaaatgaagagatctttatcagttgtcagaagtatttgcacacggtttcgtcgcat\n+cacaattattttcacaacgcaatttcttcttcagtgattggtttagagtgacaagtgccg\n+gtttgtttgcttaaatacatttaaattattgaataaaaattagatttaatcattttccta\n+ttacagttattaaataaaatgcccaaaacaaaagagttaacagttgaggcccgggctggt\n+attgttgctaggtttaaagccggtacacctgcggccaaaatagctgaaatatatcaaatt\n+tcgcgtagaactgtctactacttaataaaaaagtttgatacagttggcacattaaaaaat\n+aaaaaaagatcaggccgaaaacctgtgctggaccaaaggcaatgcaggcaaatacttgga\n+gttgtggcgaagaatcctagtgccagtccggtaaaaattgccttagaatcaaaaaataca\n+attggcaaacaagttagtagttctacaattcgtcgcaggctaaaagaagctgattttaag\n+acatacgttgttcgcaaaacgattgagatcacaccaaccaacaaaacaaaacgtcttcga\n+tttgcgttggaatatgttaagaagcctcttgacttttggtttaatattttatggactgat\n+gagtctgcatttcagtaccaggggtcatacagcaagcattttatgcatttgaaaaataat\n+caaaagcatttggcagcccagccaaccaatagatttggtgggggcacagtcatgttttgg\n+ggatgtctttcctattatggattcggagacttggtaccgatagaaggaactttaaatcag\n+aacggataccttcttatcttaaacaaccatgcttttacgtctggaaatagactttttcca\n+actactgaatggattcttcagcaggacaatgctccatgccataagggtaggataccaaca\n+aaatttttaaacgaccttaatctggcggttcttccgtggcccccccaaagcccagacctt\n+aatatcattgaaaacgtttgggcttttattaaaaaccaacgaactattgataaaaataga\n+aaacgagagggagccatcattgaaatagcggagatttggtccaaattgacattagaattt\n+gcacaaactttggtaaggtcaataccaaaaagacttcaagcagttattgatgccaaaggt\n+ggtgttacaaaatattagtattgtatttatataaaataaagaaattcttatgttgaaatt\n+agatgttaagctgaaatttactaaattaagttgagtgaaaatacttttgaagcgcaataa\n+acatgtgaaaatactattgacaacttgcatgcatattttcttttgctttaagctttgtac\n+tatgaaccgttatctttcgtatttcttttcgactaccttctgcatagatcaagctaagcg\n+ataagaactatttcaggcaaatcggacaacaacaagaagaaatataacaaaaagaagttg\n+aagtttgcaaatattgtgcgttgtgaaaatacttttgaccacctctgt\n+>MOLLY_SN:classII:TIR\n+acgtacctcacgggttggccggacacacggtttggccggacacttttgccaagcccccac\n+caaattctacctctcaacgtgatgcctcaacaacaacaccagatagacccttctagcgaa\n+cgtcatatacagactgcccttcaagctcttaaacaagacgcgacactgtccttgcgacgc\n+gctgcagctatctataacgtctctcgagcaacactaagcgatcgacgcgctggacggcct\n+tcacaagcagattgctggcctaaaacaaagaatctaactaagactgaggaggacgtagtt\n+gttaagcatatacttgagctggttacgcgtggatttcctcctaggctcgcagctgtggct\n+gatatggctaattccctgcgcgctgagcgcggtctgggccaagttggctcaaactggccc\n+agtacgttcgtcaaacgccgccctgagctccaaacgaagtttaatcgcaaatacgactac\n+aagagagccctctgcgaggatcctgaggttatacgagactggttccggcttgtagagaac\n+atgaaggcgaagcacggtatccttgatggcgacatgtacaactttgacgagtctggcttt\n+atgatgggccagatctcaactggagcagtcgttacagcttcagagcgacgaggacggccg\n+aagacagtgcaacagggcaatcgagagtggacgacggtcatccagggcgtcaacgcaaca\n+gggtgggccattccacccttcatcatcttcaagggccgccaccacctctcagcttggtat\n+aaggaggaggatctacctcataattgggttattgcagtctctaagaacggctggacaaca\n+aatgagctcggtctgcagtggttaaagcactttgatgagcatacaaagaggagggttaca\n+ggcgcttatcggctgcttattatcgacggccatgagagccacgactcgcttgaattccag\n+caatactgcaaggataacaagattatcactctctgcatgcctcctcactcgtcgcacctc\n+ctgcagcctcttgatgtgggttgttttgcctctttaaagaaggcgtacagacgccaagcc\n+gaagagctcatgcgcaaccggatcacgcacatcacgaaacttgagttcctaccgtgcttt\n+aagcgcgcctttgacgcagcaattactcctagtaatatccaaggagggtttcgaggcgct\n+ggattggtcccatttgacccagagcgggtcatattagcccttgacgtccgcatccgtacc\n+ccaccgttgcccaccgtcgaagactgtccctggcagtcgcaaactccaagtaataccctt\n+gaattaggatcgcaatcgacgcttgtaaaggcaaggattcagaggcatatagatagctca\n+ccaacgtctatggtggaggcctttgagaaggtctcaaaaggggcagcgattattgcgcac\n+aagctagtgttggcgcagaaggagattgctgagcttcgagcagctaataaggccgccacg\n+cgacgtaaatcgcacaaaagaaagcgtgtacaggaagaagggaccttgacggtcgaggac\n+ggtcttcgacggacgactctaaaggagtttggtgcgcgtagtgatgggaagaaggcgaag\n+aagcaggttcgcgctggtgcaggcgagccctcccaaaggcggtgtggacggtgcaatgag\n+actgggcataatgcgcgtacgtgtaagaaaacagtagaagtagactctgaatgatattgc\n+atcttgctttgtactatacagggccaaagtgggttgttttgcgccagaatagggtggttt\n+tggtgggggcttggcaaaagtgtccggccaaaccgtgtgtccggccaacccgtgaggtac\n+gt\n+>Polinton-1_DY:classII:Polinton\n+aaaaaaaaaacaagttgggtgtcattggaaaggatttttcaagccctttccaatggtatg\n+cttttcacgattct'..b'tgtcaa\n+taatgttgacttaagaattaattttaatattgaaaaaactgcattttatttaatggaaaa\n+ggatagcgaatcaaatttaaagatattagaagcacaacttttcatgaatcatgtaactgt\n+gaacccaagcattttattggctcatcatcatgttttacaaacaaaaaatgctctttaccc\n+attcagtaaagtagaagtaaaatcgtttacaatttacccaggaaacaatacgctatcaat\n+agacaatgctgtaattggacaattaccaaattttctagctttttgtatgattaaaaaccg\n+ttcatactcaggcaacagaggactagatccatttaattttgaacattttaaaatgcaacg\n+ctttaatctaatggtgaatggagttcaagttccttcgcaagctctggaatttgactactc\n+gaactctgaaaacgttcaaagctcaagaggctataatatgctattcagatcaagcggaat\n+aaaacattatgatcggggtcttcaaattaccaaggaaatgtttgatacgaacagttttat\n+attagcctttgatttaactgctgatcagtcaaataccaccatatgttcaaatttgatgtc\n+gcaaggcacaataagaatcgaaggaagattttcagaacctcttagtgaagccgttacttg\n+tctggtatattgtgaatacgattccatgattgatattgataagcatagaaatattcgaac\n+ccttttttaaaatgaatactttacaaattcacaatttgcttacaaaacatatatacacaa\n+aatcaatttttaaaggagttttcccttcagaccagcttccaaaaactatttcaaagtatc\n+cggctttaattattgcaaacaccgacacttcagatcaaccaggaactcattggattgcat\n+tctatttcgaaagtcgcaaatcagcagaattttttgattcctatggacaatttccccaaa\n+acaaggaattcgtgacatttttaaaatctaacgcaaataaatattgctataacaagcaac\n+aactacaaggatatttttctaatacgtgtggtcattattgcataatgtatggtcttttta\n+aatgtaaaaagaaaacgctaaaatatttattaacaaattttaagagaaatgacttttcat\n+ataacgataaattaatacttaaaatgtttaagtctaattttaaaaaataaaatgcaagat\n+atttaagaaaatcaatgtagtttatttattaagtcgtttgtttagcttataatattttaa\n+cataatattgaaggcctcgtctcttaaaattggaagcctatgacactggcacgtatcggg\n+tccttccgaagcgtcatcatcactccagggacaaagcttgtagtggaatccatatttaga\n+atggatcagttctttatcctctaagtcctccagagtgctcttcaaatatataagctgcgt\n+tacgtgctcctcaaatatcatgtcttcgaactgcatcaagaagagctcaatttgtttgga\n+gtccattttcgttatggttcttttaaggttttttaaagactgatgttggatttagaaaac\n+tcaaacctttatattggaaaatttcatttacttatttatttactgataaaaattcaattc\n+gtgcagaactatatctatttacatccttccccttatataccatatgggtaataccattct\n+catttaatttcctaaaaggtgtcttagtacagaatttgtttacataactcttaggtagaa\n+aaatccaaacatctttttcgtccttgtctgctaaataaagagctaccttagttccataag\n+gtgtctttttcagcttacaaccagtgatcttatactcaaatccaatacatagatcttcag\n+tttttgtataaccaatcacaggcttcgattcattcagagtctctaggaaatcctagaaat\n+tcaataaaatttgaacaggatttatttattaatgtcctatatgcataccatttctgacca\n+acgtgagatttgattcgttatgctcttgtaactattcgtattatactaatgcatttactt\n+acaattacgagacttttatactgttcagcactacatatatatatatatatatatatatta\n+caagaacaaaatactacgtgaaagagataccaatatatctgttatgctctttatatacat\n+aaactcagttaagcctatgtagaatatttttatatacttgagtatatctatctcattttg\n+tataaatatataacttcatatgaaataaaaggttactacaaacctaatataagacagcac\n+ttttctcgaaaactcagattcaatttttactctcagaatgacaaagttgtattgtgagtt\n+gtgttcggaaacaaaatcgtttgcttcacgcagcggtttaaaaaaacataccttaaaagt\n+gcataacttcagctatcaaaaatcaaaatcttcttattattgttgctttttctgtcccaa\n+tgaacgaaaattctggttgcgatataattacaatgttcatctcttgctgatgcacgatgc\n+agaattaaagagtatgcgtaaatcaatggaaagttatcaaataaagataaaacaagtttg\n+agaatttaattttgtaaaatagttttatttaataaatttctgtataatatagacaaatta\n+aaaacctagatcaaataaaacgtctagtaatgaatggtcatcttcatccttcattattcg\n+taaataaatgttgggttgacttggctcgatgaactgatcaaaaatatgatcatcatcagg\n+aaactcatagattataggatctgtgtgcagtatatccatatcgaaatgctgtgcacattg\n+catttcttgatcgtctccattgcctcgtataagatcatttaaaacgtcctgcgttgccgc\n+tccaaaaggaaaatcgctccactcaagagacgcaaattcacttccaatttttgtagtggg\n+ggtagatgacatagaagggtcaaatactcctcttaagaagcgatctatcgaatcctctgt\n+gtcgatagcctccacataaccttccttcgagccttcaattatttcgacatcagagtcttg\n+agttaatggaacatttattccatttactgagtcaacaaactctacagtaagatccattgg\n+tatgcagtgctgaggtattataagaactgatcctgagcaatttaaggggatccattttta\n+tacacgaggattagagagtttgacatctacagggggtgtctactcttagaatccacctga\n+ttggttctcgatttaaggagccttcacagactgcagactgcacagactgcaatggggaag\n+tcagaaaaggagggcgagctatgggtttggatgcaggtaaatagccgcttgactttctcg\n+ataataggttcaaagagtcataaatatttgattttatggggtctcctagcataatgaaaa\n+tggagacagatcacagataatctgaaatacatatctctgtagggaattatttcggatgac\n+cgcagtcagggatcatgaaattattggtgactactagtcattgttactttataagtaaca\n+ttttgcaaggcatacatttaaaacgtgggtagccttaagctgaaagtcgctatgacttca\n+gattgtttagattttcttctcaacggctccatagcttttagctgaaagtagctggaatat\n+acgggaacgacggttttttaacggttttatgattatatgattttttatatatacgtatac\n+acgaccctataaccagtttgtttgatatttctaaatatttctcaactaataatccaacca\n+ccagttaccttcccagaaaattgaggacacagtaaggtgcatagccttgaacagaaatct\n+cacccccacccttgcgatcatc\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/test_hmmpfam_output
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/test_hmmpfam_output Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,321 @@\n+hmmpfam - search one or more sequences against HMM database\n+HMMER 2.3.2 (Oct 2003)\n+Copyright (C) 1992-2003 HHMI/Washington University School of Medicine\n+Freely distributed under the GNU General Public License (GPL)\n+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n+HMM file: myhmms\n+Sequence file: test.fa\n+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n+\n+Query sequence: pkinase_full\n+Accession: [none]\n+Description: Dario rerio\n+\n+Scores for sequence family classification (score includes all domains):\n+Model Description Score E-value N \n+-------- ----------- ----- ------- ---\n+pkinase Protein kinase domain 266.6 3.3e-80 1\n+pkinase Protein kinase domain 265.4 7.6e-80 1\n+rrm -42.5 2.5 1\n+rrm -8.7 3.7 1\n+fn3 Fibronectin type III domain -45.8 3.7 1\n+fn3 Fibronectin type III domain -9.0 3.8 1\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+-------- ------- ----- ----- ----- ----- ----- -------\n+rrm 1/1 169 180 .. 66 77 .] -8.7 3.7\n+fn3 1/1 197 200 .. 81 84 .] -9.0 3.8\n+rrm 1/1 256 319 .. 1 77 [] -42.5 2.5\n+pkinase 1/1 258 540 .. 1 294 [] 266.6 3.3e-80\n+pkinase 1/1 258 537 .. 1 289 [. 265.4 7.6e-80\n+fn3 1/1 434 506 .. 1 84 [] -45.8 3.7\n+\n+Alignments of top-scoring domains:\n+rrm: domain 1 of 1, from 169 to 180: score -8.7, E = 3.7\n+ *->nGkelggrklrv<-*\n+ n++ ++l v \n+ pkinase_fu 169 NKQDPHQHRLSV 180 \n+\n+fn3: domain 1 of 1, from 197 to 200: score -9.0, E = 3.8\n+ CS CCCC \n+ *->GpeS<-*\n+ G eS \n+ pkinase_fu 197 GGES 200 \n+\n+rrm: domain 1 of 1, from 256 to 319: score -42.5, E = 2.5\n+ *->lfVgNL..ppdvteedLkdlFsk.fGpivsikivkDhkektketgks\n+ +N++ +d+ e + F+ + +++ +\n+ pkinase_fu 256 ---NNIeyVRDIGEGAFGRVFQArAPGL----LPTE----------P 285 \n+\n+ kGfaFVeFeseedAekAlealnG.kelggrklrv<-*\n+ + V++ +ee+ ++ ++++ + l++ + \n+ pkinase_fu 286 FTMVAVKMLKEEASTDMQNDFQReAALMSEFDHP 319 \n+\n+pkinase: domain 1 of 1, from 258 to 540: score 266.6, E = 3.3e-80\n+ *->yelleklGeGsfGkVykakhkd....ktgkiVAvKilkkekesikek\n+ +e ++ +GeG+fG+V++a+ +++ VAvK+lk e+ s++ \n+ pkinase_fu 258 IEYVRDIGEGAFGRVFQARAPGllptEPFTMVAVKMLK-EEASTDMQ 303 \n+\n+ .rflrEiqilkrLsHpNIvrligvfedtddhlylvmEymegGdLfdylrr\n+ ++f+rE++++ +++HpNIvrl+gv+ +++++l +Eym GdL+++lrr\n+ pkinase_fu 304 nDFQREAALMSEFDHPNIVRLLGVCA-VGKPMCLMFEYMAYGDLNEFLRR 352 \n+\n+ ng........................gplsekeakkialQilrGleYLHs\n+ + +++++ ++++ ++++ +++++ pls +e ++i +Q++ G++YL +\n+ pkinase_fu 353 RCatqqpslsrdtltssslvseperyPPLSCQEQLSISKQVAAGMAYLSE 402 \n+\n+ ngivHRDLKpeNILldendgtvKiaDFGLArlle..sssklttfvGTpwY\n+ +++vHRDL+++N+L+ en +vKiaDFGL+r ++ +++k++ + p +\n+ pkinase_fu 403 RKFVHRDLATRNCLVAEN-LVVKIADFGLSRNIYaaDYYKASENDAIPIR 451 \n+\n+ mmAPEvilegrgysskvDvWSlGviLyElltggplfpgadlpaftggdev\n+ +m+PE ++++ y+s++DvW++Gv+L+E++++g \n+ pkinase_fu 452 WMPPES-IFYNRYTSESDVWAYGVVLWEIFSYG----------------- 483 \n+\n+ dqliifvlklPfsdelpktridpleelfriikrpglrlplpsncSeelkd\n+ ++P+++ + ee+ + +++ g l +p+nc+ el+ \n+ pkinase_fu 484 --------MQPYYG-------'..b'III domain -39.7 1.2 1\n+pkinase Protein kinase domain -9.2 1.8 1\n+fn3 Fibronectin type III domain -7.5 1.9 1\n+\n+Parsed for domains:\n+Model Domain seq-f seq-t hmm-f hmm-t score E-value\n+-------- ------- ----- ----- ----- ----- ----- -------\n+fn3 1/1 143 224 .. 1 84 [] -39.7 1.2\n+pkinase 1/1 233 241 .. 1 9 [. -9.2 1.8\n+rrm 1/1 440 456 .. 1 17 [. -6.1 0.92\n+rrm 1/1 669 731 .. 1 77 [] -32.5 0.26\n+pkinase 1/1 906 1127 .. 1 294 [] -171.6 0.19\n+fn3 1/1 923 934 .. 1 12 [. -7.5 1.9\n+\n+Alignments of top-scoring domains:\n+fn3: domain 1 of 1, from 143 to 224: score -39.7, E = 1.2\n+ CS C CCCCEEEEEECCTTCCEEEEECCCCCCCCCCEEEEE.ECCCCCCC\n+ *->P.saPtnltvtdvtstsltlsWspptgngpitgYevtyRqpknggew\n+ + ++t+l++ dv ++ sWs+ +g+ + tg v R + ng +\n+ S13421 143 IdLEVTGLSCVDVAN--IQESWSKVSGDLKTTGSVVFQR-MINGH-P 185 \n+\n+ CS CCCEEECCCCCECECCEEEEECCCCEEEEEECCC CCCC \n+ neltvpgtttsytltgLkPgteYevrVqAvnggG.GpeS<-*\n+ +++ ++ + + l L + + V v ++ +G \n+ S13421 186 EYQQLFRQFRDVDLDKLGESNSFVAHVFRVVAAFdGIIH 224 \n+\n+pkinase: domain 1 of 1, from 233 to 241: score -9.2, E = 1.8\n+ *->yelleklGe<-*\n+ +l+klGe \n+ S13421 233 VSTLKKLGE 241 \n+\n+rrm: domain 1 of 1, from 440 to 456: score -6.1, E = 0.92\n+ *->lfVgNLppdvteedLkd<-*\n+ + ++ L++ + d+ d \n+ S13421 440 VIISFLNEGLRQADIVD 456 \n+\n+rrm: domain 1 of 1, from 669 to 731: score -32.5, E = 0.26\n+ *->lfVgNL.ppdvteedLkdlFskfGpivsikivkDhkektketgkskG\n+ V L+++d+ + + f i k++ D +\n+ S13421 669 --VEELpSTDAFHYHISLVMNRFSSI--GKVIDD------------N 699 \n+\n+ faFVeFeseedAekAlealnGkelggr.klrv<-*\n+ + FV ++ + e+ ++ l k+++ +l++ \n+ S13421 700 VSFVYLLKKLGREHIKRGLSRKQFDQFvELYI 731 \n+\n+pkinase: domain 1 of 1, from 906 to 1127: score -171.6, E = 0.19\n+ *->yelleklGeGsfGkVykakhkd........ktgkiVAvKilkkekes\n+ ++l G++ ++ + ++ ++ ++ + ++VAv + ++\n+ S13421 906 FKLMH-------GVIEEGLLQLerinpitgLSAREVAVVKQ---TWN 942 \n+\n+ ikekrflrEiqilkrLsHpNIvrligvfedtddhlylvmEymegGdLfdy\n+ + + L+ ++ fe + + v++ +d \n+ S13421 943 LVK----------PDLMGVGMRIFKSLFE-AFPAYQAVFPKFSD-VPLDK 980 \n+\n+ lrrng....gplsekeakkialQilrGleYLHsngivHRDLKpeNILlde\n+ l + +++ +s ++ Q+ lde\n+ S13421 981 LEDTPavgkHSISVTTKLDELIQT-----------------------LDE 1007 \n+\n+ ndgtvKiaDFGLArlle........sssklttfvGTpwYmmAPEvi...l\n+ + ++ + LAr+l +++ + + +++++f+ +++ ++++l\n+ S13421 1008 P-ANLAL----LARQLGedhivlrvNKPMFKSFGKVLVRL-----LendL 1047 \n+\n+ egrgysskvDvW..SlGviLyElltggplfpgadlpaftggdevdqliif\n+ +r s ++ W++++ vi+ ++ g \n+ S13421 1048 GQRFSSFASRSWhkAYDVIVEYIEEG------------------------ 1073 \n+\n+ vlklPfsdelpktridpleelfriikrpglrlplpsncSeelkdLlkkcL\n+ l++ +++ dp + + k l + ++++ ++l+ L +k++\n+ S13421 1074 -LQQSYKQ-------DPVTGITDAEKA--LVQESWDLLKPDLLGLGRKIF 1113 \n+\n+ nkDPskRpGsatakeilnhpwf<-*\n+ +k +k p +i f \n+ S13421 1114 TKVFTKHP----DYQI----LF 1127 \n+\n+fn3: domain 1 of 1, from 923 to 934: score -7.5, E = 1.9\n+ CS CCCCCEEEEEEC \n+ *->PsaPtnltvtdv<-*\n+ + t+l++++v \n+ S13421 923 INPITGLSAREV 934 \n+\n+//\n+\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/test_hmmpfam_output.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/test_hmmpfam_output.align Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,18 @@
+pkinase_full 169 180 rrm 66 77 3.7 -8.7 0
+pkinase_full 197 200 fn3 81 84 3.8 -9.0 0
+pkinase_full 256 319 rrm 1 77 2.5 -42.5 0
+pkinase_full 258 540 pkinase 1 294 3.3e-80 266.6 0
+pkinase_full 258 537 pkinase 1 289 7.6e-80 265.4 0
+pkinase_full 434 506 fn3 1 84 3.7 -45.8 0
+pkinase_part 169 180 rrm 66 77 3.7 -8.7 0
+pkinase_part 197 200 fn3 81 84 3.8 -9.0 0
+pkinase_part 233 299 fn3 1 84 5.9 -52.2 0
+pkinase_part 256 319 rrm 1 77 2.5 -42.5 0
+pkinase_part 258 400 pkinase 1 116 1.7e-30 101.5 0
+pkinase_part 258 400 pkinase 1 294 1.5e-05 -94.7 0
+S13421 143 224 fn3 1 84 1.2 -39.7 0
+S13421 233 241 pkinase 1 9 1.8 -9.2 0
+S13421 440 456 rrm 1 17 0.92 -6.1 0
+S13421 669 731 rrm 1 77 0.26 -32.5 0
+S13421 906 1127 pkinase 1 294 0.19 -171.6 0
+S13421 923 934 fn3 1 12 1.9 -7.5 0

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/tests/Test_AlignListUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/tests/Test_AlignListUtils.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,100 @@
+import os
+import unittest
+from commons.pyRepetUnit.align.AlignList import AlignList
+from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.coord.Range import Range
+from commons.core.coord.Align import Align
+
+class Test_AlignListUtils (unittest.TestCase):
+
+    def setUp(self):
+        self.inputFileName = "dummyAlignFile.align"
+        self.outputFileName = "dummyAlignFile.out"
+
+    def testRead(self):
+        f = open(self.inputFileName, "w")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t2.6e-60\t0\t3.2\n")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t0\t2.9\n")
+        f.close()
+        alignRead = AlignListUtils()
+        tableauAlignInstance = alignRead.read(self.inputFileName)
+        self.assertEquals(tableauAlignInstance.get(0).range_query.seqname, "blumeria_Grouper_590_20:NoCat_1")
+        self.assertEquals(tableauAlignInstance.get(0).range_query.start, 91)
+        self.assertEquals(tableauAlignInstance.get(0).range_query.end, 108)
+        self.assertEquals(tableauAlignInstance.get(0).range_subject.seqname, "DUF234")
+        self.assertEquals(tableauAlignInstance.get(0).range_subject.start, 5)
+        self.assertEquals(tableauAlignInstance.get(0).range_subject.end, 22)
+        self.assertEquals(tableauAlignInstance.get(0).e_value, 2.6e-60)
+        self.assertEquals(tableauAlignInstance.get(0).score, 0)
+        self.assertEquals(tableauAlignInstance.get(0).identity, 3.2)
+        self.assertEquals(tableauAlignInstance.get(1).range_query.seqname, "blumeria_Grouper_590_20:NoCat_1")
+        self.assertEquals(tableauAlignInstance.get(1).range_query.start, 111)
+        self.assertEquals(tableauAlignInstance.get(1).range_query.end, 119)
+        self.assertEquals(tableauAlignInstance.get(1).range_subject.seqname, "DUF1414")
+        self.assertEquals(tableauAlignInstance.get(1).range_subject.start, 1)
+        self.assertEquals(tableauAlignInstance.get(1).range_subject.end, 9)
+        self.assertEquals(tableauAlignInstance.get(1).e_value, 6.3)
+        self.assertEquals(tableauAlignInstance.get(1).score, 0)
+        self.assertEquals(tableauAlignInstance.get(1).identity, 2.9)
+        os.system("rm " + self.inputFileName)
+
+    def testWrite(self):
+        f = open("expectedAlignFile.align", "w")
+        f.write("biduleChouetteQuery\t20\t55\tbiduleChouetteSubject\t2\t15\t0.005\t20\t0.000000\n")
+        f.write("trucBiduleQuery\t110\t155\ttrucBiduleSubject\t1\t35\t2\t1\t0.000000\n")
+        f.write("dummyQuery\t1\t65\tdummySubject\t1\t25\t2.6e-60\t50\t0.000000\n")
+        f.close()
+        listAlignInstance = AlignList()
+        align1 = Align()
+        align1.range_query = Range("biduleChouetteQuery", 20, 55)
+        align1.range_subject = Range("biduleChouetteSubject", 2, 15)
+        align1.e_value = 0.005
+        align1.score = 20
+        listAlignInstance.append(align1)
+        align2 = Align()
+        align2.range_query = Range("trucBiduleQuery", 110, 155)
+        align2.range_subject = Range("trucBiduleSubject", 1, 35)
+        align2.e_value = 2
+        align2.score = 1.5
+        listAlignInstance.append(align2)
+        align3 = Align()
+        align3.range_query = Range("dummyQuery", 1, 65)
+        align3.range_subject = Range("dummySubject", 1, 25)
+        align3.e_value = 2.6e-60
+        align3.score = 50
+        listAlignInstance.append(align3)
+        f = open(self.outputFileName, "w")
+        alignUtils = AlignListUtils()
+        alignUtils.write(listAlignInstance, self.outputFileName)
+        self.assertTrue(FileUtils.are2FilesIdentical(self.outputFileName, "expectedAlignFile.align"))
+        os.system("rm " + self.outputFileName)
+        os.system("rm expectedAlignFile.align")
+
+    def testFilterOnAMinimalScore(self):
+        listAlignInstance = AlignList()
+        align1 = Align()
+        align1.range_query = Range("biduleChouetteQuery", 20, 55)
+        align1.range_subject = Range("biduleChouetteSubject", 2, 15)
+        align1.e_value = 5
+        align1.score = -0.5
+        listAlignInstance.append(align1)
+        align2 = Align()
+        align2.range_query = Range("trucBiduleQuery", 110, 155)
+        align2.range_subject = Range("trucBiduleSubject", 1, 35)
+        align2.e_value = 2
+        align2.score = -2.0
+        listAlignInstance.append(align2)
+        align3 = Align()
+        align3.range_query = Range("dummyQuery", 1, 65)
+        align3.range_subject = Range("dummySubject", 1, 25)
+        align3.e_value = 2.6e-60
+        align3.score = 50
+        listAlignInstance.append(align3)
+        expectedAlignInstance = align3
+        alignUtils = AlignListUtils()
+        alignUtils.filterOnAMinimalScore(listAlignInstance, 0)
+        self.assertEquals(listAlignInstance.get(0), expectedAlignInstance)
+
+if __name__ == "__main__" :
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/tests/hmmpfamOutputParsingTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/tests/hmmpfamOutputParsingTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,21 @@
+'''
+Created on 29 mai 2009
+
+@author: oinizan
+'''
+import unittest
+import sys
+from commons.pyRepetUnit.align.tests.Test_AlignListUtils import Test_AlignListUtils
+
+
+def main():
+
+        hmmpfamOutputParsingTestSuite = unittest.TestSuite()
+        hmmpfamOutputParsingTestSuite.addTest(unittest.makeSuite(Test_AlignListUtils,'test'))
+
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(hmmpfamOutputParsingTestSuite)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/TransformAACoordIntoNtCoordInAlignFormat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/TransformAACoordIntoNtCoordInAlignFormat.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,133 @@
+import os
+import sys
+from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils
+from commons.core.seq.BioseqUtils import BioseqUtils
+from commons.core.checker.RepetException import RepetException
+
+### Transform amino acid query coord in an align format to nucleotide coord
+### according to the frame specified at the end of seqName
+#
+class TransformAACoordIntoNtCoordInAlignFormat( object ):
+
+    def __init__(self):
+        self._inFileName = None
+        self._clean = False
+        self._outFileName = None
+        self._consensusFileName = None
+        self._IsFiltered = True
+
+    ## read input file, transform it and write the output file
+    #
+    def run(self):
+        alignUtils = AlignListUtils()
+        listAlignInstance = alignUtils.read(self._inFileName)
+        self.transformQueryCoord(listAlignInstance)
+        #self.getOriginalQueryNameForAlignList(listAlignInstance)
+        if self._IsFiltered:
+            alignUtils.filterOnAMinimalScore(listAlignInstance, 0)
+        alignUtils.write(listAlignInstance, self._outFileName)
+        if self._clean:
+            self.clean()
+
+    ## Transform the amino acid query coord into nucleotides and switch subject coord if the strand is reversed
+    # @param listAlignInstance list of align object instance
+    #
+    def transformQueryCoord(self, listAlignInstance):
+        bioseqList = BioseqUtils.extractBioseqListFromFastaFile( self._consensusFileName )
+        for alignInstance in listAlignInstance.getList():
+            try:
+                frame = self.extractFrameFromSeqName(alignInstance)
+            except RepetException, e:
+                raise e
+            previousEnd = alignInstance.range_query.end
+            previousStart = alignInstance.range_query.start
+            alignInstance.range_query.seqname = self._getOriginalQueryNameForAlignInstance(alignInstance)
+            if frame < 4:
+                self._changeStartInAAIntoNtInPositiveFrame(alignInstance, frame, previousStart)
+                self._changeEndInAAIntoNtInPositiveFrame(alignInstance, frame, previousEnd)
+            else:
+                self._checkIfSeqNameIsInDNASeqFile(bioseqList, alignInstance.range_query.seqname)
+                consensusLength = BioseqUtils.getSeqLengthWithSeqName(bioseqList, alignInstance.range_query.seqname)
+                self._changeStartInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousEnd)
+                self._changeEndInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousStart)
+                self._invertedSubjectCoord(alignInstance)
+
+    ## remove the input file
+    #
+    def clean(self):
+        os.remove(self._inFileName)
+
+    ## set input file name
+    #
+    # @param fileName string name of file
+    #
+    def setInFileName(self, fileName):
+        self._inFileName = fileName
+
+    ## set output file name
+    #
+    # @param fileName string name of file
+    #
+    def setOutFileName(self, fileName):
+        self._outFileName = fileName
+
+    ## set consensus file name
+    #
+    # @param fileName string name of file
+    #
+    def setConsensusFileName(self, fileName):
+        self._consensusFileName = fileName
+
+    ## set is clean will be done
+    #
+    # @param clean boolean clean
+    #
+    def setIsClean(self, clean):
+        self._clean = clean
+
+    ## get input file name
+    #
+    def getInFileName(self):
+        return self._inFileName
+
+    ## set is negativ score filter will be done
+    #
+    # @param isFiltered boolean isFiltered
+    #
+    def setIsFiltered(self, isFiltered):
+        self._IsFiltered = isFiltered
+
+    def _getOriginalQueryNameForAlignInstance(self, alignInstance):
+        return alignInstance.range_query.seqname[0:len(alignInstance.range_query.seqname) - 2]
+
+    def _invertedSubjectCoord(self, alignInstance):
+        return alignInstance.range_subject.reverse()
+
+    def _changeEndInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousEnd):
+        alignInstance.range_query.end = 3 * previousEnd + frame - 1
+
+    def _changeStartInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousStart):
+        alignInstance.range_query.start = 3 * (previousStart - 1) + frame
+
+    def _changeEndInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousStart):
+        alignInstance.range_query.end = consensusLength - 3 * (previousStart - 1) - frame + 4
+
+    def _changeStartInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousEnd):
+        alignInstance.range_query.start = consensusLength - 3 * (previousEnd - 1) - frame + 2
+
+    def extractFrameFromSeqName(self, alignInstance):
+        try:
+            frame = int(alignInstance.range_query.seqname[len(alignInstance.range_query.seqname) - 1])
+        except ValueError:
+            raise RepetException("Unable to extract frame from sequence name")
+        return frame
+
+    def _checkIfSeqNameIsInDNASeqFile(self, bioseqList, seqName):
+        isSeqNameInBioseqList = False
+        for bioseq in bioseqList:
+            if seqName == bioseq.header:
+                isSeqNameInBioseqList = True
+        if not isSeqNameInBioseqList:
+            sys.stderr.write("seqName : " + seqName + " is not in the consensus file " + self._consensusFileName + "\n")
+            sys.exit(1)
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_F_TransformAACoordIntoNtCoordAndScoreFiltering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_F_TransformAACoordIntoNtCoordAndScoreFiltering.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,38 @@
+import os
+import unittest
+from commons.pyRepetUnit.align.transformAACoordIntoNtCoord.TransformAACoordIntoNtCoordInAlignFormat import TransformAACoordIntoNtCoordInAlignFormat
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_TransformAACoordIntoNtCoordAndScoreFiltering (unittest.TestCase):
+
+    def setUp(self):
+        self._inputFile = "./datas/OutputHmmpfamTest.align"
+        self._consensusFile = "./datas/ConsensusTestFile_nt.fsa"
+        self._outputFile = "./datas/alignTransformedToTest.align"
+        self._expectedFile = "./datas/PostPostProcessTest.align"
+        self._expectedFileFiltered = "./datas/PostPostProcessTestFiltered.align"
+        self._alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+
+    def testRun_with_no_filter(self):
+        self._alignTransformation.setInFileName(self._inputFile)
+        self._alignTransformation.setOutFileName(self._outputFile)
+        self._alignTransformation.setConsensusFileName(self._consensusFile)
+        self._alignTransformation.setIsFiltered(False)
+        self._alignTransformation.run()
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expectedFile, self._outputFile))
+        os.remove(self._outputFile)
+
+    def testRun_with_filter(self):
+        self._alignTransformation.setInFileName(self._inputFile)
+        self._alignTransformation.setOutFileName(self._outputFile)
+        self._alignTransformation.setConsensusFileName(self._consensusFile)
+        self._alignTransformation.setIsFiltered(True)
+        self._alignTransformation.run()
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expectedFileFiltered, self._outputFile))
+        os.remove(self._outputFile)
+
+
+if __name__ == "__main__" :
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,237 @@\n+import os\n+import unittest\n+from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils\n+from commons.pyRepetUnit.align.transformAACoordIntoNtCoord.TransformAACoordIntoNtCoordInAlignFormat import TransformAACoordIntoNtCoordInAlignFormat\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Range import Range\n+from commons.core.checker.RepetException import RepetException\n+\n+class Test_TransformAACoordIntoNtCoordInAlignFormat(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self.inputFileName = "alignFile.align"\n+ self.consensusFile = "consensus.fa"\n+ self.outputFileName = "outputFile.align"\n+ self._expFileName = "expFile.align"\n+ \n+ def tearDown(self):\n+ if FileUtils.isRessourceExists(self.inputFileName):\n+ os.remove(self.inputFileName)\n+ if FileUtils.isRessourceExists(self.consensusFile):\n+ os.remove(self.consensusFile)\n+ if FileUtils.isRessourceExists(self.outputFileName):\n+ os.remove(self.outputFileName)\n+ if FileUtils.isRessourceExists(self._expFileName):\n+ os.remove(self._expFileName)\n+ \n+ def test_transformQueryCoord(self):\n+ f = open(self.inputFileName, "w")\n+ f.write("blumeria_Grouper_590_20:NoCat_1\\t91\\t108\\tDUF234\\t5\\t22\\t1.5\\t3.2\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_1\\t111\\t119\\tDUF1414\\t1\\t9\\t6.3\\t2.9\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_3\\t30\\t37\\tCPW_WPC\\t1\\t9\\t7.7\\t1.5\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_3\\t55\\t69\\tHECT\\t341\\t355\\t9.2\\t0.0\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_4\\t82\\t91\\tDUF46\\t173\\t182\\t0.11\\t6.4\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_5\\t121\\t125\\tPOC4\\t276\\t280\\t6.3\\t-1.7\\t0\\n")\n+ f.close()\n+ f = open(self.consensusFile, "w")\n+ f.write(">blumeria_Grouper_590_20:NoCat\\n")\n+ f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\\n")\n+ f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\\n")\n+ f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\\n")\n+ f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\\n")\n+ f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\\n")\n+ f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\\n")\n+ f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")\n+ f.close() \n+ alignRead = AlignListUtils() \n+ tableauAlignInstance = alignRead.read(self.inputFileName)\n+ alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()\n+ alignTransformation.setConsensusFileName(self.consensusFile)\n+ alignTransformation.transformQueryCoord(tableauAlignInstance)\n+ #check query coord\n+ self.assertEquals(tableauAlignInstance.get(0).range_query.start, 271)\n+ self.assertEquals(tableauAlignInstance.get(0).range_query.end, 324) \n+ self.assertEquals(tableauAlignInstance.get(1).range_query.start, 331)\n+ self.assertEquals(tableauAlignInstance.get(1).range_query.end, 357) \n+ self.assertEquals(tableauAlignInstance.get(2).range_query.start, 90)\n+ self.assertEquals(tableauAlignInstance.get(2).range_query.end, 113) \n+ self.assertEquals(tableauAlignInstance.get(3).range_query.start, 165)\n+ self.assertEquals(tableauAlignInstance.get(3).range_query.end, 209) \n+ self.assertEquals(tableauAlignInstance.get(4).range_query.start, 119)\n+ self.assertEquals(tableauAlignInstance.get(4).range_query.end, 148) \n+ self.assertEquals(tableauAlignInstance.get(5).range_query.start, 16)\n+ self.assertEquals(tableauAlignInstance.get(5).range_query.end, 30) \n+ #check subject (profiles) coord\n+ #positive frame : they don\'t change\n+ self.assertEquals(tableauAlignInstance.get(0).range_subject.start, 5)\n+ '..b'nTransformation = TransformAACoordIntoNtCoordInAlignFormat()\n+ self.assertRaises(RepetException, alignTransformation.extractFrameFromSeqName, alignInstance)\n+ \n+ def test_run_no_filter(self):\n+ f = open(self.inputFileName, "w")\n+ f.write("blumeria_Grouper_590_20:NoCat_1\\t91\\t108\\tDUF234\\t5\\t22\\t1.5\\t3.2\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_1\\t111\\t119\\tDUF1414\\t1\\t9\\t6.3\\t2.9\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_3\\t30\\t37\\tCPW_WPC\\t1\\t9\\t7.7\\t1.5\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_3\\t55\\t69\\tHECT\\t341\\t355\\t9.2\\t0.0\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_4\\t82\\t91\\tDUF46\\t173\\t182\\t0.11\\t6.4\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_5\\t121\\t125\\tPOC4\\t276\\t280\\t6.3\\t-1.7\\t0\\n")\n+ f.close()\n+ f = open(self.consensusFile, "w")\n+ f.write(">blumeria_Grouper_590_20:NoCat\\n")\n+ f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\\n")\n+ f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\\n")\n+ f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\\n")\n+ f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\\n")\n+ f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\\n")\n+ f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\\n")\n+ f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")\n+ f.close() \n+ alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()\n+ alignTransformation.setInFileName(self.inputFileName)\n+ alignTransformation.setOutFileName(self.outputFileName)\n+ alignTransformation.setConsensusFileName(self.consensusFile)\n+ alignTransformation.setIsFiltered(False)\n+ alignTransformation.run()\n+ self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6) \n+ self.assertTrue(FileUtils.isRessourceExists(self.inputFileName)) \n+ \n+ def test_run_no_filter_clean_option(self):\n+ f = open(self.inputFileName, "w")\n+ f.write("blumeria_Grouper_590_20:NoCat_1\\t91\\t108\\tDUF234\\t5\\t22\\t1.5\\t3.2\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_1\\t111\\t119\\tDUF1414\\t1\\t9\\t6.3\\t2.9\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_3\\t30\\t37\\tCPW_WPC\\t1\\t9\\t7.7\\t1.5\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_3\\t55\\t69\\tHECT\\t341\\t355\\t9.2\\t0.0\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_4\\t82\\t91\\tDUF46\\t173\\t182\\t0.11\\t6.4\\t0\\n")\n+ f.write("blumeria_Grouper_590_20:NoCat_5\\t121\\t125\\tPOC4\\t276\\t280\\t6.3\\t-1.7\\t0\\n")\n+ f.close()\n+ f = open(self.consensusFile, "w")\n+ f.write(">blumeria_Grouper_590_20:NoCat\\n")\n+ f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\\n")\n+ f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\\n")\n+ f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\\n")\n+ f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\\n")\n+ f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\\n")\n+ f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\\n")\n+ f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")\n+ f.close() \n+ alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()\n+ alignTransformation.setInFileName(self.inputFileName)\n+ alignTransformation.setOutFileName(self.outputFileName)\n+ alignTransformation.setConsensusFileName(self.consensusFile)\n+ alignTransformation.setIsFiltered(True)\n+ alignTransformation.setIsClean(True)\n+ alignTransformation.run()\n+ self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6) \n+ self.assertFalse(FileUtils.isRessourceExists(self.inputFileName)) \n+ \n+if __name__ == "__main__" :\n+ unittest.main() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/TransformAACoordIntoNtCoordTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/TransformAACoordIntoNtCoordTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,17 @@
+import unittest
+import sys
+import TestTransformAACoordIntoNtCoordInAlignFormat
+import TestAcceptanceTransformAACoordIntoNtCoordAndScoreFiltering
+
+def main():
+
+        TransformAlignTestSuite = unittest.TestSuite()
+        TransformAlignTestSuite.addTest(unittest.makeSuite(TestTransformAACoordIntoNtCoordInAlignFormat.TestTransformAACoordIntoNtCoordInAlignFormat,'test'))
+        TransformAlignTestSuite.addTest(unittest.makeSuite(TestAcceptanceTransformAACoordIntoNtCoordAndScoreFiltering.TestAcceptanceTransformAACoordIntoNtCoordAndScoreFiltering,'test'))
+
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(TransformAlignTestSuite)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/ConsensusTestFile_nt.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/ConsensusTestFile_nt.fsa Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/OutputHmmpfamTest.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/OutputHmmpfamTest.align Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,29 @@
+blumeria_Grouper_590_20:NoCat_1 91 108 DUF234 5 22 1.5 3.2 0
+blumeria_Grouper_590_20:NoCat_1 111 119 DUF1414 1 9 6.3 2.9 0
+blumeria_Grouper_590_20:NoCat_3 30 37 CPW_WPC 1 9 7.7 1.5 0
+blumeria_Grouper_590_20:NoCat_3 55 69 HECT 341 355 9.2 0.0 0
+blumeria_Grouper_590_20:NoCat_4 82 91 DUF46 173 182 0.11 6.4 0
+blumeria_Grouper_590_20:NoCat_5 121 125 POC4 276 280 6.3 -1.7 0
+blumeria_Grouper_4152_12:NoCat_1 27 38 Amino_oxidase 471 482 9.1 -0.6 0
+blumeria_Grouper_4152_12:NoCat_1 33 67 TrbL 231 285 8 0.8 0
+blumeria_Grouper_4152_12:NoCat_1 58 80 TNV_CP 167 189 7.9 0.1 0
+blumeria_Grouper_4152_12:NoCat_1 94 109 DGOK 283 298 1.3 0.5 0
+blumeria_Grouper_4152_12:NoCat_1 113 127 Peptidase_S29 1 15 1.1 4.7 0
+blumeria_Grouper_4152_12:NoCat_1 148 157 DUF1301 1 10 9.9 0.4 0
+blumeria_Grouper_4152_12:NoCat_2 21 26 Toxin_18 50 55 4.4 4.2 0
+blumeria_Grouper_4152_12:NoCat_2 126 137 ABC_transp_aux 276 287 5.2 1.1 0
+blumeria_Grouper_4152_12:NoCat_2 143 159 DUF1602 23 39 0.39 5.0 0
+blumeria_Grouper_4152_12:NoCat_3 102 122 zf-P11 1 20 1.8 3.6 0
+blumeria_Grouper_4152_12:NoCat_3 126 135 V-ATPase_G 1 10 5.3 2.4 0
+blumeria_Grouper_4152_12:NoCat_4 92 122 XhoI 172 202 0.014 9.9 0
+blumeria_Grouper_4152_12:NoCat_4 113 119 Endomucin 261 267 6 0.1 0
+blumeria_Grouper_4152_12:NoCat_5 2 19 DUF1798 35 52 3.3 3.0 0
+blumeria_Grouper_4152_12:NoCat_5 46 69 DUF881 214 237 1.6 4.0 0
+blumeria_Grouper_4152_12:NoCat_5 77 93 Jun 264 284 6.5 -0.7 0
+blumeria_Grouper_4152_12:NoCat_5 89 108 SLT 1 20 1.2 4.5 0
+blumeria_Grouper_4152_12:NoCat_5 93 115 DUF2346 63 85 2.8 3.2 0
+blumeria_Grouper_4152_12:NoCat_5 124 139 LBP_BPI_CETP 191 209 3.8 1.8 0
+blumeria_Grouper_4152_12:NoCat_6 50 62 DUF258 293 305 3.8 1.5 0
+blumeria_Grouper_4152_12:NoCat_6 85 90 SOCS_box 1 6 9.7 2.4 0
+blumeria_Grouper_4152_12:NoCat_6 95 115 DUF1289 36 56 5.3 2.8 0
+blumeria_Grouper_4152_12:NoCat_6 106 116 TRAP_alpha 317 327 4.1 0.1 0

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/PostPostProcessTest.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/PostPostProcessTest.align Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,29 @@
+blumeria_Grouper_590_20:NoCat 271 324 DUF234 5 22 1.5 3 0.000000
+blumeria_Grouper_590_20:NoCat 331 357 DUF1414 1 9 6.3 2 0.000000
+blumeria_Grouper_590_20:NoCat 90 113 CPW_WPC 1 9 7.7 1 0.000000
+blumeria_Grouper_590_20:NoCat 165 209 HECT 341 355 9.2 0 0.000000
+blumeria_Grouper_590_20:NoCat 119 148 DUF46 182 173 0.11 6 0.000000
+blumeria_Grouper_590_20:NoCat 16 30 POC4 280 276 6.3 -1 0.000000
+blumeria_Grouper_4152_12:NoCat 79 114 Amino_oxidase 471 482 9.1 0 0.000000
+blumeria_Grouper_4152_12:NoCat 97 201 TrbL 231 285 8 0 0.000000
+blumeria_Grouper_4152_12:NoCat 172 240 TNV_CP 167 189 7.9 0 0.000000
+blumeria_Grouper_4152_12:NoCat 280 327 DGOK 283 298 1.3 0 0.000000
+blumeria_Grouper_4152_12:NoCat 337 381 Peptidase_S29 1 15 1.1 4 0.000000
+blumeria_Grouper_4152_12:NoCat 442 471 DUF1301 1 10 9.9 0 0.000000
+blumeria_Grouper_4152_12:NoCat 62 79 Toxin_18 50 55 4.4 4 0.000000
+blumeria_Grouper_4152_12:NoCat 377 412 ABC_transp_aux 276 287 5.2 1 0.000000
+blumeria_Grouper_4152_12:NoCat 428 478 DUF1602 23 39 0.39 5 0.000000
+blumeria_Grouper_4152_12:NoCat 306 368 zf-P11 1 20 1.8 3 0.000000
+blumeria_Grouper_4152_12:NoCat 378 407 V-ATPase_G 1 10 5.3 2 0.000000
+blumeria_Grouper_4152_12:NoCat 136 228 XhoI 202 172 0.014 9 0.000000
+blumeria_Grouper_4152_12:NoCat 145 165 Endomucin 267 261 6 0 0.000000
+blumeria_Grouper_4152_12:NoCat 444 497 DUF1798 52 35 3.3 3 0.000000
+blumeria_Grouper_4152_12:NoCat 294 365 DUF881 237 214 1.6 4 0.000000
+blumeria_Grouper_4152_12:NoCat 222 272 Jun 284 264 6.5 0 0.000000
+blumeria_Grouper_4152_12:NoCat 177 236 SLT 20 1 1.2 4 0.000000
+blumeria_Grouper_4152_12:NoCat 156 224 DUF2346 85 63 2.8 3 0.000000
+blumeria_Grouper_4152_12:NoCat 84 131 LBP_BPI_CETP 209 191 3.8 1 0.000000
+blumeria_Grouper_4152_12:NoCat 314 352 DUF258 305 293 3.8 1 0.000000
+blumeria_Grouper_4152_12:NoCat 230 247 SOCS_box 6 1 9.7 2 0.000000
+blumeria_Grouper_4152_12:NoCat 155 217 DUF1289 56 36 5.3 2 0.000000
+blumeria_Grouper_4152_12:NoCat 152 184 TRAP_alpha 327 317 4.1 0 0.000000

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/PostPostProcessTestFiltered.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/PostPostProcessTestFiltered.align Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/blastnForClassifierStep1/RepbaseBLRnForClassifierStep1.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/blastnForClassifierStep1/RepbaseBLRnForClassifierStep1.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,186 @@\n+"""\n+Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table.\n+"""\n+\n+import os\n+import ConfigParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.LoggerFactory import LoggerFactory\n+\n+LOG_DEPTH = "repet.tools"\n+\n+class RepbaseBLRnForClassifierStep1( object ):\n+ \n+ """\n+ Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table.\n+ \n+ @param inFileName: name of the input fasta file\n+ @type inFileName: string\n+ \n+ @param launch_1: generic command at the beginning of a specific command\n+ @type launch_1: string\n+ \n+ @param launch_2: generic command at the end of a specific command\n+ @type launch_2: string\n+\n+ @return: all the commands to run the job\n+ @rtype: string\n+ \n+ @param cDir: current directory (where to retrieve the result files)\n+ @ype cDir: string\n+\n+ @param tmpDir: temporary directory (where the job will run)\n+ @type tmpDir: string\n+ \n+ @param configFileName: configuration file name\n+ @type configFileName: string\n+ \n+ @param logger: a logger Instance\n+ @type logger: logger\n+ \n+ @param verbose: verbose(0/1/2)\n+ @type verbose: int\n+ \n+ @param pL: program launcher\n+ @type pL: programLauncher Instance\n+ \n+ @param project: project name\n+ @type project: string\n+ \n+ """\n+\n+ def __init__(self, inFileName, launch_1, launch_2, cDir, tmpDir, configFileName, verbose, pL, project):\n+ """\n+ Constructor\n+ """\n+ self._inFileName = inFileName\n+ self._launch_1 = launch_1\n+ self._launch_2 = launch_2\n+ self._cDir = cDir\n+ self._tmpDir = tmpDir\n+ self._verbose = verbose\n+ self._pL = pL\n+ self._project = project\n+ self._fileUtils = FileUtils()\n+ self._config = ConfigParser.ConfigParser()\n+ self._configFileName = configFileName\n+ self._config.readfp( open(self._configFileName) )\n+ self._bank = self._config.get("detect_features","TE_nucl_bank")\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose)\n+\n+ def formatRepbase_ntIfNecessary( self ):\n+ """\n+ Format Repbase (make \'cut\' files).\n+ """\n+ if not os.path.exists( "%s_cut" % ( self._bank ) ):\n+ self._log.debug("prepare bank \'%s\'..." % ( self._bank ))\n+ prg = os.environ["REPET_PATH"] + "/bin/blaster"\n+ cmd = prg\n+ cmd += " -s %s" % ( self._bank )\n+ cmd += " -n blastn"\n+ if self._config.get("detect_features","wublast") == "yes":\n+ cmd += " -W"\n+ cmd += " -r"\n+ cmd += " -P"\n+ self._pL.launch( prg, cmd )\n+ os.system( "rm -f %s-blastn-*.param" % ( self._bank ) )\n+ \n+ def createCmdToLaunch( self ):\n+ cmd = self._launch_1 + os.environ["REPET_PATH"] + "/bin/blaster"\n+ cmd += " -q %s" % ( self._inFileName )\n+ cmd += " -s %s/%s" % ( self._cDir, self._bank )\n+ cmd += " -B %s_BLRn_%s" % ( self._inFileName, self._bank )\n+ cmd += " -n blastn"\n+ if self._config.get("detect_features","wublast") == "yes":\n+ cmd += " -W"\n+ cmd += " -r"\n+ cmd += " -v 1"\n+ cmd += self._launch_2\n+ \n+ cmd += "if not os.path.exists( \\"%s/%s_BLRn_%s.param\\" ):\\n" % ( self._cDir, self._inFileName, self._bank )\n+ cmd += "\\tos.system( \\"mv %s_BLRn_%s.param %s\\" )\\n" % ( self._inFileName, self._bank, self._cDir )\n+ cmd += "if os.path.exists( \\"%s_cut\\" ):\\n" % ( self._inFileName )\n+ cmd += "\\tos.system( \\"rm -f %s_cut*\\" )\\n" % ( self._inFileName )\n+ cmd += "if os.path.exists( \\"%s.Nstretch.map\\" ):\\n" % ( self._inFileName )\n+ cmd += "\\tos.remove( \\"%s.Nstretch.map\\" )\\n" % ( self._inFileName )'..b'%s.align" % ( self._inFileName, self._bank )\n+ cmd += " -q %s" % ( self._inFileName )\n+ cmd += " -s %s/%s" % ( self._cDir, self._bank )\n+ cmd += " -j"\n+ cmd += " -v 1"\n+ cmd += self._launch_2\n+ \n+ cmd += "if not os.path.exists( \\"%s/%s_BLRn_%s.align.clean_match.path\\" ):\\n" % ( self._cDir, self._inFileName, self._bank )\n+ cmd += "\\tos.system( \\"mv %s_BLRn_%s.align.clean_match.path %s\\" )\\n" % ( self._inFileName, self._bank, self._cDir )\n+ cmd += "if not os.path.exists( \\"%s/%s_BLRn_%s.align.clean_match.param\\" ):\\n" % ( self._cDir, self._inFileName, self._bank )\n+ cmd += "\\tos.system( \\"mv %s_BLRn_%s.align.clean_match.param %s\\" )\\n" % ( self._inFileName, self._bank, self._cDir )\n+ cmd += "if os.path.exists( \\"%s_BLRn_%s.align\\" ):\\n" % ( self._inFileName, self._bank )\n+ cmd += "\\tos.remove( \\"%s_BLRn_%s.align\\" )\\n" % ( self._inFileName, self._bank )\n+ cmd += "if os.path.exists( \\"%s_BLRn_%s.align.clean_match.fa\\" ):\\n" % ( self._inFileName, self._bank )\n+ cmd += "\\tos.remove( \\"%s_BLRn_%s.align.clean_match.fa\\" )\\n" % ( self._inFileName, self._bank )\n+ cmd += "if os.path.exists( \\"%s_BLRn_%s.align.clean_match.map\\" ):\\n" % ( self._inFileName, self._bank )\n+ cmd += "\\tos.remove( \\"%s_BLRn_%s.align.clean_match.map\\" )\\n" % ( self._inFileName, self._bank )\n+ cmd += "if os.path.exists( \\"%s_BLRn_%s.align.clean_match.tab\\" ):\\n" % ( self._inFileName, self._bank )\n+ cmd += "\\tos.remove( \\"%s_BLRn_%s.align.clean_match.tab\\" )\\n" % ( self._inFileName, self._bank )\n+ \n+ if self._tmpDir != self._cDir:\n+ cmd += "if os.path.exists( \\"%s\\" ):\\n" % ( self._bank )\n+ cmd += "\\tos.remove( \\"%s\\" )\\n" % ( self._bank )\n+ \n+ return cmd\n+ \n+ def collectRepbaseBLRn( self ):\n+ """\n+ Concatenate the outputs of blastn, adapt the ID and load the results into a table.\n+ """\n+ bankFull = self._bank\n+ bankPath, bank = os.path.split( bankFull )\n+ self._concatPathFile(bank)\n+ self._adaptIDInPathFile(bank)\n+ self._loadPathFileInTable(bank) \n+ self._findAndRemoveUselessFiles(bank)\n+ \n+ def _concatPathFile(self, bank):\n+ FileUtils.catFilesByPattern("../batch_*.fa_BLRn_%s.align.clean_match.path" % bank,\n+ "%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank))\n+\n+ def _adaptIDInPathFile(self, bank):\n+ if os.path.exists(os.environ["REPET_PATH"] + "/bin/pathnum2id"):\n+ prg = os.environ["REPET_PATH"] + "/bin/pathnum2id"\n+ cmd = prg\n+ cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)\n+ cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank)\n+ cmd += " -v %i" % (self._verbose - 1)\n+ self._pL.launch(prg, cmd)\n+ else:\n+ prg = os.environ["REPET_PATH"] + "/bin/pathnum2id.py"\n+ cmd = prg\n+ cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)\n+ cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank)\n+ self._pL.launch(prg, cmd)\n+\n+ def _loadPathFileInTable(self, bank):\n+ prg = os.environ["REPET_PATH"] + "/bin/srptCreateTable.py"\n+ cmd = prg\n+ cmd += " -f %s_BLRn_%s.align.clean_match.path" % (self._project, bank)\n+ cmd += " -n %s_TE_BLRn_path" % (self._project)\n+ cmd += " -t path"\n+ cmd += " -c ../%s" % (self._configFileName)\n+ self._pL.launch(prg, cmd)\n+\n+ def _findAndRemoveUselessFiles(self, bank):\n+ prg = "find"\n+ cmd = prg\n+ cmd += " .. -name \\"batch_*.fa_BLRn_%s.*\\" -exec rm {} \\;" % (bank)\n+ self._pL.launch(prg, cmd)\n+ prg = "rm"\n+ cmd = prg\n+ cmd += " %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)\n+ self._pL.launch(prg, cmd)\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/blastnForClassifierStep1/tests/Test_RepbaseBLRnForClassifierStep1.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/blastnForClassifierStep1/tests/Test_RepbaseBLRnForClassifierStep1.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,343 @@\n+import unittest\n+import os\n+import shutil\n+import ConfigParser\n+import sys\n+from commons.pyRepetUnit.blastnForClassifierStep1.RepbaseBLRnForClassifierStep1 import RepbaseBLRnForClassifierStep1\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.Path import Path\n+import pyRepet.launcher.programLauncher\n+\n+if not os.environ.has_key( "REPET_PATH" ):\n+ print "*** Error: no environment variable REPET_PATH"\n+ sys.exit(1)\n+sys.path.append( os.environ["REPET_PATH"] )\n+\n+NB_EXPECTED_LINES_IN_PATH_FILE = 10\n+CURRENT_DIR = os.getcwd()\n+\n+\n+class Test_RepbaseBLRnForClassifierStep1( unittest.TestCase ):\n+ \n+ \n+ def setUp( self ):\n+ os.chdir(CURRENT_DIR)\n+ self._inFileName = "dummyFileName"\n+ self._launch_1 = "log = os.system( \\""\n+ self._launch_2 = "\\" )\\n"\n+ self._launch_2 += "if log != 0:\\n"\n+ self._launch_2 += "\\tsys.exit(1)\\n"\n+ self._cDir = "/home/user/dummy_cdir"\n+ self._tmpDir = "/home/user/dummy_tmpDir"\n+ self._configFileName = "configFile"\n+ self._repbaseName = "dummyRepbase_nt.fa"\n+ f = open(self._configFileName, "w")\n+ f.write("[repet_env]\\n")\n+ f.write("repet_host: %s\\n" % os.environ["REPET_HOST"])\n+ f.write("repet_user: %s\\n" % os.environ["REPET_USER"])\n+ f.write("repet_pw: %s\\n" % os.environ["REPET_PW"])\n+ f.write("repet_db: %s\\n" % os.environ["REPET_DB"])\n+ f.write("repet_port: %s\\n" % os.environ["REPET_PORT"])\n+ f.write("[detect_features]\\n")\n+ f.write("TE_BLRn: yes\\n")\n+ f.write("TE_nucl_bank: %s\\n" % self._repbaseName)\n+ f.write("wublast: yes\\n")\n+ f.close()\n+ self._verbose = 0\n+ self._config = ConfigParser.ConfigParser()\n+ self._config.readfp( open(self._configFileName) )\n+ self._pL = pyRepet.launcher.programLauncher.programLauncher()\n+ self._project = "dummyProject"\n+ self._repbaseBLRn = RepbaseBLRnForClassifierStep1(self._inFileName, self._launch_1, self._launch_2, self._cDir, self._tmpDir, self._configFileName, self._verbose, self._pL, self._project)\n+ self._expFileName = "expFile"\n+ self._bank = self._config.get("detect_features","TE_nucl_bank")\n+ self._db = DbFactory.createInstance()\n+ \n+ \n+ def tearDown( self ):\n+ self._db.dropTable("%s_TE_BLRn_path" % (self._project))\n+ os.chdir(CURRENT_DIR)\n+ self._repbaseBLRn = None\n+ os.remove(self._configFileName)\n+ if os.path.isfile("%s_BLRn_%s.align.clean_match.path" % (self._project, self._bank)):\n+ os.remove("%s_BLRn_%s.align.clean_match.path" % (self._project, self._bank))\n+ if os.path.isfile("%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, self._bank)):\n+ os.remove("%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, self._bank))\n+ if os.path.isfile("batch_1.fa_BLRn_%s.*" % (self._bank)):\n+ os.remove("batch_1.fa_BLRn_%s.*" % (self._bank))\n+ if os.path.isfile("batch_2.fa_BLRn_%s.*" % (self._bank)):\n+ os.remove("batch_2.fa_BLRn_%s.*" % (self._bank))\n+ if os.path.isfile("batch_1.fa_BLRn_%s.align.clean_match.path" % (self._bank)):\n+ os.remove("batch_1.fa_BLRn_%s.align.clean_match.path" % (self._bank))\n+ if os.path.isfile("batch_2.fa_BLRn_%s.align.clean_match.path" % (self._bank)):\n+ os.remove("batch_2.fa_BLRn_%s.align.clean_match.path" % (self._bank))\n+ if os.path.isfile(self._repbaseName):\n+ os.remove(self._repbaseName)\n+ os.remove(self._repbaseName + "_cut")\n+ os.remove(self._repbaseName + "_cut.xnd")\n+ os.remove(self._repbaseName + "_cut.xns")\n+ os.remove(self._repbaseName + "_cut.xnt")\n+ os.remove(self._repbaseName + ".Nstretch.map")\n+ o'..b'FromFilePathList = self._readPathResultsFromFileAndFillList()\n+ resultFromTablePathList = self._readPathResultsFromTableAndFillList() \n+ self.assertEquals(resultFromFilePathList, resultFromTablePathList)\n+ \n+ \n+ def test_findAndRemoveUselessFiles( self ):\n+ self._createFile("%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, self._bank))\n+ self._createFile("batch_1.fa_BLRn_%s.*" % (self._bank))\n+ self._createFile("batch_2.fa_BLRn_%s.*" % (self._bank))\n+ self._repbaseBLRn._findAndRemoveUselessFiles(self._bank)\n+ self.assertFalse(FileUtils.isRessourceExists("%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, self._bank)))\n+ self.assertFalse(FileUtils.isRessourceExists("batch_1.fa_BLRn_%s.*" % (self._bank)))\n+ self.assertFalse(FileUtils.isRessourceExists("batch_2.fa_BLRn_%s.*" % (self._bank)))\n+ \n+ \n+ def test_collectRepbaseBLRn( self ):\n+ os.chdir(CURRENT_DIR)\n+ if not FileUtils.isRessourceExists("TE_BLRn"):\n+ os.mkdir( "TE_BLRn" )\n+ self._createPathFiles(self._bank)\n+ os.chdir( "TE_BLRn" )\n+ self._repbaseBLRn.collectRepbaseBLRn() \n+ self.assertTrue(FileUtils.isRessourceExists("%s_BLRn_%s.align.clean_match.path" % ( self._project, self._bank )))\n+ self.assertEquals(NB_EXPECTED_LINES_IN_PATH_FILE, FileUtils.getNbLinesInSingleFile("%s_BLRn_%s.align.clean_match.path" % ( self._project, self._bank )))\n+ self.assertTrue(self._db.doesTableExist("%s_TE_BLRn_path" % (self._project)))\n+ self._db.execute(\'select * from %s_TE_BLRn_path\' % (self._project))\n+ self.assertEquals(NB_EXPECTED_LINES_IN_PATH_FILE, len(self._db.fetchall()))\n+ self._db.dropTable("%s_TE_BLRn_path" % (self._project))\n+ \n+ \n+ def _createPathFiles( self, bankFull ):\n+ bank = os.path.split(bankFull)[1]\n+ fileName = "batch_1.fa_BLRn_" + bank + ".align.clean_match.path"\n+ f = open(fileName, "w")\n+ f.write("1\\tQueryName1\\t2\\t250\\tsubjectName1\\t5\\t255\\t4.1e-39\\t132\\t88.2\\n")\n+ f.write("2\\tQueryName1\\t255\\t550\\tsubjectName2\\t5\\t255\\t0.0002\\t32\\t78.2\\n")\n+ f.write("3\\tQueryName2\\t1\\t150\\tsubjectName1\\t250\\t400\\t5.1e-59\\t132\\t98\\n")\n+ f.write("4\\tQueryName3\\t2\\t250\\tsubjectName3\\t5\\t255\\t4.1e-39\\t132\\t88.2\\n")\n+ f.write("5\\tQueryName1\\t300\\t450\\tsubjectName1\\t300\\t450\\t4.1e-39\\t132\\t80.2\\n")\n+ f.close()\n+ fileName = "batch_2.fa_BLRn_" + bank + ".align.clean_match.path"\n+ f = open(fileName, "w")\n+ f.write("1\\tQueryName4\\t2\\t250\\tsubjectName1\\t5\\t255\\t4.1e-39\\t132\\t88.2\\n")\n+ f.write("2\\tQueryName4\\t255\\t550\\tsubjectName2\\t5\\t255\\t0.0002\\t32\\t78.2\\n")\n+ f.write("3\\tQueryName5\\t1\\t150\\tsubjectName1\\t250\\t400\\t5.1e-59\\t132\\t98\\n")\n+ f.write("4\\tQueryName6\\t2\\t250\\tsubjectName3\\t5\\t255\\t4.1e-39\\t132\\t88.2\\n")\n+ f.write("5\\tQueryName7\\t300\\t450\\tsubjectName1\\t300\\t450\\t4.1e-39\\t132\\t80.2\\n")\n+ f.close()\n+ \n+ \n+ def _readPathResultsFromTableAndFillList( self ):\n+ tablePathAdaptatorInstance = TablePathAdaptator (self._db, "%s_TE_BLRn_path" % (self._project))\n+ pathList = tablePathAdaptatorInstance.getListOfAllPaths()\n+ return pathList\n+ \n+ \n+ def _readPathResultsFromFileAndFillList( self ):\n+ pathInstance = Path()\n+ pathList = []\n+ f = open( "%s_BLRn_%s.align.clean_match.path" % (self._project, self._bank) , "r")\n+ while pathInstance.read( f ):\n+ pathList.append(pathInstance)\n+ pathInstance = Path()\n+ f.close()\n+ return pathList\n+ \n+ \n+ def _createFile( self, nameFile ):\n+ f = open(nameFile, "w")\n+ f.close()\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_RepbaseBLRnForClassifierStep1 ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/AbstractClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/AbstractClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,606 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import getopt\n+import time\n+import glob\n+import sys\n+import os\n+\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.sql.RepetJob import RepetJob\n+from commons.core.sql.Job import Job\n+from commons.core.stat.Stat import Stat\n+from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher\n+\n+GENERIC_IN_FILE = "zDUMMYz"\n+\n+\n+## Abstract class to launch a program in parallel on a cluster.\n+#\n+class AbstractClusterLauncher( object ): #( IClusterLauncher )\n+ \n+ def __init__( self ):\n+ """\n+ Constructor.\n+ """\n+ self._inputDir = "" # path to the directory with input files\n+ self._queueName = "" # name of the queue on the cluster\n+ self._groupId = "" # identifier of the group of jobs (groupid)\n+ self._inputFileSuffix = "fa" # suffix of the input files (default=\'fa\')\n+ self._prgAcronym = "" # acronym of the program to launch\n+ self._configFile = "" # name of the configuration file (connect to MySQL)\n+ self._currentDir = os.getcwd() # path to the current directory\n+ self._tmpDir = "" # path to the temporary directory\n+ self._jobTable = "jobs" # name of the table recording the jobs\n+ self._catOutFiles = False # concatenate output files of all jobs\n+ self._clean = False # clean job file, job stdout, job table...\n+ self._verbose = 1 # verbosity level\n+ self.jobdb = None # RepetJob instance\n+ self.job = Job() # Job instance\n+ \n+ self._nbJobs = 0\n+ self._cmdLineGenericOptions = "hi:Q:g:S:a:C:d:j:Zcv:"\n+ self._cmdLineSpecificOptions = ""\n+ \n+ self._exeWrapper = "AbstractProgramLauncher.py"\n+ self._prgLauncher = None\n+ # list of instances derived from AbstractProgramLauncher()\n+ # If several program are launched successively in the same job,\n+ # \'lPrgLaunchers\' has to be filled before run().\n+ self.lPrgLaunchers = []\n+ \n+ def setProgramLauncherAttributeFromCmdLine(self, o, a=""):\n+ self.getProgramLauncherInstance().setASpecificAttributeFromCmdLine(o, a)\n+ \n+ def setClusterLauncherAttributeFromCmdLine(self, o, a=""):\n+ i'..b'lf.getTemporaryDirectory() == "":\n+ self.setTemporaryDirectory(self._currentDir)\n+ \n+ def checkGenericAttributes( self ):\n+ self.checkClusterLauncherAttributes()\n+ \n+ def checkProgramLauncherAttributes( self ):\n+ self.getProgramLauncherInstance().checkSpecificAttributes()\n+ \n+ def checkSpecificAttributes( self ):\n+ self.checkProgramLauncherAttributes()\n+ \n+ def start( self ):\n+ \n+ if self.lPrgLaunchers == []:\n+ self.setSingleProgramLauncher()\n+ for pL in self.lPrgLaunchers:\n+ if pL.getWrapperCommandLine() == "":\n+ string = "ERROR: wrapper command is empty !"\n+ print string\n+ sys.exit(1)\n+ if pL.getProgramCommandLine() == "":\n+ string = "ERROR: program command is empty !"\n+ print string\n+ sys.exit(1)\n+ self.checkProgramAvailability()\n+ \n+ try:\n+ self.checkProgramLauncherAttributes()\n+ except CheckerException, msg:\n+ print msg\n+ print self.getHelpAsString()\n+ sys.exit(1)\n+ \n+ if self.getVerbosityLevel() > 0:\n+ string = "START %s" % ( type(self).__name__ )\n+ print string\n+ self.job.tablename = self.getJobTableName()\n+ self.job.groupid = self.getGroupIdentifier()\n+ tokens = self.getQueueName().replace("\'","").split(" ")\n+ self.job.setQueue( tokens[0] )\n+ if len(tokens) > 1:\n+ lResources = tokens[1:]\n+ self.job.lResources = lResources\n+ if self.getVerbosityLevel() > 0:\n+ print "groupid: %s" % ( self.getGroupIdentifier() )\n+ self.jobdb = RepetJob( cfgFileName=self.getConfigFile() )\n+ if self.jobdb.hasUnfinishedJob( self.job.tablename, \\\n+ self.job.groupid ):\n+ self.jobdb.waitJobGroup( self.job.tablename, self.job.groupid )\n+ return\n+ self.jobdb.cleanJobGroup( self.job.tablename, self.job.groupid )\n+ sys.stdout.flush()\n+ \n+ def end( self ):\n+ if self.getClean():\n+ self.removeAllJobFiles()\n+ self.removeAllJobStdouts()\n+ self.removeAllJobStderrs()\n+ \n+ if self.getCatOutputFiles():\n+ self.catOutputFiles()\n+ \n+ self.jobdb.close()\n+ \n+ if self.getVerbosityLevel() > 0:\n+ string = "END %s" % ( type(self).__name__ )\n+ print string\n+ sys.stdout.flush()\n+ \n+ def run( self ):\n+ try:\n+ self.checkClusterLauncherAttributes()\n+ except CheckerException, msg:\n+ print msg\n+ print self.getHelpAsString()\n+ sys.exit(1)\n+ \n+ self.start()\n+ \n+ lInFiles = self.getInputFilesList()\n+ self._nbJobs = len(lInFiles)\n+ \n+ if self._verbose > 0:\n+ string = "submitting " + str(self._nbJobs) + " jobs... " + self.formatGroupidAndTime() \n+ print string; sys.stdout.flush()\n+ \n+ self.submitJob(lInFiles)\n+ \n+ if self._verbose > 0: \n+ string = "waiting for jobs... " + self.formatGroupidAndTime() \n+ print string; sys.stdout.flush()\n+ \n+ self.jobdb.waitJobGroup( self.job.tablename, self.job.groupid )\n+ \n+ if self._verbose > 0:\n+ string = "all jobs completed ! " + self.formatGroupidAndTime() \n+ print string; sys.stdout.flush()\n+ statsExecutionTime = self.getStatsOfExecutionTime()\n+ print "execution time of all jobs (seconds): %f" % statsExecutionTime.getSum()\n+ print "execution time per job: %s" % statsExecutionTime.string()\n+ sys.stdout.flush()\n+ \n+ self.jobdb.cleanJobGroup( self.job.tablename, self.job.groupid )\n+ \n+ self.end()\n+ \n+ \n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/AbstractProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/AbstractProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+##@file
+# Abstract class to launch a program.
+
+import getopt
+import sys
+import os
+import pyRepet.launcher.AbstractProgramLauncher
+
+class AbstractProgramLauncher( pyRepet.launcher.AbstractProgramLauncher.AbstractProgramLauncher ):  #( IProgramLauncher )
+
+    def getHelpAsString( self ):
+        """
+        Return the generic help as a string.
+        """
+        string = ""
+        string += "usage: %s.py [options]" % ( type(self).__name__ )
+        string += "\ngeneric options:"
+        string += "\n     -h: this help"
+        string += "\n     -i: name of the input file (format='%s')" % ( self.getFormatInputFile() )
+        string += "\n     -c: clean"
+        string += "\n     -v: verbosity level (default=0/1)"
+        return string
+
+    def setAttributesFromCmdLine( self, o, a="" ):
+        """
+        Set a generic attribute from the command-line arguments.
+        """
+        if o == "-h":
+            print self.getHelpAsString()
+            sys.exit(0)
+        elif o == "-i":
+            self.setInputFile( a )
+        elif o == "-c":
+            self.setClean()
+        elif o == "-v":
+            self.setVerbosityLevel( a )
+
+    def checkAttributesFromCmdLine( self ):
+        """
+        Set the attributes from the command-line arguments.
+        """
+        try:
+            opts, args = getopt.getopt( sys.argv[1:], "%s" % (self.getCmdLineOptions()) )
+        except getopt.GetoptError, err:
+            print str(err);
+            print self.getHelpAsString()
+            sys.exit(1)
+        for o, a in opts:
+            self.setAttributesFromCmdLine( o, a )
+
+    def getCmdLineOptions(self):
+        return self._cmdLineGenericOptions
+
+    def check( self ):
+        """
+        Check the generic attributes before running the program.
+        """
+        self._checkProgramName()
+        self.checkInput()
+
+    def checkInput(self):
+        if self.getInputFile() == "":
+            string = "ERROR: missing input file"
+            print string
+            print self.getHelpAsString()
+            sys.exit(1)
+
+        if not os.path.exists(self.getInputFile()):
+            string = "ERROR: input file '%s' doesn't exist" % (self.getInputFile())
+            print string
+            print self.getHelpAsString()
+            sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/IClusterLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/IClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,290 @@
+
+## Interface for AbstractClusterLauncher
+#
+class IClusterLauncher(object):
+
+    ## Constructor.
+    #
+    def __init__( self ):
+        pass
+
+    ## Useful commands before running the program (check, open database connector...).
+    #
+    def start(self):
+        pass
+
+    ## Useful commands after the program was run (clean, close database connector...).
+    #
+    def end(self):
+        pass
+
+    ## Launch jobs in parallel on each file in the query directory.
+    #
+    def run(self):
+        pass
+
+    ## Initialize the job (jobname, command, launcher).
+    #
+    def initializeJob(self, fileName, count):
+        pass
+
+    ## Return all the job commands as a string.
+    #
+    def getJobCommandsAsString(self, fileName, jobName, minFreeGigaInTmpDir=1):
+        pass
+
+    ## Return the generic help as a string.
+    #
+    def getGenericHelpAsString(self):
+        pass
+
+    ## Check the generic attributes before running the program.
+    #
+    def checkGenericAttributes(self):
+        pass
+
+    ## Check the specific attributes of each program launcher.
+    #
+    def checkSpecificAttributes(self):
+        pass
+
+    ## Check that all required programs are in the user's PATH.
+    #
+    def checkProgramAvailability(self):
+        pass
+
+    ## Return the command-line to launch in each job. Specified in each wrapper.
+    #
+    def getProgramCommandLineAsString(self):
+        pass
+
+    ## Return the list of files to keep at the end of each job. Specified in each wrapper.
+    #
+    def getListFilesToKeep(self):
+        pass
+
+    ## Return the list of files to remove at the end of each job. Specified in each wrapper.
+    #
+    def getListFilesToRemove(self):
+        pass
+
+    ## Return the name of the job file as a string.
+    #
+    def getJobFileNameAsString(self, count):
+        pass
+
+    ## Return the command to update the job status in the table.
+    #
+    def getCmdUpdateJobStatusAsString(self, newStatus):
+        pass
+
+    ## Return the launching command as a string. Launch the wrapper, retrieve its exit status, update status if error.
+    #
+    def getCmdToLaunchWrapper(self, fileName, genericCmd, exeWrapper):
+        pass
+
+    ## Return the commands to keep the output files.
+    #
+    def getCmdToKeepFiles( self, fileName, lFilesToKeep ):
+        pass
+
+    ## Return the commands to remove the temporary files.
+    #
+    def getCmdToRemoveFiles( self, fileName, lFilesToRemove ):
+        pass
+
+    ## Remove all job files.
+    #
+    def removeAllJobFiles( self ):
+        pass
+
+    ## Remove all job stdout.
+    #
+    def removeAllJobStdouts( self ):
+        pass
+
+    ## Remove all job stderr.
+    #
+    def removeAllJobStderrs( self ):
+        pass
+
+    ## Process the output file if necessary.
+    #
+    def processOutputFile( self, tmpFile, outFile ):
+        pass
+
+    ## Concatenate output files from all jobs.
+    #
+    def catOutputFiles( self ):
+        pass
+
+    ## Return the specific help as a string.
+    #
+    def getSpecificHelpAsString( self ):
+        pass
+
+    ## Return the help as a string.
+    #
+    def getHelpAsString( self ):
+        pass
+
+    ##Set a generic attribute from the command-line arguments.
+    #
+    def setAGenericAttributeFromCmdLine( self, o, a="" ):
+        pass
+
+    ## Set the specific attributes from the command-line arguments.
+    #
+    def setASpecificAttributeFromCmdLine( self, o, a="" ):
+        pass
+
+    ## Set the attributes from the command-line arguments.
+    #
+    def setAttributesFromCmdLine( self ):
+        pass
+
+    ##
+    #
+    def setInputDirectory( self, arg ):
+        pass
+
+    ##
+    #
+    def setQueueName( self, arg ):
+        pass
+
+    ##
+    #
+    def setGroupIdentifier( self, arg ):
+        pass
+
+    ##
+    #
+    def setInputFileSuffix( self, arg ):
+        pass
+
+    ##
+    #
+    def setAcronym( self, arg ):
+        pass
+
+    ##
+    #
+    def setConfigFile( self, arg ):
+        pass
+
+    ##
+    #
+    def setCurrentDirectory( self ):
+        pass
+
+    ##
+    #
+    def setTemporaryDirectory( self, arg ):
+        pass
+
+    ##
+    #
+    def setJobTableName( self, arg ):
+        pass
+
+    ##
+    #
+    def setCatOutputFiles( self ):
+        pass
+
+    ##
+    #
+    def setClean( self ):
+        pass
+
+    ##
+    #
+    def setVerbosityLevel( self, arg ):
+        pass
+
+    ##
+    #
+    def setExecutableWrapper( self, arg ):
+        pass
+
+    ## Set the wrapper and program command-lines of the program launcher. Append the program launcher to 'self.lPrgLaunchers'.
+    #
+    def setSingleProgramLauncher( self ):
+        pass
+
+    ##
+    #
+    def getInputDirectory( self ):
+        pass
+
+    ##
+    #
+    def getQueueName( self ):
+        pass
+
+    ##
+    #
+    def getGroupIdentifier( self ):
+        pass
+
+    ##
+    #
+    def getInputFileSuffix( self ):
+        pass
+
+    ##
+    #
+    def getAcronym( self ):
+        pass
+
+    ##
+    #
+    def getConfigFile( self ):
+        pass
+
+    ##
+    #
+    def getCurrentDirectory( self ):
+        pass
+
+    ##
+    #
+    def getTemporaryDirectory( self ):
+        pass
+
+    ##
+    #
+    def getJobTableName( self ):
+        pass
+
+    ##
+    #
+    def getCatOutputFiles( self ):
+        pass
+
+    ##
+    #
+    def getClean( self ):
+        pass
+
+    ##
+    #
+    def getVerbosityLevel( self ):
+        pass
+
+    ##
+    #
+    def getWrapperName( self ):
+        pass
+
+    ##
+    #
+    def getProgramName( self ):
+        pass
+
+    ##
+    #
+    def getPatternToConcatenate( self ):
+        pass
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/blastx2GFFTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/blastx2GFFTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,26 @@
+import unittest
+import sys
+import Test_BlasterClusterLauncher
+import Test_BlasterProgramLauncher
+import Test_F_BlasterClusterLauncher
+import Test_F_BlasterComponent
+import Test_F_BlasterMatcher2GFF3
+import Test_F_BlasterMatcherComponent
+import Test_F_BlasterProgramLauncher
+
+def main():
+
+        commonsTestSuite = unittest.TestSuite()
+        commonsTestSuite.addTest( unittest.makeSuite( Test_BlasterClusterLauncher.Test_BlasterClusterLauncher, "test" ) )
+        commonsTestSuite.addTest( unittest.makeSuite( Test_BlasterProgramLauncher.Test_BlasterProgramLauncher, "test" ) )
+        commonsTestSuite.addTest( unittest.makeSuite( Test_F_BlasterClusterLauncher.Test_F_BlasterClusterLauncher, "test" ) )
+        commonsTestSuite.addTest( unittest.makeSuite( Test_F_BlasterComponent.Test_F_BlasterComponent, "test" ) )
+        commonsTestSuite.addTest( unittest.makeSuite( Test_F_BlasterMatcher2GFF3.Test_F_BlasterMatcher2GFF3, "test" ) )
+        commonsTestSuite.addTest( unittest.makeSuite( Test_F_BlasterMatcherComponent.Test_F_BlasterMatcherComponent, "test" ) )
+        commonsTestSuite.addTest( unittest.makeSuite( Test_F_BlasterProgramLauncher.Test_F_BlasterProgramLauncher, "test" ) )
+        runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+        runner.run( commonsTestSuite )
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/datas/dummy.align.match.path
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/datas/dummy.align.match.path Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,5 @@
+1 lm_SuperContig_30_v2 35498 35914 mito_010 NADH dehydrogenase (ubiquinone) chain 5 307 445 1e-62 244 82.73
+1 lm_SuperContig_30_v2 37314 37823 mito_010 NADH dehydrogenase (ubiquinone) chain 5 494 663 8e-67 258 67.65
+161 lm_SuperContig_29_v2 193781 194212 1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 228 85 1e-40 84 30.56
+161 lm_SuperContig_29_v2 192832 193704 1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 522 229 1e-40 106 23.99
+174 lm_SuperContig_29_v2 78031 78588 xnc164_090 related to multidrug resistance protein 19 209 3e-21 101 30.89

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/datas/dummy.align.match.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/datas/dummy.align.match.tab Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,3 @@
+lm_SuperContig_30_v2 35498 37823 927 0.00598594 1.29469 mito_010 307 663 309 0.431564 8e-67 424 74.4336 1
+lm_SuperContig_29_v2 192832 194212 1305 0.00649448 2.13934 1nc550_030 522 85 438 0.718033 1e-40 99 26.1649 161
+lm_SuperContig_29_v2 78031 78588 558 0.00277695 0.980668 xnc164_090 19 209 191 0.335677 3e-21 101 30.89 174

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/datas/exp_dummy.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/datas/exp_dummy.gff Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,9 @@
+##gff-version 3
+lm_SuperContig_30_v2 blastx_ProtBank match 35498 37823 8e-67 + . ID=BlastBank_lm_SuperContig_30_v2_mito_010_m0001;Target=mito_010+307+663;Name=lm_SuperContig_30_v2_mito_010;target_pcover=43.15;target_pident=74.43;evalue=8e-67;target_description=NADH dehydrogenase (ubiquinone) chain 5;target_length=716;lib=BlastBank;program=blastx
+lm_SuperContig_30_v2 blastx_ProtBank match_part 35498 35914 1e-62 + 0 ID=BlastBank_lm_SuperContig_30_v2_mito_010_m0001mp0001;Target=mito_010+307+445;Parent=BlastBank_lm_SuperContig_30_v2_mito_010_m0001
+lm_SuperContig_30_v2 blastx_ProtBank match_part 37314 37823 8e-67 + 2 ID=BlastBank_lm_SuperContig_30_v2_mito_010_m0001mp0002;Target=mito_010+494+663;Parent=BlastBank_lm_SuperContig_30_v2_mito_010_m0001
+lm_SuperContig_29_v2 blastx_ProtBank match 192832 194212 1e-40 - . ID=BlastBank_lm_SuperContig_30_v2_1nc550_030_m0002;Target=1nc550_030+85+522;Name=lm_SuperContig_30_v2_1nc550_030;target_pcover=71.80;target_pident=26.16;evalue=1e-40;target_description=related to putative multidrug transporter Mfs1.1 (major facilitator family protein);target_length=610;lib=BlastBank;program=blastx
+lm_SuperContig_29_v2 blastx_ProtBank match_part 193781 194212 1e-40 - 0 ID=BlastBank_lm_SuperContig_30_v2_1nc550_030_m0002mp0002;Target=1nc550_030+85+228;Parent=BlastBank_lm_SuperContig_30_v2_1nc550_030_m0002
+lm_SuperContig_29_v2 blastx_ProtBank match_part 192832 193704 1e-40 - 2 ID=BlastBank_lm_SuperContig_30_v2_1nc550_030_m0002mp0001;Target=1nc550_030+229+522;Parent=BlastBank_lm_SuperContig_30_v2_1nc550_030_m0002
+lm_SuperContig_29_v2 blastx_ProtBank match 78031 78588 3e-21 + . ID=BlastBank_lm_SuperContig_30_v2_xnc164_090_m0003;Target=xnc164_090+19+209;Name=lm_SuperContig_30_v2_xnc164;target_pcover=33.56;target_pident=30.89;evalue=3e-21;target_description=related to multidrug resistance protein;target_length=569;lib=BlastBank;program=blastx
+lm_SuperContig_29_v2 blastx_ProtBank match_part 78031 78588 3e-21 + 1 ID=BlastBank_lm_SuperContig_30_v2_xnc164_090_m0003mp0001;Target=xnc164_090+19+209;Parent=BlastBank_lm_SuperContig_30_v2_xnc164_090_m0003
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,456 @@\n+lm_SuperContig_30_v2\t23817\t24089\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t1\t91\t7e-25\t119\t63.74\n+lm_SuperContig_30_v2\t27615\t28001\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t109\t237\t1e-53\t214\t77.52\n+lm_SuperContig_30_v2\t29503\t29673\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t254\t310\t3e-21\t107\t92.98\n+lm_SuperContig_30_v2\t35498\t35914\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t307\t445\t1e-62\t244\t82.73\n+lm_SuperContig_30_v2\t37314\t37823\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t494\t663\t8e-67\t258\t67.65\n+lm_SuperContig_30_v2\t23817\t24089\tmito_020 predicted ND5 intron 2 protein\t1\t91\t7e-25\t119\t63.74\n+lm_SuperContig_30_v2\t27615\t28001\tmito_020 predicted ND5 intron 2 protein\t109\t237\t1e-53\t214\t77.52\n+lm_SuperContig_30_v2\t28681\t29418\tmito_020 predicted ND5 intron 2 protein\t303\t540\t4e-37\t159\t41.2\n+lm_SuperContig_30_v2\t94306\t95085\tmito_020 predicted ND5 intron 2 protein\t544\t279\t2e-22\t110\t28.46\n+lm_SuperContig_30_v2\t18181\t19317\tmito_030 predicted ND5 intron 1 protein\t167\t528\t3e-57\t226\t37.76\n+lm_SuperContig_30_v2\t22437\t23375\tmito_030 predicted ND5 intron 1 protein\t219\t528\t1e-43\t181\t32.59\n+lm_SuperContig_30_v2\t23817\t24089\tmito_030 predicted ND5 intron 1 protein\t1\t91\t7e-25\t119\t63.74\n+lm_SuperContig_30_v2\t26190\t27509\tmito_030 predicted ND5 intron 1 protein\t84\t525\t0\t666\t76.52\n+lm_SuperContig_30_v2\t32527\t33390\tmito_030 predicted ND5 intron 1 protein\t236\t518\t6e-40\t169\t36.15\n+lm_SuperContig_30_v2\t35963\t37213\tmito_030 predicted ND5 intron 1 protein\t114\t525\t4e-62\t242\t38.08\n+lm_SuperContig_30_v2\t55780\t56187\tmito_030 predicted ND5 intron 1 protein\t492\t357\t1e-34\t96\t36.96\n+lm_SuperContig_30_v2\t56292\t56582\tmito_030 predicted ND5 intron 1 protein\t351\t255\t1e-34\t74\t38.78\n+lm_SuperContig_30_v2\t61805\t62656\tmito_030 predicted ND5 intron 1 protein\t528\t241\t3e-55\t219\t38.89\n+lm_SuperContig_30_v2\t63837\t64676\tmito_030 predicted ND5 intron 1 protein\t511\t232\t1e-35\t155\t35.21\n+lm_SuperContig_30_v2\t80861\t81706\tmito_030 predicted ND5 intron 1 protein\t525\t242\t5e-44\t182\t35.66\n+lm_SuperContig_30_v2\t82749\t83615\tmito_030 predicted ND5 intron 1 protein\t525\t236\t3e-47\t193\t35.27\n+lm_SuperContig_30_v2\t86928\t87770\tmito_030 predicted ND5 intron 1 protein\t525\t242\t3e-47\t193\t36.14\n+lm_SuperContig_30_v2\t90399\t91238\tmito_030 predicted ND5 intron 1 protein\t528\t249\t1e-37\t161\t32.52\n+lm_SuperContig_30_v2\t109092\t109925\tmito_030 predicted ND5 intron 1 protein\t525\t243\t2e-49\t200\t38.16\n+lm_SuperContig_30_v2\t115228\t116103\tmito_030 predicted ND5 intron 1 protein\t236\t528\t6e-48\t195\t35.93\n+lm_SuperContig_30_v2\t116645\t117925\tmito_030 predicted ND5 intron 1 protein\t121\t526\t6e-60\t235\t37.12\n+lm_SuperContig_30_v2\t117965\t119308\tmito_030 predicted ND5 intron 1 protein\t87\t525\t1e-63\t248\t35.79\n+lm_SuperContig_30_v2\t141039\t141308\tmito_030 predicted ND5 intron 1 protein\t250\t342\t8e-47\t58\t34.41\n+lm_SuperContig_30_v2\t141292\t141858\tmito_030 predicted ND5 intron 1 protein\t337\t525\t8e-47\t155\t42.11\n+lm_SuperContig_30_v2\t146223\t147503\tmito_030 predicted ND5 intron 1 protein\t114\t528\t2e-64\t250\t37.61\n+lm_SuperContig_30_v2\t149559\t149744\tmito_030 predicted ND5 intron 1 protein\t233\t296\t6e-40\t41\t35.94\n+lm_SuperContig_30_v2\t149749\t150444\tmito_030 predicted ND5 intron 1 protein\t299\t530\t6e-40\t149\t37.02\n+lm_SuperContig_30_v2\t150994\t152199\tmito_030 predicted ND5 intron 1 protein\t121\t525\t4e-58\t229\t36.3\n+lm_SuperContig_30_v2\t21984\t22220\tmito_040 NADH dehydrogenase (ubiquinone) chain 4L\t1\t79\t3e-30\t136\t84.81\n+lm_SuperContig_30_v2\t18466\t19329\tmito_050 predicted ND4L intron protein\t145\t438\t6e-54\t215\t40.53\n+lm_SuperContig_30_v2\t21984\t22229\tmito_050 predicted ND4L intron protein\t1\t82\t8e-31\t138\t82.93\n+lm_SuperContig_30_v2\t22509\t23387\tmito_050 predicted ND4L intron protein\t145\t438\t7e-81\t305\t49\n+lm_SuperContig_30_v2\t26661\t27530\tmito_050 predicted ND4L intron protein\t145\t438\t1e-43\t181\t35.67\n+lm_SuperContig_30_v2\t32542\t33390\tmito_050 predicted ND4L intron protein\t145\t424\t6e-43\t179\t38.91\n+lm_SuperContig_30_v2\t36323\t37234\tmito_050 predicted ND4L intron protein\t145\t438\t4e-49\t199\t40.39\n+lm_SuperContig_30_v2\t'..b'cuolar Basic Amino acid transporter, Contig um_contig_1.17\t529\t218\t2e-42\t82\t20.7\n+lm_SuperContig_29_v2\t193787\t194374\tum00679 related to VBA1 - Vacuolar Basic Amino acid transporter, Contig um_contig_1.17\t211\t18\t2e-42\t114\t31\n+lm_SuperContig_29_v2\t78115\t78588\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t53\t210\t2e-53\t183\t55.06\n+lm_SuperContig_29_v2\t78662\t78814\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t211\t264\t2e-53\t47\t44.44\n+lm_SuperContig_29_v2\t192760\t193692\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t534\t211\t2e-104\t220\t36.92\n+lm_SuperContig_29_v2\t193766\t194239\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t210\t53\t2e-104\t183\t55.06\n+lm_SuperContig_29_v2\t78112\t78570\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t94\t246\t7e-23\t104\t35.95\n+lm_SuperContig_29_v2\t78659\t78802\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t253\t297\t7e-23\t24\t25\n+lm_SuperContig_29_v2\t192760\t193695\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t560\t253\t5e-40\t84\t19.43\n+lm_SuperContig_29_v2\t193784\t194242\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t246\t94\t5e-40\t104\t35.95\n+lm_SuperContig_29_v2\t78019\t78567\tum01964 related to multidrug resistance protein, Contig um_contig_1.72\t40\t220\t1e-24\t113\t33.7\n+lm_SuperContig_29_v2\t192901\t193695\tum01964 related to multidrug resistance protein, Contig um_contig_1.72\t490\t227\t7e-43\t85\t24.16\n+lm_SuperContig_29_v2\t193787\t194335\tum01964 related to multidrug resistance protein, Contig um_contig_1.72\t220\t40\t7e-43\t113\t33.7\n+lm_SuperContig_29_v2\t77890\t78591\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t38\t263\t1e-32\t115\t32.07\n+lm_SuperContig_29_v2\t78659\t78814\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t264\t313\t1e-32\t46\t36.54\n+lm_SuperContig_29_v2\t192760\t193695\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t575\t264\t1e-65\t158\t30.6\n+lm_SuperContig_29_v2\t193763\t194464\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t263\t38\t1e-65\t115\t32.07\n+lm_SuperContig_29_v2\t78136\t78576\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t133\t280\t1e-21\t91\t30.41\n+lm_SuperContig_29_v2\t78650\t78814\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t280\t331\t1e-21\t33\t29.09\n+lm_SuperContig_29_v2\t192754\t193704\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t598\t280\t3e-45\t114\t25.93\n+lm_SuperContig_29_v2\t193778\t194218\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t280\t133\t3e-45\t91\t30.41\n+lm_SuperContig_29_v2\t77938\t78585\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t27\t239\t2e-24\t93\t30.73\n+lm_SuperContig_29_v2\t78650\t78808\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t236\t285\t2e-24\t40\t33.96\n+lm_SuperContig_29_v2\t192757\t193704\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t554\t236\t1e-48\t124\t25.7\n+lm_SuperContig_29_v2\t193769\t194416\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t239\t27\t1e-48\t93\t30.73\n+lm_SuperContig_29_v2\t77938\t78576\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t13\t215\t4e-45\t167\t39.91\n+lm_SuperContig_29_v2\t78650\t78736\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t216\t244\t4e-45\t36\t55.17\n+lm_SuperContig_29_v2\t192826\t193704\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t511\t216\t2e-98\t216\t37.04\n+lm_SuperContig_29_v2\t193778\t194416\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t215\t13\t2e-98\t167\t39.91\n+lm_SuperContig_29_v2\t193006\t193692\tum05421 related to Multidrug resistance protein , Contig um_contig_1.192\t496\t272\t6e-41\t107\t27.07\n+lm_SuperContig_29_v2\t193778\t194107\tum05421 related to Multidrug resistance protein , Contig um_contig_1.192\t267\t158\t6e-41\t83\t35.45\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.map
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.map Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,341 @@\n+Afu3g02110.115\tlm_SuperContig_29_v2\t77989\t78369\n+Afu3g02110.116\tlm_SuperContig_29_v2\t194365\t193985\n+Afu3g01520.117\tlm_SuperContig_29_v2\t78109\t78817\n+Afu3g01520.118\tlm_SuperContig_29_v2\t194245\t192760\n+Afu3g02520.119\tlm_SuperContig_29_v2\t78010\t78808\n+Afu3g02520.120\tlm_SuperContig_29_v2\t194344\t192766\n+Afu3g02720.121\tlm_SuperContig_29_v2\t78109\t78817\n+Afu3g02720.122\tlm_SuperContig_29_v2\t194245\t192754\n+Afu6g03320.123\tlm_SuperContig_29_v2\t78061\t78814\n+Afu6g03320.124\tlm_SuperContig_29_v2\t194293\t192760\n+Afu6g02220.125\tlm_SuperContig_29_v2\t78007\t78817\n+Afu6g02220.126\tlm_SuperContig_29_v2\t194347\t193288\n+Afu8g00940.127\tlm_SuperContig_29_v2\t78133\t78817\n+Afu8g00940.128\tlm_SuperContig_29_v2\t194221\t192760\n+Afu4g02630.129\tlm_SuperContig_29_v2\t134343\t133669\n+Afu4g02630.130\tlm_SuperContig_29_v2\t168372\t167776\n+Afu4g02630.131\tlm_SuperContig_29_v2\t42080\t41484\n+Afu4g02630.132\tlm_SuperContig_29_v2\t102000\t101404\n+Afu4g02630.133\tlm_SuperContig_29_v2\t32272\t31597\n+Afu4g02630.134\tlm_SuperContig_29_v2\t17082\t16462\n+Afu4g02630.135\tlm_SuperContig_29_v2\t179264\t178590\n+Afu4g02630.136\tlm_SuperContig_29_v2\t11625\t10942\n+Afu4g02630.137\tlm_SuperContig_29_v2\t92562\t91888\n+Afu4g03920.138\tlm_SuperContig_29_v2\t78184\t78814\n+Afu4g03920.139\tlm_SuperContig_29_v2\t194170\t192964\n+Afu6g09110.140\tlm_SuperContig_29_v2\t78049\t78775\n+Afu6g09110.141\tlm_SuperContig_29_v2\t194305\t192904\n+Afu6g14640.142\tlm_SuperContig_29_v2\t78151\t78814\n+Afu6g14640.143\tlm_SuperContig_29_v2\t194203\t192760\n+Afu6g09710.144\tlm_SuperContig_29_v2\t78109\t78814\n+Afu6g09710.145\tlm_SuperContig_29_v2\t194245\t192772\n+Afu5g01540.146\tlm_SuperContig_29_v2\t78115\t78817\n+Afu5g01540.147\tlm_SuperContig_29_v2\t194239\t192754\n+Afu2g15140.148\tlm_SuperContig_29_v2\t194323\t192868\n+fg00095.149\tlm_SuperContig_29_v2\t194221\t193303\n+fg12051.150\tlm_SuperContig_29_v2\t194221\t192877\n+fg12074.151\tlm_SuperContig_29_v2\t77956\t78814\n+fg12074.152\tlm_SuperContig_29_v2\t194398\t192883\n+fg12335.153\tlm_SuperContig_29_v2\t78124\t78718\n+fg12335.154\tlm_SuperContig_29_v2\t194230\t192769\n+fg12717.155\tlm_SuperContig_29_v2\t194383\t193222\n+b13o8_150.156\tlm_SuperContig_29_v2\t194248\t193036\n+b18d24_100.157\tlm_SuperContig_29_v2\t78010\t78814\n+b18d24_100.158\tlm_SuperContig_29_v2\t194344\t192916\n+b5k2_230.159\tlm_SuperContig_29_v2\t78184\t78814\n+b5k2_230.160\tlm_SuperContig_29_v2\t194170\t192760\n+1nc550_030.161\tlm_SuperContig_29_v2\t194212\t192832\n+1nc570_120.162\tlm_SuperContig_29_v2\t77941\t78736\n+1nc570_120.163\tlm_SuperContig_29_v2\t194413\t192760\n+6nc360_090.164\tlm_SuperContig_29_v2\t194086\t192817\n+29e8_240.165\tlm_SuperContig_29_v2\t78112\t78817\n+29e8_240.166\tlm_SuperContig_29_v2\t194242\t192769\n+7nc450_010.167\tlm_SuperContig_29_v2\t194218\t192940\n+4nc446_010.168\tlm_SuperContig_29_v2\t194218\t192865\n+b11e5_230.169\tlm_SuperContig_29_v2\t194239\t192904\n+xnc126_050.170\tlm_SuperContig_29_v2\t77935\t78817\n+xnc126_050.171\tlm_SuperContig_29_v2\t194419\t192757\n+4nc135_070.172\tlm_SuperContig_29_v2\t78109\t78817\n+4nc135_070.173\tlm_SuperContig_29_v2\t194245\t192751\n+xnc164_090.174\tlm_SuperContig_29_v2\t78031\t78588\n+xnc164_090.175\tlm_SuperContig_29_v2\t194323\t192802\n+YBR293w.176\tlm_SuperContig_29_v2\t194107\t192916\n+YCL069w.177\tlm_SuperContig_29_v2\t193954\t192985\n+YDL037c.178\tlm_SuperContig_29_v2\t197422\t197024\n+YDL037c.179\tlm_SuperContig_29_v2\t197413\t197027\n+YDL037c.180\tlm_SuperContig_29_v2\t197437\t197039\n+YDL037c.181\tlm_SuperContig_29_v2\t197416\t197021\n+YGR109w-b.182\tlm_SuperContig_29_v2\t42272\t40143\n+YGR109w-b.183\tlm_SuperContig_29_v2\t17274\t16081\n+YGR109w-b.184\tlm_SuperContig_29_v2\t102192\t100993\n+YGR109w-b.185\tlm_SuperContig_29_v2\t168564\t167371\n+YGR109w-b.186\tlm_SuperContig_29_v2\t134535\t133543\n+YGR109w-b.187\tlm_SuperContig_29_v2\t92754\t91762\n+YGR109w-b.188\tlm_SuperContig_29_v2\t179477\t178464\n+YGR224w.189\tlm_SuperContig_29_v2\t78139\t78724\n+YGR224w.190\tlm_SuperContig_29_v2\t194215\t193630\n+YHL028w.191\tlm_SuperContig_29_v2\t197437\t197024\n+YHL028w.192\tlm_SuperContig_29_v2\t197413\t197024\n+YHL028w.193\tlm_SuperContig_29_v2\t197425\t197024\n+YHL028w.194\tlm_SuperContig_29_v2\t197434\t197024\n+YHL028w.195\tlm_SuperContig_29_v2\t1'..b'perContig_30_v2\t62650\t61793\n+mito_050.30\tlm_SuperContig_30_v2\t18466\t19329\n+mito_050.31\tlm_SuperContig_30_v2\t81703\t80849\n+mito_050.32\tlm_SuperContig_30_v2\t36323\t37234\n+mito_050.33\tlm_SuperContig_30_v2\t83594\t82737\n+mito_050.34\tlm_SuperContig_30_v2\t149583\t150450\n+mito_050.35\tlm_SuperContig_30_v2\t64748\t63768\n+mito_050.36\tlm_SuperContig_30_v2\t118442\t119320\n+mito_050.37\tlm_SuperContig_30_v2\t87767\t86916\n+mito_050.38\tlm_SuperContig_30_v2\t26661\t27530\n+mito_050.39\tlm_SuperContig_30_v2\t109925\t109071\n+mito_050.40\tlm_SuperContig_30_v2\t115249\t116115\n+mito_050.41\tlm_SuperContig_30_v2\t91259\t90387\n+mito_050.42\tlm_SuperContig_30_v2\t32542\t33390\n+mito_050.43\tlm_SuperContig_30_v2\t141021\t141879\n+mito_050.44\tlm_SuperContig_30_v2\t117050\t117943\n+mito_050.45\tlm_SuperContig_30_v2\t146610\t147515\n+mito_050.46\tlm_SuperContig_30_v2\t151234\t152220\n+mito_060.47\tlm_SuperContig_30_v2\t53696\t53343\n+mito_080.48\tlm_SuperContig_30_v2\t5079\t5582\n+mito_080.49\tlm_SuperContig_30_v2\t59799\t53703\n+mito_110.50\tlm_SuperContig_30_v2\t65165\t64707\n+mito_150.51\tlm_SuperContig_30_v2\t110387\t108494\n+mito_150.52\tlm_SuperContig_30_v2\t103711\t103508\n+mito_170.53\tlm_SuperContig_30_v2\t17614\t20481\n+mito_180.54\tlm_SuperContig_30_v2\t17614\t19341\n+mito_180.55\tlm_SuperContig_30_v2\t62653\t61781\n+mito_180.56\tlm_SuperContig_30_v2\t32542\t33390\n+mito_180.57\tlm_SuperContig_30_v2\t83615\t82716\n+mito_180.58\tlm_SuperContig_30_v2\t36329\t37252\n+mito_180.59\tlm_SuperContig_30_v2\t109979\t109059\n+mito_180.60\tlm_SuperContig_30_v2\t141030\t141897\n+mito_180.61\tlm_SuperContig_30_v2\t81706\t80825\n+mito_180.62\tlm_SuperContig_30_v2\t149580\t150465\n+mito_180.63\tlm_SuperContig_30_v2\t64718\t63756\n+mito_180.64\tlm_SuperContig_30_v2\t115189\t116127\n+mito_180.65\tlm_SuperContig_30_v2\t87770\t86895\n+mito_180.66\tlm_SuperContig_30_v2\t22509\t23405\n+mito_180.67\tlm_SuperContig_30_v2\t91256\t90384\n+mito_180.68\tlm_SuperContig_30_v2\t118439\t119344\n+mito_180.69\tlm_SuperContig_30_v2\t56585\t55774\n+mito_180.70\tlm_SuperContig_30_v2\t117050\t117955\n+mito_180.71\tlm_SuperContig_30_v2\t26640\t27542\n+mito_180.72\tlm_SuperContig_30_v2\t151309\t152232\n+mito_180.73\tlm_SuperContig_30_v2\t146610\t147527\n+mito_180.74\tlm_SuperContig_30_v2\t42209\t42879\n+mito_180.75\tlm_SuperContig_30_v2\t125743\t126138\n+mito_190.76\tlm_SuperContig_30_v2\t44197\t47305\n+mito_210.77\tlm_SuperContig_30_v2\t102604\t102101\n+mito_210.78\tlm_SuperContig_30_v2\t94202\t89139\n+mito_220.79\tlm_SuperContig_30_v2\t14626\t15192\n+mito_220.80\tlm_SuperContig_30_v2\t15860\t16315\n+mito_220.81\tlm_SuperContig_30_v2\t9733\t9936\n+mito_220.82\tlm_SuperContig_30_v2\t6405\t6605\n+mito_230.83\tlm_SuperContig_30_v2\t9733\t9936\n+mito_230.84\tlm_SuperContig_30_v2\t6405\t6605\n+mito_240.85\tlm_SuperContig_30_v2\t9733\t9927\n+mito_240.86\tlm_SuperContig_30_v2\t6405\t6605\n+mito_155.87\tlm_SuperContig_30_v2\t18442\t19341\n+mito_155.88\tlm_SuperContig_30_v2\t110081\t109056\n+mito_155.89\tlm_SuperContig_30_v2\t36323\t37249\n+mito_155.90\tlm_SuperContig_30_v2\t62650\t61781\n+mito_155.91\tlm_SuperContig_30_v2\t141021\t141891\n+mito_155.92\tlm_SuperContig_30_v2\t81703\t80828\n+mito_155.93\tlm_SuperContig_30_v2\t149583\t150462\n+mito_155.94\tlm_SuperContig_30_v2\t83594\t82716\n+mito_155.95\tlm_SuperContig_30_v2\t116981\t117955\n+mito_155.96\tlm_SuperContig_30_v2\t87767\t86895\n+mito_155.97\tlm_SuperContig_30_v2\t146610\t147533\n+mito_155.98\tlm_SuperContig_30_v2\t64655\t63756\n+mito_155.99\tlm_SuperContig_30_v2\t151258\t152232\n+mito_155.100\tlm_SuperContig_30_v2\t91256\t90381\n+mito_155.101\tlm_SuperContig_30_v2\t32494\t33504\n+mito_155.102\tlm_SuperContig_30_v2\t115198\t116127\n+mito_155.103\tlm_SuperContig_30_v2\t26607\t27542\n+mito_155.104\tlm_SuperContig_30_v2\t118442\t119341\n+mito_155.105\tlm_SuperContig_30_v2\t22509\t23387\n+mito_155.106\tlm_SuperContig_30_v2\t125740\t126240\n+mito_195.107\tlm_SuperContig_30_v2\t44996\t45911\n+mito_195.108\tlm_SuperContig_30_v2\t46058\t46769\n+SPMIT.05.109\tlm_SuperContig_30_v2\t14662\t15189\n+SPMIT.05.110\tlm_SuperContig_30_v2\t15941\t16303\n+SPMIT.04.111\tlm_SuperContig_30_v2\t65204\t64752\n+SPMIT.03.112\tlm_SuperContig_30_v2\t95262\t94345\n+SPMIT.01.113\tlm_SuperContig_30_v2\t102580\t102098\n+SPMIT.01.114\tlm_SuperContig_30_v2\t94202\t93876\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.path
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.path Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,456 @@\n+1\tlm_SuperContig_30_v2\t35498\t35914\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t307\t445\t1e-62\t244\t82.73\n+1\tlm_SuperContig_30_v2\t37314\t37823\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t494\t663\t8e-67\t258\t67.65\n+2\tlm_SuperContig_30_v2\t27615\t28001\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t109\t237\t1e-53\t214\t77.52\n+2\tlm_SuperContig_30_v2\t29503\t29673\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t254\t310\t3e-21\t107\t92.98\n+3\tlm_SuperContig_30_v2\t23817\t24089\tmito_010 NADH dehydrogenase (ubiquinone) chain 5\t1\t91\t7e-25\t119\t63.74\n+4\tlm_SuperContig_30_v2\t27615\t28001\tmito_020 predicted ND5 intron 2 protein\t109\t237\t1e-53\t214\t77.52\n+4\tlm_SuperContig_30_v2\t28681\t29418\tmito_020 predicted ND5 intron 2 protein\t303\t540\t4e-37\t159\t41.2\n+5\tlm_SuperContig_30_v2\t94306\t95085\tmito_020 predicted ND5 intron 2 protein\t544\t279\t2e-22\t110\t28.46\n+6\tlm_SuperContig_30_v2\t23817\t24089\tmito_020 predicted ND5 intron 2 protein\t1\t91\t7e-25\t119\t63.74\n+7\tlm_SuperContig_30_v2\t23817\t24089\tmito_030 predicted ND5 intron 1 protein\t1\t91\t7e-25\t119\t63.74\n+7\tlm_SuperContig_30_v2\t26190\t27509\tmito_030 predicted ND5 intron 1 protein\t84\t525\t0\t666\t76.52\n+8\tlm_SuperContig_30_v2\t61805\t62656\tmito_030 predicted ND5 intron 1 protein\t528\t241\t3e-55\t219\t38.89\n+9\tlm_SuperContig_30_v2\t146223\t147503\tmito_030 predicted ND5 intron 1 protein\t114\t528\t2e-64\t250\t37.61\n+10\tlm_SuperContig_30_v2\t109092\t109925\tmito_030 predicted ND5 intron 1 protein\t525\t243\t2e-49\t200\t38.16\n+11\tlm_SuperContig_30_v2\t117965\t119308\tmito_030 predicted ND5 intron 1 protein\t87\t525\t1e-63\t248\t35.79\n+12\tlm_SuperContig_30_v2\t82749\t83615\tmito_030 predicted ND5 intron 1 protein\t525\t236\t3e-47\t193\t35.27\n+13\tlm_SuperContig_30_v2\t35963\t37213\tmito_030 predicted ND5 intron 1 protein\t114\t525\t4e-62\t242\t38.08\n+14\tlm_SuperContig_30_v2\t86928\t87770\tmito_030 predicted ND5 intron 1 protein\t525\t242\t3e-47\t193\t36.14\n+15\tlm_SuperContig_30_v2\t116645\t117925\tmito_030 predicted ND5 intron 1 protein\t121\t526\t6e-60\t235\t37.12\n+16\tlm_SuperContig_30_v2\t80861\t81706\tmito_030 predicted ND5 intron 1 protein\t525\t242\t5e-44\t182\t35.66\n+17\tlm_SuperContig_30_v2\t150994\t152199\tmito_030 predicted ND5 intron 1 protein\t121\t525\t4e-58\t229\t36.3\n+18\tlm_SuperContig_30_v2\t56292\t56582\tmito_030 predicted ND5 intron 1 protein\t351\t255\t1e-34\t74\t38.78\n+18\tlm_SuperContig_30_v2\t55780\t56187\tmito_030 predicted ND5 intron 1 protein\t492\t357\t1e-34\t96\t36.96\n+19\tlm_SuperContig_30_v2\t18181\t19317\tmito_030 predicted ND5 intron 1 protein\t167\t528\t3e-57\t226\t37.76\n+20\tlm_SuperContig_30_v2\t90399\t91238\tmito_030 predicted ND5 intron 1 protein\t528\t249\t1e-37\t161\t32.52\n+21\tlm_SuperContig_30_v2\t141039\t141308\tmito_030 predicted ND5 intron 1 protein\t250\t342\t8e-47\t58\t34.41\n+21\tlm_SuperContig_30_v2\t141292\t141858\tmito_030 predicted ND5 intron 1 protein\t337\t525\t8e-47\t155\t42.11\n+22\tlm_SuperContig_30_v2\t63837\t64676\tmito_030 predicted ND5 intron 1 protein\t511\t232\t1e-35\t155\t35.21\n+23\tlm_SuperContig_30_v2\t115228\t116103\tmito_030 predicted ND5 intron 1 protein\t236\t528\t6e-48\t195\t35.93\n+24\tlm_SuperContig_30_v2\t149559\t149744\tmito_030 predicted ND5 intron 1 protein\t233\t296\t6e-40\t41\t35.94\n+24\tlm_SuperContig_30_v2\t149749\t150444\tmito_030 predicted ND5 intron 1 protein\t299\t530\t6e-40\t149\t37.02\n+25\tlm_SuperContig_30_v2\t22437\t23375\tmito_030 predicted ND5 intron 1 protein\t219\t528\t1e-43\t181\t32.59\n+26\tlm_SuperContig_30_v2\t32527\t33390\tmito_030 predicted ND5 intron 1 protein\t236\t518\t6e-40\t169\t36.15\n+27\tlm_SuperContig_30_v2\t21984\t22220\tmito_040 NADH dehydrogenase (ubiquinone) chain 4L\t1\t79\t3e-30\t136\t84.81\n+28\tlm_SuperContig_30_v2\t21984\t22229\tmito_050 predicted ND4L intron protein\t1\t82\t8e-31\t138\t82.93\n+28\tlm_SuperContig_30_v2\t22509\t23387\tmito_050 predicted ND4L intron protein\t145\t438\t7e-81\t305\t49\n+29\tlm_SuperContig_30_v2\t61793\t62650\tmito_050 predicted ND4L intron protein\t438\t145\t3e-49\t200\t40.33\n+30\tlm_SuperContig_30_v2\t18466\t19329\tmito_050 predicted ND4L intron protein\t145\t438\t6e-54\t215\t40.53\n+31\tlm_SuperContig_30_v2\t80849\t81703\tmito_050 predicted ND4L intron protein\t435\t145\t1e-45\t188\t39.6\n+32\tlm_SuperCont'..b'm00679 related to VBA1 - Vacuolar Basic Amino acid transporter, Contig um_contig_1.17\t529\t218\t2e-42\t82\t20.7\n+327\tlm_SuperContig_29_v2\t78115\t78588\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t53\t210\t2e-53\t183\t55.06\n+327\tlm_SuperContig_29_v2\t78662\t78814\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t211\t264\t2e-53\t47\t44.44\n+328\tlm_SuperContig_29_v2\t193766\t194239\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t210\t53\t2e-104\t183\t55.06\n+328\tlm_SuperContig_29_v2\t192760\t193692\tum00842 probable aflatoxin efflux pump AFLT, Contig um_contig_1.27\t534\t211\t2e-104\t220\t36.92\n+329\tlm_SuperContig_29_v2\t78112\t78570\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t94\t246\t7e-23\t104\t35.95\n+329\tlm_SuperContig_29_v2\t78659\t78802\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t253\t297\t7e-23\t24\t25\n+330\tlm_SuperContig_29_v2\t193784\t194242\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t246\t94\t5e-40\t104\t35.95\n+330\tlm_SuperContig_29_v2\t192760\t193695\tum01882 related to multidrug resistance proteins, Contig um_contig_1.70\t560\t253\t5e-40\t84\t19.43\n+331\tlm_SuperContig_29_v2\t78019\t78567\tum01964 related to multidrug resistance protein, Contig um_contig_1.72\t40\t220\t1e-24\t113\t33.7\n+332\tlm_SuperContig_29_v2\t193787\t194335\tum01964 related to multidrug resistance protein, Contig um_contig_1.72\t220\t40\t7e-43\t113\t33.7\n+332\tlm_SuperContig_29_v2\t192901\t193695\tum01964 related to multidrug resistance protein, Contig um_contig_1.72\t490\t227\t7e-43\t85\t24.16\n+333\tlm_SuperContig_29_v2\t77890\t78591\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t38\t263\t1e-32\t115\t32.07\n+333\tlm_SuperContig_29_v2\t78659\t78814\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t264\t313\t1e-32\t46\t36.54\n+334\tlm_SuperContig_29_v2\t193763\t194464\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t263\t38\t1e-65\t115\t32.07\n+334\tlm_SuperContig_29_v2\t192760\t193695\tum02062 related to multidrug resistance proteins, Contig um_contig_1.75\t575\t264\t1e-65\t158\t30.6\n+335\tlm_SuperContig_29_v2\t78136\t78576\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t133\t280\t1e-21\t91\t30.41\n+335\tlm_SuperContig_29_v2\t78650\t78814\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t280\t331\t1e-21\t33\t29.09\n+336\tlm_SuperContig_29_v2\t193778\t194218\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t280\t133\t3e-45\t91\t30.41\n+336\tlm_SuperContig_29_v2\t192754\t193704\tum02598 related to mfs1 - putative multidrug transporter, Contig um_contig_1.86\t598\t280\t3e-45\t114\t25.93\n+337\tlm_SuperContig_29_v2\t77938\t78585\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t27\t239\t2e-24\t93\t30.73\n+337\tlm_SuperContig_29_v2\t78650\t78808\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t236\t285\t2e-24\t40\t33.96\n+338\tlm_SuperContig_29_v2\t193769\t194416\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t239\t27\t1e-48\t93\t30.73\n+338\tlm_SuperContig_29_v2\t192757\t193704\tum03115 related to Sge1 - drug resistance protein, Contig um_contig_1.105\t554\t236\t1e-48\t124\t25.7\n+339\tlm_SuperContig_29_v2\t77938\t78576\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t13\t215\t4e-45\t167\t39.91\n+339\tlm_SuperContig_29_v2\t78650\t78736\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t216\t244\t4e-45\t36\t55.17\n+340\tlm_SuperContig_29_v2\t193778\t194416\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t215\t13\t2e-98\t167\t39.91\n+340\tlm_SuperContig_29_v2\t192826\t193704\tum05414 probable aflatoxin efflux pump AFLT, Contig um_contig_1.192\t511\t216\t2e-98\t216\t37.04\n+341\tlm_SuperContig_29_v2\t193778\t194107\tum05421 related to Multidrug resistance protein , Contig um_contig_1.192\t267\t158\t6e-41\t83\t35.45\n+341\tlm_SuperContig_29_v2\t193006\t193692\tum05421 related to Multidrug resistance protein , Contig um_contig_1.192\t496\t272\t6e-41\t107\t27.07\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.tab Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,342 @@\n+query.name\tquery.start\tquery.end\tquery.length\tquery.length.%\tmatch.length.%\tsubject.name\tsubject.start\tsubject.end\tsubject.length\tsubject.length.%\tE.value\tScore\tIdentity\tpath\n+lm_SuperContig_30_v2\t35498\t37823\t927\t0.00598594\t1.29469\tmito_010\t307\t663\t309\t0.431564\t8e-67\t424\t74.4336\t1\n+lm_SuperContig_30_v2\t27615\t29673\t558\t0.00360318\t0.77933\tmito_010\t109\t310\t186\t0.259777\t1e-53\t244\t82.2577\t2\n+lm_SuperContig_30_v2\t23817\t24089\t273\t0.00176285\t0.381285\tmito_010\t1\t91\t91\t0.127095\t7e-25\t119\t63.74\t3\n+lm_SuperContig_30_v2\t27615\t29418\t1125\t0.00726449\t2.06044\tmito_020\t109\t540\t367\t0.672161\t1e-53\t329\t53.6941\t4\n+lm_SuperContig_30_v2\t94306\t95085\t780\t0.00503671\t1.42857\tmito_020\t544\t279\t266\t0.487179\t2e-22\t110\t28.46\t5\n+lm_SuperContig_30_v2\t23817\t24089\t273\t0.00176285\t0.5\tmito_020\t1\t91\t91\t0.166667\t7e-25\t119\t63.74\t6\n+lm_SuperContig_30_v2\t23817\t27509\t1593\t0.0102865\t2.98315\tmito_030\t1\t525\t525\t0.983146\t0\t680\t74.3298\t7\n+lm_SuperContig_30_v2\t61805\t62656\t852\t0.00550164\t1.59551\tmito_030\t528\t241\t288\t0.539326\t3e-55\t219\t38.89\t8\n+lm_SuperContig_30_v2\t146223\t147503\t1281\t0.00827183\t2.39888\tmito_030\t114\t528\t415\t0.777154\t2e-64\t250\t37.61\t9\n+lm_SuperContig_30_v2\t109092\t109925\t834\t0.00538541\t1.5618\tmito_030\t525\t243\t283\t0.529963\t2e-49\t200\t38.16\t10\n+lm_SuperContig_30_v2\t117965\t119308\t1344\t0.00867864\t2.51685\tmito_030\t87\t525\t439\t0.822097\t1e-63\t248\t35.79\t11\n+lm_SuperContig_30_v2\t82749\t83615\t867\t0.0055985\t1.6236\tmito_030\t525\t236\t290\t0.543071\t3e-47\t193\t35.27\t12\n+lm_SuperContig_30_v2\t35963\t37213\t1251\t0.00807811\t2.3427\tmito_030\t114\t525\t412\t0.771536\t4e-62\t242\t38.08\t13\n+lm_SuperContig_30_v2\t86928\t87770\t843\t0.00544352\t1.57865\tmito_030\t525\t242\t284\t0.531835\t3e-47\t193\t36.14\t14\n+lm_SuperContig_30_v2\t116645\t117925\t1281\t0.00827183\t2.39888\tmito_030\t121\t526\t406\t0.7603\t6e-60\t235\t37.12\t15\n+lm_SuperContig_30_v2\t80861\t81706\t846\t0.00546289\t1.58427\tmito_030\t525\t242\t284\t0.531835\t5e-44\t182\t35.66\t16\n+lm_SuperContig_30_v2\t150994\t152199\t1206\t0.00778753\t2.25843\tmito_030\t121\t525\t405\t0.758427\t4e-58\t229\t36.3\t17\n+lm_SuperContig_30_v2\t55780\t56582\t699\t0.00451367\t1.30899\tmito_030\t492\t255\t233\t0.43633\t1e-34\t118\t37.7177\t18\n+lm_SuperContig_30_v2\t18181\t19317\t1137\t0.00734197\t2.12921\tmito_030\t167\t528\t362\t0.677903\t3e-57\t226\t37.76\t19\n+lm_SuperContig_30_v2\t90399\t91238\t840\t0.00542415\t1.57303\tmito_030\t528\t249\t280\t0.524345\t1e-37\t161\t32.52\t20\n+lm_SuperContig_30_v2\t141039\t141858\t820\t0.005295\t1.53558\tmito_030\t250\t525\t276\t0.516854\t8e-47\t212\t39.6261\t21\n+lm_SuperContig_30_v2\t63837\t64676\t840\t0.00542415\t1.57303\tmito_030\t511\t232\t280\t0.524345\t1e-35\t155\t35.21\t22\n+lm_SuperContig_30_v2\t115228\t116103\t876\t0.00565661\t1.64045\tmito_030\t236\t528\t293\t0.548689\t6e-48\t195\t35.93\t23\n+lm_SuperContig_30_v2\t149559\t150444\t882\t0.00569536\t1.65169\tmito_030\t233\t530\t296\t0.554307\t6e-40\t189\t36.7922\t24\n+lm_SuperContig_30_v2\t22437\t23375\t939\t0.00606342\t1.75843\tmito_030\t219\t528\t310\t0.580524\t1e-43\t181\t32.59\t25\n+lm_SuperContig_30_v2\t32527\t33390\t864\t0.00557912\t1.61798\tmito_030\t236\t518\t283\t0.529963\t6e-40\t169\t36.15\t26\n+lm_SuperContig_30_v2\t21984\t22220\t237\t0.00153038\t2.63333\tmito_040\t1\t79\t79\t0.877778\t3e-30\t136\t84.81\t27\n+lm_SuperContig_30_v2\t21984\t23387\t1125\t0.00726449\t2.48344\tmito_050\t1\t438\t376\t0.830022\t7e-81\t419\t56.4194\t28\n+lm_SuperContig_30_v2\t61793\t62650\t858\t0.00554038\t1.89404\tmito_050\t438\t145\t294\t0.649007\t3e-49\t200\t40.33\t29\n+lm_SuperContig_30_v2\t18466\t19329\t864\t0.00557912\t1.90728\tmito_050\t145\t438\t294\t0.649007\t6e-54\t215\t40.53\t30\n+lm_SuperContig_30_v2\t80849\t81703\t855\t0.00552101\t1.88742\tmito_050\t435\t145\t291\t0.642384\t1e-45\t188\t39.6\t31\n+lm_SuperContig_30_v2\t36323\t37234\t912\t0.00588908\t2.01325\tmito_050\t145\t438\t294\t0.649007\t4e-49\t199\t40.39\t32\n+lm_SuperContig_30_v2\t82737\t83594\t858\t0.00554038\t1.89404\tmito_050\t435\t145\t291\t0.642384\t1e-45\t188\t37.46\t33\n+lm_SuperContig_30_v2\t149583\t150450\t868\t0.00560495\t1.91611\tmito_050\t145\t438\t294\t0.649007\t3e-39\t188\t37.5948\t34\n+lm_SuperContig_30_v2\t63768\t64748\t981\t0.00633463\t2.16556\tmito_050\t438\t111\t328\t0.724062\t4e-45\t186\t37.09\t35\n+lm_SuperContig_30_v2\t118442\t119320\t879\t0.00567598\t1.9404\tmito_050\t145\t435\t291\t0.642384\t3e-44\t1'..b'AC13D1.01c\t831\t427\t405\t0.303826\t2e-39\t166\t28.15\t305\n+lm_SuperContig_29_v2\t91693\t92700\t1008\t0.00501642\t0.756189\tSPAC13D1.01c\t786\t452\t335\t0.251313\t1e-37\t161\t32.94\t306\n+lm_SuperContig_29_v2\t178395\t179504\t1110\t0.00552404\t0.832708\tSPAC13D1.01c\t786\t418\t369\t0.276819\t1e-37\t160\t30.91\t307\n+lm_SuperContig_29_v2\t10747\t11856\t1110\t0.00552404\t0.832708\tSPAC13D1.01c\t786\t418\t369\t0.276819\t1e-36\t157\t31.18\t308\n+lm_SuperContig_29_v2\t133341\t134754\t1380\t0.00686772\t1.03526\tSPBC9B6.02c\t831\t358\t461\t0.345836\t4e-46\t116\t29.6018\t309\n+lm_SuperContig_29_v2\t100993\t102531\t1539\t0.007659\t1.15454\tSPBC9B6.02c\t831\t312\t520\t0.390098\t7e-44\t182\t27.67\t310\n+lm_SuperContig_29_v2\t41079\t42287\t1209\t0.00601672\t0.906977\tSPBC9B6.02c\t831\t427\t405\t0.303826\t7e-42\t175\t28.64\t311\n+lm_SuperContig_29_v2\t167371\t168579\t1209\t0.00601672\t0.906977\tSPBC9B6.02c\t831\t427\t405\t0.303826\t2e-42\t174\t29.14\t312\n+lm_SuperContig_29_v2\t16081\t17289\t1209\t0.00601672\t0.906977\tSPBC9B6.02c\t831\t427\t405\t0.303826\t4e-39\t165\t27.9\t313\n+lm_SuperContig_29_v2\t91693\t92700\t1008\t0.00501642\t0.756189\tSPBC9B6.02c\t786\t452\t335\t0.251313\t3e-37\t159\t32.64\t314\n+lm_SuperContig_29_v2\t178395\t179504\t1110\t0.00552404\t0.832708\tSPBC9B6.02c\t786\t418\t369\t0.276819\t3e-37\t158\t30.65\t315\n+lm_SuperContig_29_v2\t10747\t11856\t1110\t0.00552404\t0.832708\tSPBC9B6.02c\t786\t418\t369\t0.276819\t3e-36\t155\t30.91\t316\n+lm_SuperContig_29_v2\t133341\t134754\t1380\t0.00686772\t1.03526\tSPAC9.04\t831\t358\t461\t0.345836\t4e-46\t116\t29.6018\t317\n+lm_SuperContig_29_v2\t100993\t102531\t1539\t0.007659\t1.15454\tSPAC9.04\t831\t312\t520\t0.390098\t7e-44\t182\t27.67\t318\n+lm_SuperContig_29_v2\t41079\t42287\t1209\t0.00601672\t0.906977\tSPAC9.04\t831\t427\t405\t0.303826\t7e-42\t175\t28.64\t319\n+lm_SuperContig_29_v2\t167371\t168579\t1209\t0.00601672\t0.906977\tSPAC9.04\t831\t427\t405\t0.303826\t2e-42\t174\t29.14\t320\n+lm_SuperContig_29_v2\t16081\t17289\t1209\t0.00601672\t0.906977\tSPAC9.04\t831\t427\t405\t0.303826\t4e-39\t165\t27.9\t321\n+lm_SuperContig_29_v2\t91693\t92700\t1008\t0.00501642\t0.756189\tSPAC9.04\t786\t452\t335\t0.251313\t3e-37\t159\t32.64\t322\n+lm_SuperContig_29_v2\t178395\t179504\t1110\t0.00552404\t0.832708\tSPAC9.04\t786\t418\t369\t0.276819\t3e-37\t158\t30.65\t323\n+lm_SuperContig_29_v2\t10747\t11856\t1110\t0.00552404\t0.832708\tSPAC9.04\t786\t418\t369\t0.276819\t3e-36\t155\t30.91\t324\n+lm_SuperContig_29_v2\t77980\t78567\t588\t0.00292625\t1.01379\tum00679\t18\t211\t194\t0.334483\t5e-25\t114\t31\t325\n+lm_SuperContig_29_v2\t192772\t194374\t1512\t0.00752463\t2.6069\tum00679\t529\t18\t506\t0.872414\t2e-42\t90\t24.7056\t326\n+lm_SuperContig_29_v2\t78115\t78814\t627\t0.00312033\t1.07179\tum00842\t53\t264\t212\t0.362393\t2e-53\t226\t52.4685\t327\n+lm_SuperContig_29_v2\t192760\t194239\t1407\t0.00700209\t2.40513\tum00842\t534\t53\t482\t0.823932\t2e-104\t305\t43.0311\t328\n+lm_SuperContig_29_v2\t78112\t78802\t603\t0.0030009\t1.005\tum01882\t94\t297\t198\t0.33\t7e-23\t123\t33.3351\t329\n+lm_SuperContig_29_v2\t192760\t194242\t1395\t0.00694237\t2.325\tum01882\t560\t94\t461\t0.768333\t5e-40\t91\t24.8656\t330\n+lm_SuperContig_29_v2\t78019\t78567\t549\t0.00273216\t0.90894\tum01964\t40\t220\t181\t0.299669\t1e-24\t113\t33.7\t331\n+lm_SuperContig_29_v2\t192901\t194335\t1344\t0.00668856\t2.22517\tum01964\t490\t40\t445\t0.736755\t7e-43\t104\t28.0569\t332\n+lm_SuperContig_29_v2\t77890\t78814\t858\t0.00426993\t1.33645\tum02062\t38\t313\t276\t0.429907\t1e-32\t158\t32.8827\t333\n+lm_SuperContig_29_v2\t192760\t194464\t1638\t0.00815169\t2.5514\tum02062\t575\t38\t538\t0.838006\t1e-65\t161\t31.23\t334\n+lm_SuperContig_29_v2\t78136\t78814\t606\t0.00301583\t0.795276\tum02598\t133\t331\t199\t0.261155\t1e-21\t120\t30.0506\t335\n+lm_SuperContig_29_v2\t192754\t194218\t1392\t0.00692744\t1.82677\tum02598\t598\t133\t466\t0.611549\t3e-45\t109\t27.3493\t336\n+lm_SuperContig_29_v2\t77938\t78808\t807\t0.00401612\t1.30161\tum03115\t27\t285\t259\t0.417742\t2e-24\t130\t31.3664\t337\n+lm_SuperContig_29_v2\t192757\t194416\t1596\t0.00794267\t2.57419\tum03115\t554\t27\t528\t0.851613\t1e-48\t108\t27.7423\t338\n+lm_SuperContig_29_v2\t77938\t78736\t726\t0.00361302\t1.24528\tum05414\t13\t244\t232\t0.397942\t4e-45\t199\t41.7387\t339\n+lm_SuperContig_29_v2\t192826\t194416\t1518\t0.00755449\t2.60377\tum05414\t511\t13\t499\t0.855918\t2e-98\t279\t38.2481\t340\n+lm_SuperContig_29_v2\t193006\t194107\t1017\t0.00506121\t1.44255\tum05421\t496\t158\t335\t0.475177\t6e-41\t118\t29.7892\t341\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/ConvMapChr2Chunk.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/ConvMapChr2Chunk.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,64 @@
+from copy import deepcopy
+from commons.core.sql.TableMapAdaptator import TableMapAdaptator
+from commons.core.coord.Map import Map
+import os
+
+class ConvMapChr2Chunk(object):
+
+    def __init__(self, db, table, chunk_table, outtable):
+        self._tablename = table
+        self._chunk_table = chunk_table
+        self._db = db
+        self._outtable = outtable
+
+    def convert(self):
+        """
+        Convert a 'set' table format.
+        """
+        temp_file=str(os.getpid()) + ".on_chunk"
+        fout=open(temp_file,'w')
+
+        str_mask = "SELECT * FROM "+\
+            self._chunk_table + " WHERE chr='%s' AND ("+\
+            "(%d BETWEEN LEAST(start,end) AND GREATEST(start,end))"+\
+            " OR (%d BETWEEN LEAST(start,end) AND GREATEST(start,end))"+\
+            " OR (%d <= LEAST(start,end) AND %d >= GREATEST(start,end)));"
+
+        iTMA = TableMapAdaptator(self._db, self._tablename)
+        chr_list = iTMA.getSeqNameList()
+
+        for chr in chr_list:
+            mlist = iTMA.getMapListFromChr(chr)
+            for m in mlist:
+                sql_cmd = str_mask%(m.seqname,m.getMin(),m.getMax(),m.getMin(),m.getMax())
+                self._db.execute(sql_cmd)
+                res = self._db.fetchall()
+                for i in res:
+                    chunk = Map(i[0],i[1],int(i[2]),int(i[3]))
+
+                    new_m = deepcopy(m)
+                    new_m.seqname = chunk.name
+
+                    if (m.start > chunk.start and m.start < chunk.end):
+                        new_m.start = m.start - chunk.start + 1
+                    if (m.end > chunk.start and m.end < chunk.end):
+                        new_m.end = m.end - chunk.start + 1
+
+                    if m.isOnDirectStrand():
+                        if m.start <= chunk.start:
+                            new_m.start = 1
+                        if m.end >= chunk.end:
+                            new_m.end = chunk.end - chunk.start + 1
+                    else:
+                        if m.end <= chunk.start:
+                            new_m.end = 1
+                        if m.start >= chunk.end:
+                            new_m.start = chunk.end - chunk.start + 1
+
+                    new_m.write(fout)
+
+        fout.close()
+
+        self._db.createTable(self._outtable, "map", temp_file)
+
+        os.remove(temp_file)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/ConvPathChr2Chunk.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/ConvPathChr2Chunk.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,68 @@
+from copy import deepcopy
+from commons.core.sql.TablePathAdaptator import TablePathAdaptator
+from commons.core.coord.PathUtils import PathUtils
+from commons.core.coord.Map import Map
+from commons.core.coord.Path import Path
+import os
+
+class ConvPathChr2Chunk(object):
+
+    def __init__(self, db, table, chunk_table, outtable):
+        self._tablename = table
+        self._chunk_table = chunk_table
+        self._db = db
+        self._outtable = outtable
+
+    def convert(self):
+        """
+        Convert a 'path' table format.
+        """
+        temp_file = str(os.getpid()) + ".on_chunk"
+        fout = open(temp_file,'w')
+
+        str_mask = "SELECT * FROM "+\
+            self._chunk_table + " WHERE chr='%s' AND ("+\
+            "(%d BETWEEN LEAST(start,end) AND GREATEST(start,end))"+\
+            " OR (%d BETWEEN LEAST(start,end) AND GREATEST(start,end))"+\
+            " OR (%d <= LEAST(start,end) AND %d >= GREATEST(start,end)));"
+
+        iTPA = TablePathAdaptator(self._db, self._tablename)
+        path_num_list = iTPA.getIdList()
+
+        for path_num in path_num_list:
+            slist = iTPA.getPathListFromId(path_num)
+            for r in slist:
+                r_min, r_max = PathUtils.getQueryMinMaxFromPathList([r])
+                sql_cmd = str_mask%(r.range_query.seqname,r_min,r_max,r_min,r_max)
+                self._db.execute(sql_cmd)
+                res = self._db.fetchall()
+                for i in res:
+                    chunk = Map(i[0],i[1],int(i[2]),int(i[3]))
+
+                    new_r = Path()
+                    new_r = deepcopy(r)
+                    new_r.range_query.seqname = chunk.name
+
+                    if (r.range_query.start > chunk.start and r.range_query.start < chunk.end):
+                        new_r.range_query.start = r.range_query.start - chunk.start + 1
+                    if (r.range_query.end > chunk.start and r.range_query.end < chunk.end):
+                        new_r.range_query.end = r.range_query.end - chunk.start + 1
+
+                    if r.range_query.isOnDirectStrand():
+                        if r.range_query.start <= chunk.start:
+                            new_r.range_query.start = 1
+                        if r.range_query.end >= chunk.end:
+                            new_r.range_query.end = chunk.end - chunk.start + 1
+                    else:
+                        if r.range_query.end <= chunk.start:
+                            new_r.range_query.end = 1
+                        if r.range_query.start >= chunk.end:
+                            new_r.range_query.start = chunk.end - chunk.start + 1
+
+                    new_r.write(fout)
+
+        fout.close()
+
+        self._db.createTable(self._outtable, "path", temp_file)
+
+        os.remove(temp_file)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/ConvSetChr2Chunk.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/ConvSetChr2Chunk.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,66 @@
+from copy import deepcopy
+from commons.core.sql.TableSetAdaptator import TableSetAdaptator
+from commons.core.coord.Map import Map
+from commons.core.coord.Set import Set
+import os
+
+class ConvSetChr2Chunk(object):
+
+    def __init__(self, db, table, chunk_table, outtable):
+        self._tablename = table
+        self._chunk_table = chunk_table
+        self._db = db
+        self._outtable = outtable
+
+    def convert(self):
+        """
+        Convert a 'set' table format.
+        """
+        temp_file = str(os.getpid()) + ".on_chunk"
+        fout = open(temp_file,'w')
+
+        str_mask = "SELECT * FROM "+\
+            self._chunk_table + " WHERE chr='%s' AND ("+\
+            "(%d BETWEEN LEAST(start,end) AND GREATEST(start,end))"+\
+            " OR (%d BETWEEN LEAST(start,end) AND GREATEST(start,end))"+\
+            " OR (%d <= LEAST(start,end) AND %d >= GREATEST(start,end)));"
+
+        iTSA = TableSetAdaptator(self._db, self._tablename)
+        path_num_list = iTSA.getIdList()
+
+        for path_num in path_num_list:
+            slist = iTSA.getSetListFromId(path_num)
+            for r in slist:
+                sql_cmd = str_mask%(r.seqname,r.getMin(),r.getMax(),r.getMin(),r.getMax())
+                self._db.execute(sql_cmd)
+                res = self._db.fetchall()
+                for i in res:
+                    chunk = Map(i[0],i[1],int(i[2]),int(i[3]))
+
+                    new_r = Set()
+                    new_r = deepcopy(r)
+                    new_r.seqname = chunk.name
+
+                    if (r.start > chunk.start and r.start < chunk.end):
+                        new_r.start = r.start - chunk.start + 1
+                    if (r.end > chunk.start and r.end < chunk.end):
+                        new_r.end = r.end - chunk.start + 1
+
+                    if r.isOnDirectStrand():
+                        if r.start <= chunk.start:
+                            new_r.start = 1
+                        if r.end >= chunk.end:
+                            new_r.end = chunk.end - chunk.start + 1
+                    else:
+                        if r.end <= chunk.start:
+                            new_r.end = 1
+                        if r.start >= chunk.end:
+                            new_r.start = chunk.end - chunk.start + 1
+
+                    new_r.write(fout)
+
+        fout.close()
+
+        self._db.createTable(self._outtable, "set", temp_file)
+
+        os.remove(temp_file)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/PathChunkConnector.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/PathChunkConnector.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,134 @@
+from pyRepet.coord.Map import Map
+import commons.core.sql.TablePathAdaptator
+
+
+## Connect overlapping chunks in a single fragment
+#
+class PathChunkConnector( object):
+
+    def __init__(self, mapFileName, db, table, verbose):
+
+        self._verbose = verbose
+        self._chunk = self._getChunkDictFromMapFileForConnectPathChunks( mapFileName )
+        self._tablePathAdaptator = commons.core.sql.TablePathAdaptator.TablePathAdaptator( db, table )
+
+    def run (self):
+        for num_chunk in xrange(1,len(self._chunk.keys())):
+            chunkName = "chunk"+str(num_chunk)
+            if self._verbose > 1:
+                print chunkName
+            next_chunkName="chunk"+str(num_chunk+1)
+
+            if next_chunkName not in self._chunk.keys():
+                break
+
+            start=self._chunk[chunkName][2]
+            end=self._chunk[next_chunkName][1]
+
+            if self._chunk[chunkName][0] == self._chunk[next_chunkName][0]:
+                lpath=self._tablePathAdaptator.getPathListIncludedInQueryCoord(self._chunk[chunkName][0],start,end)
+
+                if self._verbose > 1:
+                    print "----------"
+
+                lpath.sort()
+                chg_path_id={}
+                pathnum_to_ins=[]
+                pathnum_to_del=[]
+
+                self._createDirectAndReversePaths(lpath)
+
+                self._mergeDirectPaths(chg_path_id, pathnum_to_ins, pathnum_to_del)
+
+                if self._verbose > 1:
+                    print "..........."
+
+                self._mergeReversePaths(chg_path_id, pathnum_to_ins, pathnum_to_del)
+
+                if self._verbose > 1:
+                    print "..........."
+                    print pathnum_to_del
+
+                self._tablePathAdaptator.deleteFromIdList(pathnum_to_del)
+
+                if self._verbose > 1:
+                    print pathnum_to_ins
+
+                self._tablePathAdaptator.deleteFromIdList(pathnum_to_ins)
+
+                self._insertDirectPaths(chg_path_id, pathnum_to_ins)
+
+                self._insertReversePaths(chg_path_id, pathnum_to_ins)
+
+
+    def _createDirectAndReversePaths(self, lpath):
+        self._dpath = []
+        self._rpath = []
+        for i in lpath:
+            if i.range_query.isOnDirectStrand() and i.range_subject.isOnDirectStrand():
+                self._dpath.append(i)
+            else:
+                self._rpath.append(i)
+
+    def _insertDirectPaths (self, chg_path_id, pathnum_to_ins):
+        self._insertPaths(chg_path_id, pathnum_to_ins, self._dpath)
+
+    def _insertReversePaths (self, chg_path_id, pathnum_to_ins):
+        self._insertPaths(chg_path_id, pathnum_to_ins, self._rpath)
+
+    def _insertPaths(self, chg_path_id, pathnum_to_ins, paths2Insert):
+        for i in paths2Insert:
+            if chg_path_id.has_key(i.id):
+                i.id = chg_path_id[i.id]
+
+            if self._verbose > 1:
+                i.show()
+
+            if i.id in pathnum_to_ins:
+                self._tablePathAdaptator.insert(i)
+                if self._verbose > 1:
+                    print "--> inserted!"
+
+            if self._verbose > 1:
+                print "=========="
+
+    def _mergeDirectPaths(self, chg_path_id, pathnum_to_ins, pathnum_to_del):
+        self._mergePaths(chg_path_id, pathnum_to_ins, pathnum_to_del, self._dpath)
+
+    def _mergeReversePaths(self, chg_path_id, pathnum_to_ins, pathnum_to_del):
+        self._mergePaths(chg_path_id, pathnum_to_ins, pathnum_to_del, self._rpath)
+
+    def _mergePaths(self, chg_path_id, pathnum_to_ins, pathnum_to_del, dpath):
+        x = 0
+        while x < len(dpath) - 1:
+            x = x + 1
+            if self._verbose > 1:
+                print "++++"
+                dpath[x - 1].show()
+                dpath[x].show()
+
+            if dpath[x - 1].canMerge(dpath[x]):
+                chg_path_id[dpath[x].id] = dpath[x - 1].id
+                if dpath[x - 1].id not in pathnum_to_ins:
+                    pathnum_to_ins.append(dpath[x - 1].id)
+
+                if dpath[x].id not in pathnum_to_del:
+                    pathnum_to_del.append(dpath[x].id)
+
+                dpath[x - 1].merge(dpath[x])
+                del dpath[x]
+                x = x - 1
+                if self._verbose > 1:
+                    print "--> merged"
+
+    def _getChunkDictFromMapFileForConnectPathChunks(self, mapFileName):
+        mapDict = {}
+        mapFile = open(mapFileName)
+        mapInstance = Map()
+        while True:
+            if not mapInstance.read(mapFile):
+                break
+            mapDict[mapInstance.name] = (mapInstance.seqname, mapInstance.start, mapInstance.end)
+
+        mapFile.close()
+        return mapDict

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/test/TestConvCoordWithOverlapps.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/test/TestConvCoordWithOverlapps.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,169 @@\n+import unittest\n+import os\n+import time\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbMySql import DbMySql\n+\n+\n+class Test_ConvCoordWithOverlaps( unittest.TestCase ):\n+ pass\n+\n+# def setUp(self):\n+# self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+# self._mapFile = "map_file.map"\n+# _MockMapFile( self._mapFile )\n+# self._resultFile = "dummyObsFile_%s" % ( self._uniqId )\n+# self._refFileToCompare = "dummyExpFile_%s" % ( self._uniqId )\n+# self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+# configF = open(self._configFileName, "w" )\n+# configF.write( "[repet_env]\\n" )\n+# configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+# configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+# configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+# configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+# configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+# configF.close()\n+# self._db = DbMySql( cfgFileName = self._configFileName )\n+# \n+# def tearDown(self):\n+# os.remove( self._mapFile )\n+# os.remove( self._resultFile )\n+# os.remove( self._refFileToCompare )\n+# self._db.close()\n+# self._uniqId = None\n+# \n+# \n+# def test_run_TwoHitsOverlapOnFirstAndSecondChunks( self ):\n+# file2Process = "two_hits_overlap_on_first_and_second_chunk.align"\n+# linesToProcess = [ "chunk1" + "\\t" + "95535" + "\\t" + "95570" + "\\t" + "sbj2" + "\\t" + "125423" + "\\t" + "125467" + "\\t" + "7e-15" + "\\t" + "82" + "\\t" + "97.78" + "\\n", \n+# "chunk2" + "\\t" + "5544" + "\\t" + "5575" + "\\t" + "sbj2" + "\\t" + "125457" + "\\t" + "133465" + "\\t" + "2e-38" + "\\t" + "83" + "\\t" + "65" + "\\n" ]\n+# _MockAlignFile( file2Process, linesToProcess )\n+# \n+# convCoord = ConvCoord( file2Process, "q", self._mapFile, self._resultFile, "", 0, self._db )\n+# convCoord.run()\n+# \n+# refLine = ["dmel_chr4" + "\\t" + "95535" + "\\t" + "95575" + "\\t" + "sbj2" + "\\t" + "125423" + "\\t" + "133465" + "\\t" + "2e-38" + "\\t" + "83" + "\\t" + "97.78" + "\\n"]\n+# _MockAlignFile( self._refFileToCompare, refLine )\n+# \n+# self.assertTrue( FileUtils.are2FilesIdentical( self._refFileToCompare, self._resultFile ) )\n+# os.remove( file2Process )\n+#\n+# \n+# def test_run_ThreeHitsOverlapOnFirstAndSecondChunks( self ):\n+# file2Process = "three_hits_overlap_on_first_and_second_chunk.align"\n+# linesToProcess = [ "chunk1" + "\\t" + "95535" + "\\t" + "95570" + "\\t" + "sbj2" + "\\t" + "125423" + "\\t" + "125467" + "\\t" + "7e-15" + "\\t" + "82" + "\\t" + "97.78" + "\\n", \n+# "chunk2" + "\\t" + "5544" + "\\t" + "5575" + "\\t" + "sbj2" + "\\t" + "125457" + "\\t" + "133465" + "\\t" + "2e-38" + "\\t" + "83" + "\\t" + "65" + "\\n"\n+# "chunk2" + "\\t" + "5540" + "\\t" + "5573" + "\\t" + "sbj2" + "\\t" + "125454" + "\\t" +"125750" + "\\t" + "8e-58" + "\\t" + "224" + "\\t" + "86.43" + "\\n" ]\n+# _MockAlignFile( file2Process, linesToProcess )\n+# \n+# convCoord = ConvCoord( file2Process, "q", self._mapFile, self._resultFile, "", 0, self._db )\n+# convCoord.run()\n+#\n+# refLine = ["dmel_chr4" + "\\t" + "95535" + "\\t" + "95575" + "\\t" + "sbj2" + "\\t" + "125423" + "\\t" + "133465" + "\\t" + "8e-58" + "\\t" + "224" + "\\t" + "97.78" + "\\n"]\n+# _MockAlignFile( self._refFileToCompare, refLine )\n+# \n+# self.assertTrue( FileUtils.are2FilesIdentical( self._refFileToCompare, self._resultFile ) )\n+# os.remove( file2Process )\n+#\n+# \n+# def test_run_HitsOnDifferentStrandsAndDifferentSubjects( self ):\n+# file2Process = "hits_on_different_strands_and_differ'..b'vCoord.run()\n+# \n+# refLine = ["dmel_chr4" + "\\t" + "175000"+ "\\t" + "185000"+ "\\t" + "sbj2"+ "\\t" + "125454"+ "\\t" + "133465"+ "\\t" + "8e-58"+ "\\t" + "224"+ "\\t" + "86.43"+"\\n"]\n+# _MockAlignFile( self._refFileToCompare, refLine )\n+# \n+# self.assertTrue( FileUtils.are2FilesIdentical( self._refFileToCompare, self._resultFile ) )\n+# os.remove( file2Process )\n+#\n+# \n+# def test_run_TwoHitsOnChunks_And_OnOverlap_OnReverseStrand_ForSubject( self ):\n+# file2Process = "two_hits_on_chunks_and_on_overlap_on_reverse_strand_for_subject.align" \n+# linesToProcess = [ "chunk2" + "\\t" + "85000" + "\\t" + "95000" + "\\t" + "sbj2" + "\\t" + "133465" + "\\t" + "125457" + "\\t" + "2e-38" + "\\t" + "83" + "\\t" + "65" + "\\n",\n+# "chunk3" + "\\t" + "1000" + "\\t" + "5000" + "\\t" + "sbj2" + "\\t" + "125750" + "\\t" + "125454" + "\\t" + "8e-58" + "\\t" + "224" + "\\t" + "86.43" + "\\n" ] \n+# _MockAlignFile( file2Process, linesToProcess )\n+# \n+# convCoord = ConvCoord( file2Process, "q", self._mapFile, self._resultFile, "", 0, self._db )\n+# convCoord.run()\n+# \n+# refLine = ["dmel_chr4" + "\\t" + "175000"+ "\\t" + "185000" + "\\t" + "sbj2"+ "\\t" + "133465" + "\\t" + "125454" + "\\t" + "8e-58"+ "\\t" + "224"+ "\\t" + "86.43"+"\\n"]\n+# _MockAlignFile( self._refFileToCompare, refLine )\n+# \n+# self.assertTrue( FileUtils.are2FilesIdentical( self._refFileToCompare, self._resultFile ) ) \n+# os.remove( file2Process )\n+# \n+# \n+# def test_run_ThreeHitsAllByAllWithTwoHitsWithinAChunkOverlap( self ):\n+# file2Process = "dummyAlignFile_%s" % ( self._uniqId )\n+# linesToProcess = [ "chunk1"+"\\t"+"192"+"\\t"+"197"+"\\t"+"chunk2"+"\\t"+"2"+"\\t"+"7"+"\\t"+"8e-58"+"\\t"+"11"+"\\t"+"97.8"+"\\n",\n+# "chunk1"+"\\t"+"192"+"\\t"+"197"+"\\t"+"chunk2"+"\\t"+"51"+"\\t"+"56"+"\\t"+"8e-58"+"\\t"+"11"+"\\t"+"97.8"+"\\n",\n+# "chunk2"+"\\t"+"2"+"\\t"+"7"+"\\t"+"chunk2"+"\\t"+"51"+"\\t"+"56"+"\\t"+"8e-58"+"\\t"+"11"+"\\t"+"97.8"+"\\n" ]\n+# _MockAlignFile( file2Process, linesToProcess )\n+# \n+# mapFileName = "dummyMapFile_%s" % ( self._uniqId )\n+# mapF = open( mapFileName, "w" )\n+# mapF.write( "chunk1\\tdmel_chr4\\t1\\t200\\n" )\n+# mapF.write( "chunk2\\tdmel_chr4\\t191\\t390\\n" )\n+# mapF.close()\n+# \n+# convCoord = ConvCoord( file2Process, "qs", mapFileName, self._resultFile, "", 0, self._db )\n+# convCoord.run()\n+# \n+# refLine = [ "dmel_chr4"+"\\t"+"192"+"\\t"+"197"+"\\t"+"dmel_chr4"+"\\t"+"241"+"\\t"+"246"+"\\t"+"8e-58"+"\\t"+"11"+"\\t"+"97.8"+"\\n" ]\n+# _MockAlignFile( self._refFileToCompare, refLine )\n+# \n+# self.assertTrue( FileUtils.are2FilesIdentical( self._refFileToCompare, self._resultFile ) )\n+# os.remove( file2Process )\n+# os.remove( mapFileName )\n+# \n+# \n+#class _MockAlignFile:\n+# \n+# def __init__(self, fileName, lines):\n+# alignF = open(fileName, "w");\n+# for line in lines:\n+# alignF.write(line)\n+# alignF.close \n+#\n+#\n+#class _MockMapFile:\n+# \n+# def __init__ (self, fileName):\n+# mapF = open(fileName, "w")\n+# line1 = "chunk1" + \'\\t\' + "dmel_chr4" + \'\\t\' + "1" + \'\\t\'+ "100000" + "\\n"\n+# line2 = "chunk2" + \'\\t\' + "dmel_chr4" + \'\\t\' + "90001" + \'\\t\'+ "190000" + "\\n"\n+# line3 = "chunk3" + \'\\t\' + "dmel_chr4" + \'\\t\' + "180001" + \'\\t\' + "280000" + "\\n" \n+# mapF.write(line1)\n+# mapF.write(line2)\n+# mapF.write(line3)\n+# mapF.close\n+\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_ConvCoordWithOverlaps ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/test/Test_ConvMapChr2Chunk.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/test/Test_ConvMapChr2Chunk.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,109 @@
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.utils.FileUtils import FileUtils
+from commons.pyRepetUnit.convCoord.ConvMapChr2Chunk import ConvMapChr2Chunk
+import unittest
+import os
+
+class Test_ConvMapChr2Chunk(unittest.TestCase):
+
+    def setUp(self):
+        self._mapfilename="dummy.map"
+        self._maptablename="dummy_map"
+
+        self._chunkmapfilename="chunk.map"
+        self._chunkmaptable="test_chunk_map"
+
+        self._resfilename="dummy_res.map"
+
+        map4convData="chunk1\tchr1\t1\t100\n"
+        map4convData+="chunk2\tchr1\t90\t200\n"
+        map4convData+="chunk3\tchr1\t190\t300\n"
+
+        map4conv=open(self._chunkmapfilename,"w")
+        map4conv.write(map4convData)
+        map4conv.close()
+
+        self._db = DbFactory.createInstance()
+        self._db.createTable(self._chunkmaptable, "map", self._chunkmapfilename, True)
+
+        map2convData="reg1\tchr1\t10\t80\n"
+        map2convData+="reg1\tchr1\t90\t110\n"
+        map2convData+="reg2\tchr1\t150\t160\n"
+
+        map2convData+="reg3\tchr1\t70\t210\n"
+        map2convData+="reg4\tchr1\t85\t100\n"
+        map2convData+="reg5\tchr1\t85\t110\n"
+
+        map2convData+="reg6\tchr1\t80\t10\n"
+        map2convData+="reg6\tchr1\t110\t90\n"
+        map2convData+="reg7\tchr1\t160\t150\n"
+        map2convData+="reg8\tchr1\t190\t300\n"
+
+        map2convData+="reg9\tchr1\t210\t70\n"
+        map2convData+="reg10\tchr1\t100\t85\n"
+        map2convData+="reg11\tchr1\t110\t85\n"
+        map2convData+="reg12\tchr1\t300\t190\n"
+
+        map2conv=open(self._mapfilename,"w")
+        map2conv.write(map2convData)
+        map2conv.close()
+
+        self._db.createTable(self._maptablename, "map", self._mapfilename, True)
+
+        mapResData="reg1\tchunk1\t10\t80\n"
+        mapResData+="reg1\tchunk1\t90\t100\n"
+        mapResData+="reg1\tchunk2\t1\t21\n"
+        mapResData+="reg2\tchunk2\t61\t71\n"
+
+        mapResData+="reg3\tchunk1\t70\t100\n"
+        mapResData+="reg3\tchunk2\t1\t111\n"
+        mapResData+="reg3\tchunk3\t1\t21\n"
+
+        mapResData+="reg4\tchunk1\t85\t100\n"
+        mapResData+="reg4\tchunk2\t1\t11\n"
+        mapResData+="reg5\tchunk1\t85\t100\n"
+        mapResData+="reg5\tchunk2\t1\t21\n"
+
+        mapResData+="reg6\tchunk1\t80\t10\n"
+        mapResData+="reg6\tchunk1\t100\t90\n"
+        mapResData+="reg6\tchunk2\t21\t1\n"
+        mapResData+="reg7\tchunk2\t71\t61\n"
+        mapResData+="reg8\tchunk2\t101\t111\n"
+        mapResData+="reg8\tchunk3\t1\t111\n"
+
+        mapResData+="reg9\tchunk1\t100\t70\n"
+        mapResData+="reg9\tchunk2\t111\t1\n"
+        mapResData+="reg9\tchunk3\t21\t1\n"
+
+        mapResData+="reg10\tchunk1\t100\t85\n"
+        mapResData+="reg10\tchunk2\t11\t1\n"
+        mapResData+="reg11\tchunk1\t100\t85\n"
+        mapResData+="reg11\tchunk2\t21\t1\n"
+        mapResData+="reg12\tchunk2\t111\t101\n"
+        mapResData+="reg12\tchunk3\t111\t1\n"
+
+        mapRes=open(self._resfilename,"w")
+        mapRes.write(mapResData)
+        mapRes.close()
+
+    def tearDown(self):
+        self._db.dropTable(self._chunkmaptable)
+        self._db.dropTable(self._maptablename)
+        self._db.close()
+        os.remove(self._resfilename)
+        os.remove(self._mapfilename)
+        os.remove(self._chunkmapfilename)
+
+    def test_convert(self):
+        conv = ConvMapChr2Chunk(self._db,self._maptablename, self._chunkmaptable, self._maptablename+"_onchk")
+        conv.convert()
+        obsFileName = "dummy_map.onchk"
+        self._db.exportDataToFile("dummy_map_onchk", obsFileName)
+        self._db.dropTable("dummy_map_onchk")
+        self.assertTrue(FileUtils.are2FilesIdentical(self._resfilename, obsFileName))
+        os.remove(obsFileName)
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_ConvMapChr2Chunk ) )
+if __name__ == '__main__':
+    unittest.TextTestRunner(verbosity=2).run( test_suite )
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/test/Test_ConvPathChr2Chunk.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/test/Test_ConvPathChr2Chunk.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,110 @@
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.sql.DbFactory import DbFactory
+from commons.pyRepetUnit.convCoord.ConvPathChr2Chunk import ConvPathChr2Chunk
+import unittest
+import os
+
+class Test_ConvPathChr2Chunk(unittest.TestCase):
+
+    def setUp(self):
+        self._pathfilename="dummy.path"
+        self._pathtablename="dummy_path"
+
+        self._chunkmapfilename="chunk.map"
+        self._chunkmaptable="test_chunk_map"
+
+        self._resfilename="dummy_res.path"
+
+        map4convData="chunk1\tchr1\t1\t100\n"
+        map4convData+="chunk2\tchr1\t90\t200\n"
+        map4convData+="chunk3\tchr1\t190\t300\n"
+
+        map4conv=open(self._chunkmapfilename,"w")
+        map4conv.write(map4convData)
+        map4conv.close()
+
+        self._db = DbFactory.createInstance()
+        self._db.createTable(self._chunkmaptable, "map", self._chunkmapfilename, True)
+
+        path2convData="1\tchr1\t10\t80\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="1\tchr1\t90\t110\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="2\tchr1\t150\t160\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        path2convData+="3\tchr1\t70\t210\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="4\tchr1\t85\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="5\tchr1\t85\t110\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        path2convData+="6\tchr1\t10\t80\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="6\tchr1\t90\t110\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="7\tchr1\t150\t160\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="8\tchr1\t190\t300\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        path2convData+="9\tchr1\t70\t210\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="10\tchr1\t85\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="11\tchr1\t85\t110\tsub1\t10\t50\t0.01\t1000\t99\n"
+        path2convData+="12\tchr1\t190\t300\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        path2conv=open(self._pathfilename,"w")
+        path2conv.write(path2convData)
+        path2conv.close()
+
+        self._db.createTable(self._pathtablename, "path", self._pathfilename)
+
+        pathResData="1\tchunk1\t10\t80\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="1\tchunk1\t90\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="1\tchunk2\t1\t21\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="2\tchunk2\t61\t71\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        pathResData+="3\tchunk1\t70\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="3\tchunk2\t1\t111\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="3\tchunk3\t1\t21\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        pathResData+="4\tchunk1\t85\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="4\tchunk2\t1\t11\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="5\tchunk1\t85\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="5\tchunk2\t1\t21\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        pathResData+="6\tchunk1\t10\t80\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="6\tchunk1\t90\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="6\tchunk2\t1\t21\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="7\tchunk2\t61\t71\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="8\tchunk2\t101\t111\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="8\tchunk3\t1\t111\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        pathResData+="9\tchunk1\t70\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="9\tchunk2\t1\t111\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="9\tchunk3\t1\t21\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        pathResData+="10\tchunk1\t85\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="10\tchunk2\t1\t11\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="11\tchunk1\t85\t100\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="11\tchunk2\t1\t21\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="12\tchunk2\t101\t111\tsub1\t10\t50\t0.01\t1000\t99\n"
+        pathResData+="12\tchunk3\t1\t111\tsub1\t10\t50\t0.01\t1000\t99\n"
+
+        pathRes=open(self._resfilename,"w")
+        pathRes.write(pathResData)
+        pathRes.close()
+
+    def tearDown(self):
+        self._db.dropTable(self._chunkmaptable)
+        self._db.dropTable(self._pathtablename)
+        self._db.close()
+        os.remove(self._resfilename)
+        os.remove(self._pathfilename)
+        os.remove(self._chunkmapfilename)
+
+    def test_convert(self):
+        conv=ConvPathChr2Chunk(self._db,self._pathtablename, self._chunkmaptable, self._pathtablename+"_onchk")
+        conv.convert()
+        expPath = self._resfilename
+        obsPath = "dummy_path.onchk"
+        self._db.exportDataToFile("dummy_path_onchk", obsPath)
+        self._db.dropTable("dummy_path_onchk")
+        self.assertTrue(FileUtils.are2FilesIdentical(expPath, obsPath))
+        os.remove(obsPath)
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_ConvPathChr2Chunk ) )
+if __name__ == '__main__':
+    unittest.TextTestRunner(verbosity=2).run( test_suite )
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/test/Test_ConvSetChr2Chunk.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/test/Test_ConvSetChr2Chunk.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,109 @@
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.utils.FileUtils import FileUtils
+from commons.pyRepetUnit.convCoord.ConvSetChr2Chunk import ConvSetChr2Chunk
+import unittest
+import os
+
+class Test_ConvSetChr2Chunk(unittest.TestCase):
+
+    def setUp(self):
+        self._setfilename="dummy.set"
+        self._settablename="dummy_set"
+
+        self._chunkmapfilename="chunk.map"
+        self._chunkmaptable="test_chunk_map"
+
+        self._resfilename="dummy_res.set"
+
+        map4convData="chunk1\tchr1\t1\t100\n"
+        map4convData+="chunk2\tchr1\t90\t200\n"
+        map4convData+="chunk3\tchr1\t190\t300\n"
+
+        map4conv=open(self._chunkmapfilename,"w")
+        map4conv.write(map4convData)
+        map4conv.close()
+
+        self._db = DbFactory.createInstance()
+        self._db.createTable(self._chunkmaptable, "map", self._chunkmapfilename, True)
+
+        set2convData="1\treg1\tchr1\t10\t80\n"
+        set2convData+="1\treg1\tchr1\t90\t110\n"
+        set2convData+="2\treg2\tchr1\t150\t160\n"
+
+        set2convData+="3\treg3\tchr1\t70\t210\n"
+        set2convData+="4\treg4\tchr1\t85\t100\n"
+        set2convData+="5\treg5\tchr1\t85\t110\n"
+
+        set2convData+="6\treg6\tchr1\t80\t10\n"
+        set2convData+="6\treg6\tchr1\t110\t90\n"
+        set2convData+="7\treg7\tchr1\t160\t150\n"
+        set2convData+="8\treg8\tchr1\t190\t300\n"
+
+        set2convData+="9\treg9\tchr1\t210\t70\n"
+        set2convData+="10\treg10\tchr1\t100\t85\n"
+        set2convData+="11\treg11\tchr1\t110\t85\n"
+        set2convData+="12\treg12\tchr1\t300\t190\n"
+
+        set2conv=open(self._setfilename,"w")
+        set2conv.write(set2convData)
+        set2conv.close()
+
+        self._db.createTable(self._settablename, "set", self._setfilename, True)
+
+        setResData="1\treg1\tchunk1\t10\t80\n"
+        setResData+="1\treg1\tchunk1\t90\t100\n"
+        setResData+="1\treg1\tchunk2\t1\t21\n"
+        setResData+="2\treg2\tchunk2\t61\t71\n"
+
+        setResData+="3\treg3\tchunk1\t70\t100\n"
+        setResData+="3\treg3\tchunk2\t1\t111\n"
+        setResData+="3\treg3\tchunk3\t1\t21\n"
+
+        setResData+="4\treg4\tchunk1\t85\t100\n"
+        setResData+="4\treg4\tchunk2\t1\t11\n"
+        setResData+="5\treg5\tchunk1\t85\t100\n"
+        setResData+="5\treg5\tchunk2\t1\t21\n"
+
+        setResData+="6\treg6\tchunk1\t80\t10\n"
+        setResData+="6\treg6\tchunk1\t100\t90\n"
+        setResData+="6\treg6\tchunk2\t21\t1\n"
+        setResData+="7\treg7\tchunk2\t71\t61\n"
+        setResData+="8\treg8\tchunk2\t101\t111\n"
+        setResData+="8\treg8\tchunk3\t1\t111\n"
+
+        setResData+="9\treg9\tchunk1\t100\t70\n"
+        setResData+="9\treg9\tchunk2\t111\t1\n"
+        setResData+="9\treg9\tchunk3\t21\t1\n"
+
+        setResData+="10\treg10\tchunk1\t100\t85\n"
+        setResData+="10\treg10\tchunk2\t11\t1\n"
+        setResData+="11\treg11\tchunk1\t100\t85\n"
+        setResData+="11\treg11\tchunk2\t21\t1\n"
+        setResData+="12\treg12\tchunk2\t111\t101\n"
+        setResData+="12\treg12\tchunk3\t111\t1\n"
+
+        setRes=open(self._resfilename,"w")
+        setRes.write(setResData)
+        setRes.close()
+
+    def tearDown(self):
+        self._db.dropTable(self._chunkmaptable)
+        self._db.dropTable(self._settablename)
+        self._db.close()
+        os.remove(self._resfilename)
+        os.remove(self._setfilename)
+        os.remove(self._chunkmapfilename)
+
+    def test_convert(self):
+        conv = ConvSetChr2Chunk(self._db,self._settablename, self._chunkmaptable, self._settablename+"_onchk")
+        conv.convert()
+        obsFileName = "dummy_set.onchk"
+        self._db.exportDataToFile("dummy_set_onchk", obsFileName)
+        self._db.dropTable("dummy_set_onchk")
+        self.assertTrue(FileUtils.are2FilesIdentical(self._resfilename, obsFileName))
+        os.remove(obsFileName)
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_ConvSetChr2Chunk ) )
+if __name__ == '__main__':
+    unittest.TextTestRunner(verbosity=2).run( test_suite )
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/test/Test_PathChunkConnector.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/test/Test_PathChunkConnector.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,121 @@
+import unittest
+import time
+import os
+from commons.core.sql.TablePathAdaptator import TablePathAdaptator
+from commons.pyRepetUnit.convCoord.PathChunkConnector import PathChunkConnector
+from commons.core.sql.DbMySql import DbMySql
+
+
+class Test_PathChunkConnector( unittest.TestCase ):
+
+    def setUp(self):
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+        self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )
+        configF = open(self._configFileName, "w" )
+        configF.write( "[repet_env]\n" )
+        configF.write( "repet_host: %s\n" % ( os.environ["REPET_HOST"] ) )
+        configF.write( "repet_user: %s\n" % ( os.environ["REPET_USER"] ) )
+        configF.write( "repet_pw: %s\n" % ( os.environ["REPET_PW"] ) )
+        configF.write( "repet_db: %s\n" % ( os.environ["REPET_DB"] ) )
+        configF.write( "repet_port: %s\n" % ( os.environ["REPET_PORT"] ) )
+        configF.close()
+        self._db = DbMySql( cfgFileName=self._configFileName )
+        self._table = "dummyPathTable_%s" % ( self._uniqId )
+        self._tpA = TablePathAdaptator( self._db, self._table )
+        self._mapDict = {'chunk1': ('dmel_chr4', 1, 100000),'chunk2': ('dmel_chr4', 90001, 190000),'chunk3': ('dmel_chr4', 180001, 280000), }
+        self._mapFileName = "map_file.map"
+
+    def tearDown(self):
+        self._db.close()
+
+    def testTwoQueryOverlapsOnPlusStrand (self):
+        lines = [
+                 "1\tdmel_chr4\t95535\t95570\tsbj2\t125423\t125467\t7e-15\t82\t97.78\n",
+                 "2\tdmel_chr4\t95545\t95576\tsbj2\t125457\t133465\t2e-38\t83\t65\n"
+                ]
+
+        expectedList =[("dmel_chr4", 95535, 95576, "sbj2", 125423, 133465, 2e-38, 83,97.78)]
+        self._templateTest(lines, expectedList)
+
+    def testTwoQueryOverlapsOnReverseStrand (self):
+        lines = [
+                 "1\tdmel_chr4\t95535\t95570\tsbj2\t125467\t125423\t7e-15\t82\t97.78\n",
+                 "2\tdmel_chr4\t95545\t95576\tsbj2\t133465\t125457\t2e-38\t83\t65\n"
+                ]
+
+        expectedList =[("dmel_chr4", 95535, 95576, "sbj2", 133465, 125423, 2e-38, 83,97.78)]
+        self._templateTest(lines, expectedList)
+
+    def testTwoQueryOverlapsOnDifferentStrands (self):
+        lines = [
+                 "1\tdmel_chr4\t95535\t95570\tsbj2\t125423\t125467\t7e-15\t82\t97.78\n",
+                 "2\tdmel_chr4\t95545\t95576\tsbj2\t133465\t125457\t2e-38\t83\t65\n"
+                ]
+        expectedList =[
+                       ("dmel_chr4", 95535, 95570, "sbj2", 125423, 125467, 7e-15, 82,97.78),
+                       ("dmel_chr4", 95545, 95576, "sbj2", 133465, 125457, 2e-38, 83,65)
+                       ]
+        self._templateTest(lines, expectedList)
+
+    def _templateTest(self, datas2TestList, expectedList ):
+        pathFileName = "dummyPathFile_%s" % ( self._uniqId )
+        _MockPathFile(pathFileName, datas2TestList)
+
+
+        self._db.createTable( self._table, "path", pathFileName )
+
+        _MockMapFile(self._mapFileName)
+        chunkConnector = PathChunkConnector(self._mapFileName, self._db, self._table, 0)
+        chunkConnector.run()
+
+        sql_cmd = 'select * from %s' % (self._table)
+        self._db.execute(sql_cmd)
+        res = self._db.fetchall()
+
+        for i in xrange(len(expectedList)):
+            resultTuple = res[i]
+            expectedTuple = expectedList[i]
+            self._assertExpectedTupleEqualsObsTuple(expectedTuple, resultTuple)
+
+        self._db.dropTable( self._table )
+        os.remove(pathFileName)
+        os.remove(self._mapFileName)
+
+    def _assertExpectedTupleEqualsObsTuple(self, expectedTuple, resultTuple):
+        self.assertEquals(expectedTuple[0], resultTuple[1])
+        self.assertEquals(expectedTuple[1], resultTuple[2])
+        self.assertEquals(expectedTuple[2], resultTuple[3])
+        self.assertEquals(expectedTuple[3], resultTuple[4])
+        self.assertEquals(expectedTuple[4], resultTuple[5])
+        self.assertEquals(expectedTuple[5], resultTuple[6])
+        self.assertEquals(expectedTuple[6], resultTuple[7])
+        self.assertEquals(expectedTuple[7], resultTuple[8])
+        self.assertEquals(expectedTuple[8], resultTuple[9])
+
+
+class _MockPathFile:
+
+    def __init__(self, fileName, lines):
+        path = open(fileName, "w");
+        for line in lines:
+            path.write(line)
+        path.close
+
+
+class _MockMapFile:
+
+    def __init__ (self, fileName):
+        map = open(fileName, "w")
+        line1 = "chunk1" + '\t'   + "dmel_chr4" + '\t' +   "1" + '\t'+   "100000" + "\n"
+        line2 = "chunk2" + '\t'   + "dmel_chr4" + '\t' +   "90001" + '\t'+   "190000" + "\n"
+        line3 = "chunk3" + '\t'   + "dmel_chr4" + '\t' +   "180001" + '\t' + "280000" + "\n"
+        map.write(line1)
+        map.write(line2)
+        map.write(line3)
+        map.close
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_PathChunkConnector ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/convCoord/test/convCoordTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/convCoord/test/convCoordTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,18 @@
+import unittest
+import sys
+import TestConvCoordWithOverlapps
+import TestConvMapChr2Chunk
+import TestConvSetChr2Chunk
+import TestConvPathChr2Chunk
+
+def main():
+    commonsTestSuite = unittest.TestSuite()
+    commonsTestSuite.addTest(unittest.makeSuite(TestConvCoordWithOverlapps.Test_ConvCoordWithOverlaps,'test'))
+    commonsTestSuite.addTest(unittest.makeSuite(TestConvMapChr2Chunk.Test_ConvMapChr2Chunk,'test'))
+    commonsTestSuite.addTest(unittest.makeSuite(TestConvSetChr2Chunk.Test_ConvSetChr2Chunk,'test'))
+    commonsTestSuite.addTest(unittest.makeSuite(TestConvPathChr2Chunk.Test_ConvPathChr2Chunk,'test'))
+    runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+    runner.run(commonsTestSuite)
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/api-objects.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/api-objects.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,36 @@
+commons.Checker commons.Checker-module.html
+commons.Checker.DEFAULT_LOGGER_NAME commons.Checker-module.html#DEFAULT_LOGGER_NAME
+commons.Checker.LOG_FILE commons.Checker-module.html#LOG_FILE
+commons.IComponentWrapper commons.IComponentWrapper-module.html
+commons.IDataProcessor commons.IDataProcessor-module.html
+commons.Checker.Checker commons.Checker.Checker-class.html
+commons.Checker.Checker.getLogger commons.Checker.Checker-class.html#getLogger
+commons.Checker.Checker.__init__ commons.Checker.Checker-class.html#__init__
+commons.Checker.Checker.setLogger commons.Checker.Checker-class.html#setLogger
+commons.Checker.Checker._handle commons.Checker.Checker-class.html#_handle
+commons.Checker.IChecker.check commons.Checker.IChecker-class.html#check
+commons.Checker.CheckerException commons.Checker.CheckerException-class.html
+commons.Checker.CheckerException.msg commons.Checker.CheckerException-class.html#msg
+commons.Checker.CheckerException.__init__ commons.Checker.CheckerException-class.html#__init__
+commons.Checker.ConfigChecker commons.Checker.ConfigChecker-class.html
+commons.Checker.ConfigChecker.optionsDict commons.Checker.ConfigChecker-class.html#optionsDict
+commons.Checker.ConfigChecker.check commons.Checker.ConfigChecker-class.html#check
+commons.Checker.ConfigChecker.__init__ commons.Checker.ConfigChecker-class.html#__init__
+commons.Checker.ConfigChecker.sectionName commons.Checker.ConfigChecker-class.html#sectionName
+commons.Checker.ConfigException commons.Checker.ConfigException-class.html
+commons.Checker.ConfigException.messages commons.Checker.ConfigException-class.html#messages
+commons.Checker.ConfigException.msg commons.Checker.ConfigException-class.html#msg
+commons.Checker.ConfigException.__init__ commons.Checker.ConfigException-class.html#__init__
+commons.Checker.IChecker commons.Checker.IChecker-class.html
+commons.Checker.IChecker.check commons.Checker.IChecker-class.html#check
+commons.Checker._Logger commons.Checker._Logger-class.html
+commons.Checker._Logger.getLogger commons.Checker._Logger-class.html#getLogger
+commons.Checker._Logger._Logger__single commons.Checker._Logger-class.html#_Logger__single
+commons.Checker._Logger.__init__ commons.Checker._Logger-class.html#__init__
+commons.IComponentWrapper.IComponentWrapper commons.IComponentWrapper.IComponentWrapper-class.html
+commons.IComponentWrapper.IComponentWrapper.loadConfig commons.IComponentWrapper.IComponentWrapper-class.html#loadConfig
+commons.IComponentWrapper.IComponentWrapper.run commons.IComponentWrapper.IComponentWrapper-class.html#run
+commons.IComponentWrapper.IComponentWrapper.getComponent commons.IComponentWrapper.IComponentWrapper-class.html#getComponent
+commons.IComponentWrapper.IComponentWrapper.clean commons.IComponentWrapper.IComponentWrapper-class.html#clean
+commons.IDataProcessor.IDataProcessor commons.IDataProcessor.IDataProcessor-class.html
+commons.IDataProcessor.IDataProcessor.run commons.IDataProcessor.IDataProcessor-class.html#run

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/class-tree.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/class-tree.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>Class Hierarchy</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th bgcolor="#70b0f0" class="navbar-select"
+          >   Trees   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%"> </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="class-tree.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+<center><b>
+ [ <a href="module-tree.html">Module Hierarchy</a>
+ | <a href="class-tree.html">Class Hierarchy</a> ]
+</b></center><br />
+<h1 class="epydoc">Class Hierarchy</h1>
+<ul class="nomargin-top">
+    <li> <strong class="uidlink"><a href="commons.Checker.IChecker-class.html">commons.Checker.IChecker</a></strong>:
+      <em class="summary">Interface emulation for a checker</em>
+    <ul>
+    <li> <strong class="uidlink"><a href="commons.Checker.Checker-class.html">commons.Checker.Checker</a></strong>:
+      <em class="summary">A concrete checker implementation with a logging.</em>
+    </li>
+    <li> <strong class="uidlink"><a href="commons.Checker.ConfigChecker-class.html">commons.Checker.ConfigChecker</a></strong>:
+      <em class="summary">A checker for config file.</em>
+    </li>
+    </ul>
+    </li>
+    <li> <strong class="uidlink"><a href="commons.IComponentWrapper.IComponentWrapper-class.html">commons.IComponentWrapper.IComponentWrapper</a></strong>:
+      <em class="summary">A interface wrapper for all pipelines component</em>
+    </li>
+    <li> <strong class="uidlink"><a href="commons.IDataProcessor.IDataProcessor-class.html">commons.IDataProcessor.IDataProcessor</a></strong>:
+      <em class="summary">A interface for all pipelines data processor</em>
+    </li>
+    <li> <strong class="uidlink"><a href="commons.Checker._Logger-class.html" onclick="show_private();">commons.Checker._Logger</a></strong>
+    </li>
+    <li> <strong class="uidlink">object</strong>:
+      <em class="summary">The most base type</em>
+    <ul>
+    <li> <strong class="uidlink">exceptions.BaseException</strong>:
+      <em class="summary">Common base class for all exceptions</em>
+    <ul>
+    <li> <strong class="uidlink">exceptions.Exception</strong>:
+      <em class="summary">Common base class for all non-exit exceptions.</em>
+    <ul>
+    <li> <strong class="uidlink"><a href="commons.Checker.CheckerException-class.html">commons.Checker.CheckerException</a></strong>:
+      <em class="summary">Exception raised during check.</em>
+    </li>
+    <li> <strong class="uidlink"><a href="commons.Checker.ConfigException-class.html">commons.Checker.ConfigException</a></strong>:
+      <em class="summary">A exception raised by check method class ConfigChecker</em>
+    </li>
+    </ul>
+    </li>
+    </ul>
+    </li>
+    </ul>
+    </li>
+</ul>
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th bgcolor="#70b0f0" class="navbar-select"
+          >   Trees   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker-module.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker-module.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,240 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.Checker</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ Module Checker\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.Checker-module.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+\n+<h1 class="epydoc">Module Checker</h1><p class="nomargin-top"><span class="codelink"><a href="commons.Checker-pysrc.html">source code</a></span></p>\n+<p>Created on 9 avr. 2009</p>\n+\n+<hr />\n+<div class="fields"> <p><strong>Author:</strong>\n+ oinizan\n+ </p>\n+</div>\n+<a name="section-Classes"></a>\n+<table class="summary" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Classes</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-Classes"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <a href="commons.Checker.Checker-class.html" class="summary-name">Checker</a><br />\n+ A concrete checker implementation with a logging.\n+ </td>\n+ </tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <a href="commons.Checker.CheckerException-class.html" class="summary-name">CheckerException</a><br />\n+ Exception raised during check.\n+ </td>\n+ </tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <a href="commons.Checker.ConfigChecker-class.html" class="summary-name">ConfigChecker</a><br />\n+ A checker for config file.\n+ </td>\n+ </tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summ'..b'<span class="table-header">Variables</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-Variables"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <a name="DEFAULT_LOGGER_NAME"></a><span class="summary-name">DEFAULT_LOGGER_NAME</span> = <code title="\'log02\'"><code class="variable-quote">\'</code><code class="variable-string">log02</code><code class="variable-quote">\'</code></code>\n+ </td>\n+ </tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <a href="commons.Checker-module.html#LOG_FILE" class="summary-name">LOG_FILE</a> = <code title="\'/home/oinizan/workspace/repet_pipe/pyRepetUnit/blaster/check/longconf\\\n+.ini\'"><code class="variable-quote">\'</code><code class="variable-string">/home/oinizan/workspace/repet_pipe/pyRepetUnit/bla</code><code class="variable-ellipsis">...</code></code>\n+ </td>\n+ </tr>\n+</table>\n+\n+<a name="section-VariablesDetails"></a>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Variables Details</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-VariablesDetails"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+</table>\n+<a name="LOG_FILE"></a>\n+<div>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr><td>\n+ <h3 class="epydoc">LOG_FILE</h3>\n+ \n+ <dl class="fields">\n+ </dl>\n+ <dl class="fields">\n+ <dt>Value:</dt>\n+ <dd><table><tr><td><pre class="variable">\n+<code class="variable-quote">\'</code><code class="variable-string">/home/oinizan/workspace/repet_pipe/pyRepetUnit/blaster/check/longconf</code><span class="variable-linewrap"><img src="crarr.png" alt="\\" /></span>\n+<code class="variable-string">.ini</code><code class="variable-quote">\'</code>\n+</pre></td></tr></table>\n+</dd>\n+ </dl>\n+</td></tr></table>\n+</div>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker-pysrc.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker-pysrc.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,259 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.Checker</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ Module Checker\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.Checker-pysrc.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+<h1 class="epydoc">Source Code for <a href="commons.Checker-module.html">Module commons.Checker</a></h1>\n+<pre class="py-src">\n+<a name="L1"></a><tt class="py-lineno"> 1</tt> <tt class="py-line"><tt class="py-docstring">\'\'\'</tt> </tt>\n+<a name="L2"></a><tt class="py-lineno"> 2</tt> <tt class="py-line"><tt class="py-docstring">Created on 9 avr. 2009</tt> </tt>\n+<a name="L3"></a><tt class="py-lineno"> 3</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>\n+<a name="L4"></a><tt class="py-lineno"> 4</tt> <tt class="py-line"><tt class="py-docstring">@author: oinizan</tt> </tt>\n+<a name="L5"></a><tt class="py-lineno"> 5</tt> <tt class="py-line"><tt class="py-docstring">\'\'\'</tt> </tt>\n+<a name="L6"></a><tt class="py-lineno"> 6</tt> <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">logging</tt><tt class="py-op">.</tt><tt class="py-name">config</tt> </tt>\n+<a name="L7"></a><tt class="py-lineno"> 7</tt> <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">ConfigParser</tt> </tt>\n+<a name="L8"></a><tt class="py-lineno"> 8</tt> <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">ConfigParser</tt> <tt class="py-keyword">import</tt> <tt class="py-name">NoSectionError</tt> </tt>\n+<a name="L9"></a><tt class="py-lineno"> 9</tt> <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">ConfigParser</tt> <tt class="py-keyword">import</tt> <tt class="py-name">NoOptionError</tt> </tt>\n+<a name="L10"></a><tt class="py-lineno"> 10</tt> <tt class="py-line"> </tt>\n+<a name="L11"></a><tt class="py-lineno"> 11</tt> <tt class="py-line"><tt id="link-0" class="py-name" targets="Variable commons.Checker.LOG_FILE=commons.Checker-module.html#LOG_FILE"><a title="commons.Checker.LOG_FILE" class="py-name" href="#" onclick="return doclink(\'link-0\', \'LOG_FILE\', \'link-0\');">LOG_FILE</a></tt> <tt class="py-op">=</tt> <tt class="py-string">"/home/oinizan/workspace/repet_pipe/pyRepetUnit/blaster/check/longconf.ini"</tt> </tt>\n+<a name="L12"></a><tt class="py-lineno"> 12</tt> <tt class="p'..b'ame">logging</tt><tt class="py-op">.</tt><tt id="link-14" class="py-name"><a title="commons.Checker.Checker.getLogger\n+commons.Checker._Logger.getLogger" class="py-name" href="#" onclick="return doclink(\'link-14\', \'getLogger\', \'link-5\');">getLogger</a></tt><tt class="py-op">(</tt><tt class="py-name">loggerName</tt><tt class="py-op">)</tt> </tt>\n+<a name="L138"></a><tt class="py-lineno">138</tt> <tt class="py-line"> <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>\n+<a name="L139"></a><tt class="py-lineno">139</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">__log</tt> <tt class="py-op">=</tt> <tt class="py-name">logging</tt><tt class="py-op">.</tt><tt id="link-15" class="py-name"><a title="commons.Checker.Checker.getLogger\n+commons.Checker._Logger.getLogger" class="py-name" href="#" onclick="return doclink(\'link-15\', \'getLogger\', \'link-5\');">getLogger</a></tt><tt class="py-op">(</tt><tt id="link-16" class="py-name"><a title="commons.Checker.DEFAULT_LOGGER_NAME" class="py-name" href="#" onclick="return doclink(\'link-16\', \'DEFAULT_LOGGER_NAME\', \'link-1\');">DEFAULT_LOGGER_NAME</a></tt><tt class="py-op">)</tt> </tt>\n+<a name="L140"></a><tt class="py-lineno">140</tt> <tt class="py-line"> <tt id="link-17" class="py-name"><a title="commons.Checker._Logger" class="py-name" href="#" onclick="return doclink(\'link-17\', \'_Logger\', \'link-2\');">_Logger</a></tt><tt class="py-op">.</tt><tt class="py-name">__single</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt> </tt>\n+</div><a name="L141"></a><tt class="py-lineno">141</tt> <tt class="py-line"> </tt>\n+<a name="_Logger.getLogger"></a><div id="_Logger.getLogger-def"><a name="L142"></a><tt class="py-lineno">142</tt> <a class="py-toggle" href="#" id="_Logger.getLogger-toggle" onclick="return toggle(\'_Logger.getLogger\');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="commons.Checker._Logger-class.html#getLogger">getLogger</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>\n+</div><div id="_Logger.getLogger-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="_Logger.getLogger-expanded"><a name="L143"></a><tt class="py-lineno">143</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">__log</tt> </tt>\n+</div></div><a name="L144"></a><tt class="py-lineno">144</tt> <tt class="py-line"> </tt><script type="text/javascript">\n+\n+</script>\n+</pre>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker.Checker-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker.Checker-class.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,244 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.Checker.Checker</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ <a href="commons.Checker-module.html">Module Checker</a> ::\n+ Class Checker\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.Checker.Checker-class.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+\n+<h1 class="epydoc">Class Checker</h1><p class="nomargin-top"><span class="codelink"><a href="commons.Checker-pysrc.html#Checker">source code</a></span></p>\n+<pre class="base-tree">\n+<a href="commons.Checker.IChecker-class.html">IChecker</a> --+\n+ |\n+ <strong class="uidshort">Checker</strong>\n+</pre>\n+\n+<hr />\n+<p>A concrete checker implementation with a logging. Logger instance is a\n+ singleton of logging module</p>\n+\n+\n+<a name="section-InstanceMethods"></a>\n+<table class="summary" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Instance Methods</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-InstanceMethods"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr>\n+ <td><span class="summary-sig"><a name="__init__"></a><span class="summary-sig-name">__init__</span>(<span class="summary-sig-arg">self</span>)</span></td>\n+ <td align="right" valign="top">\n+ <span class="codelink"><a href="commons.Checker-pysrc.html#Checker.__init__">source code</a></span>\n+ \n+ </td>\n+ </tr>\n+ </table>\n+ \n+ </td>\n+ </tr>\n+<tr class="private">\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="s'..b'dth="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr>\n+ <td><span class="summary-sig"><a href="commons.Checker.Checker-class.html#setLogger" class="summary-sig-name">setLogger</a>(<span class="summary-sig-arg">self</span>,\n+ <span class="summary-sig-arg">logger</span>)</span><br />\n+ set (change) default logger</td>\n+ <td align="right" valign="top">\n+ <span class="codelink"><a href="commons.Checker-pysrc.html#Checker.setLogger">source code</a></span>\n+ \n+ </td>\n+ </tr>\n+ </table>\n+ \n+ </td>\n+ </tr>\n+ <tr>\n+ <td colspan="2" class="summary">\n+ <p class="indent-wrapped-lines"><b>Inherited from <code><a href="commons.Checker.IChecker-class.html">IChecker</a></code></b>:\n+ <code><a href="commons.Checker.IChecker-class.html#check">check</a></code>\n+ </p>\n+ </td>\n+ </tr>\n+</table>\n+\n+<a name="section-MethodDetails"></a>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Method Details</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-MethodDetails"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+</table>\n+<a name="setLogger"></a>\n+<div>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr><td>\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr valign="top"><td>\n+ <h3 class="epydoc"><span class="sig"><span class="sig-name">setLogger</span>(<span class="sig-arg">self</span>,\n+ <span class="sig-arg">logger</span>)</span>\n+ </h3>\n+ </td><td align="right" valign="top"\n+ ><span class="codelink"><a href="commons.Checker-pysrc.html#Checker.setLogger">source code</a></span> \n+ </td>\n+ </tr></table>\n+ \n+ <p>set (change) default logger</p>\n+ <dl class="fields">\n+ <dt>Parameters:</dt>\n+ <dd><ul class="nomargin-top">\n+ <li><strong class="pname"><code>logger</code></strong> (class Logger) - a new logger</li>\n+ </ul></dd>\n+ </dl>\n+</td></tr></table>\n+</div>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker.CheckerException-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker.CheckerException-class.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,270 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.Checker.CheckerException</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ <a href="commons.Checker-module.html">Module Checker</a> ::\n+ Class CheckerException\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.Checker.CheckerException-class.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+\n+<h1 class="epydoc">Class CheckerException</h1><p class="nomargin-top"><span class="codelink"><a href="commons.Checker-pysrc.html#CheckerException">source code</a></span></p>\n+<pre class="base-tree">\n+ object --+ \n+ | \n+exceptions.BaseException --+ \n+ | \n+ exceptions.Exception --+\n+ |\n+ <strong class="uidshort">CheckerException</strong>\n+</pre>\n+\n+<hr />\n+<p>Exception raised during check.</p>\n+\n+\n+<a name="section-InstanceMethods"></a>\n+<table class="summary" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Instance Methods</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-InstanceMethods"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr>\n+ <td><span class="summary-sig"><a href="commons.Checker.CheckerException-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,\n+ <span class="summary-sig-arg">msg</span>=<span class="summary-sig-default"><code class="variable-quote">\'</code><code class="variable-string"></code><code class="variable-quote">\'</code></span>)</span><'..b'ite">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Properties</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-Properties"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+ <tr>\n+ <td colspan="2" class="summary">\n+ <p class="indent-wrapped-lines"><b>Inherited from <code>exceptions.BaseException</code></b>:\n+ <code>args</code>,\n+ <code>message</code>\n+ </p>\n+ <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:\n+ <code>__class__</code>\n+ </p>\n+ </td>\n+ </tr>\n+</table>\n+\n+<a name="section-MethodDetails"></a>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Method Details</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-MethodDetails"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+</table>\n+<a name="__init__"></a>\n+<div>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr><td>\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr valign="top"><td>\n+ <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,\n+ <span class="sig-arg">msg</span>=<span class="sig-default"><code class="variable-quote">\'</code><code class="variable-string"></code><code class="variable-quote">\'</code></span>)</span>\n+ <br /><em class="fname">(Constructor)</em>\n+ </h3>\n+ </td><td align="right" valign="top"\n+ ><span class="codelink"><a href="commons.Checker-pysrc.html#CheckerException.__init__">source code</a></span> \n+ </td>\n+ </tr></table>\n+ \n+ <p>x.__init__(...) initializes x; see x.__class__.__doc__ for \n+ signature</p>\n+ <dl class="fields">\n+ <dt>Overrides:\n+ object.__init__\n+ <dd><em class="note">(inherited documentation)</em></dd>\n+ </dt>\n+ </dl>\n+</td></tr></table>\n+</div>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker.ConfigChecker-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker.ConfigChecker-class.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,244 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.Checker.ConfigChecker</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ <a href="commons.Checker-module.html">Module Checker</a> ::\n+ Class ConfigChecker\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.Checker.ConfigChecker-class.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+\n+<h1 class="epydoc">Class ConfigChecker</h1><p class="nomargin-top"><span class="codelink"><a href="commons.Checker-pysrc.html#ConfigChecker">source code</a></span></p>\n+<pre class="base-tree">\n+<a href="commons.Checker.IChecker-class.html">IChecker</a> --+\n+ |\n+ <strong class="uidshort">ConfigChecker</strong>\n+</pre>\n+\n+<hr />\n+<p>A checker for config file.</p>\n+\n+\n+<a name="section-InstanceMethods"></a>\n+<table class="summary" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Instance Methods</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-InstanceMethods"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr>\n+ <td><span class="summary-sig"><a name="__init__"></a><span class="summary-sig-name">__init__</span>(<span class="summary-sig-arg">self</span>,\n+ <span class="summary-sig-arg">sectionName</span>,\n+ <span class="summary-sig-arg">optionsDict</span>)</span></td>\n+ <td align="right" valign="top">\n+ <span class="codelink"><a href="commons.Checker-pysrc.html#ConfigChecker.__init__">source code</a></span>\n+ \n+ </td>\n+ </tr>\n+ </table>\n+ \n+ </td>\n+ </tr>\n+<tr>\n+ <td width="1'..b'ction-InstanceVariables"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type">class Dict</span>\n+ </td><td class="summary">\n+ <a name="optionsDict"></a><span class="summary-name">optionsDict</span><br />\n+ dict with option to check as keys and empty strings "" as \n+ values\n+ </td>\n+ </tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type">str</span>\n+ </td><td class="summary">\n+ <a name="sectionName"></a><span class="summary-name">sectionName</span><br />\n+ section to check in config file\n+ </td>\n+ </tr>\n+</table>\n+\n+<a name="section-MethodDetails"></a>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Method Details</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-MethodDetails"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+</table>\n+<a name="check"></a>\n+<div>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr><td>\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr valign="top"><td>\n+ <h3 class="epydoc"><span class="sig"><span class="sig-name">check</span>(<span class="sig-arg">self</span>,\n+ <span class="sig-arg">configFile</span>)</span>\n+ </h3>\n+ </td><td align="right" valign="top"\n+ ><span class="codelink"><a href="commons.Checker-pysrc.html#ConfigChecker.check">source code</a></span> \n+ </td>\n+ </tr></table>\n+ \n+ <p>perform check, raise a CheckerException if error occured</p>\n+ <dl class="fields">\n+ <dt>Parameters:</dt>\n+ <dd><ul class="nomargin-top">\n+ <li><strong class="pname"><code>arg</code></strong> - a collecting parameter: put here all you need to perform check</li>\n+ </ul></dd>\n+ <dt>Overrides:\n+ <a href="commons.Checker.IChecker-class.html#check">IChecker.check</a>\n+ <dd><em class="note">(inherited documentation)</em></dd>\n+ </dt>\n+ </dl>\n+</td></tr></table>\n+</div>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker.ConfigException-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker.ConfigException-class.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,280 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.Checker.ConfigException</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ <a href="commons.Checker-module.html">Module Checker</a> ::\n+ Class ConfigException\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.Checker.ConfigException-class.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+\n+<h1 class="epydoc">Class ConfigException</h1><p class="nomargin-top"><span class="codelink"><a href="commons.Checker-pysrc.html#ConfigException">source code</a></span></p>\n+<pre class="base-tree">\n+ object --+ \n+ | \n+exceptions.BaseException --+ \n+ | \n+ exceptions.Exception --+\n+ |\n+ <strong class="uidshort">ConfigException</strong>\n+</pre>\n+\n+<hr />\n+<p>A exception raised by check method class ConfigChecker</p>\n+\n+\n+<a name="section-InstanceMethods"></a>\n+<table class="summary" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Instance Methods</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-InstanceMethods"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr>\n+ <td><span class="summary-sig"><a href="commons.Checker.ConfigException-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,\n+ <span class="summary-sig-arg">msg</span>,\n+ <span class="summary-sig-arg">messages</span>=<span class="summary-sig-default"><code class="variable-group">[</code><code class="vari'..b' bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Properties</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-Properties"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+ <tr>\n+ <td colspan="2" class="summary">\n+ <p class="indent-wrapped-lines"><b>Inherited from <code>exceptions.BaseException</code></b>:\n+ <code>args</code>,\n+ <code>message</code>\n+ </p>\n+ <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:\n+ <code>__class__</code>\n+ </p>\n+ </td>\n+ </tr>\n+</table>\n+\n+<a name="section-MethodDetails"></a>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Method Details</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-MethodDetails"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+</table>\n+<a name="__init__"></a>\n+<div>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr><td>\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr valign="top"><td>\n+ <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,\n+ <span class="sig-arg">msg</span>,\n+ <span class="sig-arg">messages</span>=<span class="sig-default"><code class="variable-group">[</code><code class="variable-group">]</code></span>)</span>\n+ <br /><em class="fname">(Constructor)</em>\n+ </h3>\n+ </td><td align="right" valign="top"\n+ ><span class="codelink"><a href="commons.Checker-pysrc.html#ConfigException.__init__">source code</a></span> \n+ </td>\n+ </tr></table>\n+ \n+ <p>x.__init__(...) initializes x; see x.__class__.__doc__ for \n+ signature</p>\n+ <dl class="fields">\n+ <dt>Overrides:\n+ object.__init__\n+ <dd><em class="note">(inherited documentation)</em></dd>\n+ </dt>\n+ </dl>\n+</td></tr></table>\n+</div>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker.IChecker-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker.IChecker-class.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,187 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>commons.Checker.IChecker</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%">
+      <span class="breadcrumbs">
+        Package commons ::
+        <a href="commons.Checker-module.html">Module Checker</a> ::
+        Class IChecker
+      </span>
+    </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="commons.Checker.IChecker-class.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+
+<h1 class="epydoc">Class IChecker</h1><p class="nomargin-top"><span class="codelink"><a href="commons.Checker-pysrc.html#IChecker">source code</a></span></p>
+<dl><dt>Known Subclasses:</dt>
+<dd>
+      <ul class="subclass-list">
+<li><a href="commons.Checker.Checker-class.html">Checker</a></li><li>, <a href="commons.Checker.ConfigChecker-class.html">ConfigChecker</a></li>  </ul>
+</dd></dl>
+
+<hr />
+<p>Interface emulation for a checker</p>
+
+
+<a name="section-InstanceMethods"></a>
+<table class="summary" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr bgcolor="#70b0f0" class="table-header">
+  <td colspan="2" class="table-header">
+    <table border="0" cellpadding="0" cellspacing="0" width="100%">
+      <tr valign="top">
+        <td align="left"><span class="table-header">Instance Methods</span></td>
+        <td align="right" valign="top"
+         ><span class="options">[<a href="#section-InstanceMethods"
+         class="privatelink" onclick="toggle_private();"
+         >hide private</a>]</span></td>
+      </tr>
+    </table>
+  </td>
+</tr>
+<tr>
+    <td width="15%" align="right" valign="top" class="summary">
+      <span class="summary-type"> </span>
+    </td><td class="summary">
+      <table width="100%" cellpadding="0" cellspacing="0" border="0">
+        <tr>
+          <td><span class="summary-sig"><a href="commons.Checker.IChecker-class.html#check" class="summary-sig-name">check</a>(<span class="summary-sig-arg">self</span>,
+        <span class="summary-sig-arg">arg</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string"></code><code class="variable-quote">'</code></span>)</span><br />
+      perform check, raise a CheckerException if error occured</td>
+          <td align="right" valign="top">
+            <span class="codelink"><a href="commons.Checker-pysrc.html#IChecker.check">source code</a></span>
+
+          </td>
+        </tr>
+      </table>
+
+    </td>
+  </tr>
+</table>
+
+<a name="section-MethodDetails"></a>
+<table class="details" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr bgcolor="#70b0f0" class="table-header">
+  <td colspan="2" class="table-header">
+    <table border="0" cellpadding="0" cellspacing="0" width="100%">
+      <tr valign="top">
+        <td align="left"><span class="table-header">Method Details</span></td>
+        <td align="right" valign="top"
+         ><span class="options">[<a href="#section-MethodDetails"
+         class="privatelink" onclick="toggle_private();"
+         >hide private</a>]</span></td>
+      </tr>
+    </table>
+  </td>
+</tr>
+</table>
+<a name="check"></a>
+<div>
+<table class="details" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr><td>
+  <table width="100%" cellpadding="0" cellspacing="0" border="0">
+  <tr valign="top"><td>
+  <h3 class="epydoc"><span class="sig"><span class="sig-name">check</span>(<span class="sig-arg">self</span>,
+        <span class="sig-arg">arg</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string"></code><code class="variable-quote">'</code></span>)</span>
+  </h3>
+  </td><td align="right" valign="top"
+    ><span class="codelink"><a href="commons.Checker-pysrc.html#IChecker.check">source code</a></span> 
+    </td>
+  </tr></table>
+
+  <p>perform check, raise a CheckerException if error occured</p>
+  <dl class="fields">
+    <dt>Parameters:</dt>
+    <dd><ul class="nomargin-top">
+        <li><strong class="pname"><code>arg</code></strong> (choose the appropriate type) - a collecting parameter: put here all you need to perform check</li>
+    </ul></dd>
+  </dl>
+</td></tr></table>
+</div>
+<br />
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.Checker._Logger-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.Checker._Logger-class.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,175 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>commons.Checker._Logger</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%">
+      <span class="breadcrumbs">
+        Package commons ::
+        <a href="commons.Checker-module.html">Module Checker</a> ::
+        Class _Logger
+      </span>
+    </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="commons.Checker._Logger-class.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+
+<h1 class="epydoc">Class _Logger</h1><p class="nomargin-top"><span class="codelink"><a href="commons.Checker-pysrc.html#_Logger">source code</a></span></p>
+
+<a name="section-InstanceMethods"></a>
+<table class="summary" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr bgcolor="#70b0f0" class="table-header">
+  <td colspan="2" class="table-header">
+    <table border="0" cellpadding="0" cellspacing="0" width="100%">
+      <tr valign="top">
+        <td align="left"><span class="table-header">Instance Methods</span></td>
+        <td align="right" valign="top"
+         ><span class="options">[<a href="#section-InstanceMethods"
+         class="privatelink" onclick="toggle_private();"
+         >hide private</a>]</span></td>
+      </tr>
+    </table>
+  </td>
+</tr>
+<tr>
+    <td width="15%" align="right" valign="top" class="summary">
+      <span class="summary-type"> </span>
+    </td><td class="summary">
+      <table width="100%" cellpadding="0" cellspacing="0" border="0">
+        <tr>
+          <td><span class="summary-sig"><a name="__init__"></a><span class="summary-sig-name">__init__</span>(<span class="summary-sig-arg">self</span>,
+        <span class="summary-sig-arg">loggerName</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string"></code><code class="variable-quote">'</code></span>)</span></td>
+          <td align="right" valign="top">
+            <span class="codelink"><a href="commons.Checker-pysrc.html#_Logger.__init__">source code</a></span>
+
+          </td>
+        </tr>
+      </table>
+
+    </td>
+  </tr>
+<tr>
+    <td width="15%" align="right" valign="top" class="summary">
+      <span class="summary-type"> </span>
+    </td><td class="summary">
+      <table width="100%" cellpadding="0" cellspacing="0" border="0">
+        <tr>
+          <td><span class="summary-sig"><a name="getLogger"></a><span class="summary-sig-name">getLogger</span>(<span class="summary-sig-arg">self</span>)</span></td>
+          <td align="right" valign="top">
+            <span class="codelink"><a href="commons.Checker-pysrc.html#_Logger.getLogger">source code</a></span>
+
+          </td>
+        </tr>
+      </table>
+
+    </td>
+  </tr>
+</table>
+
+<a name="section-ClassVariables"></a>
+<table class="summary" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr bgcolor="#70b0f0" class="table-header">
+  <td colspan="2" class="table-header">
+    <table border="0" cellpadding="0" cellspacing="0" width="100%">
+      <tr valign="top">
+        <td align="left"><span class="table-header">Class Variables</span></td>
+        <td align="right" valign="top"
+         ><span class="options">[<a href="#section-ClassVariables"
+         class="privatelink" onclick="toggle_private();"
+         >hide private</a>]</span></td>
+      </tr>
+    </table>
+  </td>
+</tr>
+<tr class="private">
+    <td width="15%" align="right" valign="top" class="summary">
+      <span class="summary-type"> </span>
+    </td><td class="summary">
+        <a name="_Logger__single"></a><span class="summary-name">_Logger__single</span> = <code title="None">None</code>
+    </td>
+  </tr>
+</table>
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.IComponentWrapper-module.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.IComponentWrapper-module.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>commons.IComponentWrapper</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%">
+      <span class="breadcrumbs">
+        Package commons ::
+        Module IComponentWrapper
+      </span>
+    </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="commons.IComponentWrapper-module.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+
+<h1 class="epydoc">Module IComponentWrapper</h1><p class="nomargin-top"><span class="codelink"><a href="commons.IComponentWrapper-pysrc.html">source code</a></span></p>
+
+<a name="section-Classes"></a>
+<table class="summary" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr bgcolor="#70b0f0" class="table-header">
+  <td colspan="2" class="table-header">
+    <table border="0" cellpadding="0" cellspacing="0" width="100%">
+      <tr valign="top">
+        <td align="left"><span class="table-header">Classes</span></td>
+        <td align="right" valign="top"
+         ><span class="options">[<a href="#section-Classes"
+         class="privatelink" onclick="toggle_private();"
+         >hide private</a>]</span></td>
+      </tr>
+    </table>
+  </td>
+</tr>
+<tr>
+    <td width="15%" align="right" valign="top" class="summary">
+      <span class="summary-type"> </span>
+    </td><td class="summary">
+        <a href="commons.IComponentWrapper.IComponentWrapper-class.html" class="summary-name">IComponentWrapper</a><br />
+      A interface wrapper for all pipelines component
+    </td>
+  </tr>
+</table>
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.IComponentWrapper-pysrc.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.IComponentWrapper-pysrc.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,136 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.IComponentWrapper</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ Module IComponentWrapper\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.IComponentWrapper-pysrc.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+<h1 class="epydoc">Source Code for <a href="commons.IComponentWrapper-module.html">Module commons.IComponentWrapper</a></h1>\n+<pre class="py-src">\n+<a name="L1"></a><tt class="py-lineno"> 1</tt> <tt class="py-line"> </tt>\n+<a name="IComponentWrapper"></a><div id="IComponentWrapper-def"><a name="L2"></a><tt class="py-lineno"> 2</tt> <a class="py-toggle" href="#" id="IComponentWrapper-toggle" onclick="return toggle(\'IComponentWrapper\');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="commons.IComponentWrapper.IComponentWrapper-class.html">IComponentWrapper</a><tt class="py-op">:</tt> </tt>\n+</div><div id="IComponentWrapper-collapsed" style="display:none;" pad="++" indent="++++"></div><div id="IComponentWrapper-expanded"><a name="L3"></a><tt class="py-lineno"> 3</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>\n+<a name="L4"></a><tt class="py-lineno"> 4</tt> <tt class="py-line"><tt class="py-docstring"> A interface wrapper for all pipelines component</tt> </tt>\n+<a name="L5"></a><tt class="py-lineno"> 5</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>\n+<a name="L6"></a><tt class="py-lineno"> 6</tt> <tt class="py-line"> </tt>\n+<a name="IComponentWrapper.run"></a><div id="IComponentWrapper.run-def"><a name="L7"></a><tt class="py-lineno"> 7</tt> <a class="py-toggle" href="#" id="IComponentWrapper.run-toggle" onclick="return toggle(\'IComponentWrapper.run\');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="commons.IComponentWrapper.IComponentWrapper-class.html#run">run</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>\n+</div><div id="IComponentWrapper.run-collapsed" style="display:none;" pad="++" indent="++++++++"></div><div id="IComponentWrapper.run-expanded"><a name="L8"></a><tt class="py-lineno"> 8</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>\n+<a name="L9"></a><tt class="py-lineno"> 9</tt> <tt class="py-line">'..b'ss="py-line"> <tt class="py-keyword">pass</tt> </tt>\n+</div><a name="L21"></a><tt class="py-lineno">21</tt> <tt class="py-line"> </tt>\n+<a name="IComponentWrapper.clean"></a><div id="IComponentWrapper.clean-def"><a name="L22"></a><tt class="py-lineno">22</tt> <a class="py-toggle" href="#" id="IComponentWrapper.clean-toggle" onclick="return toggle(\'IComponentWrapper.clean\');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="commons.IComponentWrapper.IComponentWrapper-class.html#clean">clean</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>\n+</div><div id="IComponentWrapper.clean-collapsed" style="display:none;" pad="++" indent="++++++++"></div><div id="IComponentWrapper.clean-expanded"><a name="L23"></a><tt class="py-lineno">23</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>\n+<a name="L24"></a><tt class="py-lineno">24</tt> <tt class="py-line"><tt class="py-docstring"> clean files generated by run method</tt> </tt>\n+<a name="L25"></a><tt class="py-lineno">25</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>\n+<a name="L26"></a><tt class="py-lineno">26</tt> <tt class="py-line"> <tt class="py-keyword">pass</tt> </tt>\n+</div><a name="L27"></a><tt class="py-lineno">27</tt> <tt class="py-line"> </tt>\n+<a name="IComponentWrapper.getComponent"></a><div id="IComponentWrapper.getComponent-def"><a name="L28"></a><tt class="py-lineno">28</tt> <a class="py-toggle" href="#" id="IComponentWrapper.getComponent-toggle" onclick="return toggle(\'IComponentWrapper.getComponent\');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="commons.IComponentWrapper.IComponentWrapper-class.html#getComponent">getComponent</a> <tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>\n+</div><div id="IComponentWrapper.getComponent-collapsed" style="display:none;" pad="++" indent="++++++++"></div><div id="IComponentWrapper.getComponent-expanded"><a name="L29"></a><tt class="py-lineno">29</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>\n+<a name="L30"></a><tt class="py-lineno">30</tt> <tt class="py-line"><tt class="py-docstring"> return component wrapped</tt> </tt>\n+<a name="L31"></a><tt class="py-lineno">31</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>\n+</div></div><a name="L32"></a><tt class="py-lineno">32</tt> <tt class="py-line"> </tt><script type="text/javascript">\n+\n+</script>\n+</pre>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.IComponentWrapper.IComponentWrapper-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.IComponentWrapper.IComponentWrapper-class.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,231 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>commons.IComponentWrapper.IComponentWrapper</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%">\n+ <span class="breadcrumbs">\n+ Package commons ::\n+ <a href="commons.IComponentWrapper-module.html">Module IComponentWrapper</a> ::\n+ Class IComponentWrapper\n+ </span>\n+ </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="commons.IComponentWrapper.IComponentWrapper-class.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+\n+<h1 class="epydoc">Class IComponentWrapper</h1><p class="nomargin-top"><span class="codelink"><a href="commons.IComponentWrapper-pysrc.html#IComponentWrapper">source code</a></span></p>\n+<p>A interface wrapper for all pipelines component</p>\n+\n+\n+<a name="section-InstanceMethods"></a>\n+<table class="summary" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Instance Methods</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-InstanceMethods"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr>\n+ <td><span class="summary-sig"><a name="clean"></a><span class="summary-sig-name">clean</span>(<span class="summary-sig-arg">self</span>)</span><br />\n+ clean files generated by run method</td>\n+ <td align="right" valign="top">\n+ <span class="codelink"><a href="commons.IComponentWrapper-pysrc.html#IComponentWrapper.clean">source code</a></span>\n+ \n+ </td>\n+ </tr>\n+ </table>\n+ \n+ </td>\n+ </tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%"'..b'</td>\n+ <td align="right" valign="top">\n+ <span class="codelink"><a href="commons.IComponentWrapper-pysrc.html#IComponentWrapper.loadConfig">source code</a></span>\n+ \n+ </td>\n+ </tr>\n+ </table>\n+ \n+ </td>\n+ </tr>\n+<tr>\n+ <td width="15%" align="right" valign="top" class="summary">\n+ <span class="summary-type"> </span>\n+ </td><td class="summary">\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr>\n+ <td><span class="summary-sig"><a name="run"></a><span class="summary-sig-name">run</span>(<span class="summary-sig-arg">self</span>)</span><br />\n+ run the component</td>\n+ <td align="right" valign="top">\n+ <span class="codelink"><a href="commons.IComponentWrapper-pysrc.html#IComponentWrapper.run">source code</a></span>\n+ \n+ </td>\n+ </tr>\n+ </table>\n+ \n+ </td>\n+ </tr>\n+</table>\n+\n+<a name="section-MethodDetails"></a>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr bgcolor="#70b0f0" class="table-header">\n+ <td colspan="2" class="table-header">\n+ <table border="0" cellpadding="0" cellspacing="0" width="100%">\n+ <tr valign="top">\n+ <td align="left"><span class="table-header">Method Details</span></td>\n+ <td align="right" valign="top"\n+ ><span class="options">[<a href="#section-MethodDetails"\n+ class="privatelink" onclick="toggle_private();"\n+ >hide private</a>]</span></td>\n+ </tr>\n+ </table>\n+ </td>\n+</tr>\n+</table>\n+<a name="loadConfig"></a>\n+<div>\n+<table class="details" border="1" cellpadding="3"\n+ cellspacing="0" width="100%" bgcolor="white">\n+<tr><td>\n+ <table width="100%" cellpadding="0" cellspacing="0" border="0">\n+ <tr valign="top"><td>\n+ <h3 class="epydoc"><span class="sig"><span class="sig-name">loadConfig</span>(<span class="sig-arg">self</span>,\n+ <span class="sig-arg">configParserInstance</span>)</span>\n+ </h3>\n+ </td><td align="right" valign="top"\n+ ><span class="codelink"><a href="commons.IComponentWrapper-pysrc.html#IComponentWrapper.loadConfig">source code</a></span> \n+ </td>\n+ </tr></table>\n+ \n+ <p>load component config</p>\n+ <dl class="fields">\n+ <dt>Parameters:</dt>\n+ <dd><ul class="nomargin-top">\n+ <li><strong class="pname"><code>configParserInstance</code></strong> (class ConfigParser) - config parser where config file is loaded</li>\n+ </ul></dd>\n+ </dl>\n+</td></tr></table>\n+</div>\n+<br />\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.IDataProcessor-module.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.IDataProcessor-module.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>commons.IDataProcessor</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%">
+      <span class="breadcrumbs">
+        Package commons ::
+        Module IDataProcessor
+      </span>
+    </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="commons.IDataProcessor-module.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+
+<h1 class="epydoc">Module IDataProcessor</h1><p class="nomargin-top"><span class="codelink"><a href="commons.IDataProcessor-pysrc.html">source code</a></span></p>
+
+<a name="section-Classes"></a>
+<table class="summary" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr bgcolor="#70b0f0" class="table-header">
+  <td colspan="2" class="table-header">
+    <table border="0" cellpadding="0" cellspacing="0" width="100%">
+      <tr valign="top">
+        <td align="left"><span class="table-header">Classes</span></td>
+        <td align="right" valign="top"
+         ><span class="options">[<a href="#section-Classes"
+         class="privatelink" onclick="toggle_private();"
+         >hide private</a>]</span></td>
+      </tr>
+    </table>
+  </td>
+</tr>
+<tr>
+    <td width="15%" align="right" valign="top" class="summary">
+      <span class="summary-type"> </span>
+    </td><td class="summary">
+        <a href="commons.IDataProcessor.IDataProcessor-class.html" class="summary-name">IDataProcessor</a><br />
+      A interface for all pipelines data processor
+    </td>
+  </tr>
+</table>
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.IDataProcessor-pysrc.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.IDataProcessor-pysrc.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>commons.IDataProcessor</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%">
+      <span class="breadcrumbs">
+        Package commons ::
+        Module IDataProcessor
+      </span>
+    </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="commons.IDataProcessor-pysrc.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+<h1 class="epydoc">Source Code for <a href="commons.IDataProcessor-module.html">Module commons.IDataProcessor</a></h1>
+<pre class="py-src">
+<a name="IDataProcessor"></a><div id="IDataProcessor-def"><a name="L1"></a><tt class="py-lineno"> 1</tt> <a class="py-toggle" href="#" id="IDataProcessor-toggle" onclick="return toggle('IDataProcessor');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="commons.IDataProcessor.IDataProcessor-class.html">IDataProcessor</a><tt class="py-op">:</tt> </tt>
+</div><div id="IDataProcessor-collapsed" style="display:none;" pad="++" indent="++++"></div><div id="IDataProcessor-expanded"><a name="L2"></a><tt class="py-lineno"> 2</tt>  <tt class="py-line">    <tt class="py-docstring">"""</tt> </tt>
+<a name="L3"></a><tt class="py-lineno"> 3</tt>  <tt class="py-line"><tt class="py-docstring">    A interface for all pipelines data processor</tt> </tt>
+<a name="L4"></a><tt class="py-lineno"> 4</tt>  <tt class="py-line"><tt class="py-docstring">    """</tt>     </tt>
+<a name="IDataProcessor.run"></a><div id="IDataProcessor.run-def"><a name="L5"></a><tt class="py-lineno"> 5</tt> <a class="py-toggle" href="#" id="IDataProcessor.run-toggle" onclick="return toggle('IDataProcessor.run');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="commons.IDataProcessor.IDataProcessor-class.html#run">run</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
+</div><div id="IDataProcessor.run-collapsed" style="display:none;" pad="++" indent="++++++++"></div><div id="IDataProcessor.run-expanded"><a name="L6"></a><tt class="py-lineno"> 6</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
+<a name="L7"></a><tt class="py-lineno"> 7</tt>  <tt class="py-line"><tt class="py-docstring">        run: process the data</tt> </tt>
+<a name="L8"></a><tt class="py-lineno"> 8</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
+<a name="L9"></a><tt class="py-lineno"> 9</tt>  <tt class="py-line">        <tt class="py-keyword">pass</tt> </tt>
+</div></div><a name="L10"></a><tt class="py-lineno">10</tt>  <tt class="py-line"> </tt><script type="text/javascript">
+
+</script>
+</pre>
+<br />
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/commons.IDataProcessor.IDataProcessor-class.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/commons.IDataProcessor.IDataProcessor-class.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,136 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>commons.IDataProcessor.IDataProcessor</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%">
+      <span class="breadcrumbs">
+        Package commons ::
+        <a href="commons.IDataProcessor-module.html">Module IDataProcessor</a> ::
+        Class IDataProcessor
+      </span>
+    </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="commons.IDataProcessor.IDataProcessor-class.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+
+<h1 class="epydoc">Class IDataProcessor</h1><p class="nomargin-top"><span class="codelink"><a href="commons.IDataProcessor-pysrc.html#IDataProcessor">source code</a></span></p>
+<p>A interface for all pipelines data processor</p>
+
+
+<a name="section-InstanceMethods"></a>
+<table class="summary" border="1" cellpadding="3"
+       cellspacing="0" width="100%" bgcolor="white">
+<tr bgcolor="#70b0f0" class="table-header">
+  <td colspan="2" class="table-header">
+    <table border="0" cellpadding="0" cellspacing="0" width="100%">
+      <tr valign="top">
+        <td align="left"><span class="table-header">Instance Methods</span></td>
+        <td align="right" valign="top"
+         ><span class="options">[<a href="#section-InstanceMethods"
+         class="privatelink" onclick="toggle_private();"
+         >hide private</a>]</span></td>
+      </tr>
+    </table>
+  </td>
+</tr>
+<tr>
+    <td width="15%" align="right" valign="top" class="summary">
+      <span class="summary-type"> </span>
+    </td><td class="summary">
+      <table width="100%" cellpadding="0" cellspacing="0" border="0">
+        <tr>
+          <td><span class="summary-sig"><a name="run"></a><span class="summary-sig-name">run</span>(<span class="summary-sig-arg">self</span>)</span><br />
+      run: process the data</td>
+          <td align="right" valign="top">
+            <span class="codelink"><a href="commons.IDataProcessor-pysrc.html#IDataProcessor.run">source code</a></span>
+
+          </td>
+        </tr>
+      </table>
+
+    </td>
+  </tr>
+</table>
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th>   <a
+        href="module-tree.html">Trees</a>   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/crarr.png

Binary file commons/pyRepetUnit/doc/crarr.png has changed

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/epydoc.css
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/epydoc.css Tue Apr 30 14:33:21 2013 -0400

b"@@ -0,0 +1,322 @@\n+\n+\n+/* Epydoc CSS Stylesheet\n+ *\n+ * This stylesheet can be used to customize the appearance of epydoc's\n+ * HTML output.\n+ *\n+ */\n+\n+/* Default Colors & Styles\n+ * - Set the default foreground & background color with 'body'; and \n+ * link colors with 'a:link' and 'a:visited'.\n+ * - Use bold for decision list terms.\n+ * - The heading styles defined here are used for headings *within*\n+ * docstring descriptions. All headings used by epydoc itself use\n+ * either class='epydoc' or class='toc' (CSS styles for both\n+ * defined below).\n+ */\n+body { background: #ffffff; color: #000000; }\n+p { margin-top: 0.5em; margin-bottom: 0.5em; }\n+a:link { color: #0000ff; }\n+a:visited { color: #204080; }\n+dt { font-weight: bold; }\n+h1 { font-size: +140%; font-style: italic;\n+ font-weight: bold; }\n+h2 { font-size: +125%; font-style: italic;\n+ font-weight: bold; }\n+h3 { font-size: +110%; font-style: italic;\n+ font-weight: normal; }\n+code { font-size: 100%; }\n+/* N.B.: class, not pseudoclass */\n+a.link { font-family: monospace; }\n+ \n+/* Page Header & Footer\n+ * - The standard page header consists of a navigation bar (with\n+ * pointers to standard pages such as 'home' and 'trees'); a\n+ * breadcrumbs list, which can be used to navigate to containing\n+ * classes or modules; options links, to show/hide private\n+ * variables and to show/hide frames; and a page title (using\n+ * <h1>). The page title may be followed by a link to the\n+ * corresponding source code (using 'span.codelink').\n+ * - The footer consists of a navigation bar, a timestamp, and a\n+ * pointer to epydoc's homepage.\n+ */ \n+h1.epydoc { margin: 0; font-size: +140%; font-weight: bold; }\n+h2.epydoc { font-size: +130%; font-weight: bold; }\n+h3.epydoc { font-size: +115%; font-weight: bold;\n+ margin-top: 0.2em; }\n+td h3.epydoc { font-size: +115%; font-weight: bold;\n+ margin-bottom: 0; }\n+table.navbar { background: #a0c0ff; color: #000000;\n+ border: 2px groove #c0d0d0; }\n+table.navbar table { color: #000000; }\n+th.navbar-select { background: #70b0ff;\n+ color: #000000; } \n+table.navbar a { text-decoration: none; } \n+table.navbar a:link { color: #0000ff; }\n+table.navbar a:visited { color: #204080; }\n+span.breadcrumbs { font-size: 85%; font-weight: bold; }\n+span.options { font-size: 70%; }\n+span.codelink { font-size: 85%; }\n+td.footer { font-size: 85%; }\n+\n+/* Table Headers\n+ * - Each summary table and details section begins with a 'header'\n+ * row. This row contains a section title (marked by\n+ * 'span.table-header') as well as a show/hide private link\n+ * (marked by 'span.options', defined above).\n+ * - Summary tables that contain user-defined groups mark those\n+ * groups using 'group header' rows.\n+ */\n+td.table-header { background: #70b0ff; color: #000000;\n+ border: 1px solid #608090; }\n+td.table-header table { color: #000000; }\n+td.table-header table a:link { color: #0000ff; }\n+td.table-header table a:visited { color: #204080; }\n+span.table-header { font-size: 120%; font-weight: bold; }\n+th.group-header { background: #c0e0f8; color: #000000;\n+ text-align: left; font-style: italic; \n+ font-size: 115%; \n+ border: 1px solid #608"..b"border-left: 2px solid #000000; \n+ margin-left: .2em; padding-left: .4em; }\n+.py-lineno { font-style: italic; font-size: 90%;\n+ padding-left: .5em; }\n+a.py-toggle { text-decoration: none; }\n+div.py-highlight-hdr { border-top: 2px solid #000000;\n+ border-bottom: 2px solid #000000;\n+ background: #d8e8e8; }\n+div.py-highlight { border-bottom: 2px solid #000000;\n+ background: #d0e0e0; }\n+.py-prompt { color: #005050; font-weight: bold;}\n+.py-more { color: #005050; font-weight: bold;}\n+.py-string { color: #006030; }\n+.py-comment { color: #003060; }\n+.py-keyword { color: #600000; }\n+.py-output { color: #404040; }\n+.py-name { color: #000050; }\n+.py-name:link { color: #000050 !important; }\n+.py-name:visited { color: #000050 !important; }\n+.py-number { color: #005000; }\n+.py-defname { color: #000060; font-weight: bold; }\n+.py-def-name { color: #000060; font-weight: bold; }\n+.py-base-class { color: #000060; }\n+.py-param { color: #000060; }\n+.py-docstring { color: #006030; }\n+.py-decorator { color: #804020; }\n+/* Use this if you don't want links to names underlined: */\n+/*a.py-name { text-decoration: none; }*/\n+\n+/* Graphs & Diagrams\n+ * - These CSS styles are used for graphs & diagrams generated using\n+ * Graphviz dot. 'img.graph-without-title' is used for bare\n+ * diagrams (to remove the border created by making the image\n+ * clickable).\n+ */\n+img.graph-without-title { border: none; }\n+img.graph-with-title { border: 1px solid #000000; }\n+span.graph-title { font-weight: bold; }\n+span.graph-caption { }\n+\n+/* General-purpose classes\n+ * - 'p.indent-wrapped-lines' defines a paragraph whose first line\n+ * is not indented, but whose subsequent lines are.\n+ * - The 'nomargin-top' class is used to remove the top margin (e.g.\n+ * from lists). The 'nomargin' class is used to remove both the\n+ * top and bottom margin (but not the left or right margin --\n+ * for lists, that would cause the bullets to disappear.)\n+ */\n+p.indent-wrapped-lines { padding: 0 0 0 7em; text-indent: -7em; \n+ margin: 0; }\n+.nomargin-top { margin-top: 0; }\n+.nomargin { margin-top: 0; margin-bottom: 0; }\n+\n+/* HTML Log */\n+div.log-block { padding: 0; margin: .5em 0 .5em 0;\n+ background: #e8f0f8; color: #000000;\n+ border: 1px solid #000000; }\n+div.log-error { padding: .1em .3em .1em .3em; margin: 4px;\n+ background: #ffb0b0; color: #000000;\n+ border: 1px solid #000000; }\n+div.log-warning { padding: .1em .3em .1em .3em; margin: 4px;\n+ background: #ffffb0; color: #000000;\n+ border: 1px solid #000000; }\n+div.log-info { padding: .1em .3em .1em .3em; margin: 4px;\n+ background: #b0ffb0; color: #000000;\n+ border: 1px solid #000000; }\n+h2.log-hdr { background: #70b0ff; color: #000000;\n+ margin: 0; padding: 0em 0.5em 0em 0.5em;\n+ border-bottom: 1px solid #000000; font-size: 110%; }\n+p.log { font-weight: bold; margin: .5em 0 .5em 0; }\n+tr.opt-changed { color: #000000; font-weight: bold; }\n+tr.opt-default { color: #606060; }\n+pre.log { margin: 0; padding: 0; padding-left: 1em; }\n"

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/epydoc.js
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/epydoc.js Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,293 @@\n+function toggle_private() {\n+ // Search for any private/public links on this page. Store\n+ // their old text in "cmd," so we will know what action to\n+ // take; and change their text to the opposite action.\n+ var cmd = "?";\n+ var elts = document.getElementsByTagName("a");\n+ for(var i=0; i<elts.length; i++) {\n+ if (elts[i].className == "privatelink") {\n+ cmd = elts[i].innerHTML;\n+ elts[i].innerHTML = ((cmd && cmd.substr(0,4)=="show")?\n+ "hide private":"show private");\n+ }\n+ }\n+ // Update all DIVs containing private objects.\n+ var elts = document.getElementsByTagName("div");\n+ for(var i=0; i<elts.length; i++) {\n+ if (elts[i].className == "private") {\n+ elts[i].style.display = ((cmd && cmd.substr(0,4)=="hide")?"none":"block");\n+ }\n+ else if (elts[i].className == "public") {\n+ elts[i].style.display = ((cmd && cmd.substr(0,4)=="hide")?"block":"none");\n+ }\n+ }\n+ // Update all table rows containing private objects. Note, we\n+ // use "" instead of "block" becaue IE & firefox disagree on what\n+ // this should be (block vs table-row), and "" just gives the\n+ // default for both browsers.\n+ var elts = document.getElementsByTagName("tr");\n+ for(var i=0; i<elts.length; i++) {\n+ if (elts[i].className == "private") {\n+ elts[i].style.display = ((cmd && cmd.substr(0,4)=="hide")?"none":"");\n+ }\n+ }\n+ // Update all list items containing private objects.\n+ var elts = document.getElementsByTagName("li");\n+ for(var i=0; i<elts.length; i++) {\n+ if (elts[i].className == "private") {\n+ elts[i].style.display = ((cmd && cmd.substr(0,4)=="hide")?\n+ "none":"");\n+ }\n+ }\n+ // Update all list items containing private objects.\n+ var elts = document.getElementsByTagName("ul");\n+ for(var i=0; i<elts.length; i++) {\n+ if (elts[i].className == "private") {\n+ elts[i].style.display = ((cmd && cmd.substr(0,4)=="hide")?"none":"block");\n+ }\n+ }\n+ // Set a cookie to remember the current option.\n+ document.cookie = "EpydocPrivate="+cmd;\n+ }\n+function show_private() {\n+ var elts = document.getElementsByTagName("a");\n+ for(var i=0; i<elts.length; i++) {\n+ if (elts[i].className == "privatelink") {\n+ cmd = elts[i].innerHTML;\n+ if (cmd && cmd.substr(0,4)=="show")\n+ toggle_private();\n+ }\n+ }\n+ }\n+function getCookie(name) {\n+ var dc = document.cookie;\n+ var prefix = name + "=";\n+ var begin = dc.indexOf("; " + prefix);\n+ if (begin == -1) {\n+ begin = dc.indexOf(prefix);\n+ if (begin != 0) return null;\n+ } else\n+ { begin += 2; }\n+ var end = document.cookie.indexOf(";", begin);\n+ if (end == -1)\n+ { end = dc.length; }\n+ return unescape(dc.substring(begin + prefix.length, end));\n+ }\n+function setFrame(url1, url2) {\n+ parent.frames[1].location.href = url1;\n+ parent.frames[2].location.href = url2;\n+ }\n+function checkCookie() {\n+ var cmd=getCookie("EpydocPrivate");\n+ if (cmd && cmd.substr(0,4)!="show" && location.href.indexOf("#_") < 0)\n+ toggle_private();\n+ }\n+function toggleCallGraph(id) {\n+ var elt = document.getElementById(id);\n+ if (elt.style.display == "none")\n+ elt.style.display = "block";\n+ else\n+ elt.style.display = "none";\n+ }\n+function expand(id) {\n+ var elt = document.getElementById(id+"-expanded");\n+ if (elt) elt.style.display = "block";\n+ var elt = document.getElementById(id+"-expanded-linenums");\n+ if (elt) elt.style'..b'd);\n+ highlight(id);\n+ }\n+ }\n+}\n+\n+function kill_doclink(id) {\n+ var parent = document.getElementById(id);\n+ parent.removeChild(parent.childNodes.item(0));\n+}\n+function auto_kill_doclink(ev) {\n+ if (!ev) var ev = window.event;\n+ if (!this.contains(ev.toElement)) {\n+ var parent = document.getElementById(this.parentID);\n+ parent.removeChild(parent.childNodes.item(0));\n+ }\n+}\n+\n+function doclink(id, name, targets_id) {\n+ var elt = document.getElementById(id);\n+\n+ // If we already opened the box, then destroy it.\n+ // (This case should never occur, but leave it in just in case.)\n+ if (elt.childNodes.length > 1) {\n+ elt.removeChild(elt.childNodes.item(0));\n+ }\n+ else {\n+ // The outer box: relative + inline positioning.\n+ var box1 = document.createElement("div");\n+ box1.style.position = "relative";\n+ box1.style.display = "inline";\n+ box1.style.top = 0;\n+ box1.style.left = 0;\n+ \n+ // A shadow for fun\n+ var shadow = document.createElement("div");\n+ shadow.style.position = "absolute";\n+ shadow.style.left = "-1.3em";\n+ shadow.style.top = "-1.3em";\n+ shadow.style.background = "#404040";\n+ \n+ // The inner box: absolute positioning.\n+ var box2 = document.createElement("div");\n+ box2.style.position = "relative";\n+ box2.style.border = "1px solid #a0a0a0";\n+ box2.style.left = "-.2em";\n+ box2.style.top = "-.2em";\n+ box2.style.background = "white";\n+ box2.style.padding = ".3em .4em .3em .4em";\n+ box2.style.fontStyle = "normal";\n+ box2.onmouseout=auto_kill_doclink;\n+ box2.parentID = id;\n+\n+ // Get the targets\n+ var targets_elt = document.getElementById(targets_id);\n+ var targets = targets_elt.getAttribute("targets");\n+ var links = "";\n+ target_list = targets.split(",");\n+ for (var i=0; i<target_list.length; i++) {\n+ var target = target_list[i].split("=");\n+ links += "<li><a href=\'" + target[1] + \n+ "\' style=\'text-decoration:none\'>" +\n+ target[0] + "</a></li>";\n+ }\n+ \n+ // Put it all together.\n+ elt.insertBefore(box1, elt.childNodes.item(0));\n+ //box1.appendChild(box2);\n+ box1.appendChild(shadow);\n+ shadow.appendChild(box2);\n+ box2.innerHTML =\n+ "Which <b>"+name+"</b> do you want to see documentation for?" +\n+ "<ul style=\'margin-bottom: 0;\'>" +\n+ links + \n+ "<li><a href=\'#\' style=\'text-decoration:none\' " +\n+ "onclick=\'kill_doclink(\\""+id+"\\");return false;\'>"+\n+ "<i>None of the above</i></a></li></ul>";\n+ }\n+ return false;\n+}\n+\n+function get_anchor() {\n+ var href = location.href;\n+ var start = href.indexOf("#")+1;\n+ if ((start != 0) && (start != href.length))\n+ return href.substring(start, href.length);\n+ }\n+function redirect_url(dottedName) {\n+ // Scan through each element of the "pages" list, and check\n+ // if "name" matches with any of them.\n+ for (var i=0; i<pages.length; i++) {\n+\n+ // Each page has the form "<pagename>-m" or "<pagename>-c";\n+ // extract the <pagename> portion & compare it to dottedName.\n+ var pagename = pages[i].substring(0, pages[i].length-2);\n+ if (pagename == dottedName.substring(0,pagename.length)) {\n+\n+ // We\'ve found a page that matches `dottedName`;\n+ // construct its URL, using leftover `dottedName`\n+ // content to form an anchor.\n+ var pagetype = pages[i].charAt(pages[i].length-1);\n+ var url = pagename + ((pagetype=="m")?"-module.html":\n+ "-class.html");\n+ if (dottedName.length > pagename.length)\n+ url += "#" + dottedName.substring(pagename.length+1,\n+ dottedName.length);\n+ return url;\n+ }\n+ }\n+ }\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/frames.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/frames.html Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
+          "DTD/xhtml1-frameset.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title> API Documentation </title>
+</head>
+<frameset cols="20%,80%">
+  <frameset rows="30%,70%">
+    <frame src="toc.html" name="moduleListFrame"
+           id="moduleListFrame" />
+    <frame src="toc-everything.html" name="moduleFrame"
+           id="moduleFrame" />
+  </frameset>
+  <frame src="module-tree.html" name="mainFrame" id="mainFrame" />
+</frameset>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/help.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/help.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,262 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>Help</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th bgcolor="#70b0f0" class="navbar-select"\n+ >   Help   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%"> </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="help.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+\n+<h1 class="epydoc"> API Documentation </h1>\n+\n+<p> This document contains the API (Application Programming Interface)\n+documentation for this project. Documentation for the Python\n+objects defined by the project is divided into separate pages for each\n+package, module, and class. The API documentation also includes two\n+pages containing information about the project as a whole: a trees\n+page, and an index page. </p>\n+\n+<h2> Object Documentation </h2>\n+\n+ <p>Each <strong>Package Documentation</strong> page contains: </p>\n+ <ul>\n+ <li> A description of the package. </li>\n+ <li> A list of the modules and sub-packages contained by the\n+ package. </li>\n+ <li> A summary of the classes defined by the package. </li>\n+ <li> A summary of the functions defined by the package. </li>\n+ <li> A summary of the variables defined by the package. </li>\n+ <li> A detailed description of each function defined by the\n+ package. </li>\n+ <li> A detailed description of each variable defined by the\n+ package. </li>\n+ </ul>\n+ \n+ <p>Each <strong>Module Documentation</strong> page contains:</p>\n+ <ul>\n+ <li> A description of the module. </li>\n+ <li> A summary of the classes defined by the module. </li>\n+ <li> A summary of the functions defined by the module. </li>\n+ <li> A summary of the variables defined by the module. </li>\n+ <li> A detailed description of each function defined by the\n+ module. </li>\n+ <li> A detailed description of each variable defined by the\n+ module. </li>\n+ </ul>\n+ \n+ <p>Each <strong>Class Documentation</strong> page contains: </p>\n+ <ul>\n+ <li> A class inheritance diagram. </li>\n+ <li> A list of known subclasses. </li>\n+ <li> A description of the class. </li>\n+ <li> A summary of the methods defined by the class. </li>\n+ <li> A summary of the instance variables defined by the class. </li>\n+ <li> A summary of the class (static) variables defined by the\n+ class. </li> \n+ <li> A detailed description of each method defined by the\n+ class. </li>\n+ <li> A detailed description of each instance variable defined by the\n+ class. </li> \n+ <li> A detailed description of each class (static'..b'ong>" buttons below the top\n+navigation bar can be used to control whether the table of contents is\n+displayed or not. </p>\n+\n+<h2> The Navigation Bar </h2>\n+\n+<p> A navigation bar is located at the top and bottom of every page.\n+It indicates what type of page you are currently viewing, and allows\n+you to go to related pages. The following table describes the labels\n+on the navigation bar. Note that not some labels (such as\n+[Parent]) are not displayed on all pages. </p>\n+\n+<table class="summary" border="1" cellspacing="0" cellpadding="3" width="100%">\n+<tr class="summary">\n+ <th>Label</th>\n+ <th>Highlighted when...</th>\n+ <th>Links to...</th>\n+</tr>\n+ <tr><td valign="top"><strong>[Parent]</strong></td>\n+ <td valign="top"><em>(never highlighted)</em></td>\n+ <td valign="top"> the parent of the current package </td></tr>\n+ <tr><td valign="top"><strong>[Package]</strong></td>\n+ <td valign="top">viewing a package</td>\n+ <td valign="top">the package containing the current object\n+ </td></tr>\n+ <tr><td valign="top"><strong>[Module]</strong></td>\n+ <td valign="top">viewing a module</td>\n+ <td valign="top">the module containing the current object\n+ </td></tr> \n+ <tr><td valign="top"><strong>[Class]</strong></td>\n+ <td valign="top">viewing a class </td>\n+ <td valign="top">the class containing the current object</td></tr>\n+ <tr><td valign="top"><strong>[Trees]</strong></td>\n+ <td valign="top">viewing the trees page</td>\n+ <td valign="top"> the trees page </td></tr>\n+ <tr><td valign="top"><strong>[Index]</strong></td>\n+ <td valign="top">viewing the index page</td>\n+ <td valign="top"> the index page </td></tr>\n+ <tr><td valign="top"><strong>[Help]</strong></td>\n+ <td valign="top">viewing the help page</td>\n+ <td valign="top"> the help page </td></tr>\n+</table>\n+\n+<p> The "<strong>show private</strong>" and "<strong>hide private</strong>" buttons below\n+the top navigation bar can be used to control whether documentation\n+for private objects is displayed. Private objects are usually defined\n+as objects whose (short) names begin with a single underscore, but do\n+not end with an underscore. For example, "<code>_x</code>",\n+"<code>__pprint</code>", and "<code>epydoc.epytext._tokenize</code>"\n+are private objects; but "<code>re.sub</code>",\n+"<code>__init__</code>", and "<code>type_</code>" are not. However,\n+if a module defines the "<code>__all__</code>" variable, then its\n+contents are used to decide which objects are private. </p>\n+\n+<p> A timestamp below the bottom navigation bar indicates when each\n+page was last updated. </p>\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th>   <a\n+ href="identifier-index.html">Indices</a>   </th>\n+\n+ \n+ <th bgcolor="#70b0f0" class="navbar-select"\n+ >   Help   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/identifier-index.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/identifier-index.html Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,267 @@\n+<?xml version="1.0" encoding="ascii"?>\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n+ "DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n+<head>\n+ <title>Identifier Index</title>\n+ <link rel="stylesheet" href="epydoc.css" type="text/css" />\n+ <script type="text/javascript" src="epydoc.js"></script>\n+</head>\n+\n+<body bgcolor="white" text="black" link="blue" vlink="#204080"\n+ alink="#204080">\n+\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th bgcolor="#70b0f0" class="navbar-select"\n+ >   Indices   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table width="100%" cellpadding="0" cellspacing="0">\n+ <tr valign="top">\n+ <td width="100%"> </td>\n+ <td>\n+ <table cellpadding="0" cellspacing="0">\n+ \n+ <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"\n+ onclick="toggle_private();">hide private</a>]</span></td></tr>\n+ <tr><td align="right"><span class="options"\n+ >[<a href="frames.html" target="_top">frames</a\n+ >] | <a href="identifier-index.html"\n+ target="_top">no frames</a>]</span></td></tr>\n+ </table>\n+ </td>\n+ </tr>\n+</table>\n+<table border="0" width="100%">\n+<tr valign="bottom"><td>\n+<h1 class="epydoc">Identifier Index</h1>\n+</td><td>\n+[\n+ A\n+ B\n+ <a href="#C">C</a>\n+ <a href="#D">D</a>\n+ E\n+ F\n+ <a href="#G">G</a>\n+ H\n+ <a href="#I">I</a>\n+ J\n+ K\n+ <a href="#L">L</a>\n+ M\n+ N\n+ O\n+ P\n+ Q\n+ <a href="#R">R</a>\n+ <a href="#S">S</a>\n+ T\n+ U\n+ V\n+ W\n+ X\n+ Y\n+ Z\n+ <a href="#_">_</a>\n+]\n+</td></table>\n+<table border="0" width="100%">\n+<tr valign="top"><td valign="top" width="1%"><h2 class="epydoc"><a name="C">C</a></h2></td>\n+<td valign="top">\n+<table class="link-index" width="100%" border="1">\n+<tr>\n+<td width="33%" class="link-index"><a href="commons.Checker.ConfigChecker-class.html#check">check()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.ConfigChecker-class.html">ConfigChecker</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker.Checker-class.html">Checker</a><br />\n+<span class="index-where">(in <a href="commons.Checker-module.html">commons.Checker</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker.ConfigChecker-class.html">ConfigChecker</a><br />\n+<span class="index-where">(in <a href="commons.Checker-module.html">commons.Checker</a>)</span></td>\n+</tr>\n+<tr>\n+<td width="33%" class="link-index"><a href="commons.Checker.IChecker-class.html#check">check()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.IChecker-class.html">IChecker</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker.CheckerException-class.html">CheckerException</a><br />\n+<span class="index-where">(in <a href="commons.Checker-module.html">commons.Checker</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker.ConfigException-class.html">ConfigException</a><br />\n+<span class="index-where">(in <a href="commons.Checker-module.html">commons.Checker</a>)</span></td>\n+</tr>\n+<tr>\n+<td width="33%" class="link-index"><a href="commons.Checker-module.html">Checker</a><br />\n+<span class="index-where">(in commons)</span></td>\n+<td width="33%" class="link-index"><a href="commons.IComponentWrapper.IComponentWrapper-class.html#clean">clean()<'..b'\n+<table class="link-index" width="100%" border="1">\n+<tr>\n+<td width="33%" class="link-index"><a href="commons.Checker.Checker-class.html#setLogger">setLogger()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.Checker-class.html">Checker</a>)</span></td>\n+<td width="33%" class="link-index"> </td>\n+<td width="33%" class="link-index"> </td>\n+</tr>\n+<tr><td class="link-index"> </td><td class="link-index"> </td><td class="link-index"> </td></tr>\n+</table>\n+</td></tr>\n+<tr valign="top"><td valign="top" width="1%"><h2 class="epydoc"><a name="_">_</a></h2></td>\n+<td valign="top">\n+<table class="link-index" width="100%" border="1">\n+<tr>\n+<td width="33%" class="link-index"><a href="commons.Checker.Checker-class.html#__init__">__init__()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.Checker-class.html">Checker</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker.ConfigException-class.html#__init__">__init__()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.ConfigException-class.html">ConfigException</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker._Logger-class.html">_Logger</a><br />\n+<span class="index-where">(in <a href="commons.Checker-module.html">commons.Checker</a>)</span></td>\n+</tr>\n+<tr>\n+<td width="33%" class="link-index"><a href="commons.Checker.CheckerException-class.html#__init__">__init__()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.CheckerException-class.html">CheckerException</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker._Logger-class.html#__init__">__init__()</a><br />\n+<span class="index-where">(in <a href="commons.Checker._Logger-class.html" onclick="show_private();">_Logger</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker._Logger-class.html#_Logger__single">_Logger__single</a><br />\n+<span class="index-where">(in <a href="commons.Checker._Logger-class.html" onclick="show_private();">_Logger</a>)</span></td>\n+</tr>\n+<tr>\n+<td width="33%" class="link-index"><a href="commons.Checker.ConfigChecker-class.html#__init__">__init__()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.ConfigChecker-class.html">ConfigChecker</a>)</span></td>\n+<td width="33%" class="link-index"><a href="commons.Checker.Checker-class.html#_handle">_handle()</a><br />\n+<span class="index-where">(in <a href="commons.Checker.Checker-class.html">Checker</a>)</span></td>\n+<td width="33%" class="link-index"> </td>\n+</tr>\n+</table>\n+</td></tr>\n+</table>\n+<br /><br />\n+<table class="navbar" border="0" width="100%" cellpadding="0"\n+ bgcolor="#a0c0ff" cellspacing="0">\n+ <tr valign="middle">\n+\n+ \n+ <th>   <a\n+ href="module-tree.html">Trees</a>   </th>\n+\n+ \n+ <th bgcolor="#70b0f0" class="navbar-select"\n+ >   Indices   </th>\n+\n+ \n+ <th>   <a\n+ href="help.html">Help</a>   </th>\n+\n+ <th class="navbar" width="100%"></th>\n+ </tr>\n+</table>\n+<table border="0" cellpadding="0" cellspacing="0" width="100%%">\n+ <tr>\n+ <td align="left" class="footer">\n+ Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009\n+ </td>\n+ <td align="right" class="footer">\n+ <a target="mainFrame" href="http://epydoc.sourceforge.net"\n+ >http://epydoc.sourceforge.net</a>\n+ </td>\n+ </tr>\n+</table>\n+\n+<script type="text/javascript">\n+ \n+</script>\n+</body>\n+</html>\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/index.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/index.html Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/module-tree.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/module-tree.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>Module Hierarchy</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th bgcolor="#70b0f0" class="navbar-select"
+          >   Trees   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table width="100%" cellpadding="0" cellspacing="0">
+  <tr valign="top">
+    <td width="100%"> </td>
+    <td>
+      <table cellpadding="0" cellspacing="0">
+        
+        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span></td></tr>
+        <tr><td align="right"><span class="options"
+            >[<a href="frames.html" target="_top">frames</a
+            >] | <a href="module-tree.html"
+            target="_top">no frames</a>]</span></td></tr>
+      </table>
+    </td>
+  </tr>
+</table>
+<center><b>
+ [ <a href="module-tree.html">Module Hierarchy</a>
+ | <a href="class-tree.html">Class Hierarchy</a> ]
+</b></center><br />
+<h1 class="epydoc">Module Hierarchy</h1>
+<ul class="nomargin-top">
+    <li> <strong class="uidlink"><a href="commons.Checker-module.html">commons.Checker</a></strong>: <em class="summary">Created on 9 avr.</em>    </li>
+    <li> <strong class="uidlink"><a href="commons.IComponentWrapper-module.html">commons.IComponentWrapper</a></strong>    </li>
+    <li> <strong class="uidlink"><a href="commons.IDataProcessor-module.html">commons.IDataProcessor</a></strong>    </li>
+</ul>
+
+<table class="navbar" border="0" width="100%" cellpadding="0"
+       bgcolor="#a0c0ff" cellspacing="0">
+  <tr valign="middle">
+
+  
+      <th bgcolor="#70b0f0" class="navbar-select"
+          >   Trees   </th>
+
+  
+      <th>   <a
+        href="identifier-index.html">Indices</a>   </th>
+
+  
+      <th>   <a
+        href="help.html">Help</a>   </th>
+
+      <th class="navbar" width="100%"></th>
+  </tr>
+</table>
+<table border="0" cellpadding="0" cellspacing="0" width="100%%">
+  <tr>
+    <td align="left" class="footer">
+    Generated by Epydoc 3.0.1 on Fri Apr 10 16:39:01 2009
+    </td>
+    <td align="right" class="footer">
+      <a target="mainFrame" href="http://epydoc.sourceforge.net"
+        >http://epydoc.sourceforge.net</a>
+    </td>
+  </tr>
+</table>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/redirect.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/redirect.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,38 @@
+<html><head><title>Epydoc Redirect Page</title>
+<meta http-equiv="cache-control" content="no-cache" />
+<meta http-equiv="expires" content="0" />
+<meta http-equiv="pragma" content="no-cache" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+<body>
+<script type="text/javascript">
+
+</script>
+
+<h3>Epydoc Auto-redirect page</h3>
+
+<p>When javascript is enabled, this page will redirect URLs of
+the form <tt>redirect.html#<i>dotted.name</i></tt> to the
+documentation for the object with the given fully-qualified
+dotted name.</p>
+<p><a id="message">   </a></p>
+
+<script type="text/javascript">
+
+</script>
+
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/toc-commons.Checker-module.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/toc-commons.Checker-module.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>Checker</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+<h1 class="toc">Module Checker</h1>
+<hr />
+  <h2 class="toc">Classes</h2>
+    <a target="mainFrame" href="commons.Checker.Checker-class.html"
+     >Checker</a><br />    <a target="mainFrame" href="commons.Checker.CheckerException-class.html"
+     >CheckerException</a><br />    <a target="mainFrame" href="commons.Checker.ConfigChecker-class.html"
+     >ConfigChecker</a><br />    <a target="mainFrame" href="commons.Checker.ConfigException-class.html"
+     >ConfigException</a><br />    <a target="mainFrame" href="commons.Checker.IChecker-class.html"
+     >IChecker</a><br />  <div class="private">
+    <a target="mainFrame" href="commons.Checker._Logger-class.html"
+     >_Logger</a><br />  </div>
+  <h2 class="toc">Variables</h2>
+    <a target="mainFrame" href="commons.Checker-module.html#DEFAULT_LOGGER_NAME"
+     >DEFAULT_LOGGER_NAME</a><br />    <a target="mainFrame" href="commons.Checker-module.html#LOG_FILE"
+     >LOG_FILE</a><br /><hr />
+<span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/toc-commons.IComponentWrapper-module.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/toc-commons.IComponentWrapper-module.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>IComponentWrapper</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+<h1 class="toc">Module IComponentWrapper</h1>
+<hr />
+  <h2 class="toc">Classes</h2>
+    <a target="mainFrame" href="commons.IComponentWrapper.IComponentWrapper-class.html"
+     >IComponentWrapper</a><br /><hr />
+<span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/toc-commons.IDataProcessor-module.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/toc-commons.IDataProcessor-module.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>IDataProcessor</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+<h1 class="toc">Module IDataProcessor</h1>
+<hr />
+  <h2 class="toc">Classes</h2>
+    <a target="mainFrame" href="commons.IDataProcessor.IDataProcessor-class.html"
+     >IDataProcessor</a><br /><hr />
+<span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/toc-everything.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/toc-everything.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>Everything</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+<h1 class="toc">Everything</h1>
+<hr />
+  <h2 class="toc">All Classes</h2>
+    <a target="mainFrame" href="commons.Checker.Checker-class.html"
+     >commons.Checker.Checker</a><br />    <a target="mainFrame" href="commons.Checker.CheckerException-class.html"
+     >commons.Checker.CheckerException</a><br />    <a target="mainFrame" href="commons.Checker.ConfigChecker-class.html"
+     >commons.Checker.ConfigChecker</a><br />    <a target="mainFrame" href="commons.Checker.ConfigException-class.html"
+     >commons.Checker.ConfigException</a><br />    <a target="mainFrame" href="commons.Checker.IChecker-class.html"
+     >commons.Checker.IChecker</a><br />  <div class="private">
+    <a target="mainFrame" href="commons.Checker._Logger-class.html"
+     >commons.Checker._Logger</a><br />  </div>
+    <a target="mainFrame" href="commons.IComponentWrapper.IComponentWrapper-class.html"
+     >commons.IComponentWrapper.IComponentWrapper</a><br />    <a target="mainFrame" href="commons.IDataProcessor.IDataProcessor-class.html"
+     >commons.IDataProcessor.IDataProcessor</a><br />  <h2 class="toc">All Variables</h2>
+    <a target="mainFrame" href="commons.Checker-module.html#DEFAULT_LOGGER_NAME"
+     >commons.Checker.DEFAULT_LOGGER_NAME</a><br />    <a target="mainFrame" href="commons.Checker-module.html#LOG_FILE"
+     >commons.Checker.LOG_FILE</a><br /><hr />
+<span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/doc/toc.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/doc/toc.html Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="ascii"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+  <title>Table of Contents</title>
+  <link rel="stylesheet" href="epydoc.css" type="text/css" />
+  <script type="text/javascript" src="epydoc.js"></script>
+</head>
+
+<body bgcolor="white" text="black" link="blue" vlink="#204080"
+      alink="#204080">
+<h1 class="toc">Table of Contents</h1>
+<hr />
+  <a target="moduleFrame" href="toc-everything.html">Everything</a>
+  <br />
+  <h2 class="toc">Modules</h2>
+    <a target="moduleFrame" href="toc-commons.Checker-module.html"
+     onclick="setFrame('toc-commons.Checker-module.html','commons.Checker-module.html');"     >commons.Checker</a><br />    <a target="moduleFrame" href="toc-commons.IComponentWrapper-module.html"
+     onclick="setFrame('toc-commons.IComponentWrapper-module.html','commons.IComponentWrapper-module.html');"     >commons.IComponentWrapper</a><br />    <a target="moduleFrame" href="toc-commons.IDataProcessor-module.html"
+     onclick="setFrame('toc-commons.IDataProcessor-module.html','commons.IDataProcessor-module.html');"     >commons.IDataProcessor</a><br /><hr />
+  <span class="options">[<a href="javascript:void(0);" class="privatelink"
+    onclick="toggle_private();">hide private</a>]</span>
+
+<script type="text/javascript">
+  
+</script>
+</body>
+</html>

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/TranslateInAllFramesAndReplaceStopByX.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/TranslateInAllFramesAndReplaceStopByX.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,26 @@
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.seq.BioseqUtils import BioseqUtils
+
+class TranslateInAllFramesAndReplaceStopByX(object):
+
+    def __init__(self):
+        self._inputFile = ""
+        self._outputFile =  ""
+
+    def setInputFile(self, input):
+        self._inputFile = input
+
+    def setOutputFile(self, output):
+        self._outputFile = output
+
+    def run(self):
+        """
+        read a fasta file with nucleotide sequences and translate all sequences in all frames, write the result in a file
+        """
+        if not FileUtils.isRessourceExists(self._inputFile):
+            raise Exception("Warning your input file %s does not exist!\n" % self._inputFile)
+        bioseqList = BioseqUtils.extractBioseqListFromFastaFile(self._inputFile)
+        bioseqListInAllFrames = BioseqUtils.translateBioseqListInAllFrames(bioseqList)
+        bioseqListTranslatedAndStopsReplace = BioseqUtils.replaceStopCodonsByXInBioseqList(bioseqListInAllFrames)
+        BioseqUtils.writeBioseqListIntoFastaFile(bioseqListTranslatedAndStopsReplace, self._outputFile)
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_F_TranslateAfastaFileInAllFrameAndReplaceStopsByX.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_F_TranslateAfastaFileInAllFrameAndReplaceStopsByX.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,23 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_TranslateAfastaFileInAllFrameAndReplaceStopsByX(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFile = "./datas/ConsensusTestFile_nt.fsa"
+        self._outputFile = "./datas/PreprocessOuputFile"
+        self._prg = "translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
+
+
+    def testAcceptanceTranslateAfastaFileInAllFrameAndReplaceStopsByX_script(self):
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFile )
+        cmd += " -o %s" % ( self._outputFile )
+        os.system( cmd )
+        self.assertTrue(FileUtils.are2FilesIdentical( self._outputFile, "./datas/ConsensusTestFile_aaWithoutStop.fsa"))
+        os.system( "rm " + self._outputFile )
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_F_TranslateInAllFramesAndReplaceStopByX.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_F_TranslateInAllFramesAndReplaceStopByX.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,24 @@
+import os
+import unittest
+from commons.pyRepetUnit.fastaTranslation.allFrames.TranslateInAllFramesAndReplaceStopByX import TranslateInAllFramesAndReplaceStopByX
+from commons.core.seq.Bioseq import Bioseq
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_TranslateInAllFramesAndReplaceStopByX(unittest.TestCase):
+
+    def setUp(self):
+        self.bioseq = Bioseq()
+        self.preProcess = TranslateInAllFramesAndReplaceStopByX()
+        self._inputFile = "./datas/test_input_nt.fa"
+        self._outputFile =  "./datas/PreProcessResult.fa"
+
+
+    def testAcceptanceTranslateAfastaFileInAllFrame(self):
+        self.preProcess.setInputFile(self._inputFile)
+        self.preProcess.setOutputFile(self._outputFile)
+        self.preProcess.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._outputFile, "./datas/test_input_aa.fa"))
+        os.system("rm " + self._outputFile)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_TranslateAfastaFileInAllFrameAndReplaceStopsByX.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_TranslateAfastaFileInAllFrameAndReplaceStopsByX.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,80 @@
+import os
+import unittest
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_TranslateAfastaFileInAllFrameAndReplaceStopsByX(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFile = "dummyInputFile"
+        self._outputFile = "dummyOuputFile"
+        self._prg = "translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
+
+
+    def testTranslateAfastaFileInAllFrameAndReplaceStopsByX_script(self):
+        f = open(self._inputFile, "w")
+        f.write(">header1 description\n")
+        f.write("TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTC\n")
+        f.write("CGACTAATCAACAATATAATGCGAGTAGAGCTTGA\n")
+        f.write(">header2\n")
+        f.write("TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTA\n")
+        f.write("CGACTAATCAACAATATAATGCGAGTAGAGCTTGA")
+        f.close()
+        # expected fasta translated file
+        f = open("expectedTranslated.fa", "w")
+        f.write(">header1_1 description\n")
+        f.write("CGFXLISLXSQXFHVGVSWLRLINNIMRVEL\n")
+        f.write(">header1_2 description\n")
+        f.write("VASSXSVYDHNDFTXVSRGSDXSTIXCEXSL\n")
+        f.write(">header1_3 description\n")
+        f.write("WLLVDQFMITMISRRCLVAPTNQQYNASRAX\n")
+        f.write(">header1_4 description\n")
+        f.write("SSSTRIILLISRSHETPTXNHCDHKLINXKP\n")
+        f.write(">header1_5 description\n")
+        f.write("QALLALYCXLVGATRHLREIIVIINXSTRSH\n")
+        f.write(">header1_6 description\n")
+        f.write("KLYSHYIVDXSEPRDTYVKSLXSXTDQLEAT\n")
+        f.write(">header2_1\n")
+        f.write("CGFXLISLXSQXFHVGVSWLRLINNIMRVEL\n")
+        f.write(">header2_2\n")
+        f.write("VASSXSVYDHNDFTXVSRGYDXSTIXCEXSL\n")
+        f.write(">header2_3\n")
+        f.write("WLLVDQFMITMISRRCLVATTNQQYNASRAX\n")
+        f.write(">header2_4\n")
+        f.write("SSSTRIILLISRSHETPTXNHCDHKLINXKP\n")
+        f.write(">header2_5\n")
+        f.write("QALLALYCXLVVATRHLREIIVIINXSTRSH\n")
+        f.write(">header2_6\n")
+        f.write("KLYSHYIVDXSXPRDTYVKSLXSXTDQLEAT\n")
+        f.close()
+
+        #normal launch
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFile )
+        cmd += " -o %s" % ( self._outputFile )
+        os.system( cmd )
+        self.assertTrue(FileUtils.are2FilesIdentical( self._outputFile, "expectedTranslated.fa"))
+        os.system( "rm " + self._outputFile )
+
+        #verbose option
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFile )
+        cmd += " -o %s" % ( self._outputFile )
+        cmd += " -v 1"
+        os.system( cmd )
+        self.assertTrue(FileUtils.are2FilesIdentical( self._outputFile, "expectedTranslated.fa"))
+        os.system( "rm " + self._outputFile )
+
+        #clean option
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFile )
+        cmd += " -o %s" % ( self._outputFile )
+        cmd += " -c"
+        os.system( cmd )
+        self.assertTrue( FileUtils.are2FilesIdentical( self._outputFile, "expectedTranslated.fa" ) )
+        self.assertFalse( FileUtils.isRessourceExists( self._inputFile ) )
+        os.system( "rm " + self._outputFile)
+        os.system("rm expectedTranslated.fa")
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_TranslateInAllFramesAndReplaceStopByX.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_TranslateInAllFramesAndReplaceStopByX.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,65 @@
+import os
+import unittest
+from commons.pyRepetUnit.fastaTranslation.allFrames.TranslateInAllFramesAndReplaceStopByX import TranslateInAllFramesAndReplaceStopByX
+from commons.core.seq.Bioseq import Bioseq
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_TranslateInAllFramesAndReplaceStopByX(unittest.TestCase):
+
+    def setUp(self):
+        self.bioseq = Bioseq()
+        self.preProcess = TranslateInAllFramesAndReplaceStopByX()
+        self._inputFile = "./datas/dummy.fa"
+        self._outputFile =  "./datas/dummyoutput.fa"
+
+
+    def testRun(self):
+        self.preProcess.setInputFile(self._inputFile)
+        self.preProcess.setOutputFile(self._outputFile)
+        # real fasta file
+        f = open(self._inputFile, "w")
+        f.write(">header1 description\n")
+        f.write("TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTC\n")
+        f.write("CGACTAATCAACAATATAATGCGAGTAGAGCTTGA\n")
+        f.write(">header2\n")
+        f.write("TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTA\n")
+        f.write("CGACTAATCAACAATATAATGCGAGTAGAGCTTGA")
+        f.close()
+        # expected fasta translated file
+        f = open("./datas/expectedTranslated.fa", "w")
+        f.write(">header1_1 description\n")
+        f.write("CGFXLISLXSQXFHVGVSWLRLINNIMRVEL\n")
+        f.write(">header1_2 description\n")
+        f.write("VASSXSVYDHNDFTXVSRGSDXSTIXCEXSL\n")
+        f.write(">header1_3 description\n")
+        f.write("WLLVDQFMITMISRRCLVAPTNQQYNASRAX\n")
+        f.write(">header1_4 description\n")
+        f.write("SSSTRIILLISRSHETPTXNHCDHKLINXKP\n")
+        f.write(">header1_5 description\n")
+        f.write("QALLALYCXLVGATRHLREIIVIINXSTRSH\n")
+        f.write(">header1_6 description\n")
+        f.write("KLYSHYIVDXSEPRDTYVKSLXSXTDQLEAT\n")
+        f.write(">header2_1\n")
+        f.write("CGFXLISLXSQXFHVGVSWLRLINNIMRVEL\n")
+        f.write(">header2_2\n")
+        f.write("VASSXSVYDHNDFTXVSRGYDXSTIXCEXSL\n")
+        f.write(">header2_3\n")
+        f.write("WLLVDQFMITMISRRCLVATTNQQYNASRAX\n")
+        f.write(">header2_4\n")
+        f.write("SSSTRIILLISRSHETPTXNHCDHKLINXKP\n")
+        f.write(">header2_5\n")
+        f.write("QALLALYCXLVVATRHLREIIVIINXSTRSH\n")
+        f.write(">header2_6\n")
+        f.write("KLYSHYIVDXSXPRDTYVKSLXSXTDQLEAT\n")
+        f.close()
+        self.preProcess.run()
+        self.assertTrue(FileUtils.are2FilesIdentical("./datas/expectedTranslated.fa", self._outputFile))
+        os.system("rm " + self._inputFile)
+        os.system("rm " + self._outputFile)
+        os.system("rm ./datas/expectedTranslated.fa")
+        # empty fasta file
+        #self.preProcess.translateAfastaFileInAllFrame()
+        #self.assertFalse(FileUtils.isRessourceExists(self._outputFile))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/ConsensusTestFile_aaWithoutStop.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/ConsensusTestFile_aaWithoutStop.fsa Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,48 @@
+>blumeria_Grouper_590_20:NoCat_1
+FRSYXNVXPTTXFVDLLVEGSLCNNEGXKDVNSTTLLNTDTRLLLIPIDSPCGISYXQQX
+TTLTQGIRNLRSPNTSLSGLLIIEPSRIDNVYPNIEIIESSTPKPLINNCHXSSPDLSLY
+SIISTLKNIC
+>blumeria_Grouper_590_20:NoCat_2
+FDXIEMYNPLLSSWTCWXREAYAIMKDKRMSIRLLFXTQIPDSFXSQXIARAGSPINSSK
+QRXHRVYAISVRQTPVCRDYXLSSLVGSTMCIPTXKXXKAQHPNPSSITAINHHLTYLCT
+PXFQHXRIFV
+>blumeria_Grouper_590_20:NoCat_3
+SIXLKCITHYLVRGLVGRGKLMQXXRIKGCQFDYSSKHRYQTPFNPNRXPVRDLLLTAVN
+NANTGYTQSPFAKHQSVGTTNYRAXXDRQCVSQHRNNRKLNTQTPHQXLPLIITXLISVL
+HNFNTXEYL
+>blumeria_Grouper_590_20:NoCat_4
+YKYSXVLKLWSTEISQVMINGSYXXGVWVLSFLLFLCWDTHCRSYXARXLVVPTDWCLAN
+GDCVYPVLALFTAVNRRSRTGYLLGLKGVWYLCLEEXSNXHPFILHYCISFPLPTSPRTK
+XWVIHFNXIE
+>blumeria_Grouper_590_20:NoCat_5
+TNILXCXNYGVQRXVRXXLMAVIDEGFGCXAFYYFYVGIHIVDPTRLDNXXSRQTGVWRT
+EIAYTLCXRCLLLLIGDPARAIYWDXKESGICVXKSSRIDILLSFIIAXASLYQQVHELS
+SGLYISIXSK
+>blumeria_Grouper_590_20:NoCat_6
+QIFXSVEIMEYRDKSGDDXWQLLMRGLGVELSIISMLGYTLSILLGSIISSPDRLVFGER
+RLRIPCVSVVYCCXXEIPHGLSIGIKRSLVSVFRRVVELTSFYPSLLHKLPSTNKSTNXV
+VGYTFQXDR
+>blumeria_Grouper_4152_12:NoCat_1
+GPAATNCAIAARKXTLMESTKFLSSPVESGXRAASTLLIAVEHIVNCAAFCLALRSSASI
+AASNSGGCVFFLPAASSAGGCGASGGFSAPAALVDGVSALFAGLTSAGRGSAGTVMRFSG
+ATTVFLGDPVGSKTCSASSGLEDAQTESARVFSFSTSASTISTSGTL
+>blumeria_Grouper_4152_12:NoCat_2
+DRPPRIARLLLASRLXWSLQNFCLHRWRAVEELPRHCXXPSSILXIARLFVLPCAPPLRS
+LQAILEGVYFFCLLLLVLEAAGPVEGFRLLLPXWMVFQPFSRASLLQVAVVLEPXCASRV
+RRRFFWGIRWDPRLALRLLGWRMPKLNRRGFLASRLLRPPYLPRGH
+>blumeria_Grouper_4152_12:NoCat_3
+TGRHELRDCCSQVDFDGVYKIFVFTGGERLKSCLDTANSRRAYCELRGFLSCPALLRFDR
+CKQFWRVCIFSACCFXCWRLRGQWRVFGSCCLSGWCFSPFRGPHFCRSRXCWNRDALLGC
+DDGFSGGSGGIQDLLCVFWAGGCPNXIGEGFXLLDFCVHHIYLGDI
+>blumeria_Grouper_4152_12:NoCat_4
+QCPRGRYGGRRSREAKNPRRFSLGILQPRRRRASLGSHRIPQKNRRRTREAHHGSSTTAT
+CRSEAREKGXNTIHXGSRSRKPSTGPAASSTRSSRQKKYTPSRIACSDRSGGAQGKTKSR
+AIHNMLDGYXQCRGSSSTALHRXRQKFCRLHQSLLASSNRAIRGGRS
+>blumeria_Grouper_4152_12:NoCat_5
+NVPEVDMVDAEVEKLKTLADSVWASSSPEDAEQVLDPTGSPRKTVVAPEKRITVPALPRP
+AEVRPAKRAETPSTKAAGAENPPLAPQPPALEAAGRKNTHPPELLAAIEAEERRARQKAA
+QFTICSTAISSVEAALQPLSTGEDKNFVDSIKVYLRAAIAQFVAAG
+>blumeria_Grouper_4152_12:NoCat_6
+MSPRXIWWTQKSRSXKPSPIQFGHPPAQKTQSKSWIPPDPPEKPSSHPRSASRFQHYRDL
+QKXGPRKGLKHHPLRQQEPKTLHWPRSLQHXKQQAEKIHTLQNCLQRSKRRSAGQDKKPR
+NSQYARRLLAVSRQLFNRSPPVKTKILXTPSKSTCEQQSRNSWRPV

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/ConsensusTestFile_nt.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/ConsensusTestFile_nt.fsa Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/test_input_aa.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/test_input_aa.fa Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,258 @@\n+>blumeria_Grouper_28830_3_1\n+CGFXLISLXSQXFHVGVSWLRLINNIMRVELETNKTXIXFYNTGSPTYLNIPPHHVSQST\n+AHNLHGRVSINWDPQTIPLLKTSCPQLIXITSXLEILLWYHSNXILAEFLSVFLSCSSIR\n+RLPYHRLRTGCPSRTYAGNRFVPLYLLPNLYPPGDIKEXVLLLXQAATAHGPSXXGSILC\n+LRWRTRSTQSPGAXVRISAFSSWRXARILSLVAFILLTXERAIGAGCFLPSSSAQRRSSD\n+SVASRVYTGXEFSSSGGXVPKSKKXGLYPLGETLRLKFITASERAPASVILGESRRTLTS\n+MAFRTPFACSTLPFPLGLAGVVWTTCSCWLAXNRCASMPPKQPALSLIICFGFPKFWIIY\n+VSRGTVLAKSALGTKRDSSQPVAYSIALSKTWGPTXVXSIPHRVRGGAGTIGVKLPSNGS\n+GGFMSXQDLHFRTASXAARISSGCQNLRPNSRIVLPIDAWPWPSXTFTAVDTRSSYSKIS\n+SRVAAPDGPGASPTCCDGFFLMRCNLELYIAKLSRTQVAAIFRGSASPKVRNSVMHCRSM\n+RLSSCGCFISSISSSLSAPLWILEFSLSSVATFPSPSSLERNRTSQGRERKSARTFAFPR\n+RXIILILKCSIASKKRTILTGCSALVXIVRRDSWSVITVTSPPSTQXRQCCSDSKIPSNS\n+LSCAEXRCWVSFSFLEKXAMGCSTERSELLEHRMYGWSMTAPTNFWEASVSNMTGSLQSN\n+FFTTGDVVSASLILFHDXIDWSVRMTGQFLGPEDWSVSGPASWEKFLMRQRRXPAAFTNP\n+LISEVVVGIGLSLIAETFSGSAAMPCLEIWKPRKIPSETNSLHFEALRRRPXEANLEKIS\n+INRECNSAIESAQTATSSRSPATNGRNGSRTSSIKHWKVAGALHRPKGITSHSXTPKGVT\n+NAVFSEESLCIRICQNPFSRSRNAXKLCPAARXISSXAFGRGWACFFVILLSSLXSTQIR\n+QTVWLVWVSVFFWCXXQRCSPWSLTFAYELSRSCVAQESPRMACCCAGAISPRRGVLNWV\n+DNYPVSMRVFLSWESFIPFGGRDLSKSXRGIXLXYCREKLXCGMKGGLGAIXPLEYLKXX\n+RLFGLGEYXLXDXELVGQLSXEVAVCHLINIVAPVHGIKFVQVSHLLGMCHLGQLLETLX\n+IPESRMIQGDSSKETVQLKLHPXKLLGQGHQCLTFXKXXMYYKKSNIXRKTVKHNMLKSR\n+SQKKLKIGHRENELFVQKTFCVACHILPVEVVLLGVAQICPFLRPQSRYSGCPIHKFLHF\n+AFDIEWLQFRHRAAHLLFGLQGLFALDPNLLAQQGKXKXQALMNDRVVLQVLHIFYFSLI\n+GKFSXIYRXNATRCIXRLVHIPFVLLIPPALFSSTGSLSPSGLSFRNXSIRNACRELGMI\n+YLFLQCLVTQRRLAHTTLDQLWMRQMLEAFRLLTLRSELVHWTRNVXIHHPQVSTLXYRV\n+XLEPLDPQAPRWXVWGDYWKXWSRYVEVRVEYAASVYLXSEALPXHPKVGXRKLSGVVAQ\n+IVIESRRFLQLFVQMLSKATRVHHCRTVFXGYSLQLLPNRGVDXXLFFAPRXNLHPFPLQ\n+FXXPKQNMEAYVESSMIACGIIYSLCSHQQSAISIVVGIPRVFFYLPLLKGRADFQLRNA\n+KRNLGSTALFFALVQXHSMVRXLTGGINXXSHLCFAHHCLGWALFHRIAPRXRVFSXAET\n+RLAWLRVARRCLLKSQIAPPGMVFYFQMALLPKVERGLNFSKKFLFPRLHFYLLVWKLSR\n+SLQTFVXLEEKFVIGSLQTFDRFSWRDLLSLPLQLHXLRKNFRWLWFARQCDPSSSFPTE\n+HYSGRPXXLNRWXNAXKIWRRLKLGXPRIVRFSQQKYIILWLLVDQFMITMISRRCLVAP\n+TNQQYNASRAXDXXNVNFILXYRTANLFKYSPAPREPINSTQFARSGFHQLGST\n+>blumeria_Grouper_28830_3_2\n+VASSXSVYDHNDFTXVSRGSDXSTIXCEXSLRLIKREFXSIIQDRQLIXIFPRTTXANQQ\n+RTICTVGFPSIGIHKLSPFSKLAVHSLFKLQVNXKFYYGITQTKSLLNFXVSFYPAPAYG\n+GYHTIDYELGVLHVHMPGIGLCRCTCCLICIHPVILRSRYYYYDKLLRPMARHSRAPSCV\n+FAGVLAVPSLRGRKYVXVPFLPGGKPVFYPXLLSYYXRRSVLLVRGVSFRVLRPRGAPLI\n+LWLHGYILDKNFPPPGAECRRVRSKGYTRWVRLFDXNSLPRVRERQLVXFXENPGELXPV\n+WLSERHLPVRLCHFLLAXLVWYGQPAVVGXLEIVVLLCLRSSQHCHXXSALAFQSSGXYT\n+XAEAQCXRSLHWELSGIPASLLRTQLHXARLGVLLEYDLYPTVSEEVLVLXELSFPVTGL\n+EVSXADRTCIFGQLREPRGSVRGAKTYALIPAXYCRLTHGLGPHEHLRPSILDPHIPRFL\n+VGLRRQMDQVHRLLVATGSFXXDAIWNCTSQNYLGPKWQLYLGEVPLLRLEIRXCTAGRC\n+GXVRVGALFPPSQVLFLHHFGFXSFRCRQXLHSLPLLPXREIEHHRDGRENQREHLLFRD\n+GRXSXYXNALXHLKNGPFXRAARLWSEXFVEIHGQLSPLRRRLLPSDANVVVTVKSQVIP\n+SLVLNNAAGYRLVFLRSKLWDAALNDRNCWSIACTVGVXLHPRTFGKRQFPIXREVSNQI\n+FLQLVMLXALVXYFSMIESIGPSEXQGNFXVQRIGPXVALLVGRNFXXDNVGNQLHLQIL\n+LFPRXLLAXACLXSLKLFRGLLQCHVXKYGNQGRFPLKQIACTLKHXGGDHRKRIXKKXV\n+STVSAILPLSLPRQLHRPGALRQMDGMALELLPXNTGKXLVRCIGRKAXLAIHRHRKESQ\n+MLFSLKSLCAYVSAKIRFPGRGMHRSYVLLRAEFHLEHLVGDGRVSSXSCXALYSQHKYA\n+KRFGXSGLASFFGVNNRGAAHGPLHLRMNXAGHVLHKKVPGWLVVAPGLYRLGGEFXIGL\n+TITPFLCGCFCLGRVLSHLAGEIYRKVEGEFNFDIVGKNCNAEXRVGWVPFDHWNIXSSE\n+DFSVWESINYRIKNWLVNXVEKLQFAIXXTSXPQFMEXSLSKCHICWECAIWDNSXRHCE\n+FLSPEXFKVIRAKKLCSXNCILRNCWARATNAXHFRNNRCIIRSQIFRERLXNITCXRAV\n+VKKNXKLDIGKMSYSCKKHSVWLAISFPXRLSFWGWLKFALSYGHKVGIRAVLFINFFIS\n+HSIXNGFNFVIELLIFYLAFRAYLLLIQIFWLNKESRSDRLXXTIVWCYRFYIFFIFHXL\n+VSFPEFIAETRRGVFSVWCIFPLFFXFHRRCFHQPAVFRPVACLSAIEAFVTLVVNWVXF\n+ICFCNVLXRKDVLPIRPWISYGCVKCWKLFAFXRXDRNWCIGLGMFEYIILKCPPCNTAC\n+DLNHSIRKLRAGEFGEIIGNDGPGTWKXESNMLPQYISSLRPYHDIRRXAKESFPESSRK\n+LXLSLVVSCSYSCKCCQRRPEYIIVEQYFKDTVYSSCPIEVLTDDCSSRRDEIFIHFHCN\n+SDSPSRIWKHMLNHQXLHVVXYIHYVLISKVQFPLWWEYRACFSICLFXRAVRIFSFGTR\n+KEIWEVPLYFSLLFSNIQWFVNXLAVXINSPIFVLLIIAXVGRCFIELRRGKGSFLRLKH\n+DWPGCGSRGAACXKVKXLLPVWFFIFRWPCFRKLKGDXIFRRSFCFHVFISTYXFGNXVV\n+HCRHLFDXKRNLXLGLFKPLIDSLGEIFCHCLCSFISFGKIFVGFGLPGNVILHQAFQLS\n+IIQADHDDXIGGEMRKKFGDVLNWDDLELXGSLNKNIXFCGFXLISLXSQXFHVGVSWLR\n+QINNIMRVELETNKTXISFYNT'..b'YATHQLLSSVLWKKFXSHSVHLSQGSWTMXLSGQTQWQNCTHGXYLFFLDS\n+LPMVSSSMLQSAGYLFQRESSLVSIFLNMALQQTPKKFQRSETGLCQQLPRKXEDLXMQL\n+VTYVVSLKISPNXQGHSRTNPLDLKIALSFXRTNRFNHGKVSNXRLQHHQLXKNLIGDFP\n+SYWKLTLPKSSWVQSYSNRTCDAPTIPIVQCCIPXLTSQENXTIPSSVIQHKRGNYLGFY\n+CHYNIGVTGXKAATXRXXLTMNLYELFRPKPSSPSEWSVFXMLXSILISGLSTVAEKQMF\n+SLIFSPVPVMFYFSLGKKGKGMXLLTTTKTLESKVVQKENLRWRKXSTHTNSTASTCSAS\n+PNFXPXERHFPXIXLPLGSEIILRCTIPNCISSKRTRRNKXAMHLVHLAPQPYXKSWNMR\n+IXYRRPXMFMRAKAMRQSAILCGNXGVGFGTPNXSSRLTKLSENASPVSSXNLQTRYWEA\n+XLLXYQHLLXHGGVXIILKXDPKSCSMQLSTQQAGWNPAXFPMQTSLTLCLCSRILSRTL\n+ESQSRLSVTMLAASEAXKHNDFKLANNCRLSIPHQLGQEEMAKSNRQMAFXKPYWSEFSW\n+ILLKLHXLALSHSRXXISIEESHPAGIALTSYSSALSPRRRKILIQYIPVKPQNQRSASG\n+PKNSEGNTPHQXHAPTLVVXKQLGIKYGLTSRKKRHLYVLTPPETGYCEYASEDTRWSPT\n+MTGHGPXQLVIIIIPTPXYHRVDTNXATSTTAQTYSRHMYVKDTQFVVYGMVAAVCWSRI\n+ERHSKIQQGFSLSDTIIKFLINLXFKXAVDSXFXEGGXFVDPNXWKPDRANCALLIGSRG\n+AGEYLNKLAILYYRXKFTFYXSQALLALYCXLVGATRHLREIIVIINXSTRSH\n+>blumeria_Grouper_14770_3_1\n+GXTDTVSLVXSCGRKNXLETSVLRCMVLSKTELIRVRLRVRISYQLRFLTSIYQSPWIIS\n+YDPCRCKSIYVQIYEPEIXSXIHVCRXXIRETFNLHLIHNHLMECXCEINANVRXTFREE\n+CADDNDIGRXIXXPNXPDLQMSYPWHSRIYNISTPLLTLHRGXIHKXKTVSIFIVISHLM\n+LVWXTRQWLYXYQKLKCINRCALVSVSLLYIYLRDFXLKEHVIVXASRLQFFPLXNDKRC\n+RYVXKHHLVLDCEQVQWQPYCQSRLLLGCCLQGIGEVRAKSRYIVWMRALLPCTQLRRLN\n+>blumeria_Grouper_14770_3_2\n+VRPIQXVSYEVVEGRIDWKQVYXDVWFXVKLSXXEXDCVCAYLINXGFXPVFINLHGSXV\n+MIPVDVNLYTYRYMSQKYKAKYMCVGNRSEKPLTCIXYIIIXWNASVKXMPMYVKLSVRN\n+VLMTMIXEGEFDNLISRTYKXVILGIPGYTIFQHPSSLCIEDRYISEKLLVYLLSFLIXC\n+WYGRHASGCTSTKNXSAXIGVPWSRXAFCTYICEIFSXRNMXXCKRAVFNSFPCKMISDV\n+DMFRSIIXFWIVSKFNGSRIVSRDCCWDAVSKESVKSAQNPDILFGCERYCHVLSFAGXM\n+>blumeria_Grouper_14770_3_3\n+LDRYSKSRMKLWKEELTGNKCIEMYGSEXNXADKSEIACAHILSTEVFDQYLSISMDHKL\n+XSLXMXIYIRTDIXARNIKLNTCVSVIDPRNLXPAFDTXSSDGMLVXNKCQCTLNFPXGM\n+CXXQXYRKVNLITXLAGLTNELSLAFQDIQYFNTPPHFASRIDTXVKNCXYIYCHFSSNA\n+GMVDTPVVVLVPKTEVHKSVCLGLGKPFVHISARFLAEGTCNSVSEPSSILSLVKXXAMS\n+ICLEASFSFGLXASSMAAVLSVEIAAGMLSPRNRXSPRKIQIYCLDASAIAMYSASQVE\n+>blumeria_Grouper_14770_3_4\n+HSTCEAEYMAIALASKQYIWILRGLHRFLGDSIPAAISTDNTAAIELAHNPKLNDASKHI\n+DIAYHFTRERIEDGSLTLLHVPSAKNLADICTKGLPRPRHTDLCTSVFGTSTTTGVSTIP\n+ALDEKXQXIYXQFFTYVSILDAKXGGVLKYCISWNAKDNSFVSPANXVIKFTFLYHCHQH\n+IPHGKFNVHWHLFHTSIPSDDYVSNAGXRFLGSITDTHVFSFIFLAHISVRIXIYIYRDH\n+NLXSMEIDKYWSKTSVDKICAHAISLLSAQFYSEPYISIHLFPVNSSFHNFIRDLLYRSN\n+>blumeria_Grouper_14770_3_5\n+IQPAKLSTWQXRSHPNNISGFCADFTDSLETASQQQSRLTIRLPLNLLTIQNXMMLLNIS\n+TSLIILQGKELKTARLHYYMFLQLKISQIYVQKAYRDQGTPIYALQFLVLVQPLACLPYQ\n+HXMRNDNKYTNSFSLMYLSSMQSEEGCXNIVYPGMPRITHLXVRLIRLSNSPSYIIVIST\n+FLTESLTYIGIYFTLAFHQMIMYQMQVKGFSDLLPTHMYLALYFWLIYLYVYRFTSTGII\n+TYDPWRLINTGQKPQLIRYAHTQSHSYQLSFTQNHTSQYTCFQSILPSTTSYETYCIGLT\n+>blumeria_Grouper_14770_3_6\n+FNLRSXVHGNSARIQTIYLDFARTSPIPWRQHPSSNLDXQYGCHXTCSQSKTKXCFXTYR\n+HRLSFYKGKNXRRLAYTITCSFSXKSRRYMYKRLTETKAHRFMHFSFWYXYNHWRVYHTS\n+IRXEMTINILTVFHLCIYPRCKVRRGVEILYILECQGXLICKSGXLGYQIHLPISLSSAH\n+SSRKVXRTLAFISHXHSIRXLCIKCRLKVSRIYYRHTCIXLYISGSYICTYIDLHLQGSX\n+LMIHGDXXILVKNLSXXDMRTRNLTLISSVLLRTIHLNTLVSSQFFLPQLHTRLTVSVX\n+>blumeria_Grouper_1717_12_1\n+ISSILPXRASKSXSNSFRSLSSVTGSNSLQNPMMKRSACAERVWNILPCSARNLVATIXG\n+DGVCWCGELRXLASXIGSTNCCVGCCDGGCGPLRTISTSXRVEHXVGRGGQEPASAEDWG\n+GMSEXGLWQCEIVDGIRRAXR\n+>blumeria_Grouper_1717_12_2\n+FPVSCPEGPVSPDRIVFEVSRQXLVPTACRTQXXSALLVRSASGISCPVRRGIWSRRFEA\n+TEFVGAVSFDDSPRRLALQIVVWVVVMAVVARXGPFRRLDASNIELGGGAKSQPLRRTGA\n+AXVSEVCGSVRSXMVYGALEG\n+>blumeria_Grouper_1717_12_3\n+FQYLALKGQXVLIEXFSKSLVSDWFQQLAEPNDEALCLCGARLEYPALFGEEFGRDDLRR\n+RSLLVRXASMTRLVDWLYKLLCGLLXWRLWPVKDHFDVLTRRTLSWEGGPRASLCGGLGR\n+HEXVRFVAVXDRRWYTARLK\n+>blumeria_Grouper_1717_12_4\n+PFKRAVYHLRSHTATNLTHSCRPSPPQRLALGPPSQLNVRRVKTSKWSLTGHNRHHNNPH\n+NNLXSQSTRRVIEAHRTNKLRRLKSSRPNSSPNRAGYSRRAPHKQSASSLGSASCWNQSL\n+TRDFENYSIRTYWPFRARYWK\n+>blumeria_Grouper_1717_12_5\n+PSSAPYTIYDLTLPQTSLTHAAPVLRRGWLLAPPPNSMFDASRRRNGPXRATTAITTTHT\n+TICRANLRGESSKLTAPTNSVASNRRDQIPRRTGQDIPDALRTSRALHHWVLQAVGTSHX\n+RETSKTIRSGLTGPSGQDTGN\n+>blumeria_Grouper_1717_12_6\n+LQARRIPSTISHCHKPHSLMPPQSSAEAGSWPPLPTQCSTRQDVEMVLNGPQPPSQQPTQ\n+QFVEPIYEASHRSSPHQQTPSPQIVATKFLAEQGRIFQTRSAQAERFIIGFCKLLEPVTD\n+ERLRKLFDQDLLALQGKILE\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/test_input_nt.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/test_input_nt.fa Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,124 @@
+>blumeria_Grouper_28830_3
+TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTC
+CGACTAATCAACAATATAATGCGAGTAGAGCTTGAGACTAATAAAACGTGAATTTNATTC
+TATAATACAGGATCGCCAACTTATTTAAATATTCCCCCGCACCACGTGAGCCAATCAACA
+GCGCACAATTTGCACGGTCGGGTTTCCATCAATTGGGATCCACAAACTATCCCCCTTCTC
+AAAACTAGCTGTCCACAGCTTATTTAAATTACAAGTTAATTAGAAATTTTATTATGGTAT
+CACTCAAACTAAATCCTTGCTGAATTTTTGAGTGTCTTTCTATCCTGCTCCAGCATACGG
+CGGCTACCATACCATAGACTACGAACTGGGTGTCCTTCACGTACATATGCCGGGAATAGG
+TTTGTGCCGTTGTACTTGTTGCCTAATTTGTATCCACCCGGTGATATTAAGGAGTAGGTA
+TTATTATTATGACAAGCTGCTACGGCCCATGGCCCGTCATAGTAGGGCTCCATCTTGTGT
+CTTCGCTGGCGTACTCGCAGTACCCAGTCTCCGGGGGCGTAAGTACGTATAAGTGCCTTT
+TCTTCCTGGAGGTAAGCCCGTATTTTATCCCTAGTTGCTTTCATACTACTAACGTAGGAG
+CGTGCTATTGGTGCGGGGTGTTTCCTTCCGAGTTCTTCGGCCCAGAGGCGCTCCTCTGAT
+TCTGTGGCTTCACGGGTATATACTGGATAAGAATTTTCCTCCTCCGGGGGCTGAGTGCCG
+AAGAGTAAGAAGTAAGGGCTATACCCGCTGGGTGAGACTCTTCGATTGAAATTCATTACC
+GCGAGTGAGAGAGCGCCAGCTAGTGTAATTTTAGGAGAATCCAGGAGAACTCTGACCAGT
+ATGGCTTTCAGAACGCCATTTGCCTGTTCGACTTTGCCATTTCCTCTTGGCCTAGCTGGT
+GTGGTATGGACAACCTGCAGTTGTTGGCTAGCTTGAAATCGTTGTGCTTCTATGCCTCCG
+AAGCAGCCAGCATTGTCACTGATAATCTGCTTTGGCTTTCCAAAGTTCTGGATAATATAC
+GTGAGCAGAGGCACAGTGTTAGCGAAGTCTGCATTGGGAACTAAGCGGGATTCCAGCCAG
+CCTGTTGCGTACTCAATTGCATTGAGCAAGACTTGGGGTCCTACTTGAGTATGATCTATA
+CCCCACCGTGTCAGAGGAGGTGCTGGTACTATAGGAGTTAAGCTTCCCAGTAACGGGTCT
+GGAGGTTTCATGAGCTGACAGGACTTGCATTTTCGGACAGCTTCGTGAGCCGCGAGGATC
+AGTTCGGGGTGCCAAAACCTACGCCCTAATTCCCGCATAGTATTGCCGATTGACGCATGG
+CCTTGGCCCTCATGAACATTTACGGCCGTCGATACTAGATCCTCATATTCCAAGATTTCT
+AGTAGGGTTGCGGCGCCAGATGGACCAGGTGCATCGCCTACTTGTTGCGACGGGTTCTTT
+TTGATGAGATGCAATTTGGAATTGTACATCGCAAAATTATCTCGGACCCAAGTGGCAGCT
+ATATTTAGGGGAAGTGCCTCTCCTAAGGTTAGAAATTCGGTGATGCACTGCAGGTCGATG
+CGGTTGAGTTCGTGTGGGTGCTTTATTTCCTCCATCTCAAGTTCTCTTTCTGCACCACTT
+TGGATTCTAGAGTTTTCGTTGTCGTCAGTAGCTACATTCCCTTCCCCTTCTTCCCTAGAG
+AGAAATAGAACATCACAGGGACGGGAGAGAAAATCAGCGAGAACATTTGCTTTTCCGCGA
+CGGTAGATAATCCTGATATTAAAATGCTCTATAGCATCTAAAAAACGGACCATTCTGACG
+GGCTGCTCGGCTTTGGTCTGAATAGTTCGTAGAGATTCATGGTCAGTTATCACCGTTACG
+TCGCCGCCTTCTACCCAGTGACGCCAATGTTGTAGTGACAGTAAAATCCCAAGTAATTCC
+CTCTCTTGTGCTGAATAACGCTGCTGGGTATCGTTTAGTTTTCTTGAGAAGTAAGCTATG
+GGATGCAGCACTGAACGATCGGAATTGTTGGAGCATCGCATGTACGGTTGGAGTATGACT
+GCACCCACGAACTTTTGGGAAGCGTCAGTTTCCAATATGACGGGAAGTCTCCAATCAAAT
+TTTTTTACAACTGGTGATGTTGTAAGCGCTAGTTTGATACTTTTCCATGATTGAATCGAT
+TGGTCCGTCAGAATGACAGGGCAATTTTTAGGTCCAGAGGATTGGTCCGTGAGTGGCCCT
+GCTAGTTGGGAGAAATTTTTAATGAGACAACGTAGGTAACCAGCTGCATTTACAAATCCT
+CTTATTTCCGAGGTAGTTGTTGGCATAGGCCTGTCTCTGATCGCTGAAACTTTTTCGGGG
+TCTGCTGCAATGCCATGTTTAGAAATATGGAAACCAAGGAAGATTCCCTCTGAAACAAAT
+AGCCTGCACTTTGAAGCATTGAGGAGGAGACCATAGGAAGCGAATCTAGAAAAAATAAGT
+ATCAACCGTGAGTGCAATTCTGCCATTGAGTCTGCCCAGACAGCTACATCGTCCAGGAGC
+CCTGCGACAAATGGACGGAATGGCTCTAGAACTTCTTCCATAAAACACTGGAAAGTAGCT
+GGTGCGTTGCATAGGCCGAAAGGCATAACTAGCCATTCATAGACACCGAAAGGAGTCACA
+AATGCTGTTTTCTCTGAAGAGTCTTTGTGCATACGTATCTGCCAAAATCCGTTTTCCAGG
+TCGAGGAATGCATAGAAGTTATGTCCTGCTGCGCGCTGAATTTCATCTTGAGCATTTGGT
+AGGGGATGGGCGTGTTTCTTCGTAATCTTGTTAAGCTCTCTATAGTCAACACAAATACGC
+CAAACGGTTTGGTTAGTCTGGGTTAGCGTCTTTTTTTGGTGTTAATAACAGAGGTGCAGC
+CCATGGTCCCTTACATTTGCGTATGAATTGAGCCGGTCATGTGTTGCACAAGAAAGTCCC
+CGGATGGCTTGTTGTTGCGCCGGGGCTATATCGCCTCGGAGGGGAGTTTTAAATTGGGTT
+GACAATTACCCCGTTTCTATGCGGGTGTTTCTGTCTTGGGAGAGTTTTATCCCATTTGGC
+GGGAGAGATTTATCGAAAAGTTGAAGGGGAATTTAACTTTGATATTGTAGGGAAAAACTG
+TAATGCGGAATGAAGGGTGGGCTGGGTGCCATTTGACCATTGGAATATTTGAAGTAGTGA
+AGACTTTTCGGTTTGGGAGAGTATTAATTATAGGATTAAGAATTGGTTGGTCAATTAAGT
+TGAGAAGTTGCAGTTTGCCATTTGATAAACATCGTAGCCCCAGTTCATGGAATAAAGTTT
+GTCCAAGTGTCACATCTGCTGGGAATGTGCCATCTGGGACAACTCCTAGAGACACTGTGA
+ATTCCTGAGTCCAGAATGATTCAAGGTGATTCGAGCAAAGAAACTGTGCAGTTAAAATTG
+CATCCTTAGAAATTGTTGGGCCAGGGCCACCAATGCCTGACATTTTAGAAATAATAGATG
+TATTATAAGAAGTCAAATATTTAGAGAAAGACTGTGAAACATAACATGCTGAAGAGCCGT
+AGTCAAAAAAAACTGAAAATTGGACATAGGGAAAATGAGTTATTCGTGCAAAAAACATTC
+TGTGTGGCTTGCCATATCCTTCCCGTAGAGGTTGTCCTTTTGGGGGTGGCTCAAATTTGC
+CCTTTCCTACGGCCACAAAGTAGGTATTCGGGCTGTCCTATTCATAAATTTCTTCATTTC
+GCATTCGATATAGAATGGCTTCAATTTCGTCATCGAGCTGCTCATCTTCTATTTGGCCTT
+CAGGGCTTATTTGCTCTTGATCCAAATCTTCTGGCTCAACAAGGAAAGTAGAAGTGACAG
+GCTTTGATGAACGATCGTGTGGTGTTACAGGTTTTACATATTTTTTATTTTTCATTGATT
+GGTAAGTTTTCCTGAATTTATCGCTGAAACGCGACGAGGTGTATTTAGCGTTTGGTGCAT
+ATTCCCTTTGTTCTTTTGATTCCACCGGCGCTGTTTTCATCAACCGGCAGTCTTTCGCCC
+AGTGGCCTGTCTTTCCGCAATTGAAGCATTCGTAACGCTTGTCGTGAATTGGGTATGATT
+TATCTGTTTCTGCAATGTCTTGTGACGCAAAGACGTCTTGCCCATACGACCTTGGATCAG
+CTATGGATGCGTCAAATGCTGGAAGCTTTTCGCCTTTTGACGCTGAGATCGGAATTGGTG
+CATTGGACTCGGAATGTTTGAATACATCATCCTCAAGTGTCCACCTTGTAATACCGCGTG
+TGACTTGAACCACTCGATCCGCAAGCTCCGCGCTGGTGAGTTTGGGGAGATTATTGGAAA
+TGATGGTCCAGGTACGTGGAAGTGAGAGTCGAATATGCTGCCTCAGTATATCTCTAGTCT
+GAGGCCCTACCATGACATCCGAAGGTAGGCTAAAGAAAGCTTTCCGGAGTCGTCGCGCAA
+ATTGTAATTGAGTCTCGTCGTTTCTTGCAGTTATTCGTGCAAATGTTGTCAAAGGCGACC
+CGAGTACATCATTGTCGAACAGTATTTTAAGGATACAGTCTACAGCTCCTGCCCAATCGA
+GGTGTTGACTGATGATTGTTCTTCGCGCCGAGATGAAATCTTCATCCATTTCCATTGCAA
+TTCTGATAGCCCAAGCAGAATATGGAAGCATATGTTGAATCATCAATGATTGCATGTGGT
+ATAATATATTCATTATGTTCTCATCAGCAAAGTGCAATTTCCATTGTGGTGGGAATACCG
+CGCGTGTTTTTCTATTTGCCTCTTTTAAAGGGCCGTGCGGATTTTCAGCTTCGGAACGCG
+AAAAGAAATCTGGGAAGTACCGCTTTATTTTTCGCTCTTGTTCAGTAACATTCAATGGTT
+CGTTAACTGACTGGCGGTATAAATTAATAGTCCCATCTTTGTTTTGCTCATCATTGCTTA
+GGTTGGGCGTTGTTTCATAGAATTGCGCCGAGGTAAAGGGTCTTTTCTTAGGCTGAAACA
+CGATTGGCCTGGTTGCGGGTCGCGAGGCGCTGCCTGCTGAAAAGTCAAATAGCTCCTCCC
+GGTATGGTTTTTTATTTTCAGATGGCCCTGCTTCCGAAAGTTGAAAGGGGACTGAATTTT
+TCGAAGAAGTTTTTGTTTCCACGTCTTCATTTCTACTTACTAGTTTGGAAATTAAGTCGT
+TCATTGCAGACATTTGTTTGACTAGAAGAGAAATTTGTGATTGGGTCTCTTCAAACCTTT
+GATCGATTCTCTTGGAGAGATCTTTTGTCACTGCCTTTGCAGCTTCATTAGCTTCGGAAA
+AATTTTCGTTGGCTTTGGTTTGCCCGGCAATGTGATCCTTCATCAAGCTTTCCAACTGAG
+CATTATTCAGGCCGACCATGATGATTGAATAGGTGGTGAAATGCGTAAAAAATTTGGAGA
+CGTCTTAAATTGGGATGACCTAGAATTGTGAGGTTCTCTCAACAAAAATATATAATTTTG
+TGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCG
+ACAAATCAACAATATAATGCGAGTAGAGCTTGAGACTAATAAAACGTGAATTTCATTCTA
+TAATACAGGACCGCCAACTTATTTAAATATTCCCCCGCACCACGTGAGCCAATCAACAGC
+ACACAATTTGCACGGTCGGGTTTCCATCAATTGGGATCCACA
+>blumeria_Grouper_14770_3
+GGTTAGACCGATACAGTAAGTCTCGTATGAAGTTGTGGAAGGAAGAATTGACTGGAAACA
+AGTGTATTGAGATGTATGGTTCTGAGTAAAACTGAGCTGATAAGAGTGAGATTGCGTGTG
+CGCATATCTTATCAACTGAGGTTTTTGACCAGTATTTATCAATCTCCATGGATCATAAGT
+TATGATCCCTGTAGATGTAAATCTATATACGTACAGATATATGAGCCAGAAATATAAAGC
+TAAATACATGTGTGTCGGTAATAGATCCGAGAAACCTTTAACCTGCATTTGATACATAAT
+CATCTGATGGAATGCTAGTGTGAAATAAATGCCAATGTACGTTAAACTTTCCGTGAGGAA
+TGTGCTGATGACAATGATATAGGAAGGTGAATTTGATAACCTAATTAGCCGGACTTACAA
+ATGAGTTATCCTTGGCATTCCAGGATATACAATATTTCAACACCCCTCCTCACTTTGCAT
+CGAGGATAGATACATAAGTGAAAAACTGTTAGTATATTTATTGTCATTTCTCATCTAATG
+CTGGTATGGTAGACACGCCAGTGGTTGTACTAGTACCAAAAACTGAAGTGCATAAATCGG
+TGTGCCTTGGTCTCGGTAAGCCTTTTGTACATATATCTGCGAGATTTTTAGCTGAAGGAA
+CATGTAATAGTGTAAGCGAGCCGTCTTCAATTCTTTCCCTTGTAAAATGATAAGCGATGT
+CGATATGTTTAGAAGCATCATTTAGTTTTGGATTGTGAGCAAGTTCAATGGCAGCCGTAT
+TGTCAGTCGAGATTGCTGCTGGGATGCTGTCTCCAAGGAATCGGTGAAGTCCGCGCAAAA
+TCCAGATATATTGTTTGGATGCGAGCGCTATTGCCATGTACTCAGCTTCGCAGGTTGAAT
+G
+>blumeria_Grouper_1717_12
+ATTTCCAGTATCTTGCCCTGAAGGGCCAGTAAGTCCTGATCGAATAGTTTTCGAAGTCTC
+TCGTCAGTGACTGGTTCCAACAGCTTGCAGAACCCAATGATGAAGCGCTCTGCTTGTGCG
+GAGCGCGTCTGGAATATCCTGCCCTGTTCGGCGAGGAATTTGGTCGCGACGATTTGAGGC
+GACGGAGTTTGTTGGTGCGGTGAGCTTCGATGACTCGCCTCGTAGATTGGCTCTACAAAT
+TGTTGTGTGGGTTGTTGTGATGGCGGTTGTGGCCCGTTAAGGACCATTTCGACGTCTTGA
+CGCGTCGAACATTGAGTTGGGAGGGGGGGCCAAGAGCCAGCCTCTGCGGAGGACTGGGGC
+GGCATGAGTGAGTGAGGTTTGTGGCAGTGTGAGATCGTAGATGGTATACGGCGCGCTTGA
+AGGG
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/fastaTranslation/allFrames/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/fastaTranslation/allFrames/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+from commons.pyRepetUnit.fastaTranslation.allFrames.TranslateInAllFramesAndReplaceStopByX import TranslateInAllFramesAndReplaceStopByX
+from commons.core.utils.FileUtils import FileUtils
+import os
+import sys
+import getopt
+
+#------------------------------------------------------------------------------
+
+def help():
+
+    """
+    Give the command-line parameters.
+    """
+
+    print ""
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the nucleotidic input file (format='fasta')"
+    print "     -o: name of the output file (default=inFileName+'_aa')"
+    print "     -v: verbose (default=0/1/2)"
+    print "     -c: clean"
+    print ""
+
+#------------------------------------------------------------------------------
+
+def main():
+
+    inFileName = ""
+    outFileName = ""
+    verbose = 0
+    clean = False
+
+    try:
+        opts = getopt.getopt(sys.argv[1:],"hi:o:v:c")[0]
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+        elif o == "-c":
+            clean = True
+
+    if inFileName == "":
+        print "*** Error: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "beginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    if outFileName == "":
+        outFileName = "%s_aa" % ( inFileName )
+
+    iTIAFARSBX = TranslateInAllFramesAndReplaceStopByX()
+    iTIAFARSBX.setInputFile( inFileName )
+    iTIAFARSBX.setOutputFile( outFileName )
+    iTIAFARSBX.run( )
+
+    if clean == True:
+        os.remove( inFileName )
+
+    if verbose > 0:
+        if FileUtils.isRessourceExists( outFileName ) and not(FileUtils.isEmpty( outFileName )):
+            print "%s finished successfully" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+        else:
+            print "warning %s execution failed" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+
+    return 0
+
+#------------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/HmmpfamClusterComponent.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/HmmpfamClusterComponent.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,91 @@
+import os
+from pyRepet.launcher.Launcher import HmmpfamLauncher
+
+
+class HmmpfamClusterComponent( object ):
+
+    def __init__(self):
+        self._component = None
+        self._profilDatabank = ""
+        self._resultDir = ""
+        self._queryDir = ""
+        self._tmpDir = ""
+        self._jobDb = None
+        self._SGEQueue = ""
+        self._SGEGroupId = ""
+
+    def loadConfig( self, config ):
+        self._profilDatabank = config.get( "profil_search","ProfilDatabank" )
+        self._queryDir = config.get( "hmmer_config","query_dir" )
+        self._tmpDir = config.get( "hmmer_config", "tmp_dir" )
+        self._resultDir = config.get( "hmmer_config", "result_dir" )
+        self._SGEQueue = config.get( "sge_config", "queue" )
+        self._SGEGroupId = config.get( "sge_config", "groupid" )
+
+    def run(self):
+        params = {"param":"--informat FASTA -E 10",
+                  "profilDB": self._profilDatabank,
+                  "outputDir": self._resultDir,
+                  "query": self._queryDir,
+                  "job_table":"jobs",
+                  "queue" : self._SGEQueue,
+                  "groupid" : self._SGEGroupId,
+                  "tmpDir": self._tmpDir,
+                  "cDir" : "."}
+        self._component = HmmpfamLauncher(self._jobDb, params)
+        self._component.run()
+
+    def clean (self):
+        dirToClean = self._tmpDir
+        os.system("rm  " + dirToClean + "/*")
+        dirToClean = self._resultDir
+        os.system("rm  " + dirToClean + "/*")
+
+    def setProfilDatabank (self, profilDatabank):
+        self._profilDatabank = profilDatabank
+
+    def getProfilDatabank(self):
+        return self._profilDatabank
+
+    def setQueryDir(self, input):
+        self._queryDir = input
+
+    def getQueryDir( self ):
+        return self._queryDir
+
+    def setSubjectDir(self, input):
+        self._subjectDir = input
+
+    def getSubjectDir( self ):
+        return self._subjectDir
+
+    def setSGEQueue(self, queue):
+        self._SGEQueue = queue
+
+    def getSGEQueue( self ):
+        return self._SGEQueue
+
+    def setSGEGrouId(self, id):
+        self._SGEGroupId = id
+
+    def getSGEGroupId( self ):
+        return self._SGEGroupId;
+
+    def setTmpDir(self, tmpDir):
+        self._tmpDir = tmpDir
+
+    def getTmpDir( self ):
+        return self._tmpDir
+
+    def setResultDir(self, wkDir):
+        self._resultDir = wkDir
+
+    def getResultDir(self):
+        return self._resultDir
+
+    def setJobDb (self, jobDb):
+        self._jobDb = jobDb
+
+    def getJobDb (self):
+        return self._jobDb
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/LaunchPreProcessHmmpfamPostProcessNotInParallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/LaunchPreProcessHmmpfamPostProcessNotInParallel.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,122 @@
+import os
+import unittest
+from pyRepet.launcher.programLauncher import *
+from pyRepet.util.file.FileUtils import *
+import commons.pyRepetUnit.fastaTranslation.allFrames.TranslateInAllFramesAndReplaceStopByX
+import commons.pyRepetUnit.hmmer.hmmOutput.HmmpfamOutputProcessing
+import ConfigParser
+
+class LaunchPreProcessHmmpfamPostProcessNotInParallel:
+
+    def __init__( self ):
+
+        #for global analysis
+        self._configFile = ""
+        self._inputFile = ""
+        self._outputFile = ""
+        self.fileUtils = FileUtils()
+        self._programLauncherInstance = programLauncher()
+        #specific to PreProcess
+        self.bioseq = Bioseq()
+        self.preProcess = commons.pyRepetUnit.fastaTranslation.allFrames.TranslateInAllFramesAndReplaceStopByX.TranslateInAllFramesAndReplaceStopByX()
+        self._outputFilePreprocess =  ""
+        #specific to PostProcess
+        self.hmmpfamOutputProcess = commons.pyRepetUnit.hmmer.hmmOutput.HmmpfamOutputProcessing.HmmpfamOutputProcessing()
+
+    def _cleanTemporaryFiles(self, outputFilePreProcess, outputFileHmmpfam):
+        os.remove( outputFilePreProcess )
+        os.remove( outputFileHmmpfam )
+
+    def _launchPostProcess(self, outputFileHmmpfam):
+        return self.hmmpfamOutputProcess.readHmmOutputsAndWriteAlignFile(outputFileHmmpfam, self._outputFile)
+
+
+    def _launchHmmpfam(self, outputFilePreprocess):
+        config = ConfigParser.ConfigParser()
+        config.readfp(open(self._configFile))
+        self._programLauncherInstance.reset(outputFilePreprocess)
+        outputFileHmmpfam = outputFilePreprocess + ".hmmpfamOut"
+        self._programLauncherInstance.setOutputFileName(outputFileHmmpfam)
+        self._programLauncherInstance.launchHmmpfam(evalFilter=config.get("profil_search", "Evalue"), inputFormat=config.get("profil_search", "InputFormat"), profilDatabank=config.get("profil_search", "ProfilDatabank"))
+        return outputFileHmmpfam
+
+
+    def _launchHmmpfamPreProcess(self):
+        self.preProcess.setInputFile(self._inputFile)
+        outputFilePreprocess = self._inputFile + ".translated"
+        self.preProcess.setOutputFile(outputFilePreprocess)
+        self.preProcess.run()
+        return outputFilePreprocess
+
+    def setInputFile( self, input ):
+         self._inputFile = input;
+
+    def setOutputFile( self, output ):
+         self._outputFile = output;
+
+    def setConfigFile ( self, configFile ):
+        self._configFile = configFile
+
+    def checkInputFile(self):
+        return self._checkFileExitsAndNotEmpty( self._inputFile )
+
+    def checkConfigFile(self):
+        return self._checkFileExitsAndNotEmpty( self._configFile )
+
+    def _checkFileExitsAndNotEmpty(self, fileName):
+        fileUtils = FileUtils()
+        if fileUtils.isRessourceExists(fileName) and not fileUtils.isFileEmpty(fileName):
+            return 1
+        return 0
+
+    def run(self):
+
+        if not self.checkInputFile():
+            print "Warning : there is no input file : " + self._inputFile + "\n"
+            return 0
+        if not self.checkConfigFile():
+            print "Warning : there is no config file : " + self._configFile + "\n"
+            return 0
+        outputFilePreProcess = self._launchHmmpfamPreProcess()
+        outputFileHmmpfam = self._launchHmmpfam(outputFilePreProcess)
+        self._launchPostProcess(outputFileHmmpfam)
+        self._cleanTemporaryFiles(outputFilePreProcess, outputFileHmmpfam)
+
+
+if __name__ == "__main__":
+    main()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/check/OldDetectFeatureConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/check/OldDetectFeatureConfigChecker.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,57 @@
+'''
+Created on 14 avr. 2009
+
+@author: oinizan
+'''
+import re
+from commons.core.checker.IChecker import IChecker
+from commons.core.checker.OldConfigChecker import ConfigChecker
+from commons.core.checker.ConfigException import ConfigException
+from commons.core.checker.CheckerException import CheckerException
+
+class DetectFeatureConfigChecker (IChecker):
+    '''
+    classdocs
+    '''
+    def __init__(self):
+        '''
+        Constructor
+        '''
+        self._dict = {"te_hmmer" : "",
+                      "te_blrn" : ""
+        }
+        self._specifiqueDict = {"te_hmmer_evalue" : "",
+                                "te_hmm_profiles" : ""
+        }
+
+    def _appendMessageToConfigExceptionMessage(self, messages):
+        appendedMessages = []
+        for msg in messages:
+            if (re.match("\[detect_features\] - No option 'te_hmm_profiles' in section: 'detect_features'", msg)):
+                appendedMessages.append(msg + " whereas te_hmmer is set")
+
+            if (re.match("\[detect_features\] - No option 'te_hmmer_evalue' in section: 'detect_features'", msg)):
+                appendedMessages.append(msg + " whereas te_hmmer is set - Default value will be set")
+
+        return appendedMessages
+
+
+    def check(self, configFile):
+        chk = ConfigChecker("detect_features", self._dict)
+        try :
+            chk.check(configFile)
+        except ConfigException, e :
+            raise e
+
+        if self._dict["te_hmmer"] == "yes":
+            isErrorOccured = False
+            try:
+                chk = ConfigChecker("detect_features", self._specifiqueDict)
+                chk.check(configFile)
+            except ConfigException, e:
+                appendedMessages = self._appendMessageToConfigExceptionMessage(e.messages)
+                raise ConfigException("", appendedMessages)
+
+
+if __name__ == "__main__":
+    pass

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/check/test/Test_OldDetectFeaturesConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/check/test/Test_OldDetectFeaturesConfigChecker.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,138 @@
+import unittest
+import re
+import os
+from commons.pyRepetUnit.hmmer.check.OldDetectFeatureConfigChecker import DetectFeatureConfigChecker
+from commons.core.checker.ConfigException import ConfigException
+
+class Test_DetectFeaturesConfigChecker(unittest.TestCase):
+
+    def setUp(self):
+        self._detectFeatureConfigChecker = DetectFeatureConfigChecker()
+
+    def testWithDefaultLogger (self):
+        lineFound = False
+        try:
+            self._detectFeatureConfigChecker.check("dummyConfig")
+        except ConfigException, e:
+            for msg in e.messages:
+                if (re.match("CONFIG FILE not found.*", msg)):
+                    lineFound = True
+        self.assertTrue(lineFound)
+
+    def testWithNoConfigFile(self):
+        lineFound = False
+        try:
+            self._detectFeatureConfigChecker.check("dummyConfig")
+        except ConfigException, e:
+            for msg in e.messages:
+                if (re.match("CONFIG FILE not found.*", msg)):
+                    lineFound = True
+        self.assertTrue(lineFound)
+
+    def testWithNoSectionInConfigFile(self):
+        config = open("config.cfg", "w");
+        config.close()
+        lineFound = False
+        try:
+            self._detectFeatureConfigChecker.check("config.cfg")
+        except ConfigException, e:
+            for msg in e.messages:
+                if (re.match("\[detect_features\] section not found.*", msg)):
+                    lineFound = True
+        self.assertTrue(lineFound)
+        os.remove("config.cfg")
+
+    def testMissingOptionsInConfig (self):
+        dict = {}
+        MockConfigFile("config.cfg", dict)
+        hmmProfilsFound = False
+        TE_BLRnFound = False
+        try :
+            self._detectFeatureConfigChecker.check("config.cfg")
+        except ConfigException, e:
+            for msg in e.messages:
+                if (re.match("\[detect_features\] - No option 'te_hmmer' in section: 'detect_features'", msg)):
+                    hmmProfilsFound = True
+                if (re.match("\[detect_features\] - No option 'te_blrn' in section: 'detect_features'", msg)):
+                    TE_BLRnFound = True
+        self.assertTrue(hmmProfilsFound)
+        self.assertTrue(TE_BLRnFound)
+        os.remove("config.cfg")
+
+    def testOptionsIfHmmProfilsSetAtYESInConfig (self):
+
+        dict = {
+                "TE_HMMER" : "yes",
+                "TE_BLRn" : "no"
+        }
+
+        profilDatabankFound = False
+        evalueFound = False
+        MockConfigFile("config.cfg", dict)
+        try :
+            self._detectFeatureConfigChecker.check("config.cfg")
+        except ConfigException, e:
+            for msg in e.messages:
+                print msg
+                if (re.match("\[detect_features\] - No option 'te_hmm_profiles' in section: 'detect_features' whereas te_hmmer is set", msg)):
+                    profilDatabankFound = True
+                if (re.match("\[detect_features\] - No option 'te_hmmer_evalue' in section: 'detect_features' whereas te_hmmer is set - Default value will be set", msg)):
+                    evalueFound = True
+        self.assertTrue(profilDatabankFound)
+        self.assertTrue(evalueFound)
+        os.remove("config.cfg")
+
+
+    def testOptionsIfHmmProfilsSetAtNOInConfig (self):
+        profilDatabankFound = False
+        inputFormatFound = False
+        evalueFound = False
+        dict = {
+                "TE_HMMER" : "no"
+        }
+        MockConfigFile("config.cfg", dict)
+        try :
+            self._detectFeatureConfigChecker.check("config.cfg")
+        except ConfigException, e:
+            for msg in e.messages:
+                if (re.match(".+INFO \[detect_features\] - No option 'te_hmm_profiles' in section: 'detect_features' whereas te_hmmer is set", msg)):
+                    profilDatabankFound = True
+                if (re.match(".+INFO \[detect_features\] - No option 'te_hmmer_evalue' in section: 'detect_features' whereas te_hmmer is set - Default value will be set", msg)):
+                    evalueFound = True
+        self.assertFalse(profilDatabankFound)
+        self.assertFalse(inputFormatFound)
+        self.assertFalse(evalueFound)
+        os.remove("config.cfg")
+
+    def testTE_BLRnAndTE_hmmerAtNoInConfig (self):
+
+        dict = {
+                "TE_HMMER" : "no",
+                "TE_BLRn" : "no"
+        }
+
+        exceptionNotRaised = True
+        MockConfigFile("config.cfg", dict)
+        try :
+            self._detectFeatureConfigChecker.check("config.cfg")
+        except ConfigException:
+            exceptionNotRaised = False
+
+        self.assertTrue(exceptionNotRaised)
+
+        os.remove("config.cfg")
+
+class MockConfigFile:
+
+    def __init__ (self, fileName, optionsDict):
+
+        self._fileName = fileName
+        config = open(fileName, "w");
+        config.write("[detect_features]\n")
+        for key in optionsDict.keys():
+            config.write(key + ":" + optionsDict[key] + "\n")
+        config.close()
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/check/test/detectFeatureConfigCheckerTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/check/test/detectFeatureConfigCheckerTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,22 @@
+'''
+Created on 29 mai 2009
+
+@author: oinizan
+'''
+import unittest
+import sys
+import TestDetectFeaturesConfigChecker
+
+
+
+
+def main():
+
+        commonsTestSuite = unittest.TestSuite()
+        commonsTestSuite.addTest(unittest.makeSuite(TestDetectFeaturesConfigChecker.TestDetectFeaturesConfigChecker,'test'))
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(commonsTestSuite)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/HmmOutput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/HmmOutput.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,30 @@
+## List of Hmmpfam or Hmmscan Output (that are too lists).
+#
+class HmmOutput( object ):
+
+    list;
+
+    def __init__( self ):
+        self._hmmpfamOutput = []
+
+    ## append an output in the list of output
+    #
+    # @param list name of the list
+    #
+    def append( self, list ):
+        self._hmmpfamOutput.append(list)
+
+    ## return the length of the list of output
+    def len (self):
+        return len(self._hmmpfamOutput)
+
+    ## return the output corresponding at the element number index in the list of output
+    #
+    # @param index number of index
+    #
+    def get(self, index):
+        return self._hmmpfamOutput[index]
+
+    ## return the list of output
+    def getList(self):
+        return self._hmmpfamOutput

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/HmmOutputProcessing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/HmmOutputProcessing.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,39 @@
+import os
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
+
+##Concrete implementation for hmmscan and  hmmpfam output methods
+#
+class HmmOutputProcessing (object):
+
+    ## write a align file from a HmmOutput object
+    #
+    # @param fout handle of align file
+    # @param HmmOutput HmmOutput object in fact a list of hmmOutput containing datas required
+    #
+    def writeHmmOutputToAlignFile( self, pfamOutput, fout ):
+        for item in pfamOutput.getList():
+            for i in item:
+                fout.write(i + "\t")
+            fout.write("0\n")
+
+    ## read an output file from hmm profiles search program and write the corresponding .align file
+    #
+    # @param inputFile file
+    # @param outputFile file
+    #
+    def readHmmOutputsAndWriteAlignFile( self, inputFile, outputFile ):
+        if not os.path.exists(inputFile):
+            print "Warning your input file " + inputFile + " does not exist!\n"
+            return
+        file2parse = open( inputFile )
+        pfamOutput = HmmOutput()
+        if outputFile == "":
+            print "Warning have to specify an output name file!\n"
+            return
+        fout = open(outputFile, "w")
+        while pfamOutput != None:
+            pfamOutput = self.readHmmOutput(file2parse)
+            if pfamOutput != None:
+                self.writeHmmOutputToAlignFile(pfamOutput, fout)
+        fout.close()
+        file2parse.close()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/HmmpfamOutputProcessing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/HmmpfamOutputProcessing.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,36 @@
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
+import re
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutputProcessing import HmmOutputProcessing
+
+##Concrete implementation for specific methods
+#
+class HmmpfamOutputProcessing (HmmOutputProcessing):
+
+    ##read an output from hmmpfam and return a array with results useful to build a .align file
+    #
+    # @param file handle of file generated by software searching hmm profiles
+    #
+    def readHmmOutput( self, hmmerOutputFile ):
+        #Tested with HMMER 2.3.2 on Linux (Debian) and on Unix (Solaris)
+        line = hmmerOutputFile.readline()
+        tabResult = None
+        aRecup = 0
+        if (line == ""):
+            tabResult = None
+            return tabResult
+        tabResult = HmmOutput()
+        while not re.match("Alignments of top-scoring domains:.*", line):
+            line = hmmerOutputFile.readline()
+            m = re.match("Query sequence:\s*(.*)", line)
+            if m:
+                seqName = m.group(1)
+            #warning : do no use a return character because they change between several OS
+            if re.match("--------\s+-------\s+-----\s+-----\s+-----\s+-----\s+-----\s+-------.*", line):
+                aRecup = 1
+            m = re.match("(\S+)\s+\d+\/\d+\s+(\d+)\s+(\d+)\s+.+\s+(\d+)\s+(\d+)\s+.+\s+(.+\.\d)\s+(.+)", line)
+            if m and aRecup == 1:
+                tabResult.append([seqName, m.group(2), m.group(3), m.group(1), m.group(4), m.group(5), m.group(7), m.group(6)])
+            if line == "":
+                tabResult = None
+                return tabResult
+        return tabResult

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/HmmscanOutputProcessing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/HmmscanOutputProcessing.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,36 @@
+import re
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutputProcessing import HmmOutputProcessing
+
+##Concrete implementation for hmmscan output specific methods
+#
+class HmmscanOutputProcessing (HmmOutputProcessing):
+
+    ## read an hmmscan output from a file, return a array with results useful to build a .align file
+    #
+    # @param file handle of file generated by software searching hmm profiles
+    #
+    def readHmmOutput( self, hmmerOutputFile ):
+        #Tested with HMMER 3 on Linux
+        line = hmmerOutputFile.readline()
+        tabResult = None
+        if (line == ""):
+            tabResult = None
+            return tabResult
+        tabResult = HmmOutput()
+
+        while line != "":
+            line = hmmerOutputFile.readline()
+            if not(re.match("^#.*$", line)) and line != "":
+                lLines = re.split("\s+", line)
+                seqName = lLines[3]
+                profilName = lLines[0]
+                iValue = lLines[12]
+                score =  lLines[13]
+                queryCoordStart =lLines[17]
+                queryCoordEnd = lLines[18]
+                subjectCoordStart = lLines[15]
+                subjectCoordEnd = lLines[16]
+                tabResult.append([seqName, queryCoordStart, queryCoordEnd, profilName, subjectCoordStart, subjectCoordEnd, iValue, score])
+        return tabResult
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmOutputProcessing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmOutputProcessing.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,36 @@
+import os
+import unittest
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutputProcessing import HmmOutputProcessing
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmpfamOutputProcessing import HmmpfamOutputProcessing
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmscanOutputProcessing import HmmscanOutputProcessing
+from commons.core.utils.FileUtils import FileUtils
+
+class TestHmmOutputProcessing(unittest.TestCase):
+
+    def setUp(self):
+        self.hmmOutput = HmmOutputProcessing()
+        self._inputFile = "./datas/test_hmmpfam_output"
+        self._outputFile =  "./datas/dummyoutput.align"
+        self._inputFileScan2 = "./datas/hmmscanOutput"
+        self._outputFileScan2 = "./datas/hmmscanOutput.align"
+
+    def test_writeHmmOutputToAlignFile(self):
+        output = open("./datas/output.align", "w")
+        output.write("SAR:satellite:satellite_5\t3\t9\tDUF751\t5\t11\t5.4\t5.4\t0\n")
+        output.write("SAR:satellite:satellite_5\t17\t23\tDUF751\t5\t11\t5.1\t5.5\t0\n")
+        output.write("SAR:satellite:satellite_5\t3\t21\tClenterotox\t173\t191\t0.66\t7.4\t0\n")
+        output.close()
+        pfamOutput = HmmOutput()
+        pfamOutput.append(["SAR:satellite:satellite_5", "3", "9", "DUF751", "5", "11", "5.4", "5.4"])
+        pfamOutput.append(["SAR:satellite:satellite_5", "17", "23", "DUF751", "5", "11", "5.1", "5.5"])
+        pfamOutput.append(["SAR:satellite:satellite_5", "3", "21", "Clenterotox", "173", "191", "0.66", "7.4"])
+        fout = open("./datas/outputGen.align", "w")
+        self.hmmOutput.writeHmmOutputToAlignFile(pfamOutput, fout)
+        fout.close()
+        self.assertTrue(FileUtils.are2FilesIdentical("./datas/output.align", "./datas/outputGen.align"))
+        os.remove("./datas/output.align")
+        os.remove("./datas/outputGen.align")
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmpfamOutputProcessing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmpfamOutputProcessing.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,58 @@
+import os
+import unittest
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmpfamOutputProcessing import HmmpfamOutputProcessing
+from commons.core.utils.FileUtils import FileUtils
+
+class TestHmmpfamOutputProcessing(unittest.TestCase):
+
+    def setUp(self):
+        self.fileUtils = FileUtils()
+        self.hmmpfamOutputProcess = HmmpfamOutputProcessing()
+        self._inputFile = "./datas/Outputhmmpfam"
+        self._inputFile2 = "./datas/test_hmmpfam_output"
+        self._outputFile = "./datas/dummyAlignFile.align"
+
+    def testReadHmmOutput(self):
+        file = open( self._inputFile )
+        pfamOutput = self.hmmpfamOutputProcess.readHmmOutput(file)
+        self.assertEqual(["7LES_DROME", "437", "522", "fn3", "1", "84", "8.8e-15", "48.3"], pfamOutput.get(0))
+        self.assertEqual(["7LES_DROME", "825", "914", "fn3", "1", "84", "2.2e-06", "13.4"], pfamOutput.get(1))
+        self.assertEqual(["7LES_DROME", "1292", "1389", "fn3", "1", "84", "1.2e-06", "15.9"], pfamOutput.get(2))
+        self.assertEqual(["7LES_DROME", "1799", "1891", "fn3", "1", "84", "2.3e-19", "63.5"], pfamOutput.get(3))
+        self.assertEqual(["7LES_DROME", "1899", "1978", "fn3", "1", "84", "1.5e-06", "15.2"], pfamOutput.get(4))
+        self.assertEqual(["7LES_DROME", "1993", "2107", "fn3", "1", "84", "4.3e-07", "20.3"], pfamOutput.get(5))
+        self.assertEqual(["7LES_DROME", "2209", "2483", "pkinase", "1", "294", "6e-95", "314.6"], pfamOutput.get(6))
+        self.assertEqual(["7LES_DROME", "2223", "2284", "rrm", "1", "77", "0.72", "-40.4"], pfamOutput.get(7))
+        file.close()
+
+    def test_readHmmOutput_empty_file(self):
+        file = open( "./datas/OutputhmmpfamEmpty", "w" )
+        file.close()
+        file = open( "./datas/OutputhmmpfamEmpty")
+        pfamOutput = self.hmmpfamOutputProcess.readHmmOutput(file)
+        file.close()
+        self.assertEqual(None, pfamOutput)
+        os.system("rm ./datas/OutputhmmpfamEmpty")
+
+    def testParseHmmpfamOutput2Align(self):
+        self.hmmpfamOutputProcess.readHmmOutputsAndWriteAlignFile(self._inputFile2, self._outputFile)
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertEqual(18, FileUtils.getNbLinesInSingleFile(self._outputFile))
+        os.system("rm " + self._outputFile)
+
+    def test_parseHmmpfamOutput2Align_empty_file(self):
+        file = open("./datas/dummy_hmmpfam_output", "w")
+        file.close()
+        self._inputFile = "./datas/dummy_hmmpfam_output"
+        self.hmmpfamOutputProcess.readHmmOutputsAndWriteAlignFile( self._inputFile, self._outputFile )
+        self.assertTrue(FileUtils.isEmpty(self._outputFile))
+        os.system("rm " + self._inputFile)
+        os.system("rm " + self._outputFile)
+
+    def test_parseHmmpfamOutput2Align_file_not_exists(self):
+        self._inputFile = "./datas/dummy_hmmpfam_output"
+        self.hmmpfamOutputProcess.readHmmOutputsAndWriteAlignFile(self._inputFile, self._outputFile )
+        self.assertFalse(FileUtils.isRessourceExists(self._outputFile))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmscanOutputProcessing.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmscanOutputProcessing.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,58 @@
+import os
+import unittest
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmscanOutputProcessing import HmmscanOutputProcessing
+from commons.core.utils.FileUtils import FileUtils
+
+class TestHmmscanOutputProcessing(unittest.TestCase):
+
+    def setUp(self):
+        self.hmmscanOutputProcess = HmmscanOutputProcessing()
+        self._inputFile = "./datas/hmmscanOutputTab.txt"
+        self._inputFileScan2 = "./datas/hmmscanOutput"
+        self._outputFileScan2 = "./datas/hmmscanOutput.align"
+        self._outputFile = "./datas/dummyFile.align"
+
+    def tearDown(self):
+        if os.path.exists(self._outputFile):
+            os.remove(self._outputFile)
+
+    def testReadHmmerscanOutput(self):
+        file = open( self._inputFile )
+        pfamOutput = self.hmmscanOutputProcess.readHmmOutput(file)
+        self.assertEqual(["Polinton-1_DY:classII:Polinton_4", "2742", "2766", "rve", "50", "78", "2.4e+03", "-3.3"], pfamOutput.get(0))
+        self.assertEqual(["Polinton-1_DY:classII:Polinton_4", "2951", "3101", "rve", "4", "168", "6.7e-30", "103.4"], pfamOutput.get(1))
+        self.assertEqual(["Polinton-1_DY:classII:Polinton_4", "3198", "3228", "Chromo", "4", "51", "8.5e-06", "24.7"], pfamOutput.get(2))
+        self.assertEqual(["Polinton-1_DY:classII:Polinton_4", "2117", "2125", "GARS_A", "196", "204", "11", "2.5"], pfamOutput.get(3))
+        file.close()
+
+    def test_readHmmerscanOutput_empty_file(self):
+        file = open( "./datas/OutputhmmscanEmpty", "w" )
+        file.close()
+        file = open( "./datas/OutputhmmscanEmpty")
+        pfamOutput = self.hmmscanOutputProcess.readHmmOutput(file)
+        file.close()
+        self.assertEqual(None, pfamOutput)
+        os.system("rm ./datas/OutputhmmscanEmpty")
+
+    def testParseHmmscanOutput2Align(self):
+        self.hmmscanOutputProcess.readHmmOutputsAndWriteAlignFile(self._inputFileScan2, self._outputFile)
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._outputFileScan2, self._outputFile))
+        os.system("rm " + self._outputFile)
+
+    def test_parseHmmscanOutput2Align_empty_file(self):
+        file = open("./datas/dummy_hmmpfam_output", "w")
+        file.close()
+        self._inputFile = "./datas/dummy_hmmpfam_output"
+        self.hmmscanOutputProcess.readHmmOutputsAndWriteAlignFile( self._inputFile, self._outputFile )
+        self.assertTrue(FileUtils.isEmpty(self._outputFile))
+        os.system("rm " + self._inputFile)
+        os.system("rm " + self._outputFile)
+
+    def test_parseHmmscanOutput2Align_file_not_exists(self):
+        self._inputFile = "./datas/dummy_hmmpfam_output"
+        self.hmmscanOutputProcess.readHmmOutputsAndWriteAlignFile(self._inputFile, self._outputFile )
+        self.assertFalse(FileUtils.isRessourceExists(self._outputFile))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/Test_HmmOutput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/Test_HmmOutput.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,15 @@
+import os
+import unittest
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
+
+class Test_HmmOutput(unittest.TestCase):
+
+    def testAppend(self):
+        hmmpfamOutput = HmmOutput()
+        self.assertEquals(0, hmmpfamOutput.len())
+        list = ["a","b"]
+        hmmpfamOutput.append(list)
+        self.assertEquals(1, hmmpfamOutput.len())
+        list2 = ["c","d"]
+        hmmpfamOutput.append(list2)
+        self.assertEquals(2, hmmpfamOutput.len())
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/Outputhmmpfam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/Outputhmmpfam Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,148 @@
+hmmpfam - search one or more sequences against HMM database
+HMMER 2.3.2 (Oct 2003)
+Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
+Freely distributed under the GNU General Public License (GPL)
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+HMM file:                 myhmms
+Sequence file:            7LES_DROME
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+Query sequence: 7LES_DROME
+Accession:      P13368
+Description:    SEVENLESS PROTEIN (EC 2.7.1.112).
+
+Scores for sequence family classification (score includes all domains):
+Model    Description                                    Score    E-value  N
+-------- -----------                                    -----    ------- ---
+pkinase  Protein kinase domain                          314.6      6e-95   1
+fn3      Fibronectin type III domain                    176.6      2e-53   6
+rrm                                                     -40.4       0.72   1
+
+Parsed for domains:
+Model    Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+-------- ------- ----- -----    ----- -----      -----  -------
+fn3        1/6     437   522 ..     1    84 []    48.3  8.8e-15
+fn3        2/6     825   914 ..     1    84 []    13.4  2.2e-06
+fn3        3/6    1292  1389 ..     1    84 []    15.9  1.2e-06
+fn3        4/6    1799  1891 ..     1    84 []    63.5  2.3e-19
+fn3        5/6    1899  1978 ..     1    84 []    15.2  1.5e-06
+fn3        6/6    1993  2107 ..     1    84 []    20.3  4.3e-07
+pkinase    1/1    2209  2483 ..     1   294 []   314.6    6e-95
+rrm        1/1    2223  2284 ..     1    77 []   -40.4     0.72
+
+Alignments of top-scoring domains:
+fn3: domain 1 of 6, from 437 to 522: score 48.3, E = 8.8e-15
+                CS    C CCCCEEEEEECCTTCCEEEEECCC CCCCCCCEEEEE.ECCCCCC
+                   *->P.saPtnltvtdvtstsltlsWsppt.gngpitgYevtyRqpkngge
+                      P saP   + +++ ++ l ++W p +  ngpi+gY++++ +++ g+
+  7LES_DROME   437    PiSAPVIEHLMGLDDSHLAVHWHPGRfTNGPIEGYRLRL-SSSEGNA 482
+
+                CS CCCCEEECCCCCECECCEEEEECCCCEEEEEECCC CCCC
+                   wneltvpgtttsytltgLkPgteYevrVqAvnggG.GpeS<-*
+                   + e+ vp    sy+++ L++gt+Y++ +  +n +G+Gp
+  7LES_DROME   483 TSEQLVPAGRGSYIFSQLQAGTNYTLALSMINKQGeGPVA    522
+
+fn3: domain 2 of 6, from 825 to 914: score 13.4, E = 2.2e-06
+                CS    CCCCCEEEEEECCTTCCEEEEECCC       CCCCCCCEEEEE.EC
+                   *->PsaPtnltvtdvtstsltlsWsppt.......gngpitgYevtyRqp
+                       ++P  l++   ++  + +sW+ p++++ ++ + +   +Ye+++  +
+  7LES_DROME   825    GGKPHSLKALL-GAQAAKISWKEPErnpyqsaDAARSWSYELEV-LD 869
+
+                CS CCCCCCCCCE EECCCCCECECCEEEEECCCCEEEEEECCC  CCCC
+                   knggewnelt.vpgtttsytltgLkPgteYevrVqAvnggG..GpeS<-*
+                   + +++ ++++++ g+   + l+ L+P+  Y++rV+A+n +G++G+++
+  7LES_DROME   870 VASQSAFSIRnIRGPI--FGLQRLQPDNLYQLRVRAINVDGepGEWT    914
+
+fn3: domain 3 of 6, from 1292 to 1389: score 15.9, E = 1.2e-06
+                CS    CCCCCEEEE      EEC  CTTCCEEEEECCC    CCCCCCCEEE
+                   *->PsaPtnltv......tdv..tstsltlsWsppt....gngpitgYev
+                      Ps+P++l+v  ++  t++++   s++l+W++p+++++       Y +
+  7LES_DROME  1292    PSQPRRLRVfverlaTALqeANVSAVLRWDAPEqgqeAPMQALEYHI 1338
+
+                CS EE.ECCCCCCCCCCEEECCCC CECECCEEEEECCCCEEEEEECCC
+                   tyRqpknggewneltvpgttt.sytltgLkPgteYevrVqAvnggG....
+                   ++ +   g e +e    +++  ++ ++ L+P+ +Y+++V A+ ++++
+  7LES_DROME  1339 SC-W--VGSELHEELRLNQSAlEARVEHLQPDQTYHFQVEARVAATgaaa 1385
+
+                CS CCCC
+                   GpeS<-*
+                   G++S
+  7LES_DROME  1386 GAAS    1389
+
+fn3: domain 4 of 6, from 1799 to 1891: score 63.5, E = 2.3e-19
+                CS    CCCCCEEEEEECCTTCCEEEEECCC CCCCCCCEEEEE.ECCCCCC
+                   *->PsaPtnltvtdvtstsltlsWsppt.gngpitgYevtyRqpkngge.
+                      Ps+P+n++v+ +++ +l +sW pp++ +++  +Y++++ q++ +ge+
+  7LES_DROME  1799    PSPPRNFSVRVLSPRELEVSWLPPEqLRSESVYYTLHW-QQELDGEn 1844
+
+                CS        CCCCEEECCCCCECECCEEEEECCCCEEEEEECCC CCCC
+                   .......wneltvpgtttsytltgLkPgteYevrVqAvnggG.GpeS<-*
+                    +++++ + + +   t+ ++ ltg kPg+ Y+++VqA+ + +++  S
+  7LES_DROME  1845 vqdrrewEAHERRLETAGTHRLTGIKPGSGYSLWVQAHATPTkSNSS    1891
+
+fn3: domain 5 of 6, from 1899 to 1978: score 15.2, E = 1.5e-06
+                CS    CCCCCEEEEEECCTTCCEEEEECCCCCCCCCCEEEEE.ECCCCCCCC
+                   *->PsaPtnltvtdvtstsltlsWspptgngpitgYevtyRqpknggewn
+                        +   l+  +++++sl+l+W       p+ + ++++R   ++ e
+  7LES_DROME  1899    FAELPELQLLELGPYSLSLTWAGT--PDPLGSLQLECR---SSAEQL 1940
+
+                CS CCEEECCCCCECECCEEEEECCCCEEEEEECCC CCCC
+                   eltvpgtttsytltgLkPgteYevrVqAvnggG.GpeS<-*
+                   +++v g+ t ++++ L+P t+Y+ r+    ++++G++
+  7LES_DROME  1941 RRNVAGNHTKMVVEPLQPRTRYQCRLLLGYAATpGAPL    1978
+
+fn3: domain 6 of 6, from 1993 to 2107: score 20.3, E = 4.3e-07
+                CS    CCCCCEEEEEECCTTCCEEEEECCC CCCCCCCEEEEE.ECCCCCC
+                   *->PsaPtnltvtdvtstsltlsWsppt.gngpitgYevtyRqpkngge.
+                      Ps+P+ ++ + + +  ++++W++++++++pi  Y+++   ++++  +
+  7LES_DROME  1993    PSQPGKPQLEHIAEEVFRVTWTAARgNGAPIALYNLEA-LQARSDIr 2038
+
+                CS                            CCCCEEECCCC CECECCEEEEE
+                   ...........................wneltvpgttt.sytltgLkPgt
+                   +++++++++++++ ++ +  +++   ++++l+  +tt  s++++ L   +
+  7LES_DROME  2039 rrrrrrrrnsggsleqlpwaeepvvveDQWLDFCNTTElSCIVKSLHSSR 2088
+
+                CS CCCCEEEEEE CCC CCCC
+                   eYevrVqAvn.ggG.GpeS<-*
+                      +rV+A++ ++G Gp+S
+  7LES_DROME  2089 LLLFRVRARSlEHGwGPYS    2107
+
+pkinase: domain 1 of 1, from 2209 to 2483: score 314.6, E = 6e-95
+                   *->yelleklGeGsfGkVykakhkd...ktgkiVAvKilkkekesikekr
+                      ++ll+ lG+G+fG+Vy++++k+++++  ++VA+K l+k+++++ e
+  7LES_DROME  2209    LKLLRFLGSGAFGEVYEGQLKTedsEEPQRVAIKSLRKGASEFAE-- 2253
+
+                   flrEiqilkrLsHpNIvrligvfedtddhlylvmEymegGdLfdylrrng
+                   +l E+q++ +++H+NIvrl g++  + +++ l+mE+me GdL++ylr+ +
+  7LES_DROME  2254 LLQEAQLMSNFKHENIVRLVGICF-DTESISLIMEHMEAGDLLSYLRAAR 2302
+
+                   ..........gplsekeakkialQilrGleYLHsngivHRDLKpeNILld
+                    +++++++++  ls  e++ ++ ++++G +YL+++++vHRDL+ +N+L++
+  7LES_DROME  2303 atstqepqptAGLSLSELLAMCIDVANGCSYLEDMHFVHRDLACRNCLVT 2352
+
+                   en......dgtvKiaDFGLArlle..sssklttfvGTpwYmmAPEvileg
+                   e +++++++ tvKi+DFGLAr++++++++++ + +  p+++m+PE  l +
+  7LES_DROME  2353 EStgstdrRRTVKIGDFGLARDIYksDYYRKEGEGLLPVRWMSPES-LVD 2401
+
+                   rgysskvDvWSlGviLyElltggplfpgadlpaftggdevdqliifvlkl
+                     +++++DvW++Gv+++E+lt g                         ++
+  7LES_DROME  2402 GLFTTQSDVWAFGVLCWEILTLG-------------------------QQ 2426
+
+                   PfsdelpktridpleelfriikrpglrlplpsncSeelkdLlkkcLnkDP
+                   P+         ++ +e+++++k+ g+rl +p+ c e l++Ll  c++ DP
+  7LES_DROME  2427 PYAA-------RNNFEVLAHVKE-GGRLQQPPMCTEKLYSLLLLCWRTDP 2468
+
+                   skRpGsatakeilnhpwf<-*
+                   ++Rp   +++ + n +
+  7LES_DROME  2469 WERP---SFRRCYNTLHA    2483
+
+rrm: domain 1 of 1, from 2223 to 2284: score -40.4, E = 0.72
+                   *->lfVgNL.......ppdvteedLkdlFskfGpi.vsikivkDhkektk
+                      ++ g L+++++++p+ v  + L++  s+f +   +++++ +
+  7LES_DROME  2223    VYEGQLktedseePQRVAIKSLRKGASEFAELlQEAQLMSN------ 2263
+
+                   etgkskGfaFVeFeseedAekAlealnGkelggrklrv<-*
+                               F+     e+ ++ l G+ ++   +
+  7LES_DROME  2264 ------------FKH----ENIVR-LVGICFDTESISL    2284
+
+//

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,13 @@
+#                                                                                        --- full sequence --- -------------- this domain -------------   hmm coord   ali coord   env coord
+# target name        accession   tlen query name                       accession   qlen   E-value  score  bias   #  of  c-Evalue  i-Evalue  score  bias  from    to  from    to  from    to  acc description of target
+#------------------- ---------- -----             -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
+DNA_pol_B_2          PF03175.5    531 Polinton-1_DY:classII:Polinton_1 -           4927   3.7e-06   24.2  16.0   1   3   1.4e-09   3.7e-06   24.2  11.1    13   252   992  1214   987  1218 0.76 DNA polymerase type B, organellar and viral
+DNA_pol_B_2          PF03175.5    531 Polinton-1_DY:classII:Polinton_1 -           4927   3.7e-06   24.2  16.0   2   3      0.11   2.9e+02   -2.0   0.2   393   433  1367  1397  1364  1398 0.85 DNA polymerase type B, organellar and viral
+DNA_pol_B_2          PF03175.5    531 Polinton-1_DY:classII:Polinton_1 -           4927   3.7e-06   24.2  16.0   3   3      0.33   8.5e+02   -3.5   0.2   175   193  2832  2850  2827  2852 0.85 DNA polymerase type B, organellar and viral
+ASFV_p27             PF06556.3    131 Polinton-1_DY:classII:Polinton_1 -           4927       2.4    5.8   1.6   1   1    0.0023         6    4.5   1.1    59    74  2633  2648  2626  2652 0.90 IAP-like protein p27 C-terminus
+Ribosomal_S4         PF00163.11   110 Polinton-1_DY:classII:Polinton_1 -           4927       5.5    5.7   1.1   1   1    0.0068        18    4.1   0.7    39    72  3008  3041  3002  3043 0.92 Ribosomal protein S4/S9 N-terminal domain
+Homo_sperm_syn       PF06408.3    487 Polinton-1_DY:classII:Polinton_1 -           4927       6.3    2.4   0.3   1   1    0.0034       8.8    2.0   0.2    49    68  3966  3985  3960  3987 0.88 Homospermidine synthase
+rve                  PF00665.18   169 Polinton-1_DY:classII:Polinton_4 -           4927   5.4e-30  103.7   1.8   1   2       0.7   2.4e+03   -3.3   0.0    50    78  2742  2766  2739  2766 0.80 Integrase core domain
+rve                  PF00665.18   169 Polinton-1_DY:classII:Polinton_4 -           4927   5.4e-30  103.7   1.8   2   2   1.9e-33   6.7e-30  103.4   0.7     4   168  2951  3101  2947  3102 0.92 Integrase core domain
+Chromo               PF00385.16    69 Polinton-1_DY:classII:Polinton_4 -           4927   3.3e-06   26.0   0.0   1   1   2.5e-09   8.5e-06   24.7   0.0     4    51  3198  3228  3195  3239 0.75 'chromo' (CHRromatin Organisation MOdifier) domain
+GARS_A               PF01071.11   208 Polinton-1_DY:classII:Polinton_4 -           4927       7.6    3.0   0.7   1   1    0.0031        11    2.5   0.5   196   204  2117  2125  2117  2125 0.95 Phosphoribosylglycinamide synthetase, ATP-grasp (A) domain
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput.align Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,10 @@
+Polinton-1_DY:classII:Polinton_1 992 1214 DNA_pol_B_2 13 252 3.7e-06 24.2 0
+Polinton-1_DY:classII:Polinton_1 1367 1397 DNA_pol_B_2 393 433 2.9e+02 -2.0 0
+Polinton-1_DY:classII:Polinton_1 2832 2850 DNA_pol_B_2 175 193 8.5e+02 -3.5 0
+Polinton-1_DY:classII:Polinton_1 2633 2648 ASFV_p27 59 74 6 4.5 0
+Polinton-1_DY:classII:Polinton_1 3008 3041 Ribosomal_S4 39 72 18 4.1 0
+Polinton-1_DY:classII:Polinton_1 3966 3985 Homo_sperm_syn 49 68 8.8 2.0 0
+Polinton-1_DY:classII:Polinton_4 2742 2766 rve 50 78 2.4e+03 -3.3 0
+Polinton-1_DY:classII:Polinton_4 2951 3101 rve 4 168 6.7e-30 103.4 0
+Polinton-1_DY:classII:Polinton_4 3198 3228 Chromo 4 51 8.5e-06 24.7 0
+Polinton-1_DY:classII:Polinton_4 2117 2125 GARS_A 196 204 11 2.5 0

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutputTab.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutputTab.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,7 @@
+#                                                                                        --- full sequence --- -------------- this domain -------------   hmm coord   ali coord   env coord
+# target name        accession   tlen query name                       accession   qlen   E-value  score  bias   #  of  c-Evalue  i-Evalue  score  bias  from    to  from    to  from    to  acc description of target
+#------------------- ---------- -----             -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
+rve                  PF00665.18   169 Polinton-1_DY:classII:Polinton_4 -           4927   5.4e-30  103.7   1.8   1   2       0.7   2.4e+03   -3.3   0.0    50    78  2742  2766  2739  2766 0.80 Integrase core domain
+rve                  PF00665.18   169 Polinton-1_DY:classII:Polinton_4 -           4927   5.4e-30  103.7   1.8   2   2   1.9e-33   6.7e-30  103.4   0.7     4   168  2951  3101  2947  3102 0.92 Integrase core domain
+Chromo               PF00385.16    69 Polinton-1_DY:classII:Polinton_4 -           4927   3.3e-06   26.0   0.0   1   1   2.5e-09   8.5e-06   24.7   0.0     4    51  3198  3228  3195  3239 0.75 'chromo' (CHRromatin Organisation MOdifier) domain
+GARS_A               PF01071.11   208 Polinton-1_DY:classII:Polinton_4 -           4927       7.6    3.0   0.7   1   1    0.0031        11    2.5   0.5   196   204  2117  2125  2117  2125 0.95 Phosphoribosylglycinamide synthetase, ATP-grasp (A) domain
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/test_hmmpfam_output
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/test_hmmpfam_output Tue Apr 30 14:33:21 2013 -0400

[

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/hmmOutput/tests/hmmOutputTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/tests/hmmOutputTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,21 @@
+import unittest
+import sys
+import TestHmmOutputProcessing
+import Test_HmmOutput
+import TestHmmpfamOutputProcessing
+import TestHmmscanOutputProcessing
+
+
+def main():
+
+        hmmOutputTestSuite = unittest.TestSuite()
+        hmmOutputTestSuite.addTest(unittest.makeSuite(TestHmmOutputProcessing.TestHmmOutputProcessing,'test'))
+        hmmOutputTestSuite.addTest(unittest.makeSuite(Test_HmmOutput.Test_HmmOutput,'test'))
+        hmmOutputTestSuite.addTest(unittest.makeSuite(TestHmmpfamOutputProcessing.TestHmmpfamOutputProcessing,'test'))
+        hmmOutputTestSuite.addTest(unittest.makeSuite(TestHmmscanOutputProcessing.TestHmmscanOutputProcessing,'test'))
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(hmmOutputTestSuite)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,103 @@
+import subprocess
+import os
+import sys
+
+## Prepare profiles databank and create command to search profiles from a profiles databank in a nucleotides databank
+#
+class ProfilesSearch(object):
+
+    ## launch command to prepare profiles bank
+    #
+    # @param launch_1 string corresponding to pre command
+    # @param launch_2 string corresponding to post command
+    # @param config configParser object instance
+    # @param cDir string current directory
+    # @param verbose int (default = 0)
+    #
+    def prepareProfilesBank(self, launch_1, launch_2, config, cDir, verbose = 0):
+        bank = self._getBankBaseName(config)
+        prg = "hmmpress"
+        if verbose > 0:
+            print "prepare bank '%s'..." % ( bank ); sys.stdout.flush()
+        cmd = ""
+        cmd += prg + " -f "
+        cmd += "%s/%s " % ( cDir, bank )
+        process = subprocess.Popen(cmd, shell = True)
+        process.communicate()
+        if process.returncode != 0:
+            raise Exception("ERROR when launching '%s'" % cmd)
+
+    ## create command to detect Hmm profiles in a nt sequence file
+    #
+    # @param inFileName string name of input file
+    # @param launch_1 string corresponding to pre command
+    # @param launch_2 string corresponding to post command
+    # @param cDir string current directory
+    # @param tmpDir string temporary directory
+    # @param config configParser object instance
+    # @return cmd string command to launch
+    #
+    def detectHmmProfiles(self, inFileName, launch_1, launch_2, cDir, tmpDir, config):
+        bank = self._getBankBaseName(config)
+        evalueMax = config.get("detect_features","TE_HMMER_evalue")
+
+        cmd = ""
+
+        cmd += launch_1
+        cmd += os.environ["REPET_PATH"] + "/bin/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
+        cmd += " -i %s" % ( inFileName )
+        cmd += " -o %s_translated" % ( inFileName )
+        cmd += launch_2
+
+        cmd += launch_1
+        cmd += "hmmscan "
+        cmd += " -o %s_tr.hmmScanOut" % ( inFileName )
+        cmd += " --domtblout %s_tr.hmmScanOutTab" % ( inFileName )
+        cmd += " --noali -E " + evalueMax
+        cmd += " --cpu 1 "
+        cmd += "%s/%s" % ( cDir, bank ) + " " + "%s_translated" % ( inFileName )
+        cmd += launch_2
+
+        cmd += "if os.path.exists( \"%s_translated\" ):\n" % ( inFileName )
+        cmd += "\tos.remove( \"%s_translated\" )\n" % ( inFileName )
+
+        cmd += launch_1
+        cmd += os.environ["REPET_PATH"] + "/bin/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py"
+        cmd += " -i %s_tr.hmmScanOutTab" % ( inFileName )
+        cmd += " -o %s_profiles_%s.align" % ( inFileName, bank )
+        cmd += " -T %s" % ( inFileName )
+        cmd += " -p hmmscan"
+        cmd += " -c"
+        cmd += launch_2
+
+        cmd += launch_1
+        cmd += os.environ["REPET_PATH"] + "/bin/matcher"
+        cmd += " -m %s_profiles_%s.align" % ( inFileName, bank )
+        cmd += " -j"
+        cmd += " -E 10"
+        cmd += " -L 0"
+        cmd += " -v 1"
+        cmd += launch_2
+
+        cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.path\" ):\n" % ( cDir, inFileName, bank )
+        cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.path %s\" )\n" % ( inFileName, bank, cDir )
+        cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.param\" ):\n" % ( cDir, inFileName, bank )
+        cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.param %s\" )\n" % ( inFileName, bank, cDir )
+        cmd += "if os.path.exists( \"%s_profiles_%s.align\" ):\n" % ( inFileName, bank )
+        cmd += "\tos.remove( \"%s_profiles_%s.align\" )\n" % ( inFileName, bank )
+        cmd += "if os.path.exists( \"%s_profiles_%s.align.clean_match.map\" ):\n" % ( inFileName, bank )
+        cmd += "\tos.remove( \"%s_profiles_%s.align.clean_match.map\" )\n" % ( inFileName, bank )
+        cmd += "if os.path.exists( \"%s_hmmScanOut\" ):\n" % ( inFileName )
+        cmd += "\tos.remove( \"%s_hmmScanOut\" )\n" % ( inFileName )
+
+        if tmpDir != cDir:
+            cmd += "if os.path.exists( \"%s\" ):\n" % ( bank )
+            cmd += "\tos.remove( \"%s\" )\n" % ( bank )
+
+        return cmd
+
+    def _getBankBaseName(self, config):
+        profilsHmmBank = config.get("detect_features", "TE_HMM_profiles")
+        bank = os.path.basename(profilsHmmBank)
+        return bank
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/Test_ProfilesSearch.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/Test_ProfilesSearch.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,106 @@
+import unittest
+import os
+import ConfigParser
+import shutil
+from commons.pyRepetUnit.hmmer.profilsSearchInTEClassifier.ProfilesSearch import ProfilesSearch
+
+class Test_ProfilesSearch(unittest.TestCase):
+
+
+    def setUp(self):
+        self._section = 'detect_features'
+        self._option1 = 'TE_HMM_profiles'
+        self._option2 = 'TE_HMMER_evalue'
+        self._config = ConfigParser.ConfigParser()
+        self._config.add_section(self._section)
+        self._config.set(self._section, self._option1, './datas/hmmbank_test')
+        self._config.set(self._section, self._option2, '10')
+        self._iProfilesSearch = ProfilesSearch()
+        self._launch1 = 'First Command\n'
+        self._launch2 = '\nSecond Command\n'
+        self._cDir = '.'
+        self._profileBank = self._config.get(self._section, self._option1)
+        self._bank = os.path.basename(self._profileBank)
+        shutil.copy(self._profileBank, self._cDir + "/" + self._bank)
+
+    def tearDown(self):
+        os.remove(self._cDir + "/" + self._bank)
+
+    def test_prepareProfilesBank(self):
+        hmmpressLaunched = False
+        self._iProfilesSearch.prepareProfilesBank( self._launch1, self._launch2, self._config, self._cDir )
+        if ( os.path.exists(self._cDir + "/" + self._bank + ".h3m") \
+            and os.path.exists(self._cDir + "/" + self._bank + ".h3i") \
+            and os.path.exists(self._cDir + "/" + self._bank + ".h3f") \
+            and os.path.exists(self._cDir + "/" + self._bank + ".h3p")) :
+            hmmpressLaunched = True
+            os.remove(self._cDir + "/" + self._bank + ".h3m")
+            os.remove(self._cDir + "/" + self._bank + ".h3i")
+            os.remove(self._cDir + "/" + self._bank + ".h3f")
+            os.remove(self._cDir + "/" + self._bank + ".h3p")
+        self.assertTrue(hmmpressLaunched)
+
+    def test_detectHmmProfiles(self):
+        inputFileName = ' FileName'
+        dirTemp = 'Tempory directory'
+        eValueMax = self._config.get(self._section, self._option2)
+
+        expCommand = self._launch1
+        expCommand += os.environ["REPET_PATH"] + "/bin/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
+        expCommand += " -i %s" % ( inputFileName ) + " -o %s_translated" % ( inputFileName )
+        expCommand += self._launch2
+
+        expCommand += self._launch1
+        expCommand += "hmmscan "
+        expCommand += " -o %s_tr.hmmScanOut" % ( inputFileName )
+        expCommand += " --domtblout %s_tr.hmmScanOutTab" % ( inputFileName )
+        expCommand += " --noali "
+        expCommand += "-E " + eValueMax
+        expCommand += " --cpu 1"
+        expCommand += " " + self._cDir + "/" + self._bank + " " + "%s_translated" % ( inputFileName )
+
+        expCommand += self._launch2
+
+        expCommand += "if os.path.exists( \"%s_translated\" ):\n" % ( inputFileName )
+        expCommand += "\tos.remove( \"%s_translated\" )\n" % ( inputFileName )
+
+        expCommand += self._launch1
+        expCommand += os.environ["REPET_PATH"] + "/bin/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py"
+        expCommand += " -i %s_tr.hmmScanOutTab" % ( inputFileName )
+        expCommand += " -o %s_profiles_%s.align" % ( inputFileName, self._bank )
+        expCommand += " -T %s" % ( inputFileName )
+        expCommand += " -p hmmscan"
+        expCommand += " -c"
+        expCommand += self._launch2
+
+        expCommand += self._launch1
+        expCommand += os.environ["REPET_PATH"] + "/bin/matcher"
+        expCommand += " -m %s_profiles_%s.align" % ( inputFileName, self._bank )
+        expCommand += " -j"
+        expCommand += " -E 10"
+        expCommand += " -L 0"
+        expCommand += " -v 1"
+        expCommand += self._launch2
+
+        expCommand += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.path\" ):\n" % ( self._cDir, inputFileName, self._bank )
+        expCommand += "\tos.system( \"mv %s_profiles_%s.align.clean_match.path %s\" )\n" % ( inputFileName, self._bank, self._cDir )
+        expCommand += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.param\" ):\n" % ( self._cDir, inputFileName, self._bank )
+        expCommand += "\tos.system( \"mv %s_profiles_%s.align.clean_match.param %s\" )\n" % ( inputFileName, self._bank, self._cDir )
+        expCommand += "if os.path.exists( \"%s_profiles_%s.align\" ):\n" % ( inputFileName, self._bank )
+        expCommand += "\tos.remove( \"%s_profiles_%s.align\" )\n" % ( inputFileName, self._bank )
+        expCommand += "if os.path.exists( \"%s_profiles_%s.align.clean_match.map\" ):\n" % ( inputFileName, self._bank )
+        expCommand += "\tos.remove( \"%s_profiles_%s.align.clean_match.map\" )\n" % ( inputFileName, self._bank )
+        expCommand += "if os.path.exists( \"%s_hmmScanOut\" ):\n" % ( inputFileName )
+        expCommand += "\tos.remove( \"%s_hmmScanOut\" )\n" % ( inputFileName )
+
+        if dirTemp != dir:
+            expCommand += "if os.path.exists( \"%s\" ):\n" % ( self._bank )
+            expCommand += "\tos.remove( \"%s\" )\n" % ( self._bank )
+
+        obsCommand = self._iProfilesSearch.detectHmmProfiles( inputFileName, self._launch1, self._launch2, self._cDir, dirTemp, self._config )
+        self.assertEquals(expCommand, obsCommand)
+
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/ConsensusFile_test.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/ConsensusFile_test.fa Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,141 @@\n+>blumeria_Grouper_590_20:NoCat\n+TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n+AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n+ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n+ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n+CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n+TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n+TCCATAATTTCAACACTNAAGAATATTTGTA\n+>blumeria_Grouper_4152_12:NoCat\n+GGACCGGCCGCCACGAATTGCGCGATTGCTGCTCGCAAGTAGACTTTGATGGAGTCTACA\n+AAATTTTTGTCTTCACCGGTGGAGAGCGGTTGAAGAGCTGCCTCGACACTGCTAATAGCC\n+GTCGAGCATATTGTGAATTGCGCGGCTTTTTGTCTTGCCCTGCGCTCCTCCGCTTCGATC\n+GCTGCAAGCAATTCTGGAGGGTGTGTATTTTTTCTGCCTGCTGCTTCTAGTGCTGGAGGC\n+TGCGGGGCCAGTGGAGGGTTTTCGGCTCCTGCTGCCTTAGTGGATGGTGTTTCAGCCCTT\n+TTCGCGGGCCTCACTTCTGCAGGTCGCGGTAGTGCTGGAACCGTGATGCGCTTCTCGGGT\n+GCGACGACGGTTTTTCTGGGGGATCCGGTGGGATCCAAGACTTGCTCTGCGTCTTCTGGG\n+CTGGAGGATGCCCAAACTGAATCGGCGAGGGTTTTTAGCTTCTCGACTTCTGCGTCCACC\n+ATATCTACCTCGGGGACATTG\n+>blumeria_Grouper_28830_3\n+TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTC\n+CGACTAATCAACAATATAATGCGAGTAGAGCTTGAGACTAATAAAACGTGAATTTNATTC\n+TATAATACAGGATCGCCAACTTATTTAAATATTCCCCCGCACCACGTGAGCCAATCAACA\n+GCGCACAATTTGCACGGTCGGGTTTCCATCAATTGGGATCCACAAACTATCCCCCTTCTC\n+AAAACTAGCTGTCCACAGCTTATTTAAATTACAAGTTAATTAGAAATTTTATTATGGTAT\n+CACTCAAACTAAATCCTTGCTGAATTTTTGAGTGTCTTTCTATCCTGCTCCAGCATACGG\n+CGGCTACCATACCATAGACTACGAACTGGGTGTCCTTCACGTACATATGCCGGGAATAGG\n+TTTGTGCCGTTGTACTTGTTGCCTAATTTGTATCCACCCGGTGATATTAAGGAGTAGGTA\n+TTATTATTATGACAAGCTGCTACGGCCCATGGCCCGTCATAGTAGGGCTCCATCTTGTGT\n+CTTCGCTGGCGTACTCGCAGTACCCAGTCTCCGGGGGCGTAAGTACGTATAAGTGCCTTT\n+TCTTCCTGGAGGTAAGCCCGTATTTTATCCCTAGTTGCTTTCATACTACTAACGTAGGAG\n+CGTGCTATTGGTGCGGGGTGTTTCCTTCCGAGTTCTTCGGCCCAGAGGCGCTCCTCTGAT\n+TCTGTGGCTTCACGGGTATATACTGGATAAGAATTTTCCTCCTCCGGGGGCTGAGTGCCG\n+AAGAGTAAGAAGTAAGGGCTATACCCGCTGGGTGAGACTCTTCGATTGAAATTCATTACC\n+GCGAGTGAGAGAGCGCCAGCTAGTGTAATTTTAGGAGAATCCAGGAGAACTCTGACCAGT\n+ATGGCTTTCAGAACGCCATTTGCCTGTTCGACTTTGCCATTTCCTCTTGGCCTAGCTGGT\n+GTGGTATGGACAACCTGCAGTTGTTGGCTAGCTTGAAATCGTTGTGCTTCTATGCCTCCG\n+AAGCAGCCAGCATTGTCACTGATAATCTGCTTTGGCTTTCCAAAGTTCTGGATAATATAC\n+GTGAGCAGAGGCACAGTGTTAGCGAAGTCTGCATTGGGAACTAAGCGGGATTCCAGCCAG\n+CCTGTTGCGTACTCAATTGCATTGAGCAAGACTTGGGGTCCTACTTGAGTATGATCTATA\n+CCCCACCGTGTCAGAGGAGGTGCTGGTACTATAGGAGTTAAGCTTCCCAGTAACGGGTCT\n+GGAGGTTTCATGAGCTGACAGGACTTGCATTTTCGGACAGCTTCGTGAGCCGCGAGGATC\n+AGTTCGGGGTGCCAAAACCTACGCCCTAATTCCCGCATAGTATTGCCGATTGACGCATGG\n+CCTTGGCCCTCATGAACATTTACGGCCGTCGATACTAGATCCTCATATTCCAAGATTTCT\n+AGTAGGGTTGCGGCGCCAGATGGACCAGGTGCATCGCCTACTTGTTGCGACGGGTTCTTT\n+TTGATGAGATGCAATTTGGAATTGTACATCGCAAAATTATCTCGGACCCAAGTGGCAGCT\n+ATATTTAGGGGAAGTGCCTCTCCTAAGGTTAGAAATTCGGTGATGCACTGCAGGTCGATG\n+CGGTTGAGTTCGTGTGGGTGCTTTATTTCCTCCATCTCAAGTTCTCTTTCTGCACCACTT\n+TGGATTCTAGAGTTTTCGTTGTCGTCAGTAGCTACATTCCCTTCCCCTTCTTCCCTAGAG\n+AGAAATAGAACATCACAGGGACGGGAGAGAAAATCAGCGAGAACATTTGCTTTTCCGCGA\n+CGGTAGATAATCCTGATATTAAAATGCTCTATAGCATCTAAAAAACGGACCATTCTGACG\n+GGCTGCTCGGCTTTGGTCTGAATAGTTCGTAGAGATTCATGGTCAGTTATCACCGTTACG\n+TCGCCGCCTTCTACCCAGTGACGCCAATGTTGTAGTGACAGTAAAATCCCAAGTAATTCC\n+CTCTCTTGTGCTGAATAACGCTGCTGGGTATCGTTTAGTTTTCTTGAGAAGTAAGCTATG\n+GGATGCAGCACTGAACGATCGGAATTGTTGGAGCATCGCATGTACGGTTGGAGTATGACT\n+GCACCCACGAACTTTTGGGAAGCGTCAGTTTCCAATATGACGGGAAGTCTCCAATCAAAT\n+TTTTTTACAACTGGTGATGTTGTAAGCGCTAGTTTGATACTTTTCCATGATTGAATCGAT\n+TGGTCCGTCAGAATGACAGGGCAATTTTTAGGTCCAGAGGATTGGTCCGTGAGTGGCCCT\n+GCTAGTTGGGAGAAATTTTTAATGAGACAACGTAGGTAACCAGCTGCATTTACAAATCCT\n+CTTATTTCCGAGGTAGTTGTTGGCATAGGCCTGTCTCTGATCGCTGAAACTTTTTCGGGG\n+TCTGCTGCAATGCCATGTTTAGAAATATGGAAACCAAGGAAGATTCCCTCTGAAACAAAT\n+AGCCTGCACTTTGAAGCATTGAGGAGGAGACCATAGGAAGCGAATCTAGAAAAAATAAGT\n+ATCAACCGTGAGTGCAATTCTGCCATTGAGTCTGCCCAGACAGCTACATCGTCCAGGAGC\n+CCTGCGACAAATGGACGGAATGGCTCTAGAACTTCTTCCATAAAACACTGGAAAGTAGCT\n+GGTGCGTTGCATAGGCCGAAAGGCATAACTAGCCATTCATAGACACCGAAAGGAGTCACA\n+AATGCTGTTTTCTCTGAAGAGTCTTTGTGCATACGTATCTGCCAAAATCCGTTTTCCAGG\n+TCGAGGAATGCATAGAAGTTATGTCCTGCTGCGCGCTGAATTTCATCTTGAGCATTTGGT\n+AGGGGATGGGCGTGTTTCTTCGTAATCTTGTTAAGCTCTCTATAGTCAAC'..b'TGGGAGAGTATTAATTATAGGATTAAGAATTGGTTGGTCAATTAAGT\n+TGAGAAGTTGCAGTTTGCCATTTGATAAACATCGTAGCCCCAGTTCATGGAATAAAGTTT\n+GTCCAAGTGTCACATCTGCTGGGAATGTGCCATCTGGGACAACTCCTAGAGACACTGTGA\n+ATTCCTGAGTCCAGAATGATTCAAGGTGATTCGAGCAAAGAAACTGTGCAGTTAAAATTG\n+CATCCTTAGAAATTGTTGGGCCAGGGCCACCAATGCCTGACATTTTAGAAATAATAGATG\n+TATTATAAGAAGTCAAATATTTAGAGAAAGACTGTGAAACATAACATGCTGAAGAGCCGT\n+AGTCAAAAAAAACTGAAAATTGGACATAGGGAAAATGAGTTATTCGTGCAAAAAACATTC\n+TGTGTGGCTTGCCATATCCTTCCCGTAGAGGTTGTCCTTTTGGGGGTGGCTCAAATTTGC\n+CCTTTCCTACGGCCACAAAGTAGGTATTCGGGCTGTCCTATTCATAAATTTCTTCATTTC\n+GCATTCGATATAGAATGGCTTCAATTTCGTCATCGAGCTGCTCATCTTCTATTTGGCCTT\n+CAGGGCTTATTTGCTCTTGATCCAAATCTTCTGGCTCAACAAGGAAAGTAGAAGTGACAG\n+GCTTTGATGAACGATCGTGTGGTGTTACAGGTTTTACATATTTTTTATTTTTCATTGATT\n+GGTAAGTTTTCCTGAATTTATCGCTGAAACGCGACGAGGTGTATTTAGCGTTTGGTGCAT\n+ATTCCCTTTGTTCTTTTGATTCCACCGGCGCTGTTTTCATCAACCGGCAGTCTTTCGCCC\n+AGTGGCCTGTCTTTCCGCAATTGAAGCATTCGTAACGCTTGTCGTGAATTGGGTATGATT\n+TATCTGTTTCTGCAATGTCTTGTGACGCAAAGACGTCTTGCCCATACGACCTTGGATCAG\n+CTATGGATGCGTCAAATGCTGGAAGCTTTTCGCCTTTTGACGCTGAGATCGGAATTGGTG\n+CATTGGACTCGGAATGTTTGAATACATCATCCTCAAGTGTCCACCTTGTAATACCGCGTG\n+TGACTTGAACCACTCGATCCGCAAGCTCCGCGCTGGTGAGTTTGGGGAGATTATTGGAAA\n+TGATGGTCCAGGTACGTGGAAGTGAGAGTCGAATATGCTGCCTCAGTATATCTCTAGTCT\n+GAGGCCCTACCATGACATCCGAAGGTAGGCTAAAGAAAGCTTTCCGGAGTCGTCGCGCAA\n+ATTGTAATTGAGTCTCGTCGTTTCTTGCAGTTATTCGTGCAAATGTTGTCAAAGGCGACC\n+CGAGTACATCATTGTCGAACAGTATTTTAAGGATACAGTCTACAGCTCCTGCCCAATCGA\n+GGTGTTGACTGATGATTGTTCTTCGCGCCGAGATGAAATCTTCATCCATTTCCATTGCAA\n+TTCTGATAGCCCAAGCAGAATATGGAAGCATATGTTGAATCATCAATGATTGCATGTGGT\n+ATAATATATTCATTATGTTCTCATCAGCAAAGTGCAATTTCCATTGTGGTGGGAATACCG\n+CGCGTGTTTTTCTATTTGCCTCTTTTAAAGGGCCGTGCGGATTTTCAGCTTCGGAACGCG\n+AAAAGAAATCTGGGAAGTACCGCTTTATTTTTCGCTCTTGTTCAGTAACATTCAATGGTT\n+CGTTAACTGACTGGCGGTATAAATTAATAGTCCCATCTTTGTTTTGCTCATCATTGCTTA\n+GGTTGGGCGTTGTTTCATAGAATTGCGCCGAGGTAAAGGGTCTTTTCTTAGGCTGAAACA\n+CGATTGGCCTGGTTGCGGGTCGCGAGGCGCTGCCTGCTGAAAAGTCAAATAGCTCCTCCC\n+GGTATGGTTTTTTATTTTCAGATGGCCCTGCTTCCGAAAGTTGAAAGGGGACTGAATTTT\n+TCGAAGAAGTTTTTGTTTCCACGTCTTCATTTCTACTTACTAGTTTGGAAATTAAGTCGT\n+TCATTGCAGACATTTGTTTGACTAGAAGAGAAATTTGTGATTGGGTCTCTTCAAACCTTT\n+GATCGATTCTCTTGGAGAGATCTTTTGTCACTGCCTTTGCAGCTTCATTAGCTTCGGAAA\n+AATTTTCGTTGGCTTTGGTTTGCCCGGCAATGTGATCCTTCATCAAGCTTTCCAACTGAG\n+CATTATTCAGGCCGACCATGATGATTGAATAGGTGGTGAAATGCGTAAAAAATTTGGAGA\n+CGTCTTAAATTGGGATGACCTAGAATTGTGAGGTTCTCTCAACAAAAATATATAATTTTG\n+TGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCG\n+ACAAATCAACAATATAATGCGAGTAGAGCTTGAGACTAATAAAACGTGAATTTCATTCTA\n+TAATACAGGACCGCCAACTTATTTAAATATTCCCCCGCACCACGTGAGCCAATCAACAGC\n+ACACAATTTGCACGGTCGGGTTTCCATCAATTGGGATCCACA\n+>blumeria_Grouper_14770_3\n+GGTTAGACCGATACAGTAAGTCTCGTATGAAGTTGTGGAAGGAAGAATTGACTGGAAACA\n+AGTGTATTGAGATGTATGGTTCTGAGTAAAACTGAGCTGATAAGAGTGAGATTGCGTGTG\n+CGCATATCTTATCAACTGAGGTTTTTGACCAGTATTTATCAATCTCCATGGATCATAAGT\n+TATGATCCCTGTAGATGTAAATCTATATACGTACAGATATATGAGCCAGAAATATAAAGC\n+TAAATACATGTGTGTCGGTAATAGATCCGAGAAACCTTTAACCTGCATTTGATACATAAT\n+CATCTGATGGAATGCTAGTGTGAAATAAATGCCAATGTACGTTAAACTTTCCGTGAGGAA\n+TGTGCTGATGACAATGATATAGGAAGGTGAATTTGATAACCTAATTAGCCGGACTTACAA\n+ATGAGTTATCCTTGGCATTCCAGGATATACAATATTTCAACACCCCTCCTCACTTTGCAT\n+CGAGGATAGATACATAAGTGAAAAACTGTTAGTATATTTATTGTCATTTCTCATCTAATG\n+CTGGTATGGTAGACACGCCAGTGGTTGTACTAGTACCAAAAACTGAAGTGCATAAATCGG\n+TGTGCCTTGGTCTCGGTAAGCCTTTTGTACATATATCTGCGAGATTTTTAGCTGAAGGAA\n+CATGTAATAGTGTAAGCGAGCCGTCTTCAATTCTTTCCCTTGTAAAATGATAAGCGATGT\n+CGATATGTTTAGAAGCATCATTTAGTTTTGGATTGTGAGCAAGTTCAATGGCAGCCGTAT\n+TGTCAGTCGAGATTGCTGCTGGGATGCTGTCTCCAAGGAATCGGTGAAGTCCGCGCAAAA\n+TCCAGATATATTGTTTGGATGCGAGCGCTATTGCCATGTACTCAGCTTCGCAGGTTGAAT\n+G\n+>blumeria_Grouper_1717_12\n+ATTTCCAGTATCTTGCCCTGAAGGGCCAGTAAGTCCTGATCGAATAGTTTTCGAAGTCTC\n+TCGTCAGTGACTGGTTCCAACAGCTTGCAGAACCCAATGATGAAGCGCTCTGCTTGTGCG\n+GAGCGCGTCTGGAATATCCTGCCCTGTTCGGCGAGGAATTTGGTCGCGACGATTTGAGGC\n+GACGGAGTTTGTTGGTGCGGTGAGCTTCGATGACTCGCCTCGTAGATTGGCTCTACAAAT\n+TGTTGTGTGGGTTGTTGTGATGGCGGTTGTGGCCCGTTAAGGACCATTTCGACGTCTTGA\n+CGCGTCGAACATTGAGTTGGGAGGGGGGGCCAAGAGCCAGCCTCTGCGGAGGACTGGGGC\n+GGCATGAGTGAGTGAGGTTTGTGGCAGTGTGAGATCGTAGATGGTATACGGCGCGCTTGA\n+AGGG\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/DummyRepbase_aa.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/DummyRepbase_aa.fa Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,67 @@
+>GYPSY11_AG1p:classI:LTR_retrotransposon
+AATTTNVFKKVPYTCNFYGRSCINMQKLLEKIEILDRTYDQVRQLNKCYRLCALTTLRNN
+TKELYDEIQELLRKHESSIKDEILTTLVKKSRHLYYEINKCIKIHFERHPDSLNTTLSEN
+QFDITIETKSDKMADIIELIKITTSLISKYDGNEKDLKGVVSNLNVLKKIVKPENRETII
+ELVLGRLTGKARIVVGEAPKSIEDIVNKLQDRCSIKVTPEIVVSKMDNTKQTGTIEDFGS
+IIEKLTQQLEEAYIAEEITPEVARKKATKSGISALSYGLKDGETKIIMRSSKFETLHEAI
+EQAVKLELEDRTKKGKNEQTKILYSNATRNNRGYGNNYQGRNNYNRFTNNNNNRYQTQNP
+PRFPPARYGHNNNRNNNNYRNNNFNNTRNQHANRQNNSNRNQYVQSNQRNNSNLQNNRAP
+IHNTVTAEEQNNFLGQPQASENTQY
+>GYPSY10_AG2p:classI:LTR_retrotransposon
+AIRQPKFFRATRATKSSALHSGGDPEPTGGPALHKPIRATYTINVQKSNFIKTRLGLADS
+ICNLFVDSGSDISIIKGNKVRPTQTYKPKDIVDIISVGEGTITTHGSTITDVIVEGKKIQ
+QLFHIVPDNFKIPADGILGRDFFMDHRCIINYDTWIFSVKHDGEFLETPIEDTINDKTLI
+PPRCEVIRKLDKLKELDTDAVVCAEQLQEDVLVGNCIVNKNYPFIKIINTSNKAKLVNIS
+HIKTIPLNEFEIVKTNNHKNENRLAIIKELIRKENISEDTDTSFEQLLLSYNDIFHLPND
+HLTTNNFYKQDIKLEDKRPVYIPNYKQNHSQGPEIKKQIEKMLQDDVIEHSVSHYNSPIL
+LVPKKSSDEKKWRLVVDFRQLNKKLLPDKFPLPRIDSILDQLGRAKFFSTLDLMSGFHQI
+PLEESSKKYTAFSSTDGHYQFKRLPFGLNISPNSFQRMMTIAMTGLTPECAFVYVDDIVV
+VGASENHHLKNLEKVFDRLRHYNLKLNPEKSCFFKKEVTYLGHKITDKGILPDDSKYDSI
+KNYPIPQNADDVRRYVAFCNYYRKFIPNFALKAKPLNSLLKKNTKFEWTQECQEAFEYLK
+NTLISPQVLQYPDFSKPFILTTDASTMACGAVLAQEHGGKDMPICFASRTFTKGEANKAI
+IEKELAAIHWAIMHFKHYLYGKKFTVKTDHRPIVYLFGMKNPSSKLTRMRLDLEEFDFTV
+EFVKGKQNVVADALSRIKITSDEIKSINVITKSMSKPVTSDNVLGNTSESDQLKMFHALA
+YDEVKDLPKLESSVKKNEDTIELIGKILNKRKSKELLSVRDIHLKTDIGLQEPLLVKDFQ
+RRKEKSAIVQFIKNIEKKLVMKSITQLAISETDEIFKEVHPNELKQIANNHLKNIQILIY
+TKPQKITNEKTINDILDKVHNTPTGGHIGQYKMYKKIRREYSWNKMKKTIKEFLDKCLTC
+KLNKHQTKTAEPFVKTDTPNTPFEAVSIDTVGPFQKTNNNNRYAVTIQCNLTKHVTVIAI
+PNKEANTVARAVIEKIMLIYGTNIKKFRTDMGTEYKNEIFKNISEILKIEHKFSTPYHPQ
+TIGALERNHRCLNEYLRIFTNEHKDDWDDWINYYSFAYNTTPNLDHGYTPFELVFGRNER
+ITPNVKDTYSPLYNYDDYSKEFKYRLKIAHNRTRKHIEQVKFKLLKEQQNINQVNFEIGD
+QIALTNENRTKLDPVYKGPYKVKEINGPNMIIENTEGVVQKIHKNRAIKL
+>ORF1p_TCN4#2:classI:LTR_retrotransposon
+ALLSHQKIFFDTITPIEDQSKPRLRQDTPFPEMTTPPTDKESLQRKIKQIEDERKREREE
+HKREMAELQAQMRGLLKREAEKRDLPPHNPATSSLPAKSESPHPPIMYDSLPQPKYSFPV
+LSEHADAAAIHQHISKLRSIFTLMAAGYRYEPTAFEARKLAHAAQSLTGIRHLQFLEGDG
+LQCRTFDDWAKAFKSAMLPLGWVSETEKKIYSLQPQLLRLDKIPLAIMDFKQWFALLKDS
+DSPMSEDVATHWLRNHMTPGLLAELERDFGGENQLRQASLAELLKHMEICATRLLRYQSA
+FAPIAPTRTPIAAVSPEASSPIDIAQWLNPRLPLPKGGAGRRARAHLASEQRCFLCRQPG
+HKSPDCPKRKEPTAIAAVSTFDHEEAEFEQEEGEMFAEVLATHLAPELSVPPILLECRIG
+TNGSPFLALFDTGATVTLVDPSLITTHQLQTYPSEQRRVVSLAGGARGPALQRRVGVEVC
+IQNQVFALHGYVMPLHARYKVILGLDFIRSHGLLSGASRLGNLAPDLLGPVVASVTTADG
+YXDLRLAILHEYHDIFPDNIGEVANYPPICDANSKVRHHINLTPGAIPFKSASYRSPHMW
+RQQLIEEIQKHREAGRLRPSSSPWAAPAFLVKKENGKFRFICDYRGLNKVTTPDSTPVPN
+VDDILHRAACGKIFAKIDLSDAFFQTLMHEPDIEKTAITTELGLFEWVVMPQGACNSPAT
+QQRRLNEALRGLLGDSCEAYVDDIIVWAADAEDLDKRLRAVLAALRKSGLVCSPTKSEFF
+RHKVKFLGHVISANHIGPDPAKLRTIASWPLPQSVKELRSFLGLLQYLRKFIPSLATHTR
+TLTALLPPTPAAEKAWEKQQRALRKGQSPKDVLSWVWAWSSEATAAFEILKAKVAEISGL
+RPLDYAAALSGECPIYLFTDASNHGTGAWLGQGPDPDHAFPVAYDSRSLSAAERNYPTHE
+KELLAIVRALKLWRPLLLDVPIQVQTDHFTLKWFLQQRDLSERQKRWLGXLSRFDLRIDH
+ISGVNNFIADALSRLGGVDDEQDGMETAEVSVAVLGLLGQDTSTITKVAQGYAQDQVMGA
+WLQEEDRAPGVTLENVENGQGRSTSGVAMGRQAMCARYQX
+>Neptune2_Ap_1p:classI:LINE
+ANTRPVVDSENIDKYRNTAKKIGKYRNTASKVDEIPKLQKRGIRRNLLKDFDLFARRMRL
+KDIFSRERNKQHPFHVKSTWEPPVQQSVALETFLEEVEFELANSPSKRPKDNLSPGERRA
+LHNLLGDKTIIVKKADKGTTTVIMSREQKIKEGQILLNDLDNYRPLEKQMADETVEKIKQ
+LTTSMLTESHIDEMTVKWLSQTPNPPRIPEFYTLTKIHRPTLVGRPIISGCDGPTERISC
+FVDRLIQPIAQQQESYLKDSKDFINFIENTKLPKNTILASKDVTSLYTNIPQEEGITTVC
+KAYEDFYKNRLPIPTNFLRRMLCLILKENSFQFNKRHYLQTHGTAMGTKMAVAFANIFMA
+KIEKGIISKSIIKPLVWKRYIDDVFCLWDTNEDNIKEFVTRANHYHDNIKFTAEISDSEI
+AFLGHKSVQRREIQQRLPPSMCKRIINKQRPFNTRNFYSCHPPGVKKGFIKGEALHLLRT
+NSSHSTFNKNMQSFKTRLKNRGYPNEFLEKKGPX
+>ERV45_MD_I_2p:classI:LTR_retrotransposon
+ASSKHATGKLQTGPGPGPSKQWGNNCGAQSEKPIPSPTIRSLHRATPGSAGLDLCAASGL
+ILTPDSPPALIPTGVKGPLPEGTMGLIIGRSSVSLRGIVVTPGVIDSDFTGELQIIVQPP
+XKTVTIMKGQRIAQLLILPYVQTXNPILKTSRGQGGFGSSNWAFWVQEIKNSRPMKILNI
+SGKNIEGLLDTGADTSCIAGKHWPPTWPTKISPSNLIGIGKITNAAQSSQILTWSDGNLS
+GQFCPHVVPSLPVTIWGRDILSQMKVSLVTPEEDAQTPPNDEPLDFPEGPSPR

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/DummyRepbase_nt.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/DummyRepbase_nt.fa Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,501 @@\n+>Polinton-1_DY:classII:Polinton\n+aaaaaaaaaacaagttgggtgtcattggaaaggatttttcaagccctttccaatggtatg\n+cttttcacgattctgaaattaaaaatacgacttttcataaaaactctcaaaattacaatt\n+gcacttcttcgcgcacaagcaatcttggaagggggtgtacctgcgcgcacaagcagtcat\n+aaggatgatcgcaagggtgggggtgagatttctgttcaaggctatgcaccttactgtgtc\n+ctcaattttctgggaaggtaactggtggttggattattagttgagaaatatttagaaata\n+tcaaacaaactggttatagggtcgtgtatacgtatatataaaaaatcatataatcataaa\n+accgttaaaaaaccgtcgttcccgtatattccagctactttcagctaaaagctatggagc\n+cgttgagaagaaaatctaaacaatctgaagtcatagcgactttcagcttaaggctaccca\n+cgttttaaatgtatgccttgcaaaatgttacttataaagtaacaatgactagtagtcacc\n+aataatttcatgatccctgactgcggtcatccgaaataattccctacagagatatgtatt\n+tcagattatctgtgatctgtctccattttcattatgctaggagaccccataaaatcaaat\n+atttatgactctttgaacctattatcgagaaagtcaagcggctatttacctgcatccaaa\n+cccatagctcgccctccttttctgacttccccattgcagtctgtgcagtctgcagtctgt\n+gaaggctccttaaatcgagaaccaatcaggtggattctaagagtagacaccccctgtaga\n+tgtcaaactctctaatcctcgtgtataaaaatggatccccttaaattgctcaggatcagt\n+tcttataatacctcagcactgcattcttataatataatttcggtcgaacttgctcgggat\n+atcacgtctaaccgttttaattgtttgtgtggactcaaaccatttactgtgaaaaatttt\n+aaaaacgaattaaaatatataaataaaatatatttataaaaaattcaaaatggaatcatc\n+agtggcttgtgatttgtgcaaccaatgcatcgaaattagtgagtttgtctctcattacgc\n+gaagtgctcagaacccagtaaagcctgcgaggctttaatgttaaactctaacattagatg\n+tgacttatgctctaatctagtaaaaaattgtgaatttgatgatcactaccaacgctgtga\n+aggaccccgaaatgcatttagcattttaatgggtagaggaaaactggatgaagaagaccg\n+ccctatgacatcagctaagtctataaaacgattgaggatggaagaggataaggaaatatc\n+taaccccccaaaaagaaaacaaattgacttaagaagggaactcttatgtcctactcagag\n+aacgtcaacatattttgaccgatatacataaaaatgcgtcggccagagaagttaagaaca\n+atgcgaatatcatattgcttacatcagagacgacatttccagtaaaatcatatcgaattg\n+atagtccattatttaaaagagtatataagtgtatatggtacatatataaagcaaacagat\n+gaaggagttacagtggaaacaatgaagcactttaaaactgcctacaaatctgtatacata\n+aatgaagacattattgatgagctaaattcggccatagattgcttacttacattaagcagc\n+gaatttcaagaaaaagactcaggctgggctattaaaaattttaactattttgaaactact\n+ataataaaactggaaaacattcctgccagcggatacattaaagctcctcaaaaaataaga\n+gcccgtaatgcgctaataaacgtgcagaatgccgacgtattttgtttcaagtggtgtata\n+ctagcatttatagctaatcaaaaccatgaaaataggacttttacaaccccccagcaaagg\n+aaatattcgagagaaaggatgacaaagcctcaaagctataacattaatattaatgatgaa\n+actattgtgtatggcggaatgactttagatttttcgggaattaaatttccaatcgaaaaa\n+ataggagttcgacattttgagaaaaacaatgtcaactttagcataaatatttatgaaatt\n+gatgaagcaggtgaaaaaattgttggtcccacgattaaaacaaaagaaagaaaacggaac\n+cacattaatatattgggaatcgataatattacccaaaacattatgcattatgcgtatata\n+acaaatctttatacattatgctcttcacaattttctaaaggaaaatcgggaggatatttt\n+tgtgagaactgtcttcagtgttatcacgtgaaaagcactactcacaacaaactggaatgc\n+ggaaaagtgtcctcattttatccggaccctaatacgacaacatcgtttaaaggatatcac\n+aaaaaattatctcctccagtggtaatttatgcagacattgaggctgttcttgaaaattac\n+aaaacatgcttgaattcttctgcttcctcgtcaaccacgcaagtacaaaagcatacggca\n+tgtgcagtatctttttatgttgcccataaagattatcctaatcttaacgagctgtggacc\n+tacgaaggtaaacagaacaacatttttataaaattatatttaacttacatatttaaattt\n+tataattttataggcgttgattgcatacaaacattttgtaaaactcttaaagaaaaaact\n+ttaagcctgtattacaaatattgggttaactccaagaaaccgagggatgacactttcgat\n+gaacagcttcaaaagggaaattgttgcgcctgcgagaaagaaataaatgcgagcgacctg\n+gacaaattctttgatcaattttctggagaatacgtaggtcctatccatagaaattgtaag\n+cctaagtttagattaagtgaccctttctttcctgtagtgttccataatttatctaaatat\n+gatattcatttatttattactgaattagagggggacttaagtcccataccttgcaataaa\n+gagctctatatagcacttacgcaaactataaaaatcaatgctacaagcagatacaaaata\n+agatacatagattcagtccgatttttaaattcaagtttagataaactatcaagctatatg\n+gaagataaagattttaaaattctaagtacgaaatttcagggagaaaaatttaagcaaatg\n+aggaggaagggggtgtttccctatgactacttagatagttttgaaaagtttaatgatacc\n+caacttccaagcattgatagtttttataattctcttagtgaagaaaattgtagtatagat\n+aactttaatttcgcacagaaagtctgggagacatttaactgtcgaactattaaagactat\n+ttaaaactatatttagaaagcgatgttttaattttagcagatgtgtttgaaaattttagg\n+aaaatttgcaaaaaaatttataagttagatcccattaattatgttactgcgccatcaata\n+tcatgggatgctatgcttaagttcactaatgtaaatttagaactaataagtgatggtgat\n+atgtataattttttaaaaagagcaattcggggagggttgactcaatgcactcagcgcatt\n+tctatagcaaataataaatacttaaaaaacttcgatccaaaaaaaccgaacaatttttta\n+agttatattgatgccaataatttatatgggtgggctatgagtcaacccttaccactatca\n+gggtttcagtttttggataaagaagaagttgattcattcga'..b'tatac\n+gcaaaacctaggtcctgaacccatggacgtcgatcccacatcacgttctaaattcaggag\n+cgaacggagagctcgcagtagtcaacggttgaaccagaacaagagcagtcaaatgaccag\n+gagtatagggtaaagtcatcctacgaagcagcggaaatcgaggctgataacacgtccgat\n+tccgaatcatgtaattttttaggggacgtccctgctcccccaaataattcgttcgatagc\n+ggggcgagaaattaggttactgttggatacagaagcctccaaaaattacataaaacctct\n+aacagaattaaaacacttcaaaccggtggaaacaccatttgaagtcacatcaatccatgg\n+tcatacaaaaatagaacaaaagtgtctgatccatctattcaatgttaagtcatacttctt\n+cttgttaaacaacctgaacgaatatgaaggaattgttagactggatttgccaaaaaaggt\n+caatgcaaaaattgatctaacaaaaaacatcatcgagcatgatcatggtacggagcaaat\n+tttttactcaaaatgcaggaatggtaactttattaacatcgatgacgtggacgtgccgaa\n+agccataaacgaaaatttcaaaaagatgatcaaaaacagatcaaaagcctttgcggaccc\n+aaacgattccctccccttcaaaatgaatacggtcgccacgatccgcactgacggggaacc\n+cgtatattcaaaactttacccatatccgatgggtgtagccgatttcgtcaatacggaggt\n+taagcaacatctagcagacggaataataaggccatcccggtcgccttacaataacccaat\n+ttgggttgttgataagaagggttttggcggagaaggtcataggaagaaacgtctcgttat\n+taacttcaggaaactgaatcaaaaaacaattgatgacaagtatcctataccattcatatc\n+gaccatactgtcgaactttggaaaagctcagtacttcacgactcttgatctgaagtcggg\n+cttccatcaaattgagccctcggagcgctttcgagaaaagacagctttttagtagtatga\n+attctgcagacttccctttgctttaaaaaatgcgcctagtattttccaatagacgatgtt\n+ctgagagaacacatcggcaaaactcgctatgtctacgtcgatgacgtaatatttttctcc\n+caaacaatggagagtcatgccaacgatataaacacggttctgaaaactttgtgcgatgca\n+ggtatgagagtgtctgtagaaaaatctatgttctttaaagagaacgtagaatatttggga\n+ttcatagtgtcccgagggggaattaaaacttcacccgaaagggttaaggctataaaacaa\n+tttaaacctccatcgacattgttaagtctcaggtcatttctgggattggccagttattat\n+ataatagatgtttcataaaggggctttttagcatcgcaagacctctgacgaatattctaa\n+aaggtgacaacggaaaaattggtgctaccactcaaagaaagtcaaactggaacgagcagc\n+gaaaatcattcgaaaaactaagaaacaccctggagtctgaggatgtcattttggcatacc\n+cagattccactcagccatttgacttgaaaactgacgcctctggaagcggcctaggggctg\n+ttcttttacagatttaatcggcagcggcagaaatcaccgcaaattgcagaacgtgttccg\n+aagcaaaacaccaatcgtcacccagtgcaacaaaccatagcggaaacatcaattcctggt\n+tacactggggaaagtatccacatagatatattttggactgatcaaaagcattttctaacc\n+tgtatcgacaagttttcaaaatccgctatagtccaaccaatcgattcaagagcaatcgta\n+gatatcaaaactccgatactacaactaataaatctgttcaccaaaataaaaacagtttac\n+tgcgacaatgaaagatctatcaattcacaaaccatacgaaccatcctagaaaataggtat\n+ggtatacgggtctcaaatgcgcacccgttgcacagcacatctaatggccaagttgagaga\n+tttcatagcaccctaggggaaatcgcacggtgcatcaagatagatcaaaacataaccgag\n+acgagcgaccttattctattcggaacaatagaataggacagaactgtccactcggttaca\n+attaaaaagggtcatgaaatagttcacgctattccaccagattttacgagcaccataaga\n+gacaaaatcaaagaggcccaagagaaaacacttaggtactcaaatgcacacaaatgcaat\n+aaacagtaccaaataggcgaaaaaatctggttaaaaaaccaacagacgcctgggtaccaa\n+attaacgccactctgctcagaagaggtcatcgaggctgatctcggcacgacagtgcttat\n+tatggagcaacgaacattacatcgtacgagacttacgcggacgagacgaaacacgcgatg\n+gatttcttcgagaaggagcacatgagacgggtacttgaaacggactaggaacgaatagag\n+actcttctggacacactaaaggtacgtcacagacatgcccgtagtcttaatttctcttaa\n+ttttctcttaatttctcttaatttgctttgaaagcaatagagtggacacctgactctgac\n+ggctgagaccaggtgaggttttgacaggaacagctaacggactcggtaaacggacagata\n+gatttaaacaacaaaatacaattgcaattaaacacaatgacctcgtccatgaattctatt\n+ttaaaatcggacgacttagacacagaacatttgtacgagacgattttggcaaaaaaccgt\n+attgtaattcaagaacttgaaaatttaatacttgcaatcaccctttccaaattaaacgta\n+ataagtccaataatcttgaatgacgttgacgtaagggagattgaaaaaacttttcaaaat\n+cgagacctattgtactttttaataaaatttccgaagcctttgttaacttgtagaaaaata\n+agaatattcccggtacagcatgaaaatagaatcttagatttcgaggacggtagcacggtc\n+gcggattgcgggacggaaaccttcgccgtcaaggactgcaatgtatcaccaccttctgca\n+ggagatcgaaagcgccaacctgcgcacaacaactcatctctggcatggtcgcccactgca\n+acacccagcctggacacttggacccactcaccatgatcgactagggaatgctcatcacga\n+acgatgtaacgataaatatcaccgacgaaaagggaataagccggataatatcaggaactt\n+acccggtatgatataccgaaaaaattaaaataaacggcacccttttacgttaacaatatc\n+ggaacatcaaagaagaaagccgcagtttcagctatggcccaagtaaacgttctgagacat\n+atagagcgccttactctgtcctggaaagaaagatgatgatctttcttatctaccgtttga\n+aagccaaaccaactaagaccattgaatccgaggacgaattcatcttaagacaaggagg\n+>blumeria_Grouper_14770_3\n+GGTTAGACCGATACAGTAAGTCTCGTATGAAGTTGTGGAAGGAAGAATTGACTGGAAACA\n+AGTGTATTGAGATGTATGGTTCTGAGTAAAACTGAGCTGATAAGAGTGAGATTGCGTGTG\n+CGCATATCTTATCAACTGAGGTTTTTGACCAGTATTTATCAATCTCCATGGATCATAAGT\n+TATGATCCCTGTAGATGTAAATCTATATACGTACAGATATATGAGCCAGAAATATAAAGC\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/hmmbank_test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/hmmbank_test Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,19248 @@\n+HMMER2.0 [2.3.2]\n+NAME DUF234\n+ACC PF03008.6\n+DESC Archaea bacterial proteins of unknown function\n+LENG 100\n+ALPH Amino\n+RF no\n+CS no\n+MAP yes\n+COM hmmbuild -f -F HMM_fs.ann SEED.ann\n+COM hmmcalibrate --seed 0 HMM_fs.ann\n+NSEQ 19\n+DATE Tue Apr 22 17:16:17 2008\n+CKSUM 5535\n+GA 15.0000 15.0000;\n+TC 15.4000 15.4000;\n+NC 14.9000 14.9000;\n+XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 \n+NULT -4 -8455\n+NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 \n+EVD -8.994637 0.724738\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d b->m m->e\n+ -138 * -3458\n+ 1 -4805 -3913 -5457 -5685 -185 -5282 -1653 -3657 -5166 -3035 1529 -4003 -5160 -4085 -4629 -4531 -4683 -3846 5759 2382 1\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -13 -8474 -9516 -894 -1115 -701 -1378 -1138 -7629 \n+ 2 -4703 -3842 -5413 -5626 4193 -5234 1199 -3686 -5187 -3174 -3160 -3955 -5117 -4054 -4635 -4458 -4581 97 -875 1388 2\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -13 -8474 -9516 -894 -1115 -701 -1378 -7767 -7622 \n+ 3 -2371 -3264 -2698 -1965 -4162 -3111 -1597 -3684 -469 -3509 -2752 -2006 -3258 923 3603 330 86 -3304 -3493 -3205 3\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -13 -8474 -9516 -894 -1115 -701 -1378 -7767 -7615 \n+ 4 -2617 -2306 -4759 -4273 3096 -4141 -2245 160 -3894 547 -831 -3607 -4045 -3290 -3615 -3271 -2541 365 -1682 3149 4\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -13 -8474 -9516 -894 -1115 -701 -1378 -7767 -7607 \n+ 5 -2826 -2347 -5502 -5177 -3043 -5232 -5201 2131 -5110 -1855 -1760 -4940 -5049 -5047 -5246 -4584 0 3252 -4681 -4169 5\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -14 -8475 -9517 -894 -1115 -701 -1378 -7767 -7601 \n+ 6 -257 -1279 -2599 -2020 892 -2638 1046 -810 -1792 542 -474 -22 -338 -1580 -1919 -385 -1175 -710 -1648 3385 6\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -14 -8474 -9516 -894 -1115 -701 -1378 -7767 -7592 \n+ 7 -3058 -3845 -2245 -2210 -4348 -3380 1529 -4491 177 -4262 -3625 -2331 3930 -1887 -1298 -2975 -3059 -4092 -3954 -3528 7\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -14 -8474 -9516 -894 -1115 -701 -1378 -7767 -7585 \n+ 8 -1984 -3171 -1772 -1234 -3094 -2795 1043 -3212 -431 -3092 -2294 3505 -2878 756 909 -1867 -1870 -2866 -2958 1283 8\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -14 -8474 -9516 -894 -1115 -701 -1378 -7767 -7577 \n+ 9 -240 -1075 -3406 -2780 332 -2753 -1615 1091 271 498 2206 -2344 -2804'..b'140 -5079 -4396 8\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -55 -11429 -12472 -894 -1115 -701 -1378 -5107 -4757 \n+ 9 -712 -4440 -1806 1466 -633 -1548 -1315 -160 -363 -1082 1001 -933 2065 -1793 -142 -432 819 -302 -4473 -746 9\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -57 -11431 -12474 -894 -1115 -701 -1378 -5107 -4702 \n+ 10 -3621 -7917 -257 -766 -2799 3663 -5030 -8037 -3067 -7859 -7302 -1529 -6174 -4722 -2416 -5451 -5952 -7471 -8058 -6924 10\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -60 -11435 -12477 -894 -1115 -701 -1378 -5107 -4647 \n+ 11 -9856 -8225 -9114 -9499 -9330 -8179 5478 -10798 -9690 -10071 -10090 -9535 -8612 -9585 -9118 -10419 -9988 -10522 -8262 -9210 11\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -62 -11436 -12478 -894 -1115 -701 -1378 -5107 -4587 \n+ 12 -3703 -3532 -6027 -5393 1774 -5248 -2191 1429 -354 717 1833 -2455 -5298 579 398 -2082 -829 470 2225 1129 12\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -65 -11439 -12482 -894 -1115 -701 -1378 -5107 -4527 \n+ 13 2405 -4466 -3571 -1766 -2533 -1812 -3231 -1681 967 -2690 1010 -3283 -4633 469 -140 1477 -3419 -962 -4758 -4194 13\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -68 -11442 -12484 -894 -1115 -701 -1378 -5107 -4462 \n+ 14 466 -4892 -1785 -2120 -1083 -4397 -1037 -1763 1748 -4907 -3981 48 -1842 -1695 2893 306 -581 -1839 -5076 -1979 14\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -71 -11445 -12487 -894 -1115 -701 -1378 -5107 -4395 \n+ 15 -1558 -4443 2798 1932 -1194 -2341 -2946 -2873 -635 -4816 -1016 749 -4357 1472 -1620 -1163 -2311 -2885 -4430 -2016 15\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -75 -11449 -12492 -894 -1115 -701 -1378 -5107 -4326 \n+ 16 -8641 5869 -9075 -9458 -2879 -7931 -8315 -10103 -9573 -9532 -9450 -9114 -8419 -9344 -9000 -9002 -8953 -9678 -7851 -7886 16\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -79 -11453 -12495 -894 -1115 -701 -1378 -5107 -4252 \n+ 17 -1345 -4887 -463 -2725 -1179 -3060 -573 -4952 927 -2822 -1697 -2003 2836 -351 1498 -851 1103 -4507 661 -796 17\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -83 -11457 -12499 -894 -1115 -701 -1378 -5107 -4174 \n+ 18 172 -4895 -1008 1064 -5216 -1138 569 -1324 1288 -2109 -325 1475 -4489 1525 -1288 838 -27 -1150 -5078 -4395 18\n+ - * * * * * * * * * * * * * * * * * * * * \n+ - * * * * * * * -5107 0 \n+//\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/hmmpfamOut.align.clean_match.path
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/hmmpfamOut.align.clean_match.path Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,16 @@
+1 blumeria_Grouper_28830_3 2424 2882 RVT_1 242 23 4.1e-39 132 0
+10 blumeria_Grouper_4152_12:NoCat 428 478 DUF1602 23 39 0.0013 5 0
+11 blumeria_Grouper_4152_12:NoCat 382 407 V-ATPase_G 2 10 0.017 2 0
+12 blumeria_Grouper_4152_12:NoCat 136 228 XhoI 202 172 4.3e-05 9 0
+13 blumeria_Grouper_4152_12:NoCat 479 497 DUF1798 40 35 0.011 1 0
+14 blumeria_Grouper_4152_12:NoCat 294 365 DUF881 237 214 0.0052 4 0
+15 blumeria_Grouper_4152_12:NoCat 84 131 LBP_BPI_CETP 209 191 0.012 1 0
+16 blumeria_Grouper_4152_12:NoCat 230 247 SOCS_box 6 1 0.031 2 0
+2 blumeria_Grouper_28830_3 786 1229 rve 169 1 1.4e-23 77 0
+3 blumeria_Grouper_28830_3 4061 4108 zf-CCHC 18 3 1.7e-09 29 0
+4 blumeria_Grouper_590_20:NoCat 271 324 DUF234 5 22 0.0047 3 0
+5 blumeria_Grouper_590_20:NoCat 331 357 DUF1414 1 9 0.02 2 0
+6 blumeria_Grouper_590_20:NoCat 90 113 CPW_WPC 1 9 0.024 1 0
+7 blumeria_Grouper_590_20:NoCat 119 148 DUF46 182 173 0.00034 6 0
+8 blumeria_Grouper_4152_12:NoCat 337 381 Peptidase_S29 1 15 0.0034 4 0
+9 blumeria_Grouper_4152_12:NoCat 62 79 Toxin_18 50 55 0.014 4 0

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestAcceptanceHmmpfamAndParse2alignInparallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestAcceptanceHmmpfamAndParse2alignInparallel.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,94 @@
+import os
+import sys
+import unittest
+import pyRepet.seq.fastaDB
+from sets import Set
+import ConfigParser
+from pyRepet.launcher.programLauncher import *
+from pyRepet.launcher.Launcher import HmmpfamAndParse2alignLauncher
+from pyRepet.sql.RepetJobMySQL import *
+from pyRepet.util.file.FileUtils import *
+import commons.pyRepetUnit.dbSplit.LauncherDbSplit
+import commons.pyRepetUnit.align.hmmOutputParsing.HmmpfamOutput2align
+
+CONFIG_FILE = "datas/configTestAcceptanceHmmpfamAndParse2alignLauncherInparallel.cfg"
+CURRENT_DIR = os.getcwd()
+
+class TestAcceptanceHmmpfamLauncherAndParse2alignInparallel(unittest.TestCase):
+
+    def setUp(self):
+        self._config = ConfigParser.ConfigParser()
+        self._config.readfp( open(CONFIG_FILE) )
+        self._batchFilesList = []
+        self._batchFilesBatchesList = []
+        self._inputFile = self._config.get("dbSplit_config", "input_file")
+        self._programLauncherInstance = programLauncher()
+        self._outputFileNotInparallel = self._config.get("hmmer_config", "output_file")
+        self._hmmpfamOutput2align = commons.pyRepetUnit.align.hmmOutputParsing.HmmpfamOutput2align.HmmpfamOutput2align()
+        os.chdir(CURRENT_DIR)
+
+    def _launchHmmpfamAndParseNotInParallel(self):
+        self._programLauncherInstance.reset(self._inputFile)
+        self._programLauncherInstance.setOutputFileName(self._outputFileNotInparallel)
+        self._programLauncherInstance.launchHmmpfam(evalFilter=self._config.get("profil_search", "Evalue"), inputFormat=self._config.get("profil_search", "InputFormat"), profilDatabank=self._config.get("profil_search", "ProfilDatabank"))
+        self._hmmpfamOutput2align.setInputFile(self._outputFileNotInparallel)
+        self._outputFileParsingNotInparallel = self._config.get("hmmpfam2align_config", "output_file")
+        self._hmmpfamOutput2align.setOutputFile(self._outputFileParsingNotInparallel)
+        self._hmmpfamOutput2align.run()
+
+
+    def _countNblinesInAllResultsFilesInParallel(self, fileUtils):
+        result_dir = self._config.get("hmmer_config", "result_dir")
+        dirFiles = fileUtils.listFilesInDir(result_dir)
+        listPath = []
+        for file in dirFiles:
+            listPath.append(file)
+
+        nbLinesInAlignFilesInparallel = fileUtils.countLinesInFiles(listPath)
+        return nbLinesInAlignFilesInparallel
+
+
+    def _launchHmmpfamAndParseInParallel(self):
+        user = self._config.get("db_config", "user")
+        host = self._config.get("db_config", "host")
+        passwd = self._config.get("db_config", "passwd")
+        dbName = self._config.get("db_config", "dbname")
+        jobDb = RepetJob(user, host, passwd, dbName)
+        params = {"param": "--informat " + self._config.get("profil_search", "InputFormat") + " -E " + self._config.get("profil_search", "Evalue"), "profilDB": self._config.get("profil_search", "ProfilDatabank"), "outputDir": self._config.get("hmmer_config", "result_dir"), "query": self._config.get("hmmer_config", "query_dir"), "job_table": "jobs", "queue": self._config.get("sge_config", "queue"), "groupid": self._config.get("sge_config", "groupid"), "tmpDir": self._config.get("hmmer_config", "tmp_dir"), "scriptToLaunch": self._config.get("parse_config", "scriptToLaunch"), "cDir" : "."}
+        self.hmmpfamAndParse2align = HmmpfamAndParse2alignLauncher(jobDb, params)
+        self.hmmpfamAndParse2align.run()
+
+
+    def _splitInputFile(self, dbSplit):
+        dbSplit.setInFileName(self._inputFile)
+        dbSplit.setIsNewDir(True)
+        dbSplit.setNbSequences(3)
+        dbSplit.run()
+
+    def testAcceptanceHmmpfamAndParse2alignHasRunInparallel (self):
+
+        dbSplit = commons.pyRepetUnit.dbSplit.LauncherDbSplit.LauncherDbSplit()
+        self._splitInputFile(dbSplit)
+        self._launchHmmpfamAndParseInParallel()
+        fileUtils = FileUtils()
+        nbLinesInAlignFilesInparallel = self._countNblinesInAllResultsFilesInParallel(fileUtils)
+        dbSplit.clean()
+        self._launchHmmpfamAndParseNotInParallel()
+        nbLinesInAlignFileNotInparallel = fileUtils.countLinesInAFile(self._outputFileParsingNotInparallel)
+        self.assertEqual(nbLinesInAlignFilesInparallel,nbLinesInAlignFileNotInparallel)
+        self._clean()
+
+    def _clean(self):
+
+        dirToClean = self._config.get( "hmmer_config", "tmp_dir" )
+        os.system("rm  " + dirToClean + "/*")
+        dirToClean = self._config.get( "hmmer_config", "result_dir" )
+        os.system("rm  " + dirToClean + "/*")
+        os.system("rm  HmmpfamJob*.e*")
+        os.system("rm  HmmpfamJob*.o*")
+        os.remove(self._outputFileNotInparallel)
+        os.remove(self._outputFileParsingNotInparallel)
+
+
+if __name__ == "__main__":
+        unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestFunctionalHmmpfamAndParse2alignLauncherInParallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestFunctionalHmmpfamAndParse2alignLauncherInParallel.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,56 @@
+import os
+import sys
+import unittest
+from sets import Set
+import ConfigParser
+from pyRepet.launcher.Launcher import HmmpfamAndParse2alignLauncher
+from pyRepet.sql.RepetJobMySQL import *
+from pyRepet.util.file.FileUtils import *
+
+CONFIG_FILE = "datas/configTestFunctionalHmmpfamLauncherInparallel.cfg"
+CURRENT_DIR = os.getcwd()
+
+class TestFunctionalHmmpfamLauncherAndParse2alignInparallel(unittest.TestCase):
+
+    def setUp(self):
+        self._config = ConfigParser.ConfigParser()
+        self._config.readfp( open(CONFIG_FILE) )
+        os.chdir(CURRENT_DIR)
+
+    def testHmmpfamAndParse2alignHasRunInparallel (self):
+        user = self._config.get( "db_config","user" )
+        host = self._config.get( "db_config","host" )
+        passwd = self._config.get( "db_config","passwd" )
+        dbName = self._config.get( "db_config","dbname" )
+        jobDb = RepetJob(user , host , passwd , dbName)
+        params = {"param":"--informat FASTA -E 10",
+                  "profilDB": self._config.get("profil_search", "ProfilDatabank"),
+                  "outputDir": self._config.get("hmmer_config", "result_dir"),
+                  "query": self._config.get("hmmer_config", "query_dir"),
+                  "job_table":"jobs",
+                  "queue" : self._config.get("sge_config", "queue"),
+                  "groupid" : self._config.get("sge_config", "groupid"),
+                  "tmpDir": self._config.get("hmmer_config", "tmp_dir"),
+                  "scriptToLaunch": self._config.get("parse_config", "scriptToLaunch"),
+                  "cDir" : "."}
+        self.hmmpfamAndParse2align = HmmpfamAndParse2alignLauncher(jobDb, params)
+        self.hmmpfamAndParse2align.run()
+        fileUtils = FileUtils()
+        dirFiles = fileUtils.listDir(self._config.get("hmmer_config", "result_dir"))
+        dirSet = Set(dirFiles)
+        alignFileSet = Set([self._config.get("hmmer_config", "result_dir") + "/" + "batch_1.fa.hmmpfamOut.align", self._config.get("hmmer_config", "result_dir") + "/" + "batch_2.fa.hmmpfamOut.align"])
+        self.assertTrue( alignFileSet.issubset(dirSet))
+        self._clean()
+
+    def _clean(self):
+
+        dirToClean = self._config.get( "hmmer_config", "tmp_dir" )
+        os.system("rm  " + dirToClean + "/*")
+        dirToClean = self._config.get( "hmmer_config", "result_dir" )
+        os.system("rm  " + dirToClean + "/*")
+        os.system("rm  HmmpfamJob*.e*")
+        os.system("rm  HmmpfamJob*.o*")
+
+
+if __name__ == "__main__":
+        unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestFunctionalHmmpfamClusterComponent.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestFunctionalHmmpfamClusterComponent.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,38 @@
+import os
+import sys
+import unittest
+from sets import Set
+import ConfigParser
+from pyRepet.sql.RepetJobMySQL import *
+from pyRepet.util.file.FileUtils import *
+import commons.pyRepetUnit.hmmer.HmmpfamClusterComponent
+
+CONFIG_FILE = "datas/configTestFunctionalHmmpfamLauncherInparallel.cfg"
+CURRENT_DIR = os.getcwd()
+
+class TestFunctionalHmmpfamClusterComponent(unittest.TestCase):
+
+    def setUp(self):
+        os.chdir(CURRENT_DIR)
+        self._config = ConfigParser.ConfigParser()
+        self._config.readfp( open(CONFIG_FILE) )
+        self._hmmer = commons.pyRepetUnit.hmmer.HmmpfamClusterComponent.HmmpfamClusterComponent()
+
+    def testHmmpfamHasRunInparallel (self):
+        user = self._config.get( "db_config","user" )
+        host = self._config.get( "db_config","host" )
+        passwd = self._config.get( "db_config","passwd" )
+        dbName = self._config.get( "db_config","dbname" )
+        jobDb = RepetJob(user , host , passwd , dbName)
+        self._hmmer.setJobDb(jobDb)
+        self._hmmer.loadConfig(self._config)
+        self._hmmer.run()
+        fileUtils = FileUtils()
+        dirFiles = fileUtils.listDir(self._config.get("hmmer_config", "result_dir"))
+        dirSet = Set(dirFiles)
+        alignFileSet = Set([self._config.get("hmmer_config", "result_dir") + "/" + "batch_1.fa.hmmpfamOut", self._config.get("hmmer_config", "result_dir") + "/" + "batch_2.fa.hmmpfamOut"])
+        self.assertTrue(alignFileSet.issubset(dirSet))
+        self._hmmer.clean()
+
+if __name__ == "__main__":
+        unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestHmmpfamAndParse2alignLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestHmmpfamAndParse2alignLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,87 @@
+import unittest
+import os
+import sys
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "*** Error: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+from pyRepet.launcher.Launcher import HmmpfamAndParse2alignLauncher
+from pyRepet.sql.RepetJobMySQL import *
+
+class TestHmmpfamAndParse2alignLauncher(unittest.TestCase):
+
+    def testConstructor(self):
+
+        params = {
+                  "query" : "toto",
+                  "job_table":"fakeTable",
+                  "queue" : "fakeQueue",
+                  "groupid" : "fakeId",
+                  "tmpDir" : "fakeDir",
+                  "cDir" : "."
+        }
+
+        hmmpfam = HmmpfamAndParse2alignLauncher("toto", params)
+
+        self.assertEquals("toto", hmmpfam.query)
+        self.assertEquals("fakeTable", hmmpfam.job_table)
+        self.assertEquals("fakeQueue", hmmpfam.queue)
+        self.assertEquals("fakeId", hmmpfam.groupid)
+        self.assertEquals("fakeDir", hmmpfam.tmpdir)
+
+
+    def testCmd_start(self):
+
+        fullInFileName = "/workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa"
+        expectedCmdStart = "hmmpfam --informat FASTA -E 10 /workspace/repet_pipe/pyRepet/launcher/datas/hmmpfam/myhmms test_input_aa.fa > /workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa.hmmpfamOut"
+
+        user = "oinizan"
+        host = "pisano"
+        passwd = "oinizan"
+        dbName = "repet_oinizan"
+        jobDb = RepetJob(user , host , passwd , dbName)
+        params = {"param":"--informat FASTA -E 10",
+                  "query" : "toto",
+                  "job_table":"fake",
+                  "queue" : "fake",
+                  "groupid" : "fake",
+                  "profilDB": "/workspace/repet_pipe/pyRepet/launcher/datas/hmmpfam/myhmms",
+                  "tmpDir" : "fakeDir",
+                  "outputDir": "/workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas",
+                  "scriptToLaunch": "/workspace/repet_pipe/repet_base/HmmpfamOutput2align_script.py",
+                  "cDir" : "."
+        }
+        self.hmmpfam = HmmpfamAndParse2alignLauncher(jobDb, params)
+        self.assertEquals(expectedCmdStart, self.hmmpfam.cmd_start ( fullInFileName ))
+
+
+    def testCmd_finish(self):
+
+        fullInFileName = "/workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa"
+        user = "oinizan"
+        host = "pisano"
+        passwd = "oinizan"
+        dbName = "repet_oinizan"
+        jobDb = RepetJob(user , host , passwd , dbName)
+        params = {"param":"--informat FASTA -E 10",
+                  "query" : "toto",
+                  "job_table":"fake",
+                  "queue" : "fake",
+                  "groupid" : "fake",
+                  "profilDB": "/workspace/repet_pipe/pyRepet/launcher/datas/hmmpfam/myhmms",
+                  "tmpDir" : "fakeDir",
+                  "outputDir": "/workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas",
+                  "scriptToLaunch": "/workspace/repet_pipe/repet_base/HmmpfamOutput2align_script.py",
+                  "cDir" : "."
+        }
+        self.hmmpfamAndParse2align = HmmpfamAndParse2alignLauncher(jobDb, params)
+        expectedCmdFinish = "os.system( \"/workspace/repet_pipe/repet_base/HmmpfamOutput2align_script.py -i /workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa.hmmpfamOut -o /workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa.hmmpfamOut.align -c\" )\n"
+        self.assertEquals(expectedCmdFinish, self.hmmpfamAndParse2align.cmd_finish ( fullInFileName ))
+
+
+
+if __name__ == "__main__":
+ unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestHmmpfamClusterComponent.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestHmmpfamClusterComponent.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,76 @@
+import os
+import unittest
+import ConfigParser
+import commons.pyRepetUnit.hmmer.HmmpfamClusterComponent
+from pyRepet.sql.RepetJobMySQL import *
+from pyRepet.util.file.FileUtils import *
+
+class TestHmmpfamClusterComponent(unittest.TestCase):
+
+     def setUp(self):
+
+        self.component = commons.pyRepetUnit.hmmer.HmmpfamClusterComponent.HmmpfamClusterComponent()
+
+     def testLoadConfig(self):
+        config = open("tmpConfig.cfg", "w")
+        config.write("[profil_search]\n")
+        config.write("ProfilDatabank: /repet_pipe/commons/pyRepetUnit/hmmer/datas/myhmms\n")
+        config.write("[hmmer_config]\n")
+        config.write("result_dir: /repet_results\n")
+        config.write("query_dir: /repet_datas/hmmer_in_parallel\n")
+        config.write("tmp_dir: /repet_tmp\n")
+        config.write("[sge_config]\n")
+        config.write("queue: main.q\n")
+        config.write("groupid: fake\n")
+        config.close()
+        config = ConfigParser.ConfigParser()
+        config.readfp( open("tmpConfig.cfg") )
+        self.component.loadConfig(config)
+        self.assertEquals( "/repet_pipe/commons/pyRepetUnit/hmmer/datas/myhmms", self.component.getProfilDatabank() )
+        self.assertEquals( "/repet_results", self.component.getResultDir() )
+        self.assertEquals( "/repet_datas/hmmer_in_parallel", self.component.getQueryDir() )
+        self.assertEquals( "/repet_tmp", self.component.getTmpDir() )
+        self.assertEquals( "main.q", self.component.getSGEQueue() )
+        self.assertEquals( "fake", self.component.getSGEGroupId() )
+        os.system("rm tmpConfig.cfg")
+
+     def testClean (self):
+        fileUtils = FileUtils()
+        if (fileUtils.isRessourceExists("dummyTmpDir")):
+             os.system("rm -r dummyTmpDir")
+
+        if (fileUtils.isRessourceExists("dummyResultDir")):
+             os.system("rm -r dummyResultDir")
+
+        os.mkdir("dummyTmpDir")
+        os.system("touch dummyTmpDir/srptJob1_fake_HmmpfamJob1_2009-3-11_12394.py")
+        os.system("touch dummyTmpDir/srptJob2_fake_HmmpfamJob1_2009-3-11_12394.py")
+        os.system("touch dummyTmpDir/srptJob3_fake_HmmpfamJob1_2009-3-11_12394.py")
+        os.system("touch dummyTmpDir/srptJob4_fake_HmmpfamJob1_2009-3-11_12394.py")
+
+        os.mkdir("dummyResultDir")
+        os.system("touch dummyResultDir/batch_1.fa.hmmpfamOut")
+        os.system("touch dummyResultDir/batch_2.fa.hmmpfamOut")
+        os.system("touch dummyResultDir/batch_3.fa.hmmpfamOut")
+        os.system("touch dummyResultDir/batch_4.fa.hmmpfamOut")
+
+        self.component.setTmpDir("dummyTmpDir")
+        self.component.setResultDir("dummyResultDir")
+
+        self.component.clean()
+
+        self.assertFalse(fileUtils.isRessourceExists("dummyTmpDir/srptJob1_fake_HmmpfamJob1_2009-3-11_12394.py"))
+        self.assertFalse(fileUtils.isRessourceExists("dummyTmpDir/srptJob1_fake_HmmpfamJob2_2009-3-11_12394.py"))
+        self.assertFalse(fileUtils.isRessourceExists("dummyTmpDir/srptJob1_fake_HmmpfamJob3_2009-3-11_12394.py"))
+        self.assertFalse(fileUtils.isRessourceExists("dummyTmpDir/srptJob1_fake_HmmpfamJob4_2009-3-11_12394.py"))
+        self.assertFalse(fileUtils.isRessourceExists("dummyResultDir/batch_1.fa.hmmpfamOut"))
+        self.assertFalse(fileUtils.isRessourceExists("dummyResultDir/batch_2.fa.hmmpfamOut"))
+        self.assertFalse(fileUtils.isRessourceExists("dummyResultDir/batch_3.fa.hmmpfamOut"))
+        self.assertFalse(fileUtils.isRessourceExists("dummyResultDir/batch_4.fa.hmmpfamOut"))
+
+        os.system("rm -r dummyTmpDir")
+        os.system("rm -r dummyResultDir")
+
+
+if __name__ == "__main__":
+ unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestHmmpfamLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestHmmpfamLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,80 @@
+import unittest
+import os
+import sys
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "*** Error: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+from pyRepet.launcher.Launcher import HmmpfamLauncher
+from pyRepet.sql.RepetJobMySQL import *
+
+class TestHmmpfamLauncher(unittest.TestCase):
+
+    def testConstructor(self):
+
+        params = {
+                  "query" : "toto",
+                  "job_table":"fakeTable",
+                  "queue" : "fakeQueue",
+                  "groupid" : "fakeId",
+                  "tmpDir" : "fakeDir",
+                  "cDir" : "."
+        }
+
+        hmmpfam = HmmpfamLauncher("toto", params)
+
+        self.assertEquals("toto", hmmpfam.query)
+        self.assertEquals("fakeTable", hmmpfam.job_table)
+        self.assertEquals("fakeQueue", hmmpfam.queue)
+        self.assertEquals("fakeId", hmmpfam.groupid)
+        self.assertEquals("fakeDir", hmmpfam.tmpdir)
+
+
+    def testCmd_start(self):
+
+        fullInFileName = "/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa"
+        expectedCmdStart = "hmmpfam --informat FASTA -E 10 /workspace/repet_pipe/pyRepet/launcher/datas/hmmpfam/myhmms test_input_aa.fa > /workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa.hmmpfamOut"
+        user = "oinizan"
+        host = "pisano"
+        passwd = "oinizan"
+        dbName = "repet_oinizan"
+        jobDb = RepetJob(user , host , passwd , dbName)
+        params = {"param":"--informat FASTA -E 10",
+                  "query" : "toto",
+                  "job_table":"fake",
+                  "queue" : "fake",
+                  "groupid" : "fake",
+                  "profilDB": "/workspace/repet_pipe/pyRepet/launcher/datas/hmmpfam/myhmms",
+                  "tmpDir" : "fakeDir",
+                  "outputDir": "/workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas",
+                  "cDir" : "."}
+        self.hmmpfam = HmmpfamLauncher(jobDb, params)
+        self.assertEquals(expectedCmdStart, self.hmmpfam.cmd_start ( fullInFileName ))
+
+    def testCmd_finish(self):
+
+        fullInFileName = "/workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/test_input_aa.fa"
+        expectedCmdFinish = ""
+
+        user = "oinizan"
+        host = "pisano"
+        passwd = "oinizan"
+        dbName = "repet_oinizan"
+        jobDb = RepetJob(user , host , passwd , dbName)
+        params = {"param":"--informat FASTA -E 10",
+                  "query" : "toto",
+                  "job_table":"fake",
+                  "queue" : "fake",
+                  "groupid" : "fake",
+                  "tmpDir" : "fakeDir",
+                  "cDir" : "."
+                  }
+        self.hmmpfam = HmmpfamLauncher(jobDb, params)
+        self.assertEquals(expectedCmdFinish, self.hmmpfam.cmd_finish ( fullInFileName ))
+
+
+if __name__ == "__main__":
+ unittest.main()
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestLaunchPreProcessHmmpfamPostProcessNotInParallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestLaunchPreProcessHmmpfamPostProcessNotInParallel.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,63 @@
+import os
+import unittest
+from commons.pyRepetUnit.hmmer.LaunchPreProcessHmmpfamPostProcessNotInParallel import *
+from pyRepet.util.file.FileUtils import *
+import ConfigParser
+
+if (not os.environ.has_key("REPET_PATH")):
+    print "*** Error: no environment variable REPET_PATH"
+    exit =1
+
+if (not os.environ.has_key("PYTHONPATH")):
+    print "*** Error: no environment variable PYTHONPATH"
+    exit =1
+
+if (exit == 1):
+    sys.exit(1)
+
+CONFIG_FILE = "datas/configTestLaunchPreProcessHmmpfamPostProcessNotInParallel.cfg"
+CURRENT_DIR = os.getcwd()
+
+class TestLaunchPreProcessHmmpfamPostProcessNotInParallel(unittest.TestCase):
+
+    def setUp( self ):
+        os.chdir(CURRENT_DIR)
+        self._LaunchPreProcessHmmpfamPostProcess = LaunchPreProcessHmmpfamPostProcessNotInParallel()
+        self._configFile = CONFIG_FILE
+        config = ConfigParser.ConfigParser()
+        config.readfp(open(self._configFile))
+        self._InputFile = config.get("hmmer_config", "input_file")
+        self._OutputFile = config.get("hmmer_config", "output_file")
+        self.fileUtils = FileUtils()
+
+    def testRun(self):
+        #Normal launch
+        self._LaunchPreProcessHmmpfamPostProcess.setInputFile( self._InputFile )
+        self._LaunchPreProcessHmmpfamPostProcess.setOutputFile( self._OutputFile )
+        self._LaunchPreProcessHmmpfamPostProcess.setConfigFile( self._configFile )
+        self._LaunchPreProcessHmmpfamPostProcess.run()
+        self.assertTrue(self.fileUtils.isRessourceExists(self._OutputFile))
+        self.assertFalse(self.fileUtils.isFileEmpty(self._OutputFile))
+        os.system("rm " + self._OutputFile)
+        #without config file
+        configFile = "/workspace/repet_pipe/commons/pyRepetUnit/hmmer/datas/configdummy.cfg"
+        self._LaunchPreProcessHmmpfamPostProcess.setConfigFile( configFile )
+        self._LaunchPreProcessHmmpfamPostProcess.run()
+        self.assertFalse(self.fileUtils.isRessourceExists(self._OutputFile))
+        #without input file
+        InputFile = ""
+        self._LaunchPreProcessHmmpfamPostProcess.setInputFile( InputFile )
+        self._LaunchPreProcessHmmpfamPostProcess.run()
+        self.assertFalse(self.fileUtils.isRessourceExists(self._OutputFile))
+        #without output file
+        self._LaunchPreProcessHmmpfamPostProcess.setConfigFile( self._configFile )
+        self._LaunchPreProcessHmmpfamPostProcess.setInputFile( self._InputFile )
+        OutputFile = ""
+        self._LaunchPreProcessHmmpfamPostProcess.setOutputFile( OutputFile )
+        self._LaunchPreProcessHmmpfamPostProcess.run()
+        self.assertFalse(self.fileUtils.isRessourceExists(self._OutputFile))
+
+
+
+if __name__ == "__main__":
+        unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/TestProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/TestProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,65 @@
+import os
+import unittest
+from pyRepet.launcher.programLauncher import *
+from pyRepet.util.file.FileUtils import *
+import ConfigParser
+
+if (not os.environ.has_key("REPET_PATH")):
+    print "*** Error: no environment variable REPET_PATH"
+    exit =1
+
+if (not os.environ.has_key("PYTHONPATH")):
+    print "*** Error: no environment variable PYTHONPATH"
+    exit =1
+
+if (exit == 1):
+    sys.exit(1)
+
+CONFIG_FILE = "datas/config.cfg"
+CURRENT_DIR = os.getcwd()
+
+class TestProgramLauncher(unittest.TestCase):
+
+    def setUp(self):
+        os.chdir(CURRENT_DIR)
+        self._programLauncherInstance = programLauncher()
+        self._configFile = CONFIG_FILE
+        config = ConfigParser.ConfigParser()
+        config.readfp( open(self._configFile) )
+        self._configFile = CONFIG_FILE
+        config = ConfigParser.ConfigParser()
+        config.readfp(open(self._configFile))
+        self._inputFile = config.get("hmmer_config", "input_file")
+        self._outputFile = config.get("hmmer_config", "output_file")
+        self.fileUtils = FileUtils()
+
+    def testLaunchHmmpfam(self):
+        config = ConfigParser.ConfigParser()
+        config.readfp( open(self._configFile) )
+        self._programLauncherInstance.reset( self._inputFile )
+        self._programLauncherInstance.setOutputFileName( self._outputFile )
+        self._programLauncherInstance.launchHmmpfam( evalFilter=config.get("profil_search","Evalue"),
+                              inputFormat=config.get("profil_search","InputFormat"),
+                              profilDatabank=config.get("profil_search","ProfilDatabank"))
+        self.assertTrue(self.fileUtils.isRessourceExists(self._outputFile))
+        self.assertFalse(self.fileUtils.isFileEmpty(self._outputFile))
+        os.system( "rm " + self._outputFile)
+        self._inputFile = "repet_pipe/pyRepet/launcher/datas/hmmpfam/test_input_aa2.fa"
+        self._programLauncherInstance.reset( self._inputFile )
+        self._programLauncherInstance.launchHmmpfam( evalFilter=config.get("profil_search","Evalue"),
+                              inputFormat=config.get("profil_search","InputFormat"),
+                              profilDatabank=config.get("profil_search","ProfilDatabank"))
+        self.assertFalse(self.fileUtils.isRessourceExists(self._outputFile))
+        #No output file
+        self._outputFile = ""
+        self._inputFile = "repet_pipe/pyRepet/launcher/datas/hmmpfam/test_input_aa.fa"
+        self._programLauncherInstance.reset( self._inputFile )
+        self._programLauncherInstance.setOutputFileName( self._outputFile )
+        self._programLauncherInstance.launchHmmpfam( evalFilter=config.get("profil_search","Evalue"),
+                              inputFormat=config.get("profil_search","InputFormat"),
+                              profilDatabank=config.get("profil_search","ProfilDatabank"))
+        self.assertFalse(self.fileUtils.isRessourceExists(self._outputFile))
+        os.chdir(CURRENT_DIR)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/Outputhmmpfam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/Outputhmmpfam Tue Apr 30 14:33:21 2013 -0400

[

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/config.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/config.cfg Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,10 @@
+[profil_search]
+
+Evalue: 10
+InputFormat: FASTA
+ProfilDatabank: ./datas/myhmms
+
+[hmmer_config]
+input_file: ./datas/test_input_aa.fa
+output_file: ./datas/hmmpfam_output
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/configTestAcceptanceHmmpfamAndParse2alignLauncherInparallel.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/configTestAcceptanceHmmpfamAndParse2alignLauncherInparallel.cfg Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,29 @@
+[db_config]
+user: oinizan
+host: pisano
+passwd: oinizan
+dbname:repet_oinizan
+
+[profil_search]
+Evalue: 10
+InputFormat: FASTA
+ProfilDatabank: /home/oinizan/repet_integration/repet_pipe/pyRepetUnit/hmmer/tests/datas/myhmms
+
+[hmmer_config]
+output_file: ./datas/hmmpfam_output
+result_dir: /home/oinizan/repet_integration/repet_results
+query_dir: /home/oinizan/repet_integration/repet_pipe/pyRepetUnit/hmmer/tests/batches
+tmp_dir: /home/oinizan/repet_integration/repet_tmp
+
+[sge_config]
+queue: main.q
+groupid: fake
+
+[parse_config]
+scriptToLaunch: /home/oinizan/repet_integration/repet_pipe/repet_base/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py
+
+[dbSplit_config]
+input_file: /home/oinizan/repet_integration/repet_pipe/pyRepetUnit/hmmer/tests/datas/test_input_aa.fa
+
+[hmmpfam2align_config]
+output_file: ./datas/hmmpfam_outputNotInParallel.align

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/configTestFunctionalHmmpfamLauncherInparallel.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/configTestFunctionalHmmpfamLauncherInparallel.cfg Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,24 @@
+[db_config]
+user: oinizan
+host: pisano
+passwd: oinizan
+dbname:repet_oinizan
+
+[profil_search]
+Evalue: 10
+InputFormat: FASTA
+ProfilDatabank: /home/oinizan/repet_integration/repet_pipe/pyRepetUnit/hmmer/tests/datas/myhmms
+
+[hmmer_config]
+input_file: ./datas/test_input_nt.fa
+output_file: ./datas/hmmpfam_output.align
+result_dir: /home/oinizan/repet_integration/repet_results
+query_dir: /home/oinizan/repet_integration/repet_datas/hmmer_in_parallel
+tmp_dir: /home/oinizan/repet_integration/repet_tmp
+
+[sge_config]
+queue: main.q
+groupid: fake
+
+[parse_config]
+scriptToLaunch: /home/oinizan/repet_integration/repet_pipe/repet_base/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/configTestLaunchPreProcessHmmpfamPostProcessNotInParallel.cfg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/configTestLaunchPreProcessHmmpfamPostProcessNotInParallel.cfg Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,9 @@
+[profil_search]
+
+Evalue: 10
+InputFormat: FASTA
+ProfilDatabank: ./datas/myhmms
+
+[hmmer_config]
+input_file: ./datas/test_input_nt.fa
+output_file: ./hmmpfam_output.align
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/myhmms
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/myhmms Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,1433 @@\n+HMMER2.0 [2.3.2]\n+NAME rrm\n+LENG 77\n+ALPH Amino\n+RF no\n+CS no\n+MAP yes\n+COM hmmbuild -A myhmms rrm.sto\n+COM hmmcalibrate myhmms\n+NSEQ 90\n+DATE Thu Jan 8 17:50:18 2009\n+CKSUM 8325\n+XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 \n+NULT -4 -8455\n+NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 \n+EVD -45.953121 0.233439\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d b->m m->e\n+ -16 * -6492\n+ 1 -1084 390 -8597 -8255 -5793 -8424 -8268 2395 -8202 2081 -1197 -8080 -8115 -8020 -8297 -7789 -5911 1827 -7525 -7140 1\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11642 -12684 -894 -1115 -701 -1378 -16 * \n+ 2 -2140 -3785 -6293 -2251 3226 -2495 -727 -638 -2421 -545 -675 -5146 -5554 -4879 -1183 -2536 -1928 267 76 3171 2\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11642 -12684 -894 -1115 -701 -1378 * * \n+ 3 -2542 458 -8584 -8273 -6055 -8452 -8531 2304 -8255 -324 101 -8104 -8170 -8221 -8440 -7840 -5878 3145 -7857 -7333 3\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11642 -12684 -894 -1115 -701 -1378 * * \n+ 4 -1505 -5144 -1922 -558 -1842 2472 -3303 -2213 1099 -5160 -4233 372 -4738 -530 1147 168 498 -4766 -5327 -1476 4\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11642 -12684 -894 -1115 -701 -1378 * * \n+ 5 -3724 -5184 300 -3013 -1655 1803 -3353 -5245 -1569 -2686 -4276 3495 -1963 -1331 -1054 -1472 -3664 -4803 -5369 -2 5\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11642 -12684 -894 -1115 -701 -1378 * * \n+ 6 -1569 -6106 -8967 -8363 555 -8531 -7279 654 -8092 2953 -94 -8220 -7908 -1643 -7682 -7771 -6460 -59 -6191 -6284 6\n+ - -151 -504 230 45 -380 399 101 -621 211 -470 -713 278 399 48 91 360 113 -364 -299 -254 \n+ - -178 -3113 -12684 -1600 -578 -701 -1378 * * \n+ 7 -409 -5130 -215 -2987 -1709 -956 690 -5188 -395 -5144 -4224 729 3054 -2862 -3409 354 1293 -1381 -5321 -4644 13\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11642 -12684 -894 -1115 -701 -1378 * * \n+ 8 -3674 -5118 -1004 639 420 -4652 176 -2050 404 -1039 -935 16 1755 168 147 -275 198 -1472 1889 1977 14\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11642 -12684 -894 -1115 -701 -1378 * * \n+ 9 -408 -5134 2415 1299 -950 -66 -767 -1296 -2889 -1843 -4224 1084 -968 -1439 -1854 540 -314 -2304 -5320 -60 15\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 '..b'246 -4848 -4187 461\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11183 -12226 -894 -1115 -186 -3045 * * \n+ 285 2163 763 -1619 -5296 2250 -2060 -4007 1241 -4891 -489 484 -4781 -226 -4515 -4692 -678 -1688 -813 264 -3530 462\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11253 -12295 -894 -1115 -701 -1378 * * \n+ 286 -268 -329 -158 917 -541 -1990 350 -4851 1273 -1075 388 -1130 233 840 993 -602 801 -595 -4964 -857 463\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11253 -12295 -894 -1115 -701 -1378 * * \n+ 287 109 -243 672 2304 -5103 -4283 488 -4854 -1317 -2269 -656 -492 -1519 2679 -655 -618 -3248 -4404 -4965 -1114 464\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11253 -12295 -894 -1115 -701 -1378 * * \n+ 288 1312 1294 -6215 -5593 -206 -1244 -4339 2188 -5201 1409 395 -5091 -5478 -4828 -5009 -4538 -3794 1162 -4188 -3846 465\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -23 -11253 -6022 -894 -1115 -701 -1378 * * \n+ 289 -3562 799 -5767 -2054 -1235 -2075 318 138 237 2164 1713 -1454 -5145 -1272 -730 -4172 -1640 1071 -3865 -34 466\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11231 -12273 -894 -1115 -1470 -646 * * \n+ 290 73 1351 -674 1236 -1549 -2008 1350 -4834 1049 -2498 -3851 1801 -4356 1813 -115 -223 -1582 -1052 -4945 -4262 467\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11231 -12273 -894 -1115 -369 -2147 * * \n+ 291 -1739 -320 777 -2654 -1419 -2051 4360 -4707 -1358 -2412 -689 -1300 -4399 -224 537 531 -289 -2010 -4905 -1057 468\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -11253 -12295 -894 -1115 -701 -1378 * * \n+ 292 -3345 -4494 -233 -332 -563 -1986 -3051 333 99 1063 -3616 -3072 2953 -1026 -1490 -943 -1528 -1070 -4753 -4151 469\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -10815 -11857 -894 -1115 -701 -1378 * * \n+ 293 -6409 -5751 -7614 -7636 2593 -7311 -4003 -5084 -7219 -150 -151 -6210 -7172 -849 -6723 -6510 -6299 -1387 4881 2807 470\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -1 -10749 -11791 -894 -1115 -701 -1378 * * \n+ 294 -4057 -3817 -6415 -5791 3203 -1638 -4541 1679 -5412 765 1434 -5333 -5617 -4930 -5182 -4791 -3987 1226 750 -3959 471\n+ - * * * * * * * * * * * * * * * * * * * * \n+ - * * * * * * * * 0 \n+//\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/test_input_aa.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/test_input_aa.fa Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,258 @@\n+>blumeria_Grouper_28830_3_1\n+CGF*LISL*SQ*FHVGVSWLRLINNIMRVELETNKT*IXFYNTGSPTYLNIPPHHVSQST\n+AHNLHGRVSINWDPQTIPLLKTSCPQLI*ITS*LEILLWYHSN*ILAEFLSVFLSCSSIR\n+RLPYHRLRTGCPSRTYAGNRFVPLYLLPNLYPPGDIKE*VLLL*QAATAHGPS**GSILC\n+LRWRTRSTQSPGA*VRISAFSSWR*ARILSLVAFILLT*ERAIGAGCFLPSSSAQRRSSD\n+SVASRVYTG*EFSSSGG*VPKSKK*GLYPLGETLRLKFITASERAPASVILGESRRTLTS\n+MAFRTPFACSTLPFPLGLAGVVWTTCSCWLA*NRCASMPPKQPALSLIICFGFPKFWIIY\n+VSRGTVLAKSALGTKRDSSQPVAYSIALSKTWGPT*V*SIPHRVRGGAGTIGVKLPSNGS\n+GGFMS*QDLHFRTAS*AARISSGCQNLRPNSRIVLPIDAWPWPS*TFTAVDTRSSYSKIS\n+SRVAAPDGPGASPTCCDGFFLMRCNLELYIAKLSRTQVAAIFRGSASPKVRNSVMHCRSM\n+RLSSCGCFISSISSSLSAPLWILEFSLSSVATFPSPSSLERNRTSQGRERKSARTFAFPR\n+R*IILILKCSIASKKRTILTGCSALV*IVRRDSWSVITVTSPPSTQ*RQCCSDSKIPSNS\n+LSCAE*RCWVSFSFLEK*AMGCSTERSELLEHRMYGWSMTAPTNFWEASVSNMTGSLQSN\n+FFTTGDVVSASLILFHD*IDWSVRMTGQFLGPEDWSVSGPASWEKFLMRQRR*PAAFTNP\n+LISEVVVGIGLSLIAETFSGSAAMPCLEIWKPRKIPSETNSLHFEALRRRP*EANLEKIS\n+INRECNSAIESAQTATSSRSPATNGRNGSRTSSIKHWKVAGALHRPKGITSHS*TPKGVT\n+NAVFSEESLCIRICQNPFSRSRNA*KLCPAAR*ISS*AFGRGWACFFVILLSSL*STQIR\n+QTVWLVWVSVFFWC**QRCSPWSLTFAYELSRSCVAQESPRMACCCAGAISPRRGVLNWV\n+DNYPVSMRVFLSWESFIPFGGRDLSKS*RGI*L*YCREKL*CGMKGGLGAI*PLEYLK**\n+RLFGLGEY*L*D*ELVGQLS*EVAVCHLINIVAPVHGIKFVQVSHLLGMCHLGQLLETL*\n+IPESRMIQGDSSKETVQLKLHP*KLLGQGHQCLTF*K**MYYKKSNI*RKTVKHNMLKSR\n+SQKKLKIGHRENELFVQKTFCVACHILPVEVVLLGVAQICPFLRPQSRYSGCPIHKFLHF\n+AFDIEWLQFRHRAAHLLFGLQGLFALDPNLLAQQGK*K*QALMNDRVVLQVLHIFYFSLI\n+GKFS*IYR*NATRCI*RLVHIPFVLLIPPALFSSTGSLSPSGLSFRN*SIRNACRELGMI\n+YLFLQCLVTQRRLAHTTLDQLWMRQMLEAFRLLTLRSELVHWTRNV*IHHPQVSTL*YRV\n+*LEPLDPQAPRW*VWGDYWK*WSRYVEVRVEYAASVYL*SEALP*HPKVG*RKLSGVVAQ\n+IVIESRRFLQLFVQMLSKATRVHHCRTVF*GYSLQLLPNRGVD**LFFAPR*NLHPFPLQ\n+F**PKQNMEAYVESSMIACGIIYSLCSHQQSAISIVVGIPRVFFYLPLLKGRADFQLRNA\n+KRNLGSTALFFALVQ*HSMVR*LTGGIN**SHLCFAHHCLGWALFHRIAPR*RVFS*AET\n+RLAWLRVARRCLLKSQIAPPGMVFYFQMALLPKVERGLNFSKKFLFPRLHFYLLVWKLSR\n+SLQTFV*LEEKFVIGSLQTFDRFSWRDLLSLPLQLH*LRKNFRWLWFARQCDPSSSFPTE\n+HYSGRP**LNRW*NA*KIWRRLKLG*PRIVRFSQQKYIILWLLVDQFMITMISRRCLVAP\n+TNQQYNASRA*D**NVNFIL*YRTANLFKYSPAPREPINSTQFARSGFHQLGST\n+>blumeria_Grouper_28830_3_2\n+VASS*SVYDHNDFT*VSRGSD*STI*CE*SLRLIKREFXSIIQDRQLI*IFPRTT*ANQQ\n+RTICTVGFPSIGIHKLSPFSKLAVHSLFKLQVN*KFYYGITQTKSLLNF*VSFYPAPAYG\n+GYHTIDYELGVLHVHMPGIGLCRCTCCLICIHPVILRSRYYYYDKLLRPMARHSRAPSCV\n+FAGVLAVPSLRGRKYV*VPFLPGGKPVFYP*LLSYY*RRSVLLVRGVSFRVLRPRGAPLI\n+LWLHGYILDKNFPPPGAECRRVRSKGYTRWVRLFD*NSLPRVRERQLV*F*ENPGEL*PV\n+WLSERHLPVRLCHFLLA*LVWYGQPAVVG*LEIVVLLCLRSSQHCH**SALAFQSSG*YT\n+*AEAQC*RSLHWELSGIPASLLRTQLH*ARLGVLLEYDLYPTVSEEVLVL*ELSFPVTGL\n+EVS*ADRTCIFGQLREPRGSVRGAKTYALIPA*YCRLTHGLGPHEHLRPSILDPHIPRFL\n+VGLRRQMDQVHRLLVATGSF**DAIWNCTSQNYLGPKWQLYLGEVPLLRLEIR*CTAGRC\n+G*VRVGALFPPSQVLFLHHFGF*SFRCRQ*LHSLPLLP*REIEHHRDGRENQREHLLFRD\n+GR*S*Y*NAL*HLKNGPF*RAARLWSE*FVEIHGQLSPLRRRLLPSDANVVVTVKSQVIP\n+SLVLNNAAGYRLVFLRSKLWDAALNDRNCWSIACTVGV*LHPRTFGKRQFPI*REVSNQI\n+FLQLVML*ALV*YFSMIESIGPSE*QGNF*VQRIGP*VALLVGRNF**DNVGNQLHLQIL\n+LFPR*LLA*ACL*SLKLFRGLLQCHV*KYGNQGRFPLKQIACTLKH*GGDHRKRI*KK*V\n+STVSAILPLSLPRQLHRPGALRQMDGMALELLP*NTGK*LVRCIGRKA*LAIHRHRKESQ\n+MLFSLKSLCAYVSAKIRFPGRGMHRSYVLLRAEFHLEHLVGDGRVSS*SC*ALYSQHKYA\n+KRFG*SGLASFFGVNNRGAAHGPLHLRMN*AGHVLHKKVPGWLVVAPGLYRLGGEF*IGL\n+TITPFLCGCFCLGRVLSHLAGEIYRKVEGEFNFDIVGKNCNAE*RVGWVPFDHWNI*SSE\n+DFSVWESINYRIKNWLVN*VEKLQFAI**TS*PQFME*SLSKCHICWECAIWDNS*RHCE\n+FLSPE*FKVIRAKKLCS*NCILRNCWARATNA*HFRNNRCIIRSQIFRERL*NITC*RAV\n+VKKN*KLDIGKMSYSCKKHSVWLAISFP*RLSFWGWLKFALSYGHKVGIRAVLFINFFIS\n+HSI*NGFNFVIELLIFYLAFRAYLLLIQIFWLNKESRSDRL**TIVWCYRFYIFFIFH*L\n+VSFPEFIAETRRGVFSVWCIFPLFF*FHRRCFHQPAVFRPVACLSAIEAFVTLVVNWV*F\n+ICFCNVL*RKDVLPIRPWISYGCVKCWKLFAF*R*DRNWCIGLGMFEYIILKCPPCNTAC\n+DLNHSIRKLRAGEFGEIIGNDGPGTWK*ESNMLPQYISSLRPYHDIRR*AKESFPESSRK\n+L*LSLVVSCSYSCKCCQRRPEYIIVEQYFKDTVYSSCPIEVLTDDCSSRRDEIFIHFHCN\n+SDSPSRIWKHMLNHQ*LHVV*YIHYVLISKVQFPLWWEYRACFSICLF*RAVRIFSFGTR\n+KEIWEVPLYFSLLFSNIQWFVN*LAV*INSPIFVLLIIA*VGRCFIELRRGKGSFLRLKH\n+DWPGCGSRGAAC*KVK*LLPVWFFIFRWPCFRKLKGD*IFRRSFCFHVFISTY*FGN*VV\n+HCRHLFD*KRNL*LGLFKPLIDSLGEIFCHCLCSFISFGKIFVGFGLPGNVILHQAFQLS\n+IIQADHDD*IGGEMRKKFGDVLNWDDLEL*GSLNKNI*FCGF*LISL*SQ*FHVGVSWLR\n+QINNIMRVELETNKT*ISFYNT'..b'YATHQLLSSVLWKKF*SHSVHLSQGSWTM*LSGQTQWQNCTHG*YLFFLDS\n+LPMVSSSMLQSAGYLFQRESSLVSIFLNMALQQTPKKFQRSETGLCQQLPRK*EDL*MQL\n+VTYVVSLKISPN*QGHSRTNPLDLKIALSF*RTNRFNHGKVSN*RLQHHQL*KNLIGDFP\n+SYWKLTLPKSSWVQSYSNRTCDAPTIPIVQCCIP*LTSQEN*TIPSSVIQHKRGNYLGFY\n+CHYNIGVTG*KAAT*R**LTMNLYELFRPKPSSPSEWSVF*ML*SILISGLSTVAEKQMF\n+SLIFSPVPVMFYFSLGKKGKGM*LLTTTKTLESKVVQKENLRWRK*STHTNSTASTCSAS\n+PNF*P*ERHFP*I*LPLGSEIILRCTIPNCISSKRTRRNK*AMHLVHLAPQPY*KSWNMR\n+I*YRRP*MFMRAKAMRQSAILCGN*GVGFGTPN*SSRLTKLSENASPVSS*NLQTRYWEA\n+*LL*YQHLL*HGGV*IILK*DPKSCSMQLSTQQAGWNPA*FPMQTSLTLCLCSRILSRTL\n+ESQSRLSVTMLAASEA*KHNDFKLANNCRLSIPHQLGQEEMAKSNRQMAF*KPYWSEFSW\n+ILLKLH*LALSHSR**ISIEESHPAGIALTSYSSALSPRRRKILIQYIPVKPQNQRSASG\n+PKNSEGNTPHQ*HAPTLVV*KQLGIKYGLTSRKKRHLYVLTPPETGYCEYASEDTRWSPT\n+MTGHGP*QLVIIIIPTP*YHRVDTN*ATSTTAQTYSRHMYVKDTQFVVYGMVAAVCWSRI\n+ERHSKIQQGFSLSDTIIKFLINL*FK*AVDS*F*EGG*FVDPN*WKPDRANCALLIGSRG\n+AGEYLNKLAILYYRXKFTFY*SQALLALYC*LVGATRHLREIIVIIN*STRSH\n+>blumeria_Grouper_14770_3_1\n+G*TDTVSLV*SCGRKN*LETSVLRCMVLSKTELIRVRLRVRISYQLRFLTSIYQSPWIIS\n+YDPCRCKSIYVQIYEPEI*S*IHVCR**IRETFNLHLIHNHLMEC*CEINANVR*TFREE\n+CADDNDIGR*I**PN*PDLQMSYPWHSRIYNISTPLLTLHRG*IHK*KTVSIFIVISHLM\n+LVW*TRQWLY*YQKLKCINRCALVSVSLLYIYLRDF*LKEHVIV*ASRLQFFPL*NDKRC\n+RYV*KHHLVLDCEQVQWQPYCQSRLLLGCCLQGIGEVRAKSRYIVWMRALLPCTQLRRLN\n+>blumeria_Grouper_14770_3_2\n+VRPIQ*VSYEVVEGRIDWKQVY*DVWF*VKLS**E*DCVCAYLIN*GF*PVFINLHGS*V\n+MIPVDVNLYTYRYMSQKYKAKYMCVGNRSEKPLTCI*YIII*WNASVK*MPMYVKLSVRN\n+VLMTMI*EGEFDNLISRTYK*VILGIPGYTIFQHPSSLCIEDRYISEKLLVYLLSFLI*C\n+WYGRHASGCTSTKN*SA*IGVPWSR*AFCTYICEIFS*RNM**CKRAVFNSFPCKMISDV\n+DMFRSII*FWIVSKFNGSRIVSRDCCWDAVSKESVKSAQNPDILFGCERYCHVLSFAG*M\n+>blumeria_Grouper_14770_3_3\n+LDRYSKSRMKLWKEELTGNKCIEMYGSE*N*ADKSEIACAHILSTEVFDQYLSISMDHKL\n+*SL*M*IYIRTDI*ARNIKLNTCVSVIDPRNL*PAFDT*SSDGMLV*NKCQCTLNFP*GM\n+C**Q*YRKVNLIT*LAGLTNELSLAFQDIQYFNTPPHFASRIDT*VKNC*YIYCHFSSNA\n+GMVDTPVVVLVPKTEVHKSVCLGLGKPFVHISARFLAEGTCNSVSEPSSILSLVK**AMS\n+ICLEASFSFGL*ASSMAAVLSVEIAAGMLSPRNR*SPRKIQIYCLDASAIAMYSASQVE\n+>blumeria_Grouper_14770_3_4\n+HSTCEAEYMAIALASKQYIWILRGLHRFLGDSIPAAISTDNTAAIELAHNPKLNDASKHI\n+DIAYHFTRERIEDGSLTLLHVPSAKNLADICTKGLPRPRHTDLCTSVFGTSTTTGVSTIP\n+ALDEK*Q*IY*QFFTYVSILDAK*GGVLKYCISWNAKDNSFVSPAN*VIKFTFLYHCHQH\n+IPHGKFNVHWHLFHTSIPSDDYVSNAG*RFLGSITDTHVFSFIFLAHISVRI*IYIYRDH\n+NL*SMEIDKYWSKTSVDKICAHAISLLSAQFYSEPYISIHLFPVNSSFHNFIRDLLYRSN\n+>blumeria_Grouper_14770_3_5\n+IQPAKLSTWQ*RSHPNNISGFCADFTDSLETASQQQSRLTIRLPLNLLTIQN*MMLLNIS\n+TSLIILQGKELKTARLHYYMFLQLKISQIYVQKAYRDQGTPIYALQFLVLVQPLACLPYQ\n+H*MRNDNKYTNSFSLMYLSSMQSEEGC*NIVYPGMPRITHL*VRLIRLSNSPSYIIVIST\n+FLTESLTYIGIYFTLAFHQMIMYQMQVKGFSDLLPTHMYLALYFWLIYLYVYRFTSTGII\n+TYDPWRLINTGQKPQLIRYAHTQSHSYQLSFTQNHTSQYTCFQSILPSTTSYETYCIGLT\n+>blumeria_Grouper_14770_3_6\n+FNLRS*VHGNSARIQTIYLDFARTSPIPWRQHPSSNLD*QYGCH*TCSQSKTK*CF*TYR\n+HRLSFYKGKN*RRLAYTITCSFS*KSRRYMYKRLTETKAHRFMHFSFWY*YNHWRVYHTS\n+IR*EMTINILTVFHLCIYPRCKVRRGVEILYILECQG*LICKSG*LGYQIHLPISLSSAH\n+SSRKV*RTLAFISH*HSIR*LCIKCRLKVSRIYYRHTCI*LYISGSYICTYIDLHLQGS*\n+LMIHGD**ILVKNLS**DMRTRNLTLISSVLLRTIHLNTLVSSQFFLPQLHTRLTVSV*\n+>blumeria_Grouper_1717_12_1\n+ISSILP*RASKS*SNSFRSLSSVTGSNSLQNPMMKRSACAERVWNILPCSARNLVATI*G\n+DGVCWCGELR*LAS*IGSTNCCVGCCDGGCGPLRTISTS*RVEH*VGRGGQEPASAEDWG\n+GMSE*GLWQCEIVDGIRRA*R\n+>blumeria_Grouper_1717_12_2\n+FPVSCPEGPVSPDRIVFEVSRQ*LVPTACRTQ**SALLVRSASGISCPVRRGIWSRRFEA\n+TEFVGAVSFDDSPRRLALQIVVWVVVMAVVAR*GPFRRLDASNIELGGGAKSQPLRRTGA\n+A*VSEVCGSVRS*MVYGALEG\n+>blumeria_Grouper_1717_12_3\n+FQYLALKGQ*VLIE*FSKSLVSDWFQQLAEPNDEALCLCGARLEYPALFGEEFGRDDLRR\n+RSLLVR*ASMTRLVDWLYKLLCGLL*WRLWPVKDHFDVLTRRTLSWEGGPRASLCGGLGR\n+HE*VRFVAV*DRRWYTARLK\n+>blumeria_Grouper_1717_12_4\n+PFKRAVYHLRSHTATNLTHSCRPSPPQRLALGPPSQLNVRRVKTSKWSLTGHNRHHNNPH\n+NNL*SQSTRRVIEAHRTNKLRRLKSSRPNSSPNRAGYSRRAPHKQSASSLGSASCWNQSL\n+TRDFENYSIRTYWPFRARYWK\n+>blumeria_Grouper_1717_12_5\n+PSSAPYTIYDLTLPQTSLTHAAPVLRRGWLLAPPPNSMFDASRRRNGP*RATTAITTTHT\n+TICRANLRGESSKLTAPTNSVASNRRDQIPRRTGQDIPDALRTSRALHHWVLQAVGTSH*\n+RETSKTIRSGLTGPSGQDTGN\n+>blumeria_Grouper_1717_12_6\n+LQARRIPSTISHCHKPHSLMPPQSSAEAGSWPPLPTQCSTRQDVEMVLNGPQPPSQQPTQ\n+QFVEPIYEASHRSSPHQQTPSPQIVATKFLAEQGRIFQTRSAQAERFIIGFCKLLEPVTD\n+ERLRKLFDQDLLALQGKILE\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/hmmer/tests/datas/test_input_nt.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/tests/datas/test_input_nt.fa Tue Apr 30 14:33:21 2013 -0400

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,79 @@
+from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
+import re
+import sys
+
+
+class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object):
+    '''
+    Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them.
+    By default the profiles list is a list of profiles names
+    '''
+
+    def __init__(self):
+        '''
+        Constructor
+        '''
+        self.profilesDBUtils = ProfilesDatabankUtils()
+        self.profilesToAddNotInDB = []
+        self._pfamAccNumber = False
+
+    def setProfilesDBFile ( self, profilesDBFile ):
+        self._profilesDBFile = profilesDBFile
+
+    def setProfilesToAdd ( self, profilesFile ):
+        self._profilesToAdd = profilesFile
+
+    def setPfamAccNumberKeys ( self ):
+        self._pfamAccNumber = True
+
+    def _IsProfilInDB(self, pfamDBList, profil):
+        IsProfilInDB = False
+        for profilInstance in pfamDBList.getList():
+            if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)):
+                IsProfilInDB = True
+                break
+        return IsProfilInDB
+
+    def _generateProfilesList(self):
+        f = open(self._profilesToAdd)
+        profilesToAddList = f.readlines()
+        return profilesToAddList
+
+    def generateNotExistingProfilesList ( self ):
+        '''
+        generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number
+        '''
+        self.profilesToAddNotInDB = []
+        profilesToAddList = self._generateProfilesList()
+        pfamDBList = self.profilesDBUtils.read( self._profilesDBFile )
+        if pfamDBList.getList( ) != []:
+            for profil in profilesToAddList:
+                if profil != "\n":
+                    sys.stdout.flush()
+                    profil = profil.rstrip( )
+                    IsProfilInDB = self._IsProfilInDB(pfamDBList, profil)
+                    if ( IsProfilInDB == False):
+                        self.profilesToAddNotInDB.append( profil )
+        return ( self.profilesToAddNotInDB )
+
+    def generateGetzCmdProfilesList ( self, profilesList ):
+        '''
+        generate the getz command to retrieve profiles list of name or accession number
+        '''
+        getzCmd = "getz -e \'"
+        if (self._pfamAccNumber == False):
+            for profileName in profilesList:
+                getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | "
+        else:
+            for profileAccNumber in profilesList:
+                getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | "
+        getzCmd = getzCmd[ 0:len( getzCmd )-3 ]
+        getzCmd += "\'"
+        return getzCmd
+
+    def CmdToCompleteProfileDB (self):
+        '''
+        generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB
+        '''
+        profilesList2Add = self.generateNotExistingProfilesList()
+        return self.generateGetzCmdProfilesList ( profilesList2Add )

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/InsertProfilesMapFileInDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/InsertProfilesMapFileInDB.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,35 @@
+from commons.core.coord.Map import Map
+from pyRepet.sql.TableAdaptator import TableMapAdaptator
+
+class InsertProfilesMapFileInDB(object):
+    '''
+    Insert a map File in a database
+    You have to specified the input file name, the table name and the repetDB object when you create the object
+    '''
+
+    def __init__(self, inputFileName, tableName, db):
+        '''
+        Constructor
+        '''
+        self.inputFileName = inputFileName
+        self.tableName = tableName
+        self.db = db
+
+    def createAndLoadTable(self):
+        '''
+        Create the table and load the map data from input table
+        '''
+        self.db.createTable(self.tableName, "map", overwrite = True)
+        f = open (self.inputFileName, "r")
+        iMap = Map()
+        lMap = []
+        while iMap.read( f ):
+            lMap.append(iMap)
+            iMap = Map()
+        f.close()
+        self._tMapA = TableMapAdaptator( self.db, self.tableName )
+        self._tMapA.insMapList( lMap )
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/Profiles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/Profiles.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,91 @@
+import re
+
+#------------------------------------------------------------------------------
+
+class Profiles:
+    '''
+    Hmm profile Class
+    Attributes are name, desc, length, accNumber, GA_cut_off and retrieve
+    '''
+
+    #--------------------------------------------------------------------------
+
+    def __init__( self, name="", desc="", length=0, accNumber = "", GA_cut_off = 0, retrieve = False ):
+        self.name = name
+        self.desc = desc
+        self.length = length
+        self.accNumber = accNumber
+        self.GA_cut_off = GA_cut_off
+        self.retrieve = retrieve
+        self.tab_profile = []
+
+    #--------------------------------------------------------------------------
+
+    def _noProfileInFile(self):
+        self.name = None
+        self.desc = None
+        self.length = None
+        self.accNumber = None
+        self.GA_cut_off = None
+
+    #--------------------------------------------------------------------------
+
+    def _initialisation(self):
+        self.name = ""
+        self.desc = ""
+        self.length = 0
+        self.accNumber = ""
+        GA_cut_off = 0
+        self.tab_profile = []
+
+    #--------------------------------------------------------------------------
+
+    def read( self, hmmFile ):
+        '''
+        Read a profile and characterize the object profile
+        attributes name, length, desc, accNumber and GA_cut_off are specified
+        '''
+        line = hmmFile.readline()
+        if line == "":
+            self._noProfileInFile()
+            return
+        self._initialisation()
+        if self.retrieve:
+            self.tab_profile.append(line)
+        while not re.match("\/\/.*", line):
+            line = hmmFile.readline()
+            if self.retrieve:
+                self.tab_profile.append(line)
+            name = re.match("NAME\s*(.*)", line)
+            if name:
+                self.name = name.group(1)
+            desc = re.match("DESC\s*(.*)", line)
+            if desc:
+                self.desc = desc.group(1)
+            length = re.match("LENG\s*(.*)", line)
+            if length:
+                self.length = int(length.group(1))
+            accNumber = re.match("ACC\s*(.*)", line)
+            if accNumber:
+                self.accNumber = accNumber.group(1)
+            GA_cut_off = re.match("GA\s*\d*\.\d*\s*(.*);", line)
+            if GA_cut_off:
+                self.GA_cut_off = float(GA_cut_off.group(1))
+            else :
+                if (self.GA_cut_off == 0):
+                    self.GA_cut_off = "NA"
+        if self.retrieve:
+            return self.tab_profile
+        else:
+            return 1
+
+    #--------------------------------------------------------------------------
+
+    def readAndRetrieve( self, hmmFile ):
+        '''
+        Read a profile and characterize the object profile
+        attributes name, length, desc, accNumber and GA_cut_off are specified
+        And a list of each line of profile is returned
+        '''
+        self.retrieve = True
+        return self.read(hmmFile)

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/ProfilesDB2Map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/ProfilesDB2Map.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,40 @@
+from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
+
+
+class ProfilesDB2Map ( object ):
+    """
+    write a file in map format from a ProfilDatabank object
+    You have to set an input File and an Output File names
+    """
+
+    def __init__(self):
+        self.profilesDBUtils = ProfilesDatabankUtils()
+        self._inputFile = ""
+        self._outputFile =  ""
+
+    def setInputFile(self, input):
+        self._inputFile = input
+
+    def setOutputFile(self, output):
+        self._outputFile = output
+
+    def _readProfilesDB( self ):
+        pfamDB = self.profilesDBUtils.read( self._inputFile )
+        return pfamDB
+
+    def _writeMapFile( self, pfamDBList ):
+        """
+        write a file in map format from a ProfilDatabank object
+        """
+        if pfamDBList.getList() != []:
+            f = open( self._outputFile , "w")
+            for ProfilInstance in pfamDBList.getList():
+                f.write(ProfilInstance.name + "\t" + ProfilInstance.desc + "\t1\t" + str(ProfilInstance.length) + "\n")
+            f.close()
+
+    def run( self ):
+        """
+        read a profiles DB file, parse it and, write the corresponding .map file
+        """
+        pfamDBList = self._readProfilesDB()
+        self._writeMapFile(pfamDBList)

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/ProfilesDB4Repet.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/ProfilesDB4Repet.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,226 @@\n+#!/usr/bin/env python\n+\n+import re\n+import getopt\n+import sys\n+from commons.pyRepetUnit.profilesDB.Profiles import Profiles\n+from commons.core.LoggerFactory import LoggerFactory\n+\n+LOG_DEPTH = "commons.pyRepetUnit.profiles"\n+\n+## Format a profiles DB for pipelines in REPET\n+# \n+class ProfilesDB4Repet( object ):\n+ \n+ def __init__(self):\n+ self.profile = Profiles()\n+ self._inputFile = "" \n+ self._outputFile = ""\n+ self._verbosity = 2\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ \n+ def _help( self ):\n+ print\n+ print "usage: %s.py [ options ]" % ( type(self).__name__ )\n+ print "options:"\n+ print " -h: this help"\n+ print " -i: name of the profiles DB to format for Repet"\n+ print " -o: name of the output profiles DB for Repet"\n+ print\n+ \n+ \n+ def _setAttributesFromCmdLine( self ):\n+ try:\n+ opts, args = getopt.getopt(sys.argv[1:],"hi:o:")\n+ except getopt.GetoptError, err:\n+ print str(err); self._help(); sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ self._help(); sys.exit(0)\n+ elif o == "-i":\n+ self.setInputFile( a )\n+ elif o == "-o":\n+ self.setOutputFile( a )\n+ \n+ #TDOD: add nb of each domain in log file, verbose...\n+ def _searchCurrentDomain(self, profile):\n+ currentDomain = ""\n+ #TODO: pattern GAGA and GAGE should be excluded !\n+ #TODO: add new tag like "ORF1_LTR" for ATHILA as key word in Pfam\n+ #TODO: add new tags from GypsyDB (MOV etc...)\n+ if (re.search("[gG][aA][Gg]", profile[1]) or re.search("[gG][aA][Gg]", profile[3])):\n+ currentDomain = "GAG"\n+ elif (re.search("Zinc knuckle", profile[1]) or re.search("Zinc knuckle", profile[3])):\n+ currentDomain = "GAG"\n+ elif (re.search("PF02813", profile[2]) or re.search("PF01021", profile[2])):\n+ currentDomain = "GAG"\n+ elif (re.search("GAG_", profile[1])):\n+ currentDomain = "GAG"\n+ elif (re.search("GAGCOAT_", profile[1])):\n+ currentDomain = "GAG"\n+ \n+ elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]aspartic", profile[3])) and (re.search("[pP]roteinase", profile[1]) or re.search("[pP]roteinase", profile[3]))):\n+ currentDomain = "AP"\n+ elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]spartic", profile[3])) and (re.search("[pP]rotease", profile[1]) or re.search("[pP]rotease", profile[3]))):\n+ currentDomain = "AP"\n+ elif ((re.search("[rR]etrotransposon", profile[1]) or re.search("[rR]etrotransposon", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("[pP]eptidase", profile[3]))):\n+ currentDomain = "AP"\n+ elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]spartic", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("[pP]eptidase", profile[3]))):\n+ currentDomain = "AP"\n+ elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]spartic", profile[3])) and (re.search("[eE]ndopeptidase", profile[1]) or re.search("[eE]ndopeptidase", profile[3]))):\n+ currentDomain = "AP"\n+ elif ((re.search("[aA]spartyl", profile[1]) or re.search("[aA]spartyl", profile[3])) and (re.search("[pP]roteinase", profile[1]) or re.search("[pP]roteinase", profile[3]))):\n+ currentDomain = "AP"\n+ elif ((re.search("[aA]spartyl", profile[1]) or re.search("[aA]spartyl", profile[3])) and (re.search("[pP]rotease", profile[1]) or re.search("[pP]rotease", profile[3]))):\n+ currentDomain = "AP"\n+ elif ((re.search("[aA]spartyl", profile[1]) or re.search("[aA]spartyl", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("'..b'P]rotein", profile[1]) or re.search("[pP]rotein", profile[3])) and (re.search("A ", profile[1]) or re.search("A ", profile[3]))):\n+ currentDomain = "RPA"\n+ elif (re.search("[rR]epA ", profile[1]) or re.search("[rR]epA ", profile[3])):\n+ currentDomain = "RPA"\n+ elif (re.search("RPA", profile[1]) or re.search("RPA", profile[3])):\n+ currentDomain = "RPA"\n+ \n+ elif (re.search("[cC]-integrase", profile[1]) or re.search("[cC]-integrase", profile[3])):\n+ currentDomain = "C-INT"\n+ \n+ elif ((re.search("[pP]ackaging", profile[1]) or re.search("[pP]ackaging", profile[3])) and (re.search("ATPase", profile[1]) or re.search("ATPase", profile[3]))):\n+ currentDomain = "ATP"\n+ \n+ elif ((re.search("[cC]ysteine", profile[1]) or re.search("[cC]ysteine", profile[3])) and (re.search("[pP]rotease", profile[1]) or re.search("[pP]rotease", profile[3]))):\n+ currentDomain = "CYP"\n+ elif ((re.search("[cC]ysteine", profile[1]) or re.search("[cC]ysteine", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("[pP]eptidase", profile[3]))):\n+ currentDomain = "CYP"\n+ elif (re.search("[pP]eptidase_C", profile[1]) or re.search("[pP]eptidase_C", profile[3])):\n+ currentDomain = "CYP"\n+ elif (re.search("PF00559", profile[2])):\n+ currentDomain = "CYP"\n+ \n+ elif (re.search("[pP]ol\\S*_*B", profile[1]) or re.search("[pP]ol\\S*_*B", profile[3])):\n+ currentDomain = "POLB"\n+ elif ((re.search("[pP]olymerase", profile[1]) or re.search("[pP]olymerase", profile[3])) and (re.search("B ", profile[1]) or re.search("B ", profile[3]))):\n+ currentDomain = "POLB"\n+ \n+ elif (re.search("[hH]elicase", profile[1]) or re.search("[hH]elicase", profile[3])):\n+ currentDomain = "HEL"\n+ \n+ else :\n+ currentDomain = "OTHER"\n+ return currentDomain\n+ \n+ \n+ ## Replace the old profile name by accession number, name, domain and gather cut off\n+ # \n+ # @param fout file handle\n+ # @param profile Profiles instance\n+ # @param currentDomain string\n+ #\n+ def _writeModifiedProfile(self, fout, profile, currentDomain):\n+ for i in xrange(0, len(profile), 1):\n+ if i != 1:\n+ fout.write(profile[i])\n+ else:\n+ fout.write("NAME " + self.profile.accNumber + "_"\\\n+ + self.profile.name + "_"\\\n+ + currentDomain + "_"\\\n+ + str(self.profile.GA_cut_off) + "\\n")\n+ \n+ \n+ ## Set input file name\n+ # \n+ # @param inputFileName string\n+ #\n+ def setInputFile(self, inputFileName):\n+ self._inputFile = inputFileName\n+ \n+ \n+ ## Set output file name\n+ #\n+ # @param outputFileName string\n+ #\n+ def setOutputFile(self, outputFileName):\n+ self._outputFile = outputFileName \n+ \n+ \n+ ## Read a profiles DB file, parse it and, write a new profiles DB with TE domain information and GA score cut_off placed side by side of the name\n+ # \n+ def run( self ):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ fileIn = open( self._inputFile )\n+ fout = open( self._outputFile, "w" )\n+ profile = self.profile.readAndRetrieve( fileIn )\n+ while profile != None:\n+ currentDomain = self._searchCurrentDomain(profile)\n+ if currentDomain == "OTHER":\n+ self._log.warning(self.profile.accNumber + " " + self.profile.name + " has no associated domain")\n+ self._writeModifiedProfile(fout, profile, currentDomain)\n+ profile = self.profile.read( fileIn )\n+ \n+ \n+if __name__ == "__main__":\n+ i = ProfilesDB4Repet()\n+ i._setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/ProfilesDatabank.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/ProfilesDatabank.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,22 @@
+class ProfilesDatabank:
+
+    """
+    List of profiles objects.
+    """
+
+    list;
+
+    def __init__( self ):
+        self._profilesDatabank = []
+
+    def append( self, list ):
+       self._profilesDatabank.append(list)
+
+    def len (self):
+        return len(self._profilesDatabank)
+
+    def get(self, index):
+        return self._profilesDatabank[index]
+
+    def getList(self):
+        return self._profilesDatabank
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/ProfilesDatabankUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/ProfilesDatabankUtils.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,38 @@
+"""
+Utility to handle a databank of HMM profiles.
+"""
+
+import sys
+from commons.pyRepetUnit.profilesDB.Profiles import Profiles
+from commons.pyRepetUnit.profilesDB.ProfilesDatabank import ProfilesDatabank
+from commons.core.utils.FileUtils import FileUtils
+
+class ProfilesDatabankUtils:
+    """
+    Utility to handle a databank of HMM profiles.
+    """
+
+    def read( inFileName, verbose=0 ):
+        """
+        Read a file in Pfam format and return a L[ProfilesDatabank<commons.core.ProfilesDatabank>} instance.
+        @param inFileName: name of the input file
+        @type inFileName: string
+        @param verbose: verbosity level
+        @type verbose: integer
+        """
+        if verbose > 0: print "reading file '%s'..." % ( inFileName ); sys.stdout.flush()
+
+
+        if FileUtils.isEmpty(inFileName):
+            return (None)
+        profilesInstance = Profiles()
+        profilesDBInstance = ProfilesDatabank()
+        f = open( inFileName , "r")
+        while profilesInstance.read( f ):
+            profilesDBInstance.append( profilesInstance )
+            profilesInstance = Profiles()
+        f.close()
+        if verbose > 0: print "file '%s' is loaded" % ( inFileName ); sys.stdout.flush()
+        return (profilesDBInstance)
+
+    read = staticmethod( read )

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/InsertProfilesMapFileInDBTestRessources.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/InsertProfilesMapFileInDBTestRessources.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,14 @@
+'''
+Created on 24 juin 2009
+
+@author: choede
+'''
+
+class InsertProfilesMapFileInDBTestRessources(object):
+    '''
+    Put here ressources not present in config file
+    '''
+    REPET_DB_USER = "ochoede"
+    REPET_DB_HOST = "pisano"
+    REPET_DB_PW = "ochoede"
+    REPET_DB_NAME = "repet_ochoede"
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/TestCompleteProfilesDBFromProfilesNameListOrAccNumber.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/TestCompleteProfilesDBFromProfilesNameListOrAccNumber.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,67 @@
+'''
+Created on 18 mai 2009
+
+@author: choede
+'''
+import unittest
+import commons.pyRepetUnit.profilesDB.CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber
+
+
+class TestCompleteProfilesDBFromProfilesNameListOrAccNumber(unittest.TestCase):
+
+
+    def setUp(self):
+        self._fileProfilesList = "./datas/ListPfamProfilsInRepbase.txt"
+        self._filePfamList = "./datas/ListpfamAccNumber.txt"
+        self._profilesDB = "./datas/profilesDBTest.hmm"
+        self.CompleteProfilesDB = commons.pyRepetUnit.profilesDB.CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber()
+        self.CompleteProfilesDB.setProfilesDBFile ( self._profilesDB )
+        self._expectedList = ["zf-CCHC", "InvE", "Chromo", "TLV_coat"]
+        self._expectedList2 = ["PF00075", "PF00424", "PF00469"]
+        self._expectedGetzCmd = "getz -e \'[pfamhmm-Id:\"zf-CCHC*\"] | [pfamhmm-Id:\"InvE*\"] | [pfamhmm-Id:\"Chromo*\"] | [pfamhmm-Id:\"TLV_coat*\"]\'"
+        self._expectedGetzCmd2 = "getz -e \'[pfamhmm-AccNumber:\"PF00075\"] | [pfamhmm-AccNumber:\"PF00424\"] | [pfamhmm-AccNumber:\"PF00469\"]\'"
+
+
+    def tearDown(self):
+        pass
+
+
+    def testListNotExistingProfiles(self):
+        #with name
+        self.CompleteProfilesDB.setProfilesToAdd ( self._fileProfilesList )
+        profilesNotInDBList = self.CompleteProfilesDB.generateNotExistingProfilesList( )
+        i = 0
+        if profilesNotInDBList != []:
+            for profil in profilesNotInDBList:
+                self.assertEquals(profil, self._expectedList[i])
+                i = i + 1
+        #with pfam accession number
+        self.CompleteProfilesDB.setProfilesToAdd ( self._filePfamList )
+        self.CompleteProfilesDB.setPfamAccNumberKeys ()
+        profilesNotInDBList = self.CompleteProfilesDB.generateNotExistingProfilesList( )
+        i = 0
+        if profilesNotInDBList != []:
+            for profil in profilesNotInDBList:
+                self.assertEquals(profil, self._expectedList2[i])
+                i = i + 1
+
+    def testCreateGetzCommand(self):
+        getzCmd = self.CompleteProfilesDB.generateGetzCmdProfilesList( self._expectedList )
+        # pour le moment je cherche dans les fs ou les ls si je ne veux que les fs specifier type = fs ce sera surement le cas a terme
+        self.assertEqual (getzCmd, self._expectedGetzCmd)
+
+    def testCmdToCompleteProfileDBFromList(self):
+        #names list
+        self.CompleteProfilesDB.setProfilesToAdd ( self._fileProfilesList )
+        getzCmd = self.CompleteProfilesDB.CmdToCompleteProfileDB()
+        self.assertEqual (getzCmd, self._expectedGetzCmd)
+        #pfam accession Number list
+        self.CompleteProfilesDB.setPfamAccNumberKeys ()
+        self.CompleteProfilesDB.setProfilesToAdd ( self._filePfamList )
+        getzCmd = self.CompleteProfilesDB.CmdToCompleteProfileDB()
+        self.assertEqual (getzCmd, self._expectedGetzCmd2)
+
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/TestInsertProfilesMapFileInDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/TestInsertProfilesMapFileInDB.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,65 @@
+import unittest
+import time
+import os
+from commons.core.sql.TableMapAdaptator import TableMapAdaptator
+from commons.core.coord.Map import Map
+#from commons.pyRepetUnit.profilesDB.tests.InsertProfilesMapFileInDBTestRessources import InsertProfilesMapFileInDBTestRessources
+from commons.pyRepetUnit.profilesDB.InsertProfilesMapFileInDB import InsertProfilesMapFileInDB
+from commons.core.sql.DbMySql import DbMySql
+
+
+class TestInsertProfilesMapFileInDB(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = "./datas/profiles.map"
+        self._createProfilesMapFile()
+#        self._host = InsertProfilesMapFileInDBTestRessources.REPET_DB_HOST
+#        self._user = InsertProfilesMapFileInDBTestRessources.REPET_DB_USER
+#        self._passwd = InsertProfilesMapFileInDBTestRessources.REPET_DB_PW
+#        self._dbname = InsertProfilesMapFileInDBTestRessources.REPET_DB_NAME
+#        self._db = DbMySql( self._user, self._host, self._passwd, self._dbname )
+        self._db = DbMySql( os.environ["REPET_USER"],
+                            os.environ["REPET_HOST"],
+                            os.environ["REPET_PW"],
+                            os.environ["REPET_DB"] )
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+        self._table = "dummyMapTable_%s" % ( self._uniqId )
+
+    def tearDown(self):
+        os.remove(self._inputFileName)
+        self._deleteDBtables()
+        self._db.close()
+
+    def testInsertProfilesMapFileInDB(self):
+        resultFromFileMapUtils = self._readMapResultsFromFileAndFillList()
+        InsertProfilesMapFileInDBinstance = InsertProfilesMapFileInDB(self._inputFileName, self._table,  self._db)
+        InsertProfilesMapFileInDBinstance.createAndLoadTable()
+        resultFromTableMapUtils = self._readMapResultsFromTableAndFillList()
+        self.assertEquals(resultFromFileMapUtils, resultFromTableMapUtils)
+
+    def _createProfilesMapFile(self):
+        f = open(self._inputFileName, "w")
+        f.write("rrm\t\t1\t77\n")
+        f.write("fn3\tFibronectin type III domain\t1\t84\n")
+        f.write("pkinase\tProtein kinase domain\t1\t294\n")
+        f.close()
+
+    def _readMapResultsFromTableAndFillList(self):
+        tableMapAdaptatorInstance = TableMapAdaptator (self._db, self._table)
+        return tableMapAdaptatorInstance.getListOfAllMaps()
+
+    def _readMapResultsFromFileAndFillList(self):
+       mapInstance = Map()
+       lMap = []
+       f = open( self._inputFileName , "r")
+       while mapInstance.read( f ):
+            lMap.append (mapInstance)
+            mapInstance = Map()
+       f.close()
+       return lMap
+
+    def _deleteDBtables(self):
+        self._db.dropTable(self._table)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/TestProfiles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/TestProfiles.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,151 @@
+import os
+import unittest
+from commons.pyRepetUnit.profilesDB.Profiles import Profiles
+from commons.core.utils.FileUtils import FileUtils
+
+
+class TestProfiles(unittest.TestCase):
+
+    def createInputDBFile(self):
+        f = open(self._inputDBFile, "w")
+        f.write("HMMER2.0  [2.3.2]\n")
+        f.write("NAME  fn3\n")
+        f.write("ACC   PF00041\n")
+        f.write("DESC  Fibronectin type III domain\n")
+        f.write("LENG  84\n")
+        f.write("ALPH  Amino\n")
+        f.write("RF    no\n")
+        f.write("CS    yes\n")
+        f.write("MAP   yes\n")
+        f.write("COM   hmmbuild -A myhmms fn3.sto\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("NSEQ  108\n")
+        f.write("DATE  Thu Jan  8 17:50:26 2009\n")
+        f.write("CKSUM 6130\n")
+        f.write("GA    7.8 2.0;\n")
+        f.write("TC    7.9 2.1;\n")
+        f.write("NC    7.0 1.1;\n")
+        f.write("XT      -8455     -4  -1000  -1000  -8455     -4  -8455     -4 \n")
+        f.write("NULT      -4  -8455\n")
+        f.write("NULE     595  -1558     85    338   -294    453  -1158    197    249    902  -1085   -142    -21   -313     45    531    201    384  -1998   -644 \n")
+        f.write("EVD   -45.973442   0.237545\n")
+        f.write("HMM        A      C      D      E      F      G      H      I      K      L      M      N      P      Q      R      S      T      V      W      Y    \n")
+        f.write("         m->m   m->i   m->d   i->m   i->i   d->m   d->d   b->m   m->e\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("          -13      *  -6756\n")
+        f.write("     1  -1698  -4236  -5399   -847  -4220  -2885  -1259   -930  -2438    406  -3428  -4768   3631  -1835  -4773  -1187  -1331   -120  -4666  -1510     1\n")
+        f.write("     -   -150   -501    232     46   -382    399    104   -628    211   -461   -722    274    395     44     95    358    118   -368   -296   -251 \n")
+        f.write("     C   -144  -3400 -12951    -19  -6286   -701  -1378    -13      * \n")
+        f.write("//\n")
+        f.write("HMMER2.0  [2.3.2]\n")
+        f.write("NAME  truc\n")
+        f.write("ACC   PF00042.012\n")
+        f.write("LENG  123\n")
+        f.write("ALPH  Amino\n")
+        f.write("RF    no\n")
+        f.write("CS    yes\n")
+        f.write("MAP   yes\n")
+        f.write("COM   hmmbuild -A myhmms fn3.sto\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("NSEQ  108\n")
+        f.write("DATE  Thu Jan  8 17:50:26 2009\n")
+        f.write("CKSUM 6130\n")
+        f.write("GA    7.8 -1.0;\n")
+        f.write("TC    7.9 0.1;\n")
+        f.write("NC    7.8 0.1;\n")
+        f.write("XT      -8455     -4  -1000  -1000  -8455     -4  -8455     -4 \n")
+        f.write("NULT      -4  -8455\n")
+        f.write("NULE     595  -1558     85    338   -294    453  -1158    197    249    902  -1085   -142    -21   -313     45    531    201    384  -1998   -644 \n")
+        f.write("EVD   -45.973442   0.237545\n")
+        f.write("HMM        A      C      D      E      F      G      H      I      K      L      M      N      P      Q      R      S      T      V      W      Y    \n")
+        f.write("         m->m   m->i   m->d   i->m   i->i   d->m   d->d   b->m   m->e\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("          -13      *  -6756\n")
+        f.write("     1  -1698  -4236  -5399   -847  -4220  -2885  -1259   -930  -2438    406  -3428  -4768   3631  -1835  -4773  -1187  -1331   -120  -4666  -1510     1\n")
+        f.write("     -   -150   -501    232     46   -382    399    104   -628    211   -461   -722    274    395     44     95    358    118   -368   -296   -251 \n")
+        f.write("     C   -144  -3400 -12951    -19  -6286   -701  -1378    -13      * \n")
+        f.write("//\n")
+        f.close()
+
+
+    def setUp(self):
+        self.profiles = Profiles()
+        self._inputDBFile = "dummy.hmm"
+
+
+    def testRead(self):
+        # atomic profiles file
+        self.createInputDBFile()
+        file = open( self._inputDBFile )
+        profile = self.profiles
+        profile.read(file)
+        self.assertEqual("fn3",profile.name)
+        self.assertEqual("Fibronectin type III domain", profile.desc)
+        self.assertEqual(84, profile.length)
+        self.assertEqual("PF00041", profile.accNumber)
+        self.assertEqual(2.0, profile.GA_cut_off)
+        profile.read(file)
+        self.assertEqual("truc",profile.name)
+        self.assertEqual("", profile.desc)
+        self.assertEqual(123, profile.length)
+        self.assertEqual("PF00042.012", profile.accNumber)
+        self.assertEqual(-1.0, profile.GA_cut_off)
+        os.system("rm " + self._inputDBFile)
+        # empty file
+        f = open(self._inputDBFile, "w")
+        f.close()
+        file = open( self._inputDBFile )
+        profile = self.profiles
+        profile.read(file)
+        self.assertEqual(None, profile.name)
+        self.assertEqual(None, profile.desc)
+        self.assertEqual(None, profile.length)
+        self.assertEqual(None, profile.accNumber)
+        self.assertEqual(None, profile.GA_cut_off)
+        os.system("rm " + self._inputDBFile)
+
+
+    def testReadAndRetrieve(self):
+        # atomic profiles file
+        self.createInputDBFile()
+        file = open( self._inputDBFile )
+        profile = self.profiles
+        myProfile = profile.readAndRetrieve(file)
+        self.assertEqual("fn3",profile.name)
+        self.assertEqual("Fibronectin type III domain", profile.desc)
+        self.assertEqual(84, profile.length)
+        self.assertEqual("PF00041", profile.accNumber)
+        self.assertEqual(2.0, profile.GA_cut_off)
+        myProfile2 = profile.readAndRetrieve(file)
+        self.assertEqual("truc",profile.name)
+        self.assertEqual("", profile.desc)
+        self.assertEqual(123, profile.length)
+        self.assertEqual("PF00042.012", profile.accNumber)
+        self.assertEqual(-1.0, profile.GA_cut_off)
+        #I check if retrieve work like I want
+        f = open("retrieveResult", "w")
+        for item in myProfile:
+            f.write(item)
+        for item in myProfile2:
+            f.write(item)
+        f.close()
+        self.assertTrue(FileUtils.are2FilesIdentical("retrieveResult", self._inputDBFile))
+        os.system("rm " + self._inputDBFile)
+        os.system("rm retrieveResult")
+        # empty file
+        f = open(self._inputDBFile, "w")
+        f.close()
+        file = open( self._inputDBFile )
+        profile = self.profiles
+        myProfile3 = profile.readAndRetrieve(file)
+        self.assertEqual(None, profile.name)
+        self.assertEqual(None, profile.desc)
+        self.assertEqual(None, profile.length)
+        self.assertEqual(None, profile.accNumber)
+        self.assertEqual(None, profile.GA_cut_off)
+        self.assertEqual(None, myProfile3)
+        os.system("rm " + self._inputDBFile)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/TestProfilesDB2Map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/TestProfilesDB2Map.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,199 @@\n+import os\n+import unittest\n+from commons.pyRepetUnit.profilesDB.Profiles import Profiles\n+from commons.pyRepetUnit.profilesDB.ProfilesDatabank import ProfilesDatabank\n+import commons.pyRepetUnit.profilesDB.ProfilesDB2Map\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class TestProfilesDB2Map(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self.profilesDB2Map = commons.pyRepetUnit.profilesDB.ProfilesDB2Map.ProfilesDB2Map() \n+ \n+ \n+ def testWriteProfilesDB(self):\n+ # atomic hmmerpfam file\n+ profilesDB = ProfilesDatabank()\n+ profiles1 = Profiles()\n+ profiles1.name = "fn3"\n+ profiles1.desc = "Fibronectin type III domain"\n+ profiles1.length = 84\n+ profilesDB.append( profiles1 )\n+ profiles2 = Profiles()\n+ profiles2.name = "truc"\n+ profiles2.desc = ""\n+ profiles2.length = 123\n+ profilesDB.append( profiles2 )\n+ file = open( "expectedFile", "w" )\n+ file.write( "fn3\\tFibronectin type III domain\\t1\\t84\\n" )\n+ file.write( "truc\\t\\t1\\t123\\n" )\n+ file.close()\n+ self.profilesDB2Map.setOutputFile("testOutput.map")\n+ self.profilesDB2Map._writeMapFile(profilesDB)\n+ self.assertTrue(FileUtils.are2FilesIdentical("expectedFile", "testOutput.map"))\n+ os.remove("expectedFile")\n+ os.remove("testOutput.map")\n+ #empty profileDB object\n+ profilesDB = ProfilesDatabank()\n+ self.profilesDB2Map._writeMapFile(profilesDB)\n+ self.assertFalse(FileUtils.isRessourceExists("testOutput.map"))\n+ \n+ def testReadProfilesDB(self):\n+ # atomic hmmerpfam file\n+ f = open("dummy.hmm", "w")\n+ f.write("HMMER2.0 [2.3.2]\\n")\n+ f.write("NAME fn3\\n")\n+ f.write("ACC PF00041\\n")\n+ f.write("DESC Fibronectin type III domain\\n")\n+ f.write("LENG 84\\n")\n+ f.write("ALPH Amino\\n")\n+ f.write("RF no\\n")\n+ f.write("CS yes\\n")\n+ f.write("MAP yes\\n")\n+ f.write("COM hmmbuild -A myhmms fn3.sto\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write("NSEQ 108\\n")\n+ f.write("DATE Thu Jan 8 17:50:26 2009\\n")\n+ f.write("CKSUM 6130\\n")\n+ f.write("GA 7.8 -1.0\\n")\n+ f.write("TC 7.9 0.1\\n")\n+ f.write("XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 \\n")\n+ f.write("NULT -4 -8455\\n")\n+ f.write("NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 \\n")\n+ f.write("EVD -45.973442 0.237545\\n")\n+ f.write("HMM A C D E F G H I K L M N P Q R S T V W Y \\n")\n+ f.write(" m->m m->i m->d i->m i->i d->m d->d b->m m->e\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write(" -13 * -6756\\n")\n+ f.write(" 1 -1698 -4236 -5399 -847 -4220 -2885 -1259 -930 -2438 406 -3428 -4768 3631 -1835 -4773 -1187 -1331 -120 -4666 -1510 1\\n")\n+ f.write(" - -150 -501 232 46 -382 399 104 -628 211 -461 -722 274 395 44 95 358 118 -368 -296 -251 \\n")\n+ f.write(" C -144 -3400 -12951 -19 -6286 -701 -1378 -13 * \\n")\n+ f.write("//\\n")\n+ f.write("HMMER2.0 [2.3.2]\\n")\n+ f.write("NAME truc\\n")\n+ f.write("ACC PF00041\\n")\n+ f.write("LENG 123\\n")\n+ f.write("ALPH Amino\\n")\n+ f.write("RF no\\n")\n+ f.write("CS yes\\n")\n+ f.write("MAP yes\\n")\n+ f.write("COM hmmbuild -A myhmms fn3.sto\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write("NSEQ 108\\n")\n+ f.write("DATE Thu Jan 8 17:50:26 2009\\n")\n+ f.write("CKSUM '..b'write("ALPH Amino\\n")\n+ f.write("RF no\\n")\n+ f.write("CS yes\\n")\n+ f.write("MAP yes\\n")\n+ f.write("COM hmmbuild -A myhmms fn3.sto\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write("NSEQ 108\\n")\n+ f.write("DATE Thu Jan 8 17:50:26 2009\\n")\n+ f.write("CKSUM 6130\\n")\n+ f.write("GA 7.8 -1.0\\n")\n+ f.write("TC 7.9 0.1\\n")\n+ f.write("XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 \\n")\n+ f.write("NULT -4 -8455\\n")\n+ f.write("NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 \\n")\n+ f.write("EVD -45.973442 0.237545\\n")\n+ f.write("HMM A C D E F G H I K L M N P Q R S T V W Y \\n")\n+ f.write(" m->m m->i m->d i->m i->i d->m d->d b->m m->e\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write(" -13 * -6756\\n")\n+ f.write(" 1 -1698 -4236 -5399 -847 -4220 -2885 -1259 -930 -2438 406 -3428 -4768 3631 -1835 -4773 -1187 -1331 -120 -4666 -1510 1\\n")\n+ f.write(" - -150 -501 232 46 -382 399 104 -628 211 -461 -722 274 395 44 95 358 118 -368 -296 -251 \\n")\n+ f.write(" C -144 -3400 -12951 -19 -6286 -701 -1378 -13 * \\n")\n+ f.write("//\\n")\n+ f.write("HMMER2.0 [2.3.2]\\n")\n+ f.write("NAME truc\\n")\n+ f.write("ACC PF00041\\n")\n+ f.write("LENG 123\\n")\n+ f.write("ALPH Amino\\n")\n+ f.write("RF no\\n")\n+ f.write("CS yes\\n")\n+ f.write("MAP yes\\n")\n+ f.write("COM hmmbuild -A myhmms fn3.sto\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write("NSEQ 108\\n")\n+ f.write("DATE Thu Jan 8 17:50:26 2009\\n")\n+ f.write("CKSUM 6130\\n")\n+ f.write("GA 7.8 -1.0\\n")\n+ f.write("TC 7.9 0.1\\n")\n+ f.write("XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 \\n")\n+ f.write("NULT -4 -8455\\n")\n+ f.write("NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 \\n")\n+ f.write("EVD -45.973442 0.237545\\n")\n+ f.write("HMM A C D E F G H I K L M N P Q R S T V W Y \\n")\n+ f.write(" m->m m->i m->d i->m i->i d->m d->d b->m m->e\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write(" -13 * -6756\\n")\n+ f.write(" 1 -1698 -4236 -5399 -847 -4220 -2885 -1259 -930 -2438 406 -3428 -4768 3631 -1835 -4773 -1187 -1331 -120 -4666 -1510 1\\n")\n+ f.write(" - -150 -501 232 46 -382 399 104 -628 211 -461 -722 274 395 44 95 358 118 -368 -296 -251 \\n")\n+ f.write(" C -144 -3400 -12951 -19 -6286 -701 -1378 -13 * \\n")\n+ f.write("//\\n") \n+ f.close()\n+ self.profilesDB2Map.setInputFile("dummy.hmm")\n+ self.profilesDB2Map.setOutputFile("testOutput.map")\n+ self.profilesDB2Map.run()\n+ file = open( "expectedFile", "w" )\n+ file.write( "fn3\\tFibronectin type III domain\\t1\\t84\\n" )\n+ file.write( "truc\\t\\t1\\t123\\n" )\n+ file.close()\n+ self.assertTrue(FileUtils.are2FilesIdentical("expectedFile", "testOutput.map"))\n+ os.remove("expectedFile")\n+ os.remove("testOutput.map")\n+ os.remove("dummy.hmm")\n+ \n+ \n+if __name__ == "__main__": \n+ unittest.main() \n+ \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/TestProfilesDatabankUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/TestProfilesDatabankUtils.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,101 @@
+import unittest
+import os
+from commons.pyRepetUnit.profilesDB.Profiles import Profiles
+from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils
+
+
+class TestProfilesDatabankUtils( unittest.TestCase ):
+
+    def setUp( self ):
+        self.profilesDBUtils = ProfilesDatabankUtils()
+
+
+    def test_read_emptyFile( self ):
+        file = open( "pfamDBEmpty", "w" )
+        file.close()
+        pfamDB2 = self.profilesDBUtils.read( "pfamDBEmpty" )
+        self.assertEquals( None, pfamDB2 )
+        os.remove( "pfamDBEmpty" )
+
+
+    def test_read( self ):
+        f = open("dummy.hmm", "w")
+        f.write("HMMER2.0  [2.3.2]\n")
+        f.write("NAME  fn3\n")
+        f.write("ACC   PF00041\n")
+        f.write("DESC  Fibronectin type III domain\n")
+        f.write("LENG  84\n")
+        f.write("ALPH  Amino\n")
+        f.write("RF    no\n")
+        f.write("CS    yes\n")
+        f.write("MAP   yes\n")
+        f.write("COM   hmmbuild -A myhmms fn3.sto\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("NSEQ  108\n")
+        f.write("DATE  Thu Jan  8 17:50:26 2009\n")
+        f.write("CKSUM 6130\n")
+        f.write("GA    7.8 -1.0\n")
+        f.write("TC    7.9 0.1\n")
+        f.write("XT      -8455     -4  -1000  -1000  -8455     -4  -8455     -4 \n")
+        f.write("NULT      -4  -8455\n")
+        f.write("NULE     595  -1558     85    338   -294    453  -1158    197    249    902  -1085   -142    -21   -313     45    531    201    384  -1998   -644 \n")
+        f.write("EVD   -45.973442   0.237545\n")
+        f.write("HMM        A      C      D      E      F      G      H      I      K      L      M      N      P      Q      R      S      T      V      W      Y    \n")
+        f.write("         m->m   m->i   m->d   i->m   i->i   d->m   d->d   b->m   m->e\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("          -13      *  -6756\n")
+        f.write("     1  -1698  -4236  -5399   -847  -4220  -2885  -1259   -930  -2438    406  -3428  -4768   3631  -1835  -4773  -1187  -1331   -120  -4666  -1510     1\n")
+        f.write("     -   -150   -501    232     46   -382    399    104   -628    211   -461   -722    274    395     44     95    358    118   -368   -296   -251 \n")
+        f.write("     C   -144  -3400 -12951    -19  -6286   -701  -1378    -13      * \n")
+        f.write("//\n")
+        f.write("HMMER2.0  [2.3.2]\n")
+        f.write("NAME  truc\n")
+        f.write("ACC   PF00041\n")
+        f.write("LENG  123\n")
+        f.write("ALPH  Amino\n")
+        f.write("RF    no\n")
+        f.write("CS    yes\n")
+        f.write("MAP   yes\n")
+        f.write("COM   hmmbuild -A myhmms fn3.sto\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("NSEQ  108\n")
+        f.write("DATE  Thu Jan  8 17:50:26 2009\n")
+        f.write("CKSUM 6130\n")
+        f.write("GA    7.8 -1.0\n")
+        f.write("TC    7.9 0.1\n")
+        f.write("XT      -8455     -4  -1000  -1000  -8455     -4  -8455     -4 \n")
+        f.write("NULT      -4  -8455\n")
+        f.write("NULE     595  -1558     85    338   -294    453  -1158    197    249    902  -1085   -142    -21   -313     45    531    201    384  -1998   -644 \n")
+        f.write("EVD   -45.973442   0.237545\n")
+        f.write("HMM        A      C      D      E      F      G      H      I      K      L      M      N      P      Q      R      S      T      V      W      Y    \n")
+        f.write("         m->m   m->i   m->d   i->m   i->i   d->m   d->d   b->m   m->e\n")
+        f.write("COM   hmmcalibrate myhmms\n")
+        f.write("          -13      *  -6756\n")
+        f.write("     1  -1698  -4236  -5399   -847  -4220  -2885  -1259   -930  -2438    406  -3428  -4768   3631  -1835  -4773  -1187  -1331   -120  -4666  -1510     1\n")
+        f.write("     -   -150   -501    232     46   -382    399    104   -628    211   -461   -722    274    395     44     95    358    118   -368   -296   -251 \n")
+        f.write("     C   -144  -3400 -12951    -19  -6286   -701  -1378    -13      * \n")
+        f.write("//\n")
+        f.close()
+        pfamDB = self.profilesDBUtils.read("dummy.hmm")
+        expectedProfiles1 = Profiles()
+        expectedProfiles1.name = "fn3"
+        expectedProfiles1.desc = "Fibronectin type III domain"
+        expectedProfiles1.length = 84
+        expectedProfiles2 = Profiles()
+        expectedProfiles2.name = "truc"
+        expectedProfiles2.desc = ""
+        expectedProfiles2.length = 123
+
+        self.assertEqual( expectedProfiles1.name, pfamDB.get(0).name )
+        self.assertEqual( expectedProfiles1.desc, pfamDB.get(0).desc )
+        self.assertEqual( expectedProfiles1.length, pfamDB.get(0).length )
+        self.assertEqual( expectedProfiles2.name, pfamDB.get(1).name )
+        self.assertEqual( expectedProfiles2.desc, pfamDB.get(1).desc )
+        self.assertEqual( expectedProfiles2.length, pfamDB.get(1).length )
+
+        os.remove("dummy.hmm")
+
+
+if __name__ == "__main__":
+    unittest.main()
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/Test_F_CompleteProfilDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/Test_F_CompleteProfilDB.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,40 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_CompleteProfilDB(unittest.TestCase):
+
+    def setUp(self):
+        self._fileProfilesList = "./datas/ListPfamProfilsInRepbase.txt"
+        self._filePfamList = "./datas/ListpfamAccNumber.txt"
+        self._profilesDB = "./datas/profilesDBTest.hmm"
+        self._expectedGetzCmd = "getz -e \'[pfamhmm-Id:\"zf-CCHC*\"] | [pfamhmm-Id:\"InvE*\"] | [pfamhmm-Id:\"Chromo*\"] | [pfamhmm-Id:\"TLV_coat*\"]\'"
+        self._expectedFile(self._expectedGetzCmd)
+        self._outputFileName = "./datas/outputFile"
+        self._expectedGetzCmd2 = "getz -e \'[pfamhmm-AccNumber:\"PF00075\"] | [pfamhmm-AccNumber:\"PF00424\"] | [pfamhmm-AccNumber:\"PF00469\"]\'"
+
+    def tearDown(self):
+        os.remove(self._outputFileName)
+        os.remove(self._expectedGetzCmdFile)
+
+    def testCompleteProfilesDBScriptByName(self):
+        Cmd = "completeProfilesDBFromAFileWithProfilesList_script.py -d " + self._profilesDB + " -l " + self._fileProfilesList + " -o " + self._outputFileName + "\n"
+        os.system (Cmd)
+        self.assertTrue (FileUtils.are2FilesIdentical(self._outputFileName, self._expectedGetzCmdFile))
+
+    def testCompleteProfilesDBScriptByAccession(self):
+        self._expectedFile(self._expectedGetzCmd2)
+        Cmd = "completeProfilesDBFromAFileWithProfilesList_script.py -d " + self._profilesDB + " -l " + self._filePfamList + " -o " + self._outputFileName + " -n\n"
+        os.system(Cmd)
+        self.assertTrue(FileUtils.are2FilesIdentical(self._outputFileName, self._expectedGetzCmdFile))
+
+    def _expectedFile(self, expectedGetzCmd):
+        self._expectedGetzCmdFile = "./datas/expectedCmdFile"
+        f = open(self._expectedGetzCmdFile, 'w')
+        f.write(expectedGetzCmd)
+        f.close
+
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/Test_F_ProfilesDB2Map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/Test_F_ProfilesDB2Map.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,34 @@
+import os
+import unittest
+from commons.core.utils.FileUtils import FileUtils
+from commons.pyRepetUnit.profilesDB.ProfilesDB2Map import ProfilesDB2Map
+
+class Test_F_ProfilesDB2Map(unittest.TestCase):
+
+    def setUp(self):
+        self.profilesDB2Map = ProfilesDB2Map()
+        self._inputFile = "./datas/myhmms"
+        self._outputFile = "./datas/outputFile.map"
+        self._expectedFile = "./datas/profiles.map"
+        self._createProfilesMapFile()
+
+    def tearDown(self):
+        os.remove(self._outputFile)
+        os.remove(self._expectedFile)
+
+    def testRun(self):
+        self.profilesDB2Map.setInputFile(self._inputFile)
+        self.profilesDB2Map.setOutputFile(self._outputFile)
+        pfamDB = self.profilesDB2Map.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expectedFile, self._outputFile))
+
+    def _createProfilesMapFile(self):
+        f = open(self._expectedFile, "w")
+        f.write("rrm\t\t1\t77\n")
+        f.write("fn3\tFibronectin type III domain\t1\t84\n")
+        f.write("pkinase\tProtein kinase domain\t1\t294\n")
+        f.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/Test_F_ProfilesDB4Repet.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/Test_F_ProfilesDB4Repet.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,20 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_ProfilesDB4Repet(unittest.TestCase):
+
+    def setUp(self):
+        self._inputDBFile = "%s/Tools/ProfilesDB4Repet_v2.hmm" % os.environ["REPET_DATA"]
+        self._outputDBFile = "output.hmm"
+
+    def tearDown(self):
+        os.remove(self._outputDBFile)
+
+    def test_run_as_script(self):
+        cmd = "python ../ProfilesDB4Repet.py -i " + self._inputDBFile + " -o " + self._outputDBFile
+        os.system(cmd)
+        self.assertEqual(FileUtils.getNbLinesInSingleFile(self._outputDBFile), 375854)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/Test_ProfilesDB4Repet.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/Test_ProfilesDB4Repet.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,442 @@\n+import unittest\n+import os\n+from commons.pyRepetUnit.profilesDB.ProfilesDB4Repet import ProfilesDB4Repet\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_ProfilesDB4Repet(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputDBFile = "./datas/dummy.hmm"\n+ self._expectedDBFile = "./datas/expected.hmm"\n+ self._outputDBFile = "./datas/output.hmm"\n+ self._profilesDB4Repet = ProfilesDB4Repet()\n+\n+ def tearDown(self):\n+ os.remove(self._expectedDBFile)\n+ os.remove(self._outputDBFile)\n+ os.remove(self._inputDBFile)\n+\n+ def testFormateProfilesDB4Repet(self):\n+ self._createHmmFile()\n+ self._createExpectedHmmFile()\n+ self._profilesDB4Repet.setInputFile(self._inputDBFile)\n+ self._profilesDB4Repet.setOutputFile(self._outputDBFile)\n+ self._profilesDB4Repet.run()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expectedDBFile, self._outputDBFile))\n+\n+ def test_formateProfilesDB4Repet_GypsyDB(self):\n+ self._createHmmFile_GypsyDB()\n+ self._createExpectedHmmFile_GypsyDB()\n+ self._profilesDB4Repet.setInputFile(self._inputDBFile)\n+ self._profilesDB4Repet.setOutputFile(self._outputDBFile)\n+ self._profilesDB4Repet.run()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expectedDBFile, self._outputDBFile))\n+ \n+ def _createExpectedHmmFile(self):\n+ f = open(self._expectedDBFile, "w")\n+ f.write("HMMER2.0 [2.3.2]\\n")\n+ f.write("NAME PF07582.4_AP_endonuc_2_N_APE_20.9\\n")\n+ f.write("ACC PF07582.4\\n")\n+ f.write("DESC AP endonuclease family 2 C terminus\\n")\n+ f.write("LENG 84\\n")\n+ f.write("ALPH Amino\\n")\n+ f.write("RF no\\n")\n+ f.write("CS yes\\n")\n+ f.write("MAP yes\\n")\n+ f.write("COM hmmbuild -A myhmms fn3.sto\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write("NSEQ 108\\n")\n+ f.write("DATE Thu Jan 8 17:50:26 2009\\n")\n+ f.write("CKSUM 6130\\n")\n+ f.write("GA 20.9000 20.9000;\\n")\n+ f.write("TC 21.2000 21.2000;\\n")\n+ f.write("NC 20.8000 20.8000;\\n")\n+ f.write("XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 \\n")\n+ f.write("NULT -4 -8455\\n")\n+ f.write("NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 \\n")\n+ f.write("EVD -45.973442 0.237545\\n")\n+ f.write("HMM A C D E F G H I K L M N P Q R S T V W Y \\n")\n+ f.write(" m->m m->i m->d i->m i->i d->m d->d b->m m->e\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write(" -13 * -6756\\n")\n+ f.write(" 1 -1698 -4236 -5399 -847 -4220 -2885 -1259 -930 -2438 406 -3428 -4768 3631 -1835 -4773 -1187 -1331 -120 -4666 -1510 1\\n")\n+ f.write(" - -150 -501 232 46 -382 399 104 -628 211 -461 -722 274 395 44 95 358 118 -368 -296 -251 \\n")\n+ f.write(" C -144 -3400 -12951 -19 -6286 -701 -1378 -13 * \\n")\n+ f.write("//\\n")\n+ f.write("HMMER2.0 [2.3.2]\\n")\n+ f.write("NAME PF01606.8_Arteri_env_ENV_25.0\\n")\n+ f.write("ACC PF01606.8\\n")\n+ f.write("DESC DESC Arterivirus envelope protein\\n")\n+ f.write("LENG 123\\n")\n+ f.write("ALPH Amino\\n")\n+ f.write("RF no\\n")\n+ f.write("CS yes\\n")\n+ f.write("MAP yes\\n")\n+ f.write("COM hmmbuild -A myhmms fn3.sto\\n")\n+ f.write("COM hmmcalibrate myhmms\\n")\n+ f.write("NSEQ 108\\n")\n+ f.write("DATE Thu Jan 8 17:50:26 2009\\n")\n+ f.write("CKSUM 6130\\n")\n+ f.write("GA 21.0000 25.0000;\\n")'..b' -4 -8455\\n")\n+ f.write("NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644\\n") \n+ f.write("EVD -12.496953 0.641902\\n")\n+ f.write("HMM A C D E F G H I K L M N P Q R S T V W Y\\n") \n+ f.write(" m->m m->i m->d i->m i->i d->m d->d b->m m->e\\n")\n+ f.write(" -576 * -1603\\n")\n+ f.write("1 -995 -1009 -2161 -1884 -298 -2046 -1336 348 -1429 591 4179 -1738 -2255 -1394 -1460 -1476 -1060 98 -1303 -906 1\\n")\n+ f.write("- -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249\\n") \n+ f.write("- -33 -6075 -7117 -894 -1115 -701 -1378 -1576 -10335\\n") \n+ f.write("2 95 -518 -873 -904 -1785 -691 -984 -1664 -955 -1934 -1254 -652 -1327 -861 -1134 2815 -243 -1070 -2024 -1564 2\\n")\n+ f.write("- -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249\\n") \n+ f.write("- -33 -6075 -7117 -894 -1115 -701 -1378 -10911 -10334\\n") \n+ f.write("//\\n")\n+ f.write("HMMER2.0 [2.3.2]\\n")\n+ f.write("NAME MOV_cavemovirus\\n")\n+ f.write("LENG 302\\n")\n+ f.write("ALPH Amino\\n")\n+ f.write("RF no\\n")\n+ f.write("CS no\\n")\n+ f.write("MAP yes\\n")\n+ f.write("COM hmmbuild output/MOV_cavemovirus.hmm input/MOV_cavemovirus.sto\\n")\n+ f.write("COM hmmcalibrate output/MOV_cavemovirus.hmm\\n")\n+ f.write("NSEQ 2\\n")\n+ f.write("DATE Tue Nov 3 11:25:12 2009\\n")\n+ f.write("CKSUM 1918\\n")\n+ f.write("XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4\\n") \n+ f.write("NULT -4 -8455\\n")\n+ f.write("NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644\\n") \n+ f.write("EVD -135.876633 0.154850\\n")\n+ f.write("HMM A C D E F G H I K L M N P Q R S T V W Y\\n") \n+ f.write(" m->m m->i m->d i->m i->i d->m d->d b->m m->e\\n")\n+ f.write(" -585 * -1585\\n")\n+ f.write("1 -1028 -1686 -800 -442 -2171 -1521 -263 -1873 3103 -1859 -1251 -547 -1750 74 599 -1004 -972 -1620 -1812 -1528 5\\n")\n+ f.write("- -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249\\n") \n+ f.write("- -33 -6045 -7087 -894 -1115 -701 -1378 -585 *\\n") \n+ f.write("2 -1016 -805 -2637 -2367 -591 -2470 -1853 3039 -2040 329 346 -2180 -2589 -1943 -2070 -1879 -1051 1184 -1698 -1261 6\\n")\n+ f.write("- -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249\\n") \n+ f.write("- -33 -6045 -7087 -894 -1115 -701 -1378 * *\\n") \n+ f.write("3 -1016 -805 -2637 -2367 -591 -2470 -1853 3039 -2040 329 346 -2180 -2589 -1943 -2070 -1879 -1051 1184 -1698 -1261 7\\n")\n+ f.write("- -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249\\n") \n+ f.write("- -33 -6045 -7087 -894 -1115 -701 -1378 * *\\n") \n+ f.write("//\\n")\n+ f.close() \n+\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/completeProfilesDBFromAFileWithProfilesList_script.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/completeProfilesDBFromAFileWithProfilesList_script.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+'''
+Created on 19 mai 2009
+
+@author: choede
+'''
+
+import commons.pyRepetUnit.profilesDB.CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber
+import user, os, sys, getopt, exceptions
+from pyRepet.util.file.FileUtils import *
+
+#------------------------------------------------------------------------------
+
+def help():
+
+    """
+    Give the command-line parameters.
+    """
+
+    print ""
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -d: name of profiles databank (format='pfam')"
+    print "     -l: name of the profiles list file (name or accession number, default=profiles name)"
+    print "     -o: name of the output file (default=profiles list file+'.getz')"
+    print "     -v: verbose (default=0/1/2)"
+    print "     -n: pfam accession number (default=Not set)"
+    print ""
+
+#------------------------------------------------------------------------------
+
+def main():
+
+    DBFileName = ""
+    outFileName = ""
+    verbose = 0
+    listFileName = ""
+    accNumber = False
+
+    try:
+        opts,args=getopt.getopt(sys.argv[1:],"hd:l:o:v:n")
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-d":
+            DBFileName = a
+        elif o == "-l":
+            listFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+        elif o == "-n":
+            accNumber = True
+
+
+    if DBFileName == "" or listFileName == "":
+        print "*** Error: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "beginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    if outFileName == "":
+        outFileName = "%s.getz" % ( listFileName )
+
+    CompleteProfilesDB = commons.pyRepetUnit.profilesDB.CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber()
+    CompleteProfilesDB.setProfilesDBFile ( DBFileName )
+    CompleteProfilesDB.setProfilesToAdd ( listFileName )
+    if accNumber == True:
+        CompleteProfilesDB.setPfamAccNumberKeys ()
+        if verbose > 0:
+            print "The profiles list is in Accession number"
+            sys.stdout.flush()
+    getzCmd = CompleteProfilesDB.CmdToCompleteProfileDB()
+    f = open( outFileName , 'w')
+    f.write ( getzCmd )
+    f.close()
+
+    if verbose > 0:
+        fileUtils = FileUtils( )
+        if fileUtils.isRessourceExists( outFileName ) and not(fileUtils.isFileEmpty( outFileName )):
+            print "%s finished successfully" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+        else:
+            print "warning %s execution failed" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+
+    return 0
+
+#------------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/datas/ListPfamProfilsInRepbase.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/datas/ListPfamProfilsInRepbase.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,17 @@
+RVT_1
+RVT_2
+rve
+zf-CCHC
+DDE
+hATC
+Hermes_DBD
+MULE
+Exo_endo_phos
+Peptidase_C27
+Transposase_mut
+InvE
+Retrotrans_gag
+Chromo
+TLV_coat
+
+

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/datas/ListpfamAccNumber.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/datas/ListpfamAccNumber.txt Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,6 @@
+PF00075
+PF00077
+PF00078
+PF00424
+PF05407
+PF00469

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/datas/myhmms
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/datas/myhmms Tue Apr 30 14:33:21 2013 -0400

[

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/datas/profilesDBTest.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/datas/profilesDBTest.hmm Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,7162 @@\n+HMMER2.0 [2.3.2]\n+NAME RVT_1\n+ACC PF00078.19\n+DESC Reverse transcriptase (RNA-dependent DNA polymerase)\n+LENG 242\n+ALPH Amino\n+RF no\n+CS no\n+MAP yes\n+COM hmmbuild -f -F --prior PRIOR HMM_fs.ann SEED.ann\n+COM hmmcalibrate --seed 0 HMM_fs.ann\n+NSEQ 156\n+DATE Mon Jun 2 18:30:00 2008\n+CKSUM 7481\n+GA 35.2000 35.2000;\n+TC 35.2000 35.2000;\n+NC 35.1000 35.1000;\n+XT -8455 -4 -1000 -1000 -8455 -4 -8455 -4 \n+NULT -4 -8455\n+NULE 595 -1558 85 338 -294 453 -1158 197 249 902 -1085 -142 -21 -313 45 531 201 384 -1998 -644 \n+EVD -10.513688 0.618327\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d b->m m->e\n+ -13 * -6834\n+ 1 254 -362 -1116 20 -891 -782 -593 -1348 663 -2265 -4560 682 522 340 223 2070 -1876 -2798 -263 -4972 1\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -3 -26756 -26756 0 -19932 0 -19932 -1013 -8913 \n+ 2 -419 -233 -396 -1108 1220 -2792 -3751 -250 -584 -1669 -4321 -1660 2448 -323 -379 -1635 -527 -1337 3626 168 2\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -12 -26767 -7314 0 -19932 0 -19932 -8926 -8910 \n+ 3 -3036 1124 -1949 -2086 -484 -1024 1155 -973 1367 -1738 -250 1015 -902 -89 1143 -814 -125 -1397 2962 1591 3\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -3 -26758 -26758 0 -19932 -781 -1259 -8926 -8907 \n+ 4 516 -987 -771 -323 -5789 -1603 1192 -740 80 -1399 452 1966 1252 -961 178 229 252 -1273 -632 -4972 4\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -3 -26758 -26758 0 -19932 -781 -1259 -8926 -8904 \n+ 5 1343 -1132 -6596 -5963 -4077 -340 -4703 201 -5567 -2165 -675 -770 -2432 -5197 -287 1192 2065 481 -4580 975 5\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -3 -26758 -26758 0 -19932 0 -20712 -8926 -8901 \n+ 6 -1152 -5367 -3923 -1459 -2119 -869 -642 -1019 908 -1501 -4470 334 2500 -1749 1764 -1025 141 -13 -594 -1817 6\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -3 -26767 -26767 0 -19932 0 -19932 -8926 -8898 \n+ 7 414 -4522 -2360 -2529 -4542 -1623 -4197 1934 -816 267 417 -4460 -2354 -1999 1302 -1950 186 1758 -4919 -4490 7\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -10 -26767 -7603 0 -19932 0 -19932 -8926 -8895 \n+ 8 -1232 249 -2517 -1894 2481 -2907 -1402 1076 -826 -485 -144 -5476 -2696 -5208 -3149 -1959 -20 2026 1351 503 8\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -3 -26760 -26760 0 -19932 0 -20598 -8926 -8892 \n+ 9 -326 -780 -6569 -1098 1014 -1723 -4705 257 -1888 1172 '..b'179 -3382 -3017 122\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -15 -10636 -11678 -894 -1115 -701 -1378 -7662 -6768 \n+ 91 -1419 -2874 -4996 -166 -2834 -4498 -3350 2192 -4047 893 2115 -1277 -4552 898 -425 -1951 -2931 1569 -3326 -2975 123\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -15 -10636 -11678 -894 -1115 -701 -1378 -7662 -6755 \n+ 92 -76 -4209 -341 1004 -950 -235 1846 -4278 -1226 -1838 -551 -151 -3808 -565 1304 1164 477 53 -4394 -721 124\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -35 -10636 -6161 -894 -1115 -701 -1378 -7662 -6741 \n+ 93 1500 -3036 -4100 -3522 -848 -4290 -3083 -269 1297 -1148 1015 344 -4355 -811 1917 -3310 743 248 -3462 -3074 125\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -82 -10616 -4462 -894 -1115 -1200 -824 -7662 -6728 \n+ 94 -232 623 -5382 -4748 3758 -4590 -3444 -657 -4344 426 -2 -4233 -4630 -3956 -1660 -3676 -2979 -2255 -3285 1780 126\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -15 -10549 -11591 -894 -1115 -2164 -364 -7662 -6714 \n+ 95 -2923 -2753 -5240 -4606 -782 -2061 -3337 1062 -1449 1644 2573 -4104 -4517 -862 878 -1538 150 -96 -3210 1568 127\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -15 -10549 -11592 -894 -1115 -2164 -364 -7662 -6700 \n+ 96 978 -4137 -106 675 -4458 8 289 -4209 991 -4153 -379 1210 -3731 767 1934 -484 -1280 -3759 -4320 -3637 128\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -16 -10551 -11593 -894 -1115 -2164 -364 -7662 -6688 \n+ 97 -1890 -5006 -7160 -7517 -7033 3792 -6646 -7516 -7403 -7615 -6806 -6070 -6009 -6947 -7027 -4766 -4978 -6286 719 -6963 129\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -37 -10550 -6074 -894 -1115 -576 -1603 -7662 -6672 \n+ 98 -1954 -5230 -8287 -7756 -3698 -8091 -6961 -272 -7594 3060 1462 -7802 -7231 -6491 -7169 -7468 -5696 303 -5529 -5708 130\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -16 -10591 -11633 -894 -1115 -320 -2331 -7662 -6658 \n+ 99 -977 -4213 252 -2039 -4534 -1329 -71 -4284 999 -787 -3302 2027 -922 385 2239 -660 -231 -3835 1268 -871 131\n+ - -149 -500 233 43 -381 399 106 -626 210 -466 -720 275 394 45 96 359 117 -369 -294 -249 \n+ - -16 -10637 -11679 -894 -1115 -701 -1378 -7662 -6644 \n+ 100 -1418 -4205 1277 1300 -1156 -249 666 -936 -491 -2013 -3295 -979 1463 75 844 -2623 643 -1003 406 42 132\n+ - * * * * * * * * * * * * * * * * * * * * \n+ - * * * * * * * -7662 0 \n+//\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/pyRepetUnit/profilesDB/tests/profilesDBTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/profilesDB/tests/profilesDBTestSuite.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,27 @@
+import unittest
+import sys
+import TestProfilesDB2Map
+import TestAcceptanceProfilesDB2Map
+import TestCompleteProfilesDBFromProfilesNameListOrAccNumber
+import TestAcceptanceCompleteProfilDB_script
+import TestProfilesDB4Repet
+import TestAcceptanceProfilesDB4Repet
+import TestInsertProfilesMapFileInDB
+
+def main():
+
+        profilesDBTestSuite = unittest.TestSuite()
+        profilesDBTestSuite.addTest(unittest.makeSuite(TestProfilesDB2Map.TestProfilesDB2Map,'test'))
+        profilesDBTestSuite.addTest(unittest.makeSuite(TestAcceptanceProfilesDB2Map.TestAcceptanceProfilesDB2Map,'test'))
+        profilesDBTestSuite.addTest(unittest.makeSuite(TestCompleteProfilesDBFromProfilesNameListOrAccNumber.TestCompleteProfilesDBFromProfilesNameListOrAccNumber, 'test'))
+        profilesDBTestSuite.addTest(unittest.makeSuite(TestAcceptanceCompleteProfilDB_script.TestAcceptanceCompleteProfilDB_script, 'test'))
+        profilesDBTestSuite.addTest(unittest.makeSuite(TestProfilesDB4Repet.TestProfilesDB4Repet, 'test'))
+        profilesDBTestSuite.addTest(unittest.makeSuite(TestAcceptanceProfilesDB4Repet.TestAcceptanceProfilesDB4Repet, 'test'))
+        profilesDBTestSuite.addTest(unittest.makeSuite(TestInsertProfilesMapFileInDB.TestInsertProfilesMapFileInDB, 'test'))
+
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(profilesDBTestSuite)
+
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/AlignTEOnGenomeAccordingToAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/AlignTEOnGenomeAccordingToAnnotation.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,282 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import re\n+import os\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.checker.ConfigChecker import ConfigRules, ConfigChecker\n+import subprocess\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.coord.SetUtils import SetUtils\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.Set import Set\n+from commons.core.sql.DbFactory import DbFactory\n+\n+## Align a TE on genome according to annotation\n+\n+LOG_DEPTH = "repet.tools"\n+\n+class AlignTEOnGenomeAccordingToAnnotation(object):\n+ \n+ def __init__(self, pathTableName = "", queryTableName = "", subjectTableName = "", mergeSamePathId = False, outTableName = "", matchPenality=10, mism=8, gapo=16, gape=4, gapl=20, configFileName = "", doClean = False, verbosity = 0):\n+ self._pathTableName = pathTableName\n+ self._queryTableName = queryTableName\n+ self._subjectTableName = subjectTableName\n+ self._mergeSamePathId = mergeSamePathId\n+ self.setOutTableName(outTableName)\n+ self._matchPenality = matchPenality\n+ self._mismatch = mism\n+ self._gapOpening = gapo\n+ self._gapExtend = gape\n+ self._gapLength = gapl\n+ self._configFileName = configFileName\n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._iDb = None\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Align a TE on genome according to annotation."\n+ epilog = "\\nExample 1: launch without verbosity and keep temporary files.\\n"\n+ epilog += "\\t$ python AlignTEOnGenomeAccordingToAnnotation.py -p DmelChr4_chr_allTEs_nr_noSSR_join_path -q DmelChr4_chr_seq -s DmelChr4_refTEs_seq -v 0"\n+ epilog += "\\n"\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser.add_option("-p", "--path", dest = "pathTableName", action = "store", type = "string", help = "path table name [compulsory] [format: path]", default = "")\n+ parser.ad'..b's : is it correct ? matcher allow overlap on query and not on subject ?\n+ iSubjectBioseq = iSubjectTSA.getBioseqFromSetList(lSubjectSets)\n+ iAlignedBioseqDB = self.alignBioseqWithNWalign(iQueryBioseq, iSubjectBioseq)\n+ self._insertAlignedBioseqDBWithScoreAndIdentityInTable(pathNum, iAlignedBioseqDB)\n+ else:\n+ lPathId = iTPA.getIdList()\n+ pathNb = len(lPathId)\n+ count = 0\n+ for pathNum in lPathId:\n+ self._log.debug(count,"/",pathNb,"=>path",pathNum,"...")\n+ lPaths = iTPA.getPathListFromId(pathNum)\n+ queryName = lPaths[0].getQueryName()\n+ subjectName = lPaths[0].getSubjectName()\n+ lQueryStart = []\n+ lQueryEnd = []\n+ lSubjectStart = []\n+ lSubjectEnd = []\n+ isReversed = not lPaths[0].isSubjectOnDirectStrand()\n+ for iPath in lPaths:\n+ lQueryStart.append(iPath.getQueryStart())\n+ lQueryEnd.append(iPath.getQueryEnd())\n+ lSubjectStart.append(iPath.getSubjectStart())\n+ lSubjectEnd.append(iPath.getSubjectEnd())\n+ queryStart = min(lQueryStart)\n+ queryEnd = max(lQueryEnd)\n+ if isReversed:\n+ subjectStart = max(lSubjectStart)\n+ subjectEnd = min(lSubjectEnd)\n+ else:\n+ subjectStart = min(lSubjectStart)\n+ subjectEnd = max(lSubjectEnd)\n+ lQuerySets = [Set(pathNum,subjectName, queryName,queryStart,queryEnd)]\n+ lSubjectSets = [Set(pathNum,queryName, subjectName,subjectStart,subjectEnd)]\n+ \n+ iQueryBioseq = iQueryTSA.getBioseqFromSetList(lQuerySets)\n+ iSubjectBioseq = iSubjectTSA.getBioseqFromSetList(lSubjectSets)\n+ iAlignedBioseqDB = self.alignBioseqWithNWalign(iQueryBioseq, iSubjectBioseq)\n+ self._insertAlignedBioseqDBWithScoreAndIdentityInTable(pathNum, iAlignedBioseqDB)\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ if self._configFileName:\n+ self._checkConfig()\n+ self._iDb = DbFactory.createInstance()\n+ self._checkOptions()\n+ self._log.info("START AlignTEOnGenomeAccordingToAnnotation")\n+ self._log.debug("path table name: %s" % self._pathTableName)\n+ self._log.debug("query table name: %s" % self._queryTableName)\n+ self._log.debug("subject table name: %s" % self._subjectTableName)\n+ self.alignSeqAccordingToPathAndBuildAlignedSeqTable()\n+ self._iDb.close()\n+ self._log.info("END AlignTEOnGenomeAccordingToAnnotation")\n+\n+ def _insertAlignedBioseqDBWithScoreAndIdentityInTable(self, pathNum, iAlignedBioseqDB):\n+ scoreWithEndLine = re.split("Score=", iAlignedBioseqDB.db[0].header)[1]\n+ score = int(scoreWithEndLine.split()[0])\n+ \n+ identity = re.split("Identity=", scoreWithEndLine)[1]\n+ if identity == "nan":\n+ identity = "0.0"\n+ identity = float(identity)*100.0\n+ \n+ #TODO: create TableAlignedSeqAdaptator (to use insert...)\n+ sqlCmd = \'INSERT INTO %s VALUES (%d,"%s","%s", %d,%f);\' % (self._outTableName, pathNum, iAlignedBioseqDB.db[0].sequence, iAlignedBioseqDB.db[1].sequence, score, identity)\n+ self._iDb.execute(sqlCmd)\n+ \n+ self._log.debug("header:", iAlignedBioseqDB.db[0].header)\n+ self._log.debug("path", pathNum, "Score=", score, "Identity=", identity, "ok")\n+ self._log.debug(iAlignedBioseqDB.db[0].sequence[:80])\n+ self._log.debug(iAlignedBioseqDB.db[1].sequence[:80])\n+\n+if __name__ == "__main__":\n+ iLaunch = AlignTEOnGenomeAccordingToAnnotation()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/AnnotationStats.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/AnnotationStats.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,374 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+##@file\n+# Give summary information on a TE annotation table.\n+# options:\n+# -h: this help\n+# -t: analysis type (default = 1, 1: per transposable element (TE), 2: per cluster, 3: per classification, 4: with map input file)\n+# -p: name of the table (_path) or file (.path) with the annotated TE copies\n+# -s: name of the table (_seq) or file (.fasta or .fa) with the TE reference sequences\n+# -g: length of the genome (in bp)\n+# -m: name of the file with the group and the corresponding TE names (format = \'map\')\n+# -o: name of the output file (default = pathTableName + \'_stats.txt\')\n+# -C: name of the configuration file to access MySQL (e.g. \'TEannot.cfg\')\n+# -c: remove map files and blastclust file (if analysis type is 2 or 3)\n+# -I: identity coverage threshold (default = 0)\n+# -L: length coverage threshold (default=0.8)\n+# -v: verbosity level (default = 0)\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.stat.Stat import Stat\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.tools.getCumulLengthFromTEannot import getCumulLengthFromTEannot\n+\n+LOG_DEPTH = "repet.tools"\n+\n+#TODO: use templating engine instead of raw strings for AnnotationStatsWriter\n+class AnnotationStats( object ):\n+\n+ def __init__(self, analysisName="TE", clusterFileName="",seqTableName="", pathTableName="", genomeLength=0, statsFileName="", globalStatsFileName="", verbosity=3):\n+ self._analysisName = analysisName\n+ self._clusterFileName = clusterFileName\n+ self._seqTableName = seqTableName\n+ self._pathTableName = pathTableName\n+ self._genomeLength = genomeLength\n+ self._statsFileName = statsFileName\n+ self._globalStatsFileName = globalStatsFileName\n+ self._iDb = None\n+ self._iTablePathAdaptator = None\n+ self._iTableSeqAdaptator = None\n+ self._save = False\n+ self._clean = False\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)'..b'tsAllCopiesMedIdentity" ].add( dOneTErefseq[ "statsIdentityPerChain" ].median() )\n+ \n+ if dOneTErefseq[ "statsLengthPerChainPerc" ].getValuesNumber() != 0:\n+ self._dAllTErefseqs[ "statsAllCopiesMedLengthPerc" ].add( dOneTErefseq[ "statsLengthPerChainPerc" ].median() )\n+\n+ def printStatsForAllTEs(self, TEnb):\n+# statString += "(sum of cumulative coverages: %i bp)" % ( self._dAllTErefseqs[ "sumCumulCoverage" ] )\n+ statString = "total nb of TE fragments: %i\\n" % ( self._dAllTErefseqs[ "totalNbFragments" ] )\n+ \n+ if self._dAllTErefseqs[ "totalNbFragments" ] != 0:\n+ \n+ statString += "total nb full-length fragments: %i (%.2f%%)\\n" % \\\n+ ( self._dAllTErefseqs[ "totalNbFullLengthFragments" ], \\\n+ 100*self._dAllTErefseqs[ "totalNbFullLengthFragments" ] / float(self._dAllTErefseqs[ "totalNbFragments" ]) )\n+ \n+ statString += "total nb of TE copies: %i\\n" % ( self._dAllTErefseqs[ "totalNbCopies" ] )\n+ \n+ statString += "total nb full-length copies: %i (%.2f%%)\\n" % \\\n+ ( self._dAllTErefseqs[ "totalNbFullLengthCopies" ], \\\n+ 100*self._dAllTErefseqs[ "totalNbFullLengthCopies" ] / float(self._dAllTErefseqs[ "totalNbCopies" ]) )\n+ \n+ statString += "families with full-length fragments: %i (%.2f%%)\\n" % \\\n+ ( self._dAllTErefseqs[ "nbFamWithFullLengthFragments" ], \\\n+ 100*self._dAllTErefseqs[ "nbFamWithFullLengthFragments" ] / float(TEnb) )\n+ statString += " with only one full-length fragment: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithOneFullLengthFragment" ] )\n+ statString += " with only two full-length fragments: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithTwoFullLengthFragments" ] )\n+ statString += " with only three full-length fragments: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithThreeFullLengthFragments" ] )\n+ statString += " with more than three full-length fragments: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithMoreThanThreeFullLengthFragments" ] )\n+ \n+ statString += "families with full-length copies: %i (%.2f%%)\\n" % \\\n+ ( self._dAllTErefseqs[ "nbFamWithFullLengthCopies" ], \\\n+ 100*self._dAllTErefseqs[ "nbFamWithFullLengthCopies" ] / float(TEnb) )\n+ statString += " with only one full-length copy: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithOneFullLengthCopy" ] )\n+ statString += " with only two full-length copies: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithTwoFullLengthCopies" ] )\n+ statString += " with only three full-length copies: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithThreeFullLengthCopies" ] )\n+ statString += " with more than three full-length copies: %i\\n" % ( self._dAllTErefseqs[ "nbFamWithMoreThanThreeFullLengthCopies" ] )\n+ \n+ statString += "mean of median identity of all families: %.2f +- %.2f\\n" % \\\n+ ( self._dAllTErefseqs[ "statsAllCopiesMedIdentity" ].mean(), \\\n+ self._dAllTErefseqs[ "statsAllCopiesMedIdentity" ].sd() )\n+ \n+ statString += "mean of median length percentage of all families: %.2f +- %.2f\\n" % \\\n+ ( self._dAllTErefseqs[ "statsAllCopiesMedLengthPerc" ].mean(), \\\n+ self._dAllTErefseqs[ "statsAllCopiesMedLengthPerc" ].sd() )\n+ return statString\n+ \n+ def printResume(self, lNamesTErefseq, lDistinctSubjects, totalCumulCoverage, genomeLength):\n+ statString = "nb of sequences: %i\\n" % len(lNamesTErefseq)\n+ statString += "nb of matched sequences: %i\\n" % len(lDistinctSubjects)\n+ statString += "cumulative coverage: %i bp\\n" % totalCumulCoverage\n+ statString += "coverage percentage: %.2f%%\\n" % ( 100 * totalCumulCoverage / float(genomeLength) )\n+# statString += "processing the %i TE families..." % len(lNamesTErefseq)\n+ return statString\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/BenchmarkTEconsensus.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/BenchmarkTEconsensus.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,567 @@\n+#!/usr/bin/env python\n+\n+##@file\n+# Compare two fasta files of TEs to assess how reference sequences are recovered by de novo consensus.\n+\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import getopt\n+import shutil\n+import glob\n+\n+\n+import pyRepet.launcher.programLauncher\n+from commons.core.coord.AlignUtils import AlignUtils\n+from commons.core.coord.MatchUtils import MatchUtils\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+\n+\n+class BenchmarkTEconsensus( object ):\n+ \n+ def __init__( self ):\n+ self._qryFile = ""\n+ self._sbjFile = ""\n+ self._method = 1\n+ self._keepConflictSbj = False\n+ self._thresholdCoverage = 95\n+ self._thresholdIdentity = 80\n+ self._thresholdEvalue = 1e-10\n+ self._thresholdCoverageMatch = 90\n+ self._useCluster = False\n+ self._queue = ""\n+ self._configFileName = ""\n+ self._clean = False\n+ self._verbose = 0\n+ self._pL = pyRepet.launcher.programLauncher.programLauncher()\n+ \n+ \n+ def help( self ):\n+ print\n+ print "usage: BenchmarkTEconsensus.py [ options ]"\n+ print "options:"\n+ print " -h: this help"\n+ print " -q: name of the query file (de novo consensus, format=\'fasta\')"\n+ print " -s: name of the subject file (reference sequences, format=\'fasta\')"\n+ print " -m: method"\n+ print " 1: Blaster + Matcher (default)"\n+ print " 2: Blaster + merge + Matcher (not with \'-Q\')"\n+ print " 3: Orienter + Mafft + Matcher"\n+ print " 4: Yass + Matcher"\n+ print " -a: keep all conflicting subjects in Matcher"\n+ print " -t: coverage threshold over which the match is \'complete\' (in %% of the seq length, default=%i)" % self._thresholdCoverage\n+ print " -I: identity threshold for \'CC\' matches (default=%i)" % self._thresholdIdentity\n+ print " -E: E-value threshold for \'CC\' matches (default=1e-10)"\n+ print " -T: coverage threshold for match length on query compare to subject length (default=%i)" % self._thresholdCoverageMatch\n+ print " -Q: queue name to run in paral'..b'\n+ self._sbjFile,\n+ self._method ),\n+ tmpFile )\n+ else:\n+ tmpFile = "%s_vs_%s.m%i.align" % ( self._qryFile,\n+ self._sbjFile,\n+ self._method )\n+ \n+ prg = os.environ["REPET_PATH"] + "/bin/matcher"\n+ cmd = prg\n+ cmd += " -m %s" % ( tmpFile )\n+ cmd += " -q %s" % ( self._qryFile )\n+ cmd += " -s %s" % ( self._sbjFile )\n+ cmd += " -j"\n+ if self._keepConflictSbj:\n+ cmd += " -a"\n+ cmd += " -v %i" % ( self._verbose )\n+ self._pL.launch( prg, cmd )\n+ \n+ csh = ChangeSequenceHeaders()\n+ csh.setInputFile( matchFile )\n+ csh.setFormat( "tab" )\n+ csh.setStep( 2 )\n+ csh.setLinkFile( "%slink" % ( self._qryFile ) )\n+ csh.setOutputFile( matchFile.replace(".newH","") )\n+ csh.run()\n+ \n+ csh.setInputFile( pathFile )\n+ csh.setFormat( "path" )\n+ csh.setStep( 2 )\n+ csh.setOutputFile( pathFile.replace(".newH","") )\n+ csh.run()\n+ \n+ return matchFile.replace(".newH",""), pathFile.replace(".newH","")\n+ \n+ \n+ def analyzeMatchFile( self, matchFile, pathFile ):\n+ if matchFile != "":\n+ if self._verbose > 0:\n+ print "analyze the \'tab\' file..."\n+ sys.stdout.flush()\n+ prg = os.environ["REPET_PATH"] + "/bin/tabFileReader.py"\n+ cmd = prg\n+ cmd += " -m %s" % ( matchFile )\n+ cmd += " -q %s" % ( self._qryFile.replace(".newH","") )\n+ cmd += " -s %s" % ( self._sbjFile.replace(".newH","") )\n+ cmd += " -t %i" % ( self._thresholdCoverage )\n+ cmd += " -I %i" % ( self._thresholdIdentity )\n+ cmd += " -E %g" % ( self._thresholdEvalue )\n+ cmd += " -T %i" % ( self._thresholdCoverageMatch )\n+ cmd += " -v %i" % ( self._verbose - 1 )\n+ self._pL.launch( prg, cmd )\n+ for f in [ matchFile, pathFile,\n+ "%s_tabFileReader.txt" % matchFile,\n+ "%s_qryCategories.txt" % matchFile,\n+ "%s_sbjCategories.txt" % matchFile ]:\n+ shutil.copy( f, ".." )\n+ os.chdir( ".." )\n+ \n+ \n+ def start( self ):\n+ self.checkAttributes()\n+ if self._verbose > 0:\n+ print "START BenchmarkTEconsensus.py"\n+ sys.stdout.flush()\n+ \n+ \n+ def end( self ):\n+ if self._clean:\n+ tmpDir = "tmp%s_t%i_m%i_I%i" % ( os.getpid(),\n+ self._thresholdCoverage,\n+ self._method,\n+ self._thresholdIdentity )\n+ shutil.rmtree( tmpDir )\n+ if self._verbose > 0:\n+ print "END BenchmarkTEconsensus.py"\n+ sys.stdout.flush()\n+ \n+ \n+ def run( self ):\n+ self.start()\n+ \n+ self.preprocess()\n+ \n+ if self._method == 1:\n+ matchFile, pathFile = self.compareFastaViaBlasterMatcher()\n+ elif self._method == 2:\n+ matchFile, pathFile = self.compareFastaViaBlasterMatcher( merged=True )\n+ elif self._method == 3:\n+ matchFile, pathFile = self.compareFastaViaMafft()\n+ elif self._method == 4:\n+ matchFile, pathFile = self.compareFastaViaYassMatcher()\n+ \n+ self.analyzeMatchFile( matchFile, pathFile )\n+ \n+ self.end()\n+ \n+ \n+if __name__ == "__main__":\n+ i = BenchmarkTEconsensus()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/CalcCoordCumulLength.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/CalcCoordCumulLength.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+
+"""
+Calculate the cumulative length of coordinates data in the L{Map<commons.coreMap>} format.
+"""
+
+import os
+import sys
+import getopt
+from pyRepet.launcher.programLauncher import programLauncher
+from pyRepet.util.Stat import Stat
+from commons.core.checker.CheckerUtils import CheckerUtils
+
+
+class CalcCoordCumulLength( object ):
+    """
+    Compute the coverage of coordinates data in the L{Map<commons.core.ccommons.core    """
+
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        self._inFileName = ""
+        self._outFileName = ""
+        self._verbose = 0
+
+
+    def help( self ):
+        """
+        Display the help on stdout.
+        """
+        print
+        print "usage:",sys.argv[0]," [ options ]"
+        print "options:"
+        print "     -h: this help"
+        print "     -i: name of the input file (format='map')"
+        print "     -o: name of the output file (default=inFileName+'.coverage')"
+        print
+
+
+    def setAttributesFromCmdLine( self ):
+        """
+        Set the attributes from the command-line.
+        """
+        try:
+            opts, args = getopt.getopt(sys.argv[1:],"hi:o:v:")
+        except getopt.GetoptError, err:
+            print str(err); self.help(); sys.exit(1)
+        for o,a in opts:
+            if o == "-h":
+                self.help(); sys.exit(0)
+            elif o == "-i":
+                self.setInputFileName( a )
+            elif o == "-o":
+                self._outFileName = a
+            elif o == "-v":
+                self._verbose = int(a)
+
+
+    def setInputFileName( self, inFileName ):
+        self._inFileName = inFileName
+
+    def setVerbose( self, verbose ):
+        self._verbose = int(verbose)
+
+    def checkAttributes( self ):
+        """
+        Check the attributes are valid before running the algorithm.
+        """
+        if self._inFileName == "":
+            print "ERROR: missing input file"
+            self.help(); sys.exit(1)
+        if not os.path.exists( self._inFileName ):
+            print "ERROR: can't find file '%s'" % ( self._inFileName )
+            self.help(); sys.exit(1)
+        if self._outFileName == "":
+            self._outFileName = "%s.coverage" % ( self._inFileName )
+
+
+    def mergeCoordinates( self ):
+        """
+        Merge the coordinates with 'mapOp'.
+        """
+        if self._verbose > 0:
+            print "merge the coordinates with mapOp..."; sys.stdout.flush()
+        if not CheckerUtils.isExecutableInUserPath( "mapOp" ):
+            msg = "ERROR: 'mapOp' is not in your PATH"
+            sys.stderr.write( "%s\n" % msg )
+            sys.exit(1)
+        pL = programLauncher()
+        prg = os.environ["REPET_PATH"] + "/bin/mapOp"
+        cmd = prg
+        cmd += " -q %s" % ( self._inFileName )
+        cmd += " -m"
+        cmd += " > /dev/null"
+        pL.launch( prg, cmd, self._verbose - 1 )
+        if self._verbose > 0:
+            print "coordinates merged !"; sys.stdout.flush()
+        mergeFileName = "%s.merge" % ( self._inFileName )
+        inPath, inName = os.path.split( self._inFileName )
+        if inPath != "":
+            os.system( "mv %s.merge %s" % ( inName, inPath ) )
+        return mergeFileName
+
+
+    def getStatsPerChr( self, mergeFileName ):
+        """
+        Return summary statistics on the coordinates, per chromosome.
+        @param mergeFileName: L{Map<commons.core.coord.Macommons.coreype mergeFileName: string
+        @return: dictionary whose keys are the chromosomes of the 'map file and values are L{Stat<pyRepet.util.Stat>} instances
+        """
+        dChr2Stats = {}
+        if self._verbose > 0:
+            print "compute the coverage of the coordinates..."; sys.stdout.flush()
+        mergeF = open( mergeFileName, "r" )
+        line = mergeF.readline()
+        while True:
+            if line == "": break
+            tokens = line[:-1].split("\t")
+            if int(tokens[2]) < int(tokens[3]):
+                matchLength = int(tokens[3]) - int(tokens[2]) + 1
+            elif int(tokens[2]) > int(tokens[3]):
+                matchLength = int(tokens[2]) - int(tokens[3]) + 1
+            if not dChr2Stats.has_key( tokens[1] ):
+                dChr2Stats[ tokens[1] ] = Stat()
+            dChr2Stats[ tokens[1] ].add( matchLength )
+            line = mergeF.readline()
+        mergeF.close()
+        os.remove( mergeFileName )
+        return dChr2Stats
+
+
+    def saveCumulLength( self, dChr2Stats ):
+        """
+        Write the stats in the output file.
+        """
+        outF = open( self._outFileName, "w" )
+        totalLength = 0
+        for i in dChr2Stats.keys():
+            totalLength += dChr2Stats[i].sum
+            string = "cumulative length (in bp) on '%s': %i" % ( i, dChr2Stats[i].sum )
+            outF.write( "%s\n" % ( string ) )
+            if self._verbose > 0: print string
+        string = "total cumulative length (in bp): %i" % ( totalLength )
+        outF.write( "%s\n" % ( string ) )
+        if self._verbose > 0: print string
+        outF.close()
+        sys.stdout.flush()
+
+
+    def start( self ):
+        """
+        Useful commands before running the program.
+        """
+        if self._verbose > 0:
+            print "beginning of %s" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
+        self.checkAttributes()
+        if self._verbose > 0:
+            print "input file : '%s'" % ( self._inFileName )
+            sys.stdout.flush()
+
+
+    def end( self ):
+        """
+        Useful commands before ending the program.
+        """
+        if self._verbose > 0:
+            print "%s finished successfully" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
+
+
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+        mergeFileName = self.mergeCoordinates()
+        dChr2Stats = self.getStatsPerChr( mergeFileName )
+        self.saveCumulLength( dChr2Stats )
+        self.end()
+
+
+if __name__ == '__main__':
+    i = CalcCoordCumulLength()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/ChangeSequenceHeaders.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/ChangeSequenceHeaders.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,524 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import getopt\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Match import Match\n+\n+\n+\n+class ChangeSequenceHeaders( object ):\n+ \n+ def __init__( self, name="ChangeSequenceHeaders", inFile="", format="", step=0, prefix="seq", outFile="",linkFile="", whichCluster = "", replace_query=True, replace_subject=True, verbosity=0):\n+ self._name = name\n+ self._inFile = inFile\n+ self._format = format\n+ self._step = step\n+ self._prefix = prefix\n+ self._linkFile = linkFile\n+ self._whichCluster = whichCluster\n+ self._outFile = outFile\n+ self.replace_query = replace_query\n+ self.replace_subject = replace_subject\n+ self._verbose = verbosity\n+ \n+ \n+ def help( self ):\n+ print\n+ print "usage: %s.py [ options ]" % ( self._name )\n+ print "options:"\n+ print " -h: this help"\n+ print " -i: name of the input file"\n+ print " -f: format of the input file (fasta/newick/align/path/tab)"\n+ print " -s: step (1: shorten headers / 2: retrieve initial headers)"\n+ print " -p: prefix of new headers (with \'-s 1\', default=\'seq\')"\n+ print " -l: name of the \'link\' file (with \'-s 2\', format=map)"\n+ print " -w: header formatting type (A: after LTRharvest, B: for ClusterConsensus, not specified: no change)"\n+ print " -o: name of the output file (default=inFile+\'.newH\'/\'.initH\')"\n+ print\n+ \n+ \n+ def setAttributesFromCmdLine( self ):\n+ try:\n+ opts, args = getopt.getopt(sys.argv[1:],"hi:f:s:p:l:w:o:v:")\n+ except getopt.GetoptError, err:\n+ sys.stderr.write( "%s\\n" % ( str(err) ) )\n+ self.help(); sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ self.help(); sys.exit(0)\n+ elif o == "-i":\n+ self.setInputFile( a )\n+ elif o == "-f":\n+ self.setFormat( a )\n+ elif o == "-s":\n+ self.setStep( a )\n+ elif o == "-p":\n+ self.setPrefix( a )\n+ elif o == "-l":\n+ self.setLinkFile( a )\n+ elif o == "-w":\n+ self.setWhichcluste'..b'elems[2], elems[3], elems[4], elems[5], query_seqname, elems[7], elems[8],elems[9],elems[10], elems[11], elems[12],elems[13],elems[14].strip(\'\\n\').strip(\'\\r\')]\n+ newLine = "\\t".join(modedelems)\n+ outFileHandler.write("%s\\n" % newLine)\n+ if self._verbose >0 :\n+ print("query", query_seqname, "subject", subject_seqname)\n+ print("Output lastz_line : line %s " % newLine)\n+ except: pass\n+ inFileHandler.close()\n+ outFileHandler.close()\n+ \n+ def retrieveInitialSequenceHeadersForChainFile( self, dNew2Init):\n+ #format: chain score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id \n+ inFileHandler = open( self._inFile, "r" )\n+ outFileHandler = open( self._outFile, "w" )\n+ while True:\n+ try:\n+ line = inFileHandler.next()\n+ except StopIteration:\n+ break\n+ if line == "" or not "seq" in line:\n+ outFileHandler.write(line)\n+ else :\n+ elems = line.split(" ")\n+ try:\n+ subject_seqname = elems[2]\n+ if self.replace_subject :\n+ nameToBeReplaced = elems[2]\n+ if dNew2Init.has_key( nameToBeReplaced ):\n+ subject_seqname = dNew2Init[nameToBeReplaced]\n+ subject_seqname = subject_seqname.strip(\'\\n\').strip(\'\\r\')\n+ \n+ query_seqname = elems[7]\n+ if self.replace_query:\n+ nameToBeReplaced = elems[7]\n+ if dNew2Init.has_key( nameToBeReplaced ):\n+ query_seqname = dNew2Init[nameToBeReplaced]\n+ query_seqname = query_seqname.strip(\'\\n\').strip(\'\\r\')\n+ \n+ modedelems = elems[:]\n+ modedelems[2] = subject_seqname\n+ modedelems[7] = query_seqname\n+ newLine = " ".join(modedelems)\n+ outFileHandler.write("%s\\n" % newLine)\n+ except: pass\n+ \n+ inFileHandler.close()\n+ outFileHandler.close() \n+ \n+ \n+ def run( self ):\n+ self.checkAttributes()\n+ if self._step == 1:\n+ if self._linkFile == "":\n+ self._linkFile = "%s.newHlink" % ( self._inFile )\n+ if self._format == "fasta":\n+ self.shortenSequenceHeadersForFastaFile()\n+ if self._step == 2:\n+ dNew2Init = self.getLinksNewToInitialHeaders()\n+ if self._format == "fasta":\n+ self.retrieveInitialSequenceHeadersForFastaFile( dNew2Init )\n+ elif self._format == "newick":\n+ self.retrieveInitialSequenceHeadersForNewickFile( dNew2Init )\n+ elif self._format == "align":\n+ self.retrieveInitialSequenceHeadersForAlignFile( dNew2Init )\n+ elif self._format == "path":\n+ self.retrieveInitialSequenceHeadersForPathFile( dNew2Init )\n+ elif self._format == "axt":\n+ self.retrieveInitialSequenceHeadersForAxtFile( dNew2Init)\n+ elif self._format == "psl":\n+ self.retrieveInitialSequenceHeadersForPslFile( dNew2Init)\n+ elif self._format == "lastz":\n+ self.retrieveInitialSequenceHeadersForLastZFile(dNew2Init)\n+ elif self._format == "chain":\n+ self.retrieveInitialSequenceHeadersForChainFile(dNew2Init)\n+ elif self._format in [ "tab", "match" ]:\n+ self.retrieveInitialSequenceHeadersForMatchFile( dNew2Init )\n+ \n+ \n+if __name__ == "__main__":\n+ i = ChangeSequenceHeaders()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/CheckMysqlConnect.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/CheckMysqlConnect.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import sys
+import os
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+from commons.core.checker.RepetException import RepetException
+from commons.tools.CleanClusterNodesAfterRepet import CleanClusterNodesAfterRepet
+
+REPET_PATH = os.environ.get("REPET_PATH")
+
+def main():
+
+    description = "Check MySQL connection on each node"
+    usage = "CheckMysqlConnect.py [options]"
+    examples = "\nExample 1: check MySQL connection from every node of a SGE cluster using environment variables:\n"
+    examples += "\t$ python CheckMysqlConnect.py"
+    examples += "\n\t"
+    examples += "\nExample 2: check MySQL connection from every node of a SGE cluster using configuration file:\n"
+    examples += "\t$ python CheckMysqlConnect.py -C configFileName"
+    examples += "\n\t"
+    examples += "\nExample 3: check MySQL connection from every node of a SGE cluster using environment variables and using a different python executable than the one in the PATH:\n"
+    examples += "\t$ python CheckMysqlConnect.py -p /path/to/python"
+    examples += "\n\n"
+
+    parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples)
+    parser.add_option( "-p", "--path", dest = "pythonPath", type = "string", help = "path to python executable ('python' included, without finishing '/')", default = "python")
+    parser.add_option( "-C", "--config", dest = "config", type = "string", help = "path to config file", default = "")
+
+    options, args = parser.parse_args()
+    pythonPath = options.pythonPath
+    configFile =  options.config
+
+    iCleanClusterNodesAfterRepet = CleanClusterNodesAfterRepet()
+    nodesList = iCleanClusterNodesAfterRepet.getAllNodesList()
+
+    nbNodes = len(nodesList)
+    nodeCount = 0
+    configOption = ""
+    for node in nodesList:
+        try:
+            nodeCount += 1
+            print "Connect to node '%s' (%i/%i)..." % (node, nodeCount, nbNodes)
+            sys.stdout.flush()
+            cmd = "ssh"
+            cmd += " -q %s " % ( node )
+            if configFile != "":
+                configOption = " -C %s" % configFile
+            cmd += "'%s %s/bin/MysqlConnect.py -n %s %s'" % (pythonPath, REPET_PATH, node, configOption)
+            os.system(cmd)
+        except RepetException, e:
+            print e.getMessage()
+
+    iCleanClusterNodesAfterRepet.clean()
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/CleanClusterNodesAfterRepet.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/CleanClusterNodesAfterRepet.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+
+## @file
+# Clean the cluster nodes after REPET was used.
+#
+# usage: CleanClusterNodesAfterRepet.py [ options ]
+# options:
+#      -h: this help
+#      -n: node to clean (otherwise all nodes will be cleaned)
+#      -t: temporary directory (e.g. '/tmp')
+#      -p: pattern (e.g. 'DmelChr4*')
+#      -v: verbosity level (default=0/1/2)
+
+import os
+import sys
+import getopt
+
+class CleanClusterNodesAfterRepet( object ):
+    """
+    Clean the cluster nodes after REPET was used.
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        self._lNodes = []
+        self._tmpDir = ""
+        self._pattern = ""
+        self._verbose = 0
+        self._lNodesToExcept = ["compute-2-46", "compute-2-47"]
+
+    def help( self ):
+        """
+        Display the help on stdout.
+        """
+        print
+        print "usage: %s.py [ options ]" % ( type(self).__name__ )
+        print "options:"
+        print "     -h: this help"
+        print "     -n: node to clean (otherwise all nodes will be cleaned)"
+        print "     -t: temporary directory (e.g. '/tmp')"
+        print "     -p: pattern (e.g. 'DmelChr4*')"
+        print "     -v: verbosity level (default=0/1/2)"
+        print
+
+    def setAttributesFromCmdLine( self ):
+        """
+        Set the attributes from the command-line.
+        """
+        try:
+            opts, args = getopt.getopt(sys.argv[1:],"hi:n:t:p:v:")
+        except getopt.GetoptError, err:
+            print str(err); self.help(); sys.exit(1)
+        for o,a in opts:
+            if o == "-h":
+                self.help(); sys.exit(0)
+            elif o == "-n":
+                self.setLNodes( a.split(" ") )
+            elif o == "-t":
+                self.setTempDirectory( a )
+            elif o == "-p":
+                self.setPattern( a )
+            elif o == "-v":
+                self.setVerbosityLevel( a )
+
+    def setLNodes( self, a ):
+        self._lNodes = a
+
+    def setTempDirectory( self, a ):
+        if a[-1] == "/":
+            self._tmpDir = a[:-1]
+        else:
+            self._tmpDir = a
+
+    def setPattern( self, a ):
+        self._pattern = a
+
+    def setVerbosityLevel( self, verbose ):
+        self._verbose = int(verbose)
+
+    def checkAttributes( self ):
+        """
+        Before running, check the required attributes are properly filled.
+        """
+        if self._tmpDir == "":
+            print "ERROR: need a valid temporary directory"
+            self.help(); sys.exit(1)
+
+    def getAllNodesList( self ):
+        """
+        Return the list of the names of each node.
+        """
+        lNodes = []
+        log = os.system( "qhost > qhost.txt" )
+        if log != 0: print "ERROR with qhost"; sys.exit(1)
+        inF = open( "qhost.txt", "r" )
+        line = inF.readline()
+        line = inF.readline()
+        line = inF.readline()
+        while True:
+            if line == "":
+                break
+            tokens = line.split()
+            if tokens[3] == "-":
+                line = inF.readline()
+                continue
+            lNodes.append( tokens[0] )
+            line = inF.readline()
+        inF.close()
+        #Remove nodes to avoid from the nodes list
+        for node in self._lNodesToExcept:
+            if node in lNodes:
+                lNodes.remove(node)
+        return lNodes
+
+    def showNodeList( self, lNodes ):
+        print "nb of nodes: %i" % ( len(lNodes) )
+        for i in range(0,len(lNodes)):
+            print " %i: %s" % ( i+1, lNodes[i] )
+
+    def cleanNodes( self):
+        """
+        Connect to each job and clean the temporary directory.
+        """
+        nbNodes = len(self._lNodes)
+        nodeCount = 0
+        for node in self._lNodes:
+            nodeCount += 1
+            if self._verbose > 0:
+                print "connect to node '%s' (%i/%i)..." % ( node, nodeCount, nbNodes )
+                sys.stdout.flush()
+            cmd = "ssh"
+            cmd += " -q %s " % ( node )
+            cmd += "'find %s" % ( self._tmpDir )
+            cmd += " -user %s" % ( os.environ["USER"] )
+            if self._pattern != "":
+                cmd += " -name '%s'" % ( self._pattern )
+            cmd += " 2> /dev/null -exec rm -rf {} \; ; exit'"
+            if self._verbose > 0: print cmd; sys.stdout.flush()
+            os.system( cmd )  # warning, even if everything goes right, ssh returns an error code, i.e. different than 0
+
+    def clean( self ):
+        if os.path.exists( "qhost.txt" ):
+            os.remove( "qhost.txt" )
+
+    def start( self ):
+        """
+        Useful commands before running the program.
+        """
+        if self._verbose > 0:
+            print "START %s" % ( type(self).__name__ ); sys.stdout.flush()
+        self.checkAttributes()
+
+    def end( self ):
+        """
+        Useful commands before ending the program.
+        """
+        self.clean()
+        if self._verbose > 0:
+            print "END %s" % ( type(self).__name__ ); sys.stdout.flush()
+
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+        if self._lNodes == []:
+            self._lNodes = self.getAllNodesList()
+        if self._verbose > 0: self.showNodeList( self._lNodes )
+        self.cleanNodes()
+        self.end()
+
+if __name__ == "__main__":
+    i = CleanClusterNodesAfterRepet()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/CorrelateTEageWithGCcontent.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/CorrelateTEageWithGCcontent.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,204 @@\n+#!/usr/bin/env python\n+\n+import sys\n+import os\n+import getopt\n+import math\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.coord.SetUtils import SetUtils\n+from commons.core.seq.BioseqUtils import BioseqUtils\n+\n+\n+class CorrelateTEageWithGCcontent( object ):\n+ \n+ def __init__( self ):\n+ self._inputCoord = ""\n+ self._inputGenome = ""\n+ self._inputTErefseq = ""\n+ self._configFile = ""\n+ self._outFile = ""\n+ self._verbose = 0\n+ self._db = None\n+ self._tableCoord = ""\n+ self._pathA = TablePathAdaptator()\n+ self._tableGenome = ""\n+ self._seqA = TableSeqAdaptator()\n+ \n+ \n+ def help( self ):\n+ print\n+ print "usage: CorrelateTEageWithGCcontent.py [ options ]"\n+ print "options:"\n+ print " -h: this help"\n+ print " -i: input TE coordinates (can be file or table)"\n+ print " TEs as subjects in \'path\' format"\n+ print " -g: input genome sequences (can be fasta file or table)"\n+ print " -r: input TE reference sequences (can be fasta file or table)"\n+ print " -C: configuration file (if table as input)"\n+ print " -o: output fasta file (default=inputCoord+\'.gc\')"\n+ print " -v: verbosity level (default=0/1)"\n+ print\n+ \n+ \n+ def setAttributesFromCmdLine( self ):\n+ try:\n+ opts, args = getopt.getopt(sys.argv[1:],"hi:g:r:C:o:v:")\n+ except getopt.GetoptError, err:\n+ msg = "%s" % str(err)\n+ sys.stderr.write( "%s\\n" % msg )\n+ self.help(); sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ self.help(); sys.exit(0)\n+ elif o == "-i":\n+ self._inputCoord = a\n+ elif o == "-g":\n+ self._inputGenome = a\n+ elif o == "-r":\n+ self._inputTErefseq = a\n+ elif o == "-C":\n+ self._configFile = a\n+ elif o =="-o":\n+ self._outFile = a\n+ elif o == "-v":\n+ self._verbose = int(a)\n+ \n+ \n+ def checkAttributes( self ):\n+ if self._inputCoord == "":\n+ msg = "ERROR: missing input TE coordinates (-i)"\n+ sys.stderr.write( "%s\\n" % msg )\n+ self.help()\n+ sys.exit(1)\n+ if not os.path.exists( self._inputCoord ):\n+ if not os.path.exists( self._configFile ):\n+ msg = "ERROR: neither input file \'%s\' nor configuration file \'%s\'" % ( self._inputCoord, self._configFile )\n+ sys.stderr.write( "%s\\n" % msg )\n+ self.help()\n+ sys.exit(1)\n+ if not os.path.exists( self._configFile ):\n+ msg = "ERROR: can\'t find configuration file \'%s\'" % ( self._configFile )\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ self._db = DbMySql( cfgFileName=self._configFile )\n+ if not self._db.exist( self._inputCoord ):\n+ msg = "ERROR: can\'t find table \'%s\'" % ( self._inputCoord )\n+ sys.stderr.write( "%s\\n" % msg )\n+ self.help()\n+ sys.exit(1)\n+ self._tableCoord = self._inputCoord\n+ else:\n+ self._tableCoord = self._inputCoord.replace(".","_")\n+ if self._inputGenome == "":\n+ msg = "ERROR: missing input genome sequences (-g)"\n+ sys.stderr.write( "%s\\n" % msg )\n+ self.help()\n+ sys.exit(1)\n+ if not os.path.exists( self._inputGenome ):\n+ if not self._db.doesTableExist( self._inputGenome ):\n+ msg = "ERROR: can\'t find table \'%s\'" % ('..b'rr.write( "%s\\n" % msg )\n+ self.help()\n+ sys.exit(1)\n+ if not os.path.exists( self._inputTErefseq ):\n+ if not self._db.doesTableExist( self._inputTErefseq ):\n+ msg = "ERROR: can\'t find table \'%s\'" % ( self._inputTErefseq )\n+ sys.stderr.write( "%s\\n" % msg )\n+ self.help()\n+ sys.exit(1)\n+ if self._outFile == "":\n+ self._outFile = "%s.gc" % ( self._inputCoord )\n+ \n+ \n+ def getLengthOfTErefseqs( self ):\n+ if os.path.exists( self._inputTErefseq ):\n+ return BioseqUtils.getLengthPerSeqFromFile( self._inputTErefseq )\n+ else:\n+ dTErefseq2Length = {}\n+ refseqA = TableSeqAdaptator( self._db, self._inputTErefseq )\n+ lAccessions = refseqA.getAccessionsList()\n+ for acc in lAccessions:\n+ dTErefseq2Length[ acc ] = refseqA.getSeqLengthFromAccession( acc )\n+ return dTErefseq2Length\n+ \n+ \n+ def start( self ):\n+ self.checkAttributes()\n+ if self._verbose > 0:\n+ print "START CorrelateTEageWithGCcontent.py"\n+ sys.stdout.flush()\n+ if os.path.exists( self._inputCoord ):\n+ self._db = DbMySql( cfgFileName=self._configFile )\n+ self._db.createTable( self._tableCoord, "path", self._inputCoord, True )\n+ self._pathA = TablePathAdaptator( self._db, self._tableCoord )\n+ if os.path.exists( self._inputGenome ):\n+ self._db.createTable( self._tableGenome, "seq", self._inputGenome, True )\n+ self._seqA = TableSeqAdaptator( self._db, self._tableGenome )\n+ if self._verbose > 0:\n+ print "output fasta file: %s" % self._outFile\n+ \n+ \n+ def end( self ):\n+ if os.path.exists( self._inputCoord ):\n+ self._db.dropTable( self._tableCoord )\n+ if os.path.exists( self._inputGenome ):\n+ self._db.dropTable( self._tableGenome )\n+ self._db.close()\n+ if self._verbose > 0:\n+ print "END CorrelateTEageWithGCcontent.py"\n+ sys.stdout.flush()\n+ \n+ \n+ def run( self ):\n+ self.start()\n+ \n+ dTErefseq2Length = self.getLengthOfTErefseqs()\n+ \n+ outFileHandler = open( self._outFile, "w" )\n+ outFileHandler.write( "copy\\tTE\\tchr\\tlength\\tid\\tGC\\tlengthPerc\\n" )\n+ \n+ lIdentifiers = self._pathA.getIdList()\n+ nbTEcopies = len(lIdentifiers)\n+ if self._verbose > 0:\n+ print "nb of TE copies: %i" % ( nbTEcopies )\n+ sys.stdout.flush()\n+ count = 0\n+ power10 = int( math.floor( math.log10( nbTEcopies ) ) ) - 1\n+ for id in lIdentifiers:\n+ count += 1\n+ if self._verbose > 0 and power10 > 0 and count % math.pow(10,power10) == 0:\n+ print "%s / %s" % ( str(count).zfill(power10+2), str(nbTEcopies).zfill(power10+2) )\n+ sys.stdout.flush()\n+ lPaths = self._pathA.getPathListFromId( id )\n+ lSets = PathUtils.getSetListFromQueries( lPaths )\n+ lMergedSets = SetUtils.mergeSetsInList( lSets )\n+ bs = self._seqA.getBioseqFromSetList( lMergedSets )\n+ data = "%i" % id\n+ data += "\\t%s" % ( bs.header.split("::")[0] )\n+ data += "\\t%s" % ( lPaths[0].getQueryName() )\n+ data += "\\t%i" % ( bs.getLength() )\n+ data += "\\t%.2f" % ( PathUtils.getIdentityFromPathList( lPaths ) )\n+ data += "\\t%.2f" % ( bs.getGCpercentage() )\n+ data += "\\t%.2f" % ( 100 * bs.getLength() / float( dTErefseq2Length[ bs.header.split("::")[0] ] ) )\n+ outFileHandler.write( "%s\\n" % data )\n+ \n+ outFileHandler.close()\n+ \n+ self.end()\n+ \n+ \n+if __name__ == "__main__":\n+ i = CorrelateTEageWithGCcontent()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/FilterAlign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/FilterAlign.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+
+import sys
+import getopt
+import os
+
+
+def help():
+    print
+    print "usage: ",sys.argv[0].split("/")[-1],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='align')"
+    print "     -E: maximum E-value (default=100)"
+    print "     -S: minimum score (default=0)"
+    print "     -I: minimum identity (default=0)"
+    print "     -l: minimum length (default=0)"
+    print "     -L: maximum length (default=1000000000)"
+    print "     -o: name of the output file (default=inFileName+'.filtered')"
+    print "     -v: verbose (default=0/1)"
+    print
+
+
+def main():
+    """
+    This program filters the output from BLASTER ('align' file recording HSPs).
+    """
+
+    inFileName = ""
+    outFileName = ""
+    maxEValue = 100
+    minIdentity = 0
+    minLength = 0
+    maxLength = 1000000000
+    minScore = 0
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:E:S:I:l:L:o:v:")
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit()
+        elif o == "-i":
+            inFileName = a
+        elif o == "-E":
+            maxEValue = float(a)
+        elif o == "-S":
+            minScore = int(float(a))
+        elif o == "-I":
+            minIdentity = int(float(a))
+        elif o == "-l":
+            minLength = int(a)
+        elif o == "-L":
+            maxLength = int(a)
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFileName == "":
+        print "ERROR: missing input file name"
+        help()
+        sys.exit(1)
+
+    if outFileName == "":
+        outFileName = "%s.filtered" % ( inFileName )
+
+    if os.path.exists( os.environ["REPET_PATH"] + "/bin/filterAlign" ):
+        prg = os.environ["REPET_PATH"] + "/bin/filterAlign"
+        cmd = prg
+        cmd += " -i %s" % ( inFileName )
+        cmd += " -E %g" % ( maxEValue )
+        cmd += " -S %i" % ( minScore )
+        cmd += " -I %f" % ( minIdentity )
+        cmd += " -l %i" % ( minLength )
+        cmd += " -L %i" % ( maxLength )
+        cmd += " -o %s" % ( outFileName )
+        cmd += " -v %i" % ( verbose )
+        return os.system( cmd )
+
+    if verbose > 0:
+        print "START %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    inFile = open( inFileName, "r" )
+    outFile = open( outFileName, "w" )
+
+    nbMatches = 0
+    nbFiltered = 0
+
+    line = inFile.readline()
+    while True:
+        if line == "":
+            break
+        nbMatches += 1
+        data = line.split("\t")
+        qryName = data[0]
+        qryStart = data[1]
+        qryEnd = data[2]
+        sbjName = data[3]
+        sbjStart = data[4]
+        sbjEnd = data[5]
+        Evalue = data[6]
+        score = data[7]
+        identity = data[8]
+
+        if int(qryStart) < int(qryEnd):
+            matchLength = int(qryEnd) - int(qryStart) + 1
+        elif int(qryStart) > int(qryEnd):
+            matchLength = int(qryStart) - int(qryEnd) + 1
+
+        if float(Evalue) <= maxEValue and matchLength >= minLength and \
+               float(identity) >= minIdentity and matchLength <= maxLength and \
+               int(score) >= minScore:
+            string = qryName + "\t" + qryStart + "\t" + qryEnd + "\t" +\
+                     sbjName + "\t" + sbjStart + "\t" + sbjEnd + "\t" +\
+                     Evalue + "\t" + score + "\t" + identity
+            outFile.write( string )
+        else:
+            nbFiltered += 1
+            string = "qry %s (%s-%s) vs subj %s (%s-%s): Eval=%s identity=%s matchLength=%s score=%s" %\
+            ( qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, Evalue, identity.split("\n")[0], matchLength, score )
+            if verbose > 1:
+                print string; sys.stdout.flush()
+
+        line = inFile.readline()
+
+    inFile.close()
+    outFile.close()
+
+    if verbose > 0:
+        msg = "total number of matches: %i" % ( nbMatches )
+        msg += "\nnumber of filtered matches: %i" % ( nbFiltered )
+        print msg; sys.stdout.flush()
+
+    if verbose > 0:
+        print "END %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/GFF3Maker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/GFF3Maker.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,498 @@\n+#!/usr/bin/env python\n+\n+##@file GFF3Maker.py\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+import sys\n+import os\n+\n+## GFF3Maker exports annotations from a \'path\' table into a GFF3 file.\n+#\n+class GFF3Maker(object):\n+\n+ def __init__(self, inFastaName = "", tablesFileName = "", classifTableName = "", isChado = False, isGFF3WithoutAnnotation = False, isWithSequence = False, areMatchPartsCompulsory = False, configFileName = "", verbose = 0, doMergeIdenticalMatches = False, doSplit = False):\n+ self._inFastaName = inFastaName\n+ self._tablesFileName = tablesFileName\n+ self._classifTableName = classifTableName\n+ self._isChado = isChado\n+ self._isGFF3WithoutAnnotation = isGFF3WithoutAnnotation\n+ self._isWithSequence = isWithSequence\n+ self._areMatchPartsCompulsory = areMatchPartsCompulsory\n+ self._configFileName = configFileName\n+ self._doMergeIdenticalMatches = doMergeIdenticalMatches\n+ self._doSplit = doSplit\n+ self._iDB = None\n+ self._verbose = verbose\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "GFF3Maker exports annotations from \'path\', \'set\' and/or \'classif\' tables into a GFF3 file\\n"\n+ parser = RepetOptionParser(description = description)\n+ parser.add_option("-f", "--inseq", dest = "inFastaName", action = "store", type = "string", help = "\'seq\' table recording the input sequences", default = "")\n+ parser.add_option("-t", "--tablesfile", dest = "tablesFileName", action = "store", type = "string", help = "tabulated file of table name to use to create the gff3 files (fields: tier name, format, table name)", default = "")\n+ parser.add_option("-w", "--withSequence", dest = "isWithSequence", action = "store_true", help = "write the sequence at the end of GFF3 file", default = False)\n+ parser.add_option("-a", "--withoutAnnotation", dest = "isGFF3WithoutAnnotation", action = "store_true", help = "write GFF3 files even if no a'..b'_iDB, self._inFastaName)\n+ lTuples = iTSA.getAccessionAndLengthList()\n+ for seqName, length in lTuples :\n+ if not self._doSplit:\n+ fileName = "%s.gff3" % seqName\n+ outFile = open(fileName, "w")\n+ outFile.write("##gff-version 3\\n")\n+ outFile.write("##sequence-region %s 1 %s\\n" % (seqName, length))\n+ for line in linesFromAnnotationTablesFile:\n+ if line[0] == "#":\n+ continue\n+ tok = line.split()\n+ if len(tok) == 0:\n+ break\n+ source = tok[0]\n+ format = tok[1]\n+ table = tok[2]\n+ tableseq = ""\n+ if len(tok) == 4:\n+ tableseq = tok[3]\n+ if format == \'path\' :\n+ annotations = self._getPathFeatures(table, tableseq, seqName, source, feature, frame)\n+ elif format == \'set\' :\n+ annotations = self._getSetFeatures(table, seqName, source, feature, frame)\n+ else:\n+ raise Exception("Wrong format : %s" % format)\n+ outFile.write(annotations)\n+ outFile.close()\n+ #TODO: check getNbLinesInSingleFile() to handle big files\n+ if not self._isGFF3WithoutAnnotation and FileUtils.getNbLinesInSingleFile(fileName) == 2:\n+ os.remove(fileName)\n+ elif self._isWithSequence:\n+ outFile = open(fileName, "a")\n+ outFile.write("##FASTA\\n")\n+ iBioseq = iTSA.getBioseqFromHeader(seqName)\n+ iBioseq.write(outFile)\n+ outFile.close()\n+ else:\n+ count = 1\n+ for line in linesFromAnnotationTablesFile:\n+ if line[0] == "#":\n+ continue\n+ tok = line.split()\n+ if len(tok) == 0:\n+ break\n+ source = tok[0]\n+ format = tok[1]\n+ table = tok[2]\n+ tableseq = ""\n+ if len(tok) == 4:\n+ tableseq = tok[3]\n+ fileName = "%s_Annot%i.gff3" % (seqName, count)\n+ outFile = open(fileName, "w")\n+ outFile.write("##gff-version 3\\n")\n+ outFile.write("##sequence-region %s 1 %s\\n" % (seqName, length))\n+ if format == \'path\' :\n+ annotations = self._getPathFeatures(table, tableseq, seqName, source, feature, frame)\n+ elif format == \'set\' :\n+ annotations = self._getSetFeatures(table, seqName, source, feature, frame)\n+ else:\n+ raise Exception("Wrong format : %s" % format)\n+ outFile.write(annotations)\n+ outFile.close()\n+ #TODO: check getNbLinesInSingleFile() to handle big files\n+ if not self._isGFF3WithoutAnnotation and FileUtils.getNbLinesInSingleFile(fileName) == 2:\n+ os.remove(fileName)\n+ elif self._isWithSequence:\n+ outFile = open(fileName, "a")\n+ outFile.write("##FASTA\\n")\n+ iBioseq = iTSA.getBioseqFromHeader(seqName)\n+ iBioseq.write(outFile)\n+ outFile.close()\n+ count += 1\n+ \n+ self._iDB.close()\n+ \n+ if self._verbose > 0:\n+ print "END GFF3Maker"\n+ sys.stdout.flush()\n+ \n+if __name__ == "__main__":\n+ iGFF3Maker = GFF3Maker()\n+ iGFF3Maker.setAttributesFromCmdLine()\n+ iGFF3Maker.run()\n+ \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/GameXmlMaker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/GameXmlMaker.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,384 @@\n+#!/usr/bin/env python\n+\n+##@file GameXmlMaker.py\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import os\n+import glob\n+import sys\n+import xml.dom.minidom\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.sql.TableSetAdaptator import TableSetAdaptator\n+from commons.core.sql.TableMapAdaptator import TableMapAdaptator\n+\n+## GameXmlMaker exports .\n+#\n+class GameXmlMaker(object):\n+\n+ def __init__(self, inFastaName = "", tablesFileName = "", configFileName = "", verbose = 0):\n+ self._inFastaName = inFastaName\n+ self._tablesFileName = tablesFileName\n+ self._configFileName = configFileName\n+ self._verbose = verbose\n+ self._gameXMLFileName = ""\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "GameXmlMaker with -f option <=> step 1 : create gff files (write only the sequence and not the annotation. Only one sequence in each file)\\n"\n+ description += "GameXmlMaker with -t option <=> step 2 : add annotations in each file\\n"\n+ parser = RepetOptionParser(description = description)\n+ parser.add_option("-f", "--inseq", dest = "inFastaName", action = "store", type = "string", help = "\'fasta\' file or \'seq\' table recording the input sequences (required to generate new \'.gamexml\' files)", default = "")\n+ parser.add_option("-t", "--tablesfile", dest = "tablesFileName", action = "store", type = "string", help = "tabulated file of table name to use to update the GameXML files (fields: tier name, format, table name)", default = "")\n+ parser.add_option("-g", "--gameXML", dest = "gameXML", action = "store", type = "string", help = "gameXML file to update (if not specified, update all gameXML files in directory", default = "")\n+ parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "configuration file for database connection", default = "")\n+ parser.add_option("-v", "--verbose", dest = "verbose", action = "store", type = "int", help = "verbosity level (default=0, else 1 or 2)", default = 0)\n+ (options, args) '..b' resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iSet.getName(),str(iSet.getId()))))\n+ resultSet.appendChild(resultSetName)\n+ return resultSet\n+ \n+ def _addResultSetFromMap(self, docXML, iMap, parent):\n+ computationalAnalysis = parent\n+ resultSet = docXML.createElement(\'result_set\')\n+ resultSet.setAttribute(\'id\', "-1") \n+ computationalAnalysis.appendChild(resultSet)\n+ resultSetName = docXML.createElement(\'name\')\n+ resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iMap.getName(), "-1")))\n+ resultSet.appendChild(resultSetName)\n+ return resultSet\n+\n+ def _updateGameXMLFileFromlTablesFile(self, gameXMLFile):\n+ docXML = xml.dom.minidom.parse(gameXMLFile) \n+ \n+ f = open(self._tablesFileName, "r")\n+ line = f.readline()\n+ \n+ while line: \n+ if not line.startswith("#"):\n+ list = line.split()\n+ programName = list[0]\n+ format = list[1]\n+ table = list[2]\n+\n+ gameXMLFileName = os.path.splitext(gameXMLFile)[0]\n+ computationalAnalysis = self._addComputationalAnalysisTags(docXML,programName)\n+ if format == "path": \n+ iDB = DbFactory.createInstance(self._configFileName)\n+ iTpa = TablePathAdaptator(iDB, table)\n+ \n+ lPaths = iTpa.getPathListFromQuery(gameXMLFileName)\n+ dResultSets = {}\n+\n+ for iPath in lPaths:\n+ if dResultSets.get(iPath.getIdentifier()) is None:\n+ resultSet = self._addResultSetFromPath(docXML, iPath,computationalAnalysis)\n+ dResultSets[iPath.getIdentifier()] = resultSet\n+ else:\n+ resultSet = dResultSets[iPath.getIdentifier()]\n+ spanInfo = self._parseResultSpanInfo(iPath, "path")\n+ self._addPathSpan(docXML, spanInfo, resultSet)\n+ \n+ if format == "set":\n+ iDB = DbFactory.createInstance(self._configFileName)\n+ iTsa = TableSetAdaptator(iDB, table)\n+ lSet = iTsa.getSetListFromSeqName(gameXMLFileName)\n+ \n+ dResultSets = {}\n+\n+ for iSet in lSet:\n+ if dResultSets.get(iSet.getId()) is None:\n+ resultSet = self._addResultSetFromSet(docXML, iSet,computationalAnalysis)\n+ dResultSets[iSet.getId()] = resultSet\n+ else:\n+ resultSet = dResultSets[iSet.getId()]\n+ \n+ spanInfo = self._parseResultSpanInfo(iSet, "set")\n+ self._addPathSpan(docXML, spanInfo, resultSet)\n+ \n+ if format == "map":\n+ iDB = DbFactory.createInstance(self._configFileName)\n+ iTma = TableMapAdaptator(iDB, table)\n+ lMap = iTma.getMapListFromChr(gameXMLFileName)\n+ dResultSets = {}\n+ for iMap in lMap:\n+ resultSet = self._addResultSetFromMap(docXML, iMap,computationalAnalysis)\n+ spanInfo = self._parseResultSpanInfo(iMap, "map")\n+ self._addPathSpan(docXML, spanInfo, resultSet)\n+ \n+ line = f.readline()\n+ \n+ outputGameXMLFile = open(gameXMLFile, "w") \n+ xmlstr = docXML.toxml()\n+ outputGameXMLFile.write(xmlstr)\n+ outputGameXMLFile.close()\n+ \n+if __name__ == "__main__":\n+ iGameXmlMaker = GameXmlMaker()\n+ iGameXmlMaker.setAttributesFromCmdLine()\n+ iGameXmlMaker.run()\n+ \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/GetMultAlignAndPhylogenyPerTErefSeq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/GetMultAlignAndPhylogenyPerTErefSeq.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,547 @@\n+#!/usr/bin/env python\n+\n+##@file\n+# For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.\n+# usage: GetMultAlignAndPhylogenyPerTErefSeq.py [ options ]\n+# options:\n+# -h: this help\n+# -S: step (0: all steps [default], 1:file generation, 2:multiple alignements, 3:phylogenies)\n+# -p: table with the annotations (format=path)\n+# -s: table with the TE reference sequences (format=seq)\n+# -g: table with the genome sequence (format=seq)\n+# -r: name or file with TE reference sequence(s) (all by default)\n+# -m: MSA method (default=Refalign/Map)\n+# -l: minimum length of copies (default=100)\n+# -n: number of longest copies to use (default=20)\n+# -y: minimum copy proportion compare to references (default=0.5)\n+# -R: keep the reference sequence (only with Refalign)\n+# -C: configuration file\n+# -q: queue name\n+# -c: clean\n+# -d: temporary directory\n+# -v: verbosity level (default=0/1)\n+\n+import os\n+import sys\n+import glob\n+import ConfigParser\n+\n+import pyRepet.launcher.programLauncher\n+\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.core.coord.SetUtils import SetUtils\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.tools.OrientSequences import OrientSequences\n+from ConfigParser import MissingSectionHeaderError\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.launcher import LaunchMap\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+LOG_DEPTH = "repet.tools"\n+\n+## For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.\n+#\n+class GetMultAlignAndPhylogenyPerTErefSeq(object):\n+ \n+ def __init__(self, pathTableName="",refSeqTableName="", genomeSeqTableName="", step=0, mSAmethod="RefAlign",keepRefseq=False, configFileName= "", clean = True, verbosity=3):\n+ """\n+ Constructor.\n+ """\n+ self.step = step\n+ self._pathTable = pathTableName\n+ self._refSeqTable = refSeqTableName\n+ self._genomeSeqTable = genomeSeqTableName\n+ self._TErefseq = ""\n+ self._MSAmethod = mSAmethod\n+ self._minCopyLength = 100\n+ self._nbLongestCopies = 20\n+ self._minPropCopy = 0.5\n+ self._keepRefseq = keepRefseq\n+ self.setConfigFileName(configFileName)\n+ self._queue = ""\n+ self._tmpDir = ""\n+ self._clean = clean\n+ self._verbosity = verbosity\n+ self._db = None\n+ self._tpaAnnot = None\n+ self._tsaRef = None\n+ self._pL = pyRepet.launcher.programLauncher.programLauncher()\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+ \n+ def setAttributesFromCmdLine(self):\n+ desc = "For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.\\n"\n+ #Commented: it\'s not true, Config File is mandatory!\n+# desc += "Connection to the database parameters are retrieved from the environment"\n+ \n+ #TODO: format options as other scripts (have a look at LaunchTemplate)\n+ parser = RepetOptionParser(description = desc, epilog = "")\n+ parser.add_option("-S", "--step", dest = "step" , action = "store", type = "int", help = "step (0: all steps [default], 1:file generation, 2:mu'..b' \n+ def filter4phylogenies( self, verbosity=0 ):\n+ """\n+ Filter TE copy alignment for better phylogenies.\n+ """\n+ self._log.info("Filtering MSA")\n+ lInFiles = glob.glob( "*_all.fa.oriented_%s.fa_aln" % ( self._MSAmethod.lower() ) )\n+ count = 0\n+ for inFileName in lInFiles:\n+ count += 1 \n+ self._log.debug("clean MSA %d --> %s" % (count,inFileName)) \n+ alignDB = AlignedBioseqDB()\n+ alignDB.load(inFileName)\n+ alignDB.cleanMSA()\n+ if alignDB.getSize() > 2: \n+ alignDB.save( inFileName + ".clean" )\n+ self._log.debug("clean!")\n+ else:\n+ self._log.debug("skip!")\n+ self._log.info("MSA cleaned: %d" % count)\n+ \n+\n+ def _createLaunchPhyMLCommands(self, iLauncher, query):\n+# prg = os.environ["REPET_PATH"] + "/bin/srptPhyML.py"\n+# cmd = prg\n+# cmd += " -g %s_PHY_%s" % ( self._refSeqTable, os.getpid() )\n+# cmd += " -q %s" % ( os.getcwd() )\n+# cmd += " -S \'*_all.fa.oriented_%s.fa_aln.clean\'" % ( self._MSAmethod.lower() )\n+# cmd += " -Q %s" % ( self._queue )\n+# cmd += " -C %s" % ( self._configFileName )\n+\n+ prg = "LaunchPhyML.py"\n+ lArgs = []\n+ lArgs.append("-i %s" % query)\n+ lArgs.append("-o %s.fa_phylo" % query) \n+ lArgs.append("-v %d" % (self._verbosity-1))\n+ \n+ self._log.debug("Prepared Phyml commands : %s %s" % (prg, " ".join(lArgs)))\n+ return iLauncher.getSystemCommand("%s" % prg, lArgs)\n+ \n+ def makePhylogenies( self ):\n+ """\n+ Launch PhyML on each TE family.\n+ """\n+ self.phyloFileSuffix = "*_all.fa.oriented_%s.fa_aln.clean" % ( self._MSAmethod.lower() )\n+ \n+ queue = self._queue\n+ cDir = os.getcwd()\n+ tmpDir = self._tmpDir \n+ groupid = "%s_PHY_%s" % ( self._refSeqTable, os.getpid() )\n+ acronym = "Phylo"\n+ iDb = DbFactory.createInstance(configFileName=self._configFileName)\n+ iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")\n+ iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)\n+ lCmdsTuples = self._preparePhyMljobs(iLauncher, cDir)\n+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._clean) \n+\n+ \n+ def start( self ):\n+ """\n+ Useful commands before running the program.\n+ """\n+ self.checkAttributes()\n+ self._log.info("START GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)\n+ self.connectSql()\n+ \n+ def end( self ):\n+ """\n+ Useful commands before ending the program.\n+ """\n+ self._db.close()\n+ self._log.info("END GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)\n+ \n+ def run( self ):\n+ """\n+ Run the program.\n+ """\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self.start()\n+ lNamesTErefSeq = self.getNamesOfTErefSeq()\n+ self._log.debug("lNamesTErefSeq: %s" % " ".join(lNamesTErefSeq))\n+ \n+ if self.step in [0, 1]:\n+ self.getTErefSeqInFastaFiles( lNamesTErefSeq )\n+ self.getCopiesInFastaFilesPerTErefSeq( lNamesTErefSeq )\n+ \n+ if self.step in [0, 2]:\n+ self.filter4Alignments(lNamesTErefSeq)\n+ self.buildInFiles4Launcher(lNamesTErefSeq)\n+ self.launchMultAlignments(lNamesTErefSeq)\n+ \n+ if self.step in [0, 3]:\n+ self.filter4phylogenies(verbosity=self._verbosity)\n+ self.makePhylogenies()\n+ self.end()\n+ \n+if __name__ == "__main__":\n+ iGetMultAlignAndPhylogenyPerTErefSeq = GetMultAlignAndPhylogenyPerTErefSeq()\n+ iGetMultAlignAndPhylogenyPerTErefSeq.setAttributesFromCmdLine()\n+ iGetMultAlignAndPhylogenyPerTErefSeq.run()\n+ \n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/GetSpecificTELibAccordingToAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/GetSpecificTELibAccordingToAnnotation.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,178 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.core.LoggerFactory import LoggerFactory\n+\n+LOG_DEPTH = "repet.tools" \n+LOG_FORMAT = "%(message)s"\n+#TODO: use configuration file\n+\n+## Get 3 annotation files, using output from TEannot:\n+#- consensus with one or more full length copy, \n+#- consensus with one or more full length fragment,\n+#- consensus without copy\n+\n+class GetSpecificTELibAccordingToAnnotation(object):\n+ \n+ def __init__(self, inInfoFileName = "", tableName = "", verbose = 0):\n+ self._inInfoFileName = inInfoFileName\n+ self._tableName = tableName\n+ self._verbose = verbose\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose, LOG_FORMAT)\n+ \n+ def setAttributesFromCmdLine(self):\n+ desc = "Splits a GiveInfoTEannot \\"statsPerTE.txt\\" file in 3 subfiles containing consensus which have at least one copy, one full length fragment or one full length copy. "\n+ desc += "A TEs library is built according to each category. Connection to the database parameters are retrieved from the environment"\n+ \n+ examples = "\\nExample : with a project called \\"MyTEannotAnalysis\\":\\n"\n+ examples += "\\t$ python GetSpecificTELibAccordingToAnnotation.py -i MyTEannotAnalysis_chr_allTEs_nr_noSSR_join_path_statsPerTE.txt -t MyTEannotAnalysis_refTEs_seq"\n+ examples += "\\n\\t"\n+ examples += "\\n\\n"\n+ \n+ parser = RepetOptionParser(description = desc, epilog = examples)\n+ parser.add_option("-i", "--file", dest = "inInfoFileName", action = "store", type = "string", help = "input file (mandatory) = output file from GiveInfoTEannot.py (e.g. <project_name>_chr_allTEs_nr_noSSR_join_path_statsPerTE.txt)", default = "")\n+ parser.add_option("-t", "--table", dest = "tableName", action = "store", type = "string", help = "table name of TEs sequences (mandatory, seq format, e.g. <project_name>_refTEs_seq)", default = "")\n+ parser.add_option("-v", "--verbose", dest = "verbose", action = "store", type = "int", help = "verbosity le'..b' self._tableName = tableName\n+ \n+ def setInInfoFileName(self, inInfoFileName):\n+ self._inInfoFileName = inInfoFileName\n+ \n+ def setVerbose(self, verbose):\n+ self._verbose = verbose\n+ \n+ def checkOptions(self):\n+ if self._inInfoFileName != "":\n+ if not FileUtils.isRessourceExists(self._inClassifFileName):\n+ self._logAndRaise("ERROR: Input GiveInfoTEannot.txt output file does not exist!")\n+ else:\n+ self._logAndRaise("ERROR: No specified -i option!")\n+ \n+ if self._tableName != "":\n+ iDb = DbMySql()\n+ if not iDb.doesTableExist(self._tableName):\n+ self._logAndRaise("ERROR: table does not exist!")\n+ iDb.close()\n+ else:\n+ self._logAndRaise("ERROR: No specified -t option!")\n+\n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+\n+ def writeFastaFileFromGiveInfoTEAnnot(self, fileName):\n+ fFileHandler = open(fileName,"r")\n+ lineHeader = fFileHandler.readline()\n+ line = fFileHandler.readline()\n+ lConsensusName = []\n+ while line: \n+ lConsensusName.append(line.split()[0])\n+ line = fFileHandler.readline()\n+\n+ fFileHandler.close()\n+ iDb = DbMySql()\n+ iTSA = TableSeqAdaptator(iDb, self._tableName)\n+ outPutFileName = "%s.fa" % os.path.splitext(fileName)[0]\n+ iTSA.saveAccessionsListInFastaFile(lConsensusName, outPutFileName)\n+ iDb.close()\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbose)\n+\n+ outInfoFileNameFullCopy = "%s_FullLengthCopy.txt" % os.path.splitext(os.path.basename(self._inInfoFileName))[0]\n+ outInfoFileNameCopy = "%s_OneCopyAndMore.txt" % os.path.splitext(os.path.basename(self._inInfoFileName))[0]\n+ outInfoFileNameFullFrag = "%s_FullLengthFrag.txt" % os.path.splitext(os.path.basename(self._inInfoFileName))[0]\n+ \n+ outInfoFileFullCopy = open(outInfoFileNameFullCopy, "w")\n+ outInfoFileCopy = open(outInfoFileNameCopy, "w")\n+ outInfoFileFullFrag = open(outInfoFileNameFullFrag, "w")\n+ \n+ self._log.info("START GetSpecificTELibAccordingToAnnotation\\n input info file: %s" % self._inInfoFileName)\n+ \n+ inFileFh = open(self._inInfoFileName, "r")\n+ line = inFileFh.readline()\n+ lHeaders = line.split()\n+ if "fullLgthCopies" not in lHeaders:\n+ self._logAndRaise("ERROR: No headers in %s!" % self._inInfoFileName )\n+ \n+ outInfoFileFullCopy.write(line)\n+ outInfoFileCopy.write(line)\n+ outInfoFileFullFrag.write(line)\n+ \n+ line = inFileFh.readline()\n+ while line:\n+ dTokens = {}\n+ for index, token in enumerate(line.split()):\n+ dTokens[lHeaders[index]] = token\n+ \n+ if int(dTokens["fullLgthCopies"]) > 0:\n+ outInfoFileFullCopy.write(line)\n+ if int(dTokens["copies"]) > 0:\n+ outInfoFileCopy.write(line)\n+ if int(dTokens["fullLgthFrags"]) > 0:\n+ outInfoFileFullFrag.write(line)\n+ line = inFileFh.readline()\n+ \n+ inFileFh.close()\n+ outInfoFileFullCopy.close()\n+ outInfoFileCopy.close()\n+ outInfoFileFullFrag.close()\n+ \n+ self.writeFastaFileFromGiveInfoTEAnnot(outInfoFileNameFullCopy)\n+ self.writeFastaFileFromGiveInfoTEAnnot(outInfoFileNameCopy)\n+ self.writeFastaFileFromGiveInfoTEAnnot(outInfoFileNameFullFrag)\n+ \n+ self._log.info("END GetSpecificTELibAccordingToAnnotation\\n" )\n+ \n+ return 0\n+\n+if __name__ == \'__main__\':\n+ iGetTELib = GetSpecificTELibAccordingToAnnotation()\n+ iGetTELib.setAttributesFromCmdLine()\n+ iGetTELib.run() \n+ \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+from commons.pyRepetUnit.align.hmmOutputParsing.HmmpfamOutput2align import HmmpfamOutput2align
+from commons.pyRepetUnit.align.hmmOutputParsing.HmmscanOutput2align import HmmscanOutput2align
+from commons.pyRepetUnit.align.transformAACoordIntoNtCoord.TransformAACoordIntoNtCoordInAlignFormat import TransformAACoordIntoNtCoordInAlignFormat
+from commons.core.utils.FileUtils import FileUtils
+
+#------------------------------------------------------------------------------
+
+def help():
+
+    """
+    Give the command-line parameters.
+    """
+
+    print ""
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='hmmpfam Output' or 'hmmscan Output)"
+    print "     -o: name of the output file (default=inFileName+'.align')"
+    print "     -T: name of the consensus File (To launch the transformation of aa positions in nt positions and Filter positive score, default=no transformation)"
+    print "     -v: verbose (default=0/1/2)"
+    print "     -p: name of program (default=hmmpfam, but you can specify hmmscan too)"
+    print "     -c: clean"
+    print ""
+
+#------------------------------------------------------------------------------
+
+def main():
+
+    inFileName = ""
+    outFileName = ""
+    verbose = 0
+    clean = False
+    consensusFileName = ""
+    program = "hmmpfam"
+
+    try:
+        opts,args=getopt.getopt(sys.argv[1:],"hi:o:T:v:p:c")
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-T":
+            consensusFileName = a
+        elif o == "-v":
+            verbose = int(a)
+        elif o == "-p":
+            program = a
+        elif o == "-c":
+            clean = True
+
+    if inFileName == "":
+        print "*** Error: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "beginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    if outFileName == "":
+        outFileName = "%s.align" % ( inFileName )
+
+    if program == "hmmpfam":
+        hmmpfamOutput2align = HmmpfamOutput2align( )
+        hmmpfamOutput2align.setInputFile( inFileName )
+        if consensusFileName == "":
+            hmmpfamOutput2align.setOutputFile( outFileName )
+        else:
+            hmmpfamOutput2align.setOutputFile( outFileName + ".tmp" )
+        hmmpfamOutput2align.run( )
+    else:
+        if program == "hmmscan":
+            hmmscanOutput2align = HmmscanOutput2align( )
+            hmmscanOutput2align.setInputFile( inFileName )
+            if consensusFileName == "":
+                hmmscanOutput2align.setOutputFile( outFileName )
+            else:
+                hmmscanOutput2align.setOutputFile( outFileName + ".tmp" )
+            hmmscanOutput2align.run( )
+        else:
+            print "\nWarning: You must specify a valid program (-p option). Only hmmpfam or hmmscan are supported !\n"
+
+    if consensusFileName != "":
+        alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+        alignTransformation.setInFileName( outFileName + ".tmp" )
+        alignTransformation.setOutFileName( outFileName )
+        alignTransformation.setConsensusFileName( consensusFileName )
+        alignTransformation.setIsFiltered(True)
+        alignTransformation.run()
+        os.remove( outFileName + ".tmp" )
+
+
+    if clean == True:
+        os.remove( inFileName )
+
+    if verbose > 0:
+        if FileUtils.isRessourceExists( outFileName ) and not(FileUtils.isEmpty( outFileName )):
+            print "%s finished successfully" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+        else:
+            print "warning %s execution failed" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+
+    return 0
+
+#------------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/LaunchBlaster.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/LaunchBlaster.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,204 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+import subprocess\n+\n+LOG_DEPTH = "repet.tools"\n+\n+##Launch BLASTER\n+#\n+class LaunchBlaster(object):\n+ \n+ def __init__(self, queryFileName = "", subjectFileName = "", evalue = 1e-300, identity = 90, length = 100, doAllByall = False, type = "ncbi", nbCPU = 1, program="blastn",extraParams="", doClean = False, verbosity = 0):\n+ self._queryFileName = queryFileName\n+ self.setSubjectFileName(subjectFileName)\n+ self._eValue = evalue\n+ self._identity = identity\n+ self._length = length\n+ self._doAllByall = doAllByall\n+ self._blastType = type\n+ self._program = program\n+ self._extraParams = extraParams\n+ self._nbCPU = nbCPU\n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Launch Blaster."\n+ epilog = "\\nExample 1: launch without verbosity and keep temporary files.\\n"\n+ epilog += "\\t$ python LaunchBlaster.py -q query.fa -v 0"\n+ epilog += "\\n\\t"\n+ epilog += "\\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\\n"\n+ epilog += "\\t$ python LaunchBlaster.py -q query.fa -s nr.fa -c -v 2"\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser.add_option("-q", "--query", dest = "query", action = "store", type = "string", help = "query fasta file name [compulsory] [format: fasta]", default = "")\n+ parser.add_option("-s", "--subject", dest = "subject", action = "store", type = "string", help = "subject fasta file name [default: query] [format: fasta]", default = "")\n+ parser.add_option("-e", "--evalue", dest = "evalue", action = "store", type = "string", help = "Blast e-value [default: 1e-300]", default = "1e-300")\n+ parser.add_option("-d", "--id", dest = "identity", action = "store", type = "int", help = "Blast identity [default: 90]", d'..b':\n+ if subjectFileName == "":\n+ self._subjectFileName = self._queryFileName\n+ else:\n+ self._subjectFileName = subjectFileName\n+ \n+ def setEvalue(self, evalue):\n+ self._eValue = evalue\n+ \n+ def setIdentity(self, identity):\n+ self._identity = identity\n+ \n+ def setLength(self, length):\n+ self._length = length\n+ \n+ def setDoAllByall(self, doAllByall):\n+ self._doAllByall = doAllByall\n+ \n+ def setType(self, blastType):\n+ self._blastType = blastType\n+ \n+ def setProgram(self, program):\n+ self._program = program\n+ \n+ def setExtraParams(self, extraParams):\n+ self._extraParams = extraParams\n+ \n+ def setCPU(self, cpu):\n+ self._nbCPU = cpu\n+ \n+ def setDoClean(self, doClean):\n+ self._doClean = doClean\n+ \n+ def setVerbosity(self, verbosity):\n+ self._verbosity = verbosity\n+ \n+ def _checkOptions(self):\n+ if self._queryFileName == "":\n+ self._logAndRaise("ERROR: Missing input fasta file name")\n+ \n+ lBlastType = ["ncbi", "wu", "blastplus"] \n+ if self._blastType.lower() not in lBlastType:\n+ self._logAndRaise("ERROR: unknown Blast type \'%s\' - correct values are %s" % (self._blastType, lBlastType))\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+\n+ def _getBlasterCmd(self):\n+ lArgs = []\n+ lArgs.append("-n %s" % self._program)\n+ lArgs.append("-q %s" % self._queryFileName)\n+ lArgs.append("-s %s" % self._subjectFileName)\n+ lArgs.append("-B %s" % self._queryFileName)\n+ if self._doAllByall:\n+ lArgs.append("-a")\n+ lArgs.append("-E %s" % self._eValue)\n+ lArgs.append("-L %s" % self._length)\n+ lArgs.append("-I %s" % self._identity)\n+ if self._blastType == "ncbi": \n+ lArgs.append("-N")\n+ lArgs.append("-p \'-a %s %s\'" % (self._nbCPU, self._extraParams))\n+ elif self._blastType == "wu":\n+ lArgs.append("-W")\n+ lArgs.append("-p \'-cpus=%s %s\'" % (self._nbCPU, self._extraParams))\n+ elif self._blastType == "blastplus":\n+ lArgs.append("-X")\n+ lArgs.append("-p \'-num_threads %s %s\'" % (self._nbCPU, self._extraParams))\n+# TODO: check the check option at the beginning of step 2 to allow to launch megablast for blast and blast+\n+# elif config.get(sectionName, "blast") == "mega":\n+# lArgs.append("-N")\n+# lArgs.append("-n megablast")\n+# elif config.get(sectionName, "blast") == "megablastplus":\n+# lArgs.append("-X")\n+# lArgs.append("-n megablast")\n+ if self._doClean:\n+ lArgs.append("-c")\n+ lArgs.append("-v %i" % (self._verbosity - 1))\n+ return self._getSystemCommand("blaster", lArgs)\n+ \n+ def _getSystemCommand(self, prg, lArgs):\n+ systemCmd = prg \n+ for arg in lArgs:\n+ systemCmd += " " + arg\n+ return systemCmd\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkOptions()\n+ self._log.info("START LaunchBlaster")\n+ self._log.debug("Query file name: %s" % self._queryFileName)\n+ self._log.debug("Subject file name: %s" % self._subjectFileName)\n+ if self._doClean:\n+ self._log.warning("Files will be cleaned")\n+ cmd = self._getBlasterCmd()\n+ process = subprocess.Popen(cmd, shell = True)\n+ self._log.debug("Running : %s" % cmd)\n+ process.communicate()\n+ if process.returncode != 0:\n+ self._logAndRaise("ERROR when launching \'%s\'" % cmd)\n+ self._log.info("END LaunchBlaster")\n+\n+if __name__ == "__main__":\n+ iLaunch = LaunchBlaster()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/LaunchBlasterInParallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/LaunchBlasterInParallel.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,301 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import os\n+import shutil\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.checker.ConfigChecker import ConfigRules, ConfigChecker\n+from commons.tools.MergeMatchsFiles import MergeMatchsFiles\n+\n+LOG_DEPTH = "repet.tools"\n+\n+##Launch BLASTER in parallel\n+#\n+class LaunchBlasterInParallel(object):\n+ \n+ def __init__(self, queryDirectory = "", subjectFilePath = "", outFileName = "", configFileName = "", groupId = "", queryPattern = ".*\\.fa", \\\n+ doAllByall = False, nbCPU = 1, eValue="1e-300", type = "ncbi", program="blastn", extraParams="", verbosity = 0):\n+ self._queryDirectory = queryDirectory\n+ self._queryPattern = queryPattern\n+ self.setSubjectFilePath(subjectFilePath)\n+ self._outFileName = outFileName\n+ self._configFileName = configFileName\n+ self.setGroupId(groupId)\n+ self._doAllByall = doAllByall\n+ self._blastType = type\n+ self._program = program\n+ self._extraParams = extraParams\n+ self._nbCPU = nbCPU\n+ self._jobSectionName = "jobs"\n+ self._blasterSectionName = "alignment"\n+ self._prepareDataSectionName = "prepare_data"\n+ self._eValue = eValue\n+ \n+ self._doClean = None\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Launch Blaster in parallel."\n+ epilog = "\\nExample 1: launch without verbosity and keep temporary files.\\n"\n+ epilog += "\\t$ python LaunchBlasterInParallel.py -q query -o query.align -v 0"\n+ epilog += "\\n\\t"\n+ epilog += "\\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\\n"\n+ epilog += "\\t$ python LaunchBlasterInParallel.py -q query -o query.align -s nr.fa -c -v 2"\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parse'..b' lArgs)\n+\n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkConfig()\n+ self._checkOptions()\n+ self._log.info("START LaunchBlasterInParallel")\n+ self._log.debug("Query file name: %s" % self._queryPattern)\n+ self._log.debug("Subject file name: %s" % self._subjectFileName)\n+ \n+ cDir = os.getcwd()\n+ if not self._tmpDir:\n+ self._tmpDir = cDir\n+ \n+ acronym = "Blaster"\n+ iDb = DbFactory.createInstance()\n+ jobdb = TableJobAdaptatorFactory.createInstance(iDb, "jobs")\n+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", cDir, self._tmpDir, "jobs", self._resources, self._groupId, acronym, chooseTemplateWithCopy = self._isCopyOnNode)\n+\n+ lCmdsTuples = []\n+ fileSize = float(os.path.getsize(self._subjectFilePath) + 5000000) / 1000000000\n+ \n+ lCmdSize = []\n+ lCmdCopy = []\n+ if self._isCopyOnNode:\n+ lCmdSize.append("fileSize = %f" % fileSize) \n+ lCmdCopy.append("shutil.copy(\\"%s\\", \\".\\")" % self._subjectFilePath)\n+ \n+ lFiles = FileUtils.getFileNamesList(self._queryDirectory, self._queryPattern)\n+ for file in lFiles:\n+ lCmds = []\n+ lCmds.append(self._getLaunchBlasterCmd(iLauncher, file))\n+ lCmdStart = []\n+ if self._isCopyOnNode:\n+ lCmdStart.append("os.symlink(\\"../%s\\", \\"%s\\")" % (self._subjectFileName, self._subjectFileName))\n+ lCmdStart.append("shutil.copy(\\"%s/%s\\", \\".\\")" % (self._queryDirectory, file))\n+ else:\n+ lCmdStart.append("os.symlink(\\"%s\\", \\"%s\\")" % (self._subjectFilePath, self._subjectFileName))\n+ lCmdStart.append("os.symlink(\\"%s/%s\\", \\"%s\\")" % (self._queryDirectory, file, file))\n+ lCmdFinish = []\n+ lCmdFinish.append("if os.path.exists(\\"%s.align\\"):" % file)\n+ lCmdFinish.append("\\tshutil.move(\\"%s.align\\", \\"%s/.\\" )" % (file, cDir))\n+ lCmdFinish.append("shutil.move(\\"%s.param\\", \\"%s/.\\" )" % (file, cDir))\n+ lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish, lCmdSize, lCmdCopy))\n+ \n+ iLauncher.runLauncherForMultipleJobs("Blaster", lCmdsTuples, self._doClean, self._isCopyOnNode)\n+\n+ tmpFileName = "tmp_%s.align" % os.getpid()\n+ iMMF = MergeMatchsFiles("align", "tmp_%s" % os.getpid(), allByAll = self._doAllByall, clean = self._doClean)\n+ iMMF.run()\n+\n+ if self._doAllByall:\n+ iDb = DbFactory.createInstance()\n+ jobdb = TableJobAdaptatorFactory.createInstance(iDb, "jobs")\n+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", cDir, self._tmpDir, "jobs", self._resources, "%s_RmvPairAlignInChunkOverlaps" % self._groupId)\n+ \n+ lCmdsTuples = []\n+ lCmds = []\n+ lCmds.append(self._getRmvPairAlignInChunkOverlapsCmd(iLauncher, tmpFileName, self._outFileName))\n+ lCmdStart = [] \n+ lCmdStart.append("os.symlink(\\"%s/%s\\", \\"%s\\")" % (cDir, tmpFileName, tmpFileName))\n+ lCmdFinish = []\n+ lCmdFinish.append("shutil.move(\\"%s\\", \\"%s/.\\")" % (self._outFileName, cDir))\n+ lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n+ \n+ iLauncher.runLauncherForMultipleJobs("RmvPairAlignInChunkOverlaps", lCmdsTuples, self._doClean)\n+ if self._doClean:\n+ os.remove(tmpFileName)\n+ else:\n+ shutil.move(tmpFileName, self._outFileName)\n+ \n+ if self._doClean:\n+ FileUtils.removeFilesByPattern("*.param")\n+ \n+ self._log.info("END LaunchBlasterInParallel")\n+\n+if __name__ == "__main__":\n+ iLaunch = LaunchBlasterInParallel()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/LaunchMatcherInParallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/LaunchMatcherInParallel.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,285 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.launcher.LauncherUtils import LauncherUtils\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.checker.ConfigChecker import ConfigRules, ConfigChecker\n+from commons.core.coord.AlignUtils import AlignUtils\n+import shutil\n+import os\n+\n+LOG_DEPTH = "repet.tools"\n+\n+\n+class LaunchMatcherInParallel(object):\n+ def __init__(self, align="", queryFileName="", subjectFileName="", evalue="1e-10", doJoin=False, keepConflict=False, prefix="", alignPattern = ".*\\.align", \\\n+ config = "", groupId = "", maxFileSize = 1000000, mergeResults=True, workingDir="tmpMatcher", doClean = False, verbosity = 0):\n+ self._alignFileName = align\n+ self._queryFileName = queryFileName\n+ self.setSubjectFileName(subjectFileName)\n+ self.setOutPrefix(prefix)\n+ self._alignPattern = alignPattern\n+ self._doJoin = doJoin\n+ self._eValue = evalue\n+ self._keepConflict = keepConflict\n+ self._configFileName = config\n+ self.setGroupId(groupId)\n+ self._maxFileSize = maxFileSize\n+ self._mergeResults = mergeResults\n+ self._doClean = doClean\n+ self._workingDir = workingDir\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ self._jobSectionName = "jobs"\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Launch Matcher in parallel."\n+ epilog = "\\nExample 1: launch without verbosity and keep temporary files.\\n"\n+ epilog += "\\t$ python LaunchMatcherInParallel.py -a in.align -v 0"\n+ epilog += "\\n\\t"\n+ epilog += "\\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\\n"\n+ epilog += "\\t$ python LaunchMatcherInParallel.py -a in.align -q query.fa -s subject.fa -o query -c -v 2"\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser.add_option("-'..b'eName, \'w\') as f:\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+\n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkConfig()\n+ self._checkOptions()\n+ \n+ self._log.info("START LaunchMatcherInParallel")\n+ self._log.debug("Align file name: %s" % self._alignFileName)\n+ self._log.debug("Query file name: %s" % self._queryFileName)\n+ self._log.debug("Subject file name: %s" % self._subjectFileName)\n+ if not os.path.exists(self._workingDir):\n+ os.makedirs(self._workingDir)\n+ else:\n+ self._doClean = False\n+ self._splitAlignFilePerSeq()\n+ os.chdir(self._workingDir)\n+ os.symlink("../%s" % self._queryFileName, self._queryFileName)\n+ if self._queryFileName != self._subjectFileName:\n+ os.symlink("../%s" % self._subjectFileName, self._subjectFileName)\n+ \n+ cDir = os.getcwd()\n+ if not self._tmpDir:\n+ self._tmpDir = cDir\n+ \n+ acronym = "Matcher"\n+ iDb = DbFactory.createInstance()\n+ jobdb = TableJobAdaptatorFactory.createInstance(iDb, "jobs")\n+ iLauncher = Launcher(jobdb, os.getcwd(), "", "", cDir, self._tmpDir, "jobs", self._resources, self._groupId, acronym, chooseTemplateWithCopy = self._isCopyOnNode)\n+\n+ lCmdsTuples = []\n+ lCmdSize = []\n+ lCmdCopy = []\n+\n+ lFiles = FileUtils.getFileNamesList(".", self._alignPattern)\n+ lFileSizeTuples = []\n+ for fileName in lFiles:\n+ fileSize = os.path.getsize(fileName)\n+ lFileSizeTuples.append((fileName, fileSize))\n+ lFileSizeList = LauncherUtils.createHomogeneousSizeList(lFileSizeTuples, self._maxFileSize)\n+ \n+ for lFiles in lFileSizeList:\n+ lCmds = []\n+ lCmdStart = []\n+ lCmdFinish = []\n+ if self._queryFileName:\n+ lCmdStart.append("os.symlink(\\"%s/%s\\", \\"%s\\")" % (cDir, self._queryFileName, self._queryFileName))\n+ if self._subjectFileName and self._subjectFileName != self._queryFileName:\n+ lCmdStart.append("os.symlink(\\"%s/%s\\", \\"%s\\")" % (cDir, self._subjectFileName, self._subjectFileName))\n+ for file in lFiles:\n+ lCmds.append(self._getLaunchMatcherCmd(iLauncher, file))\n+ lCmdStart.append("os.symlink(\\"%s/%s\\", \\"%s\\")" % (cDir, file, file))\n+ lCmdFinish.append("if os.path.exists(\\"%s.match.path\\"):" % file)\n+ lCmdFinish.append("\\tshutil.move(\\"%s.match.path\\", \\"%s/.\\" )" % (file, cDir))\n+ lCmdFinish.append("if os.path.exists(\\"%s.match.tab\\"):" % file)\n+ lCmdFinish.append("\\tshutil.move(\\"%s.match.tab\\", \\"%s/.\\" )" % (file, cDir))\n+ lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish, lCmdSize, lCmdCopy))\n+ \n+ iLauncher.runLauncherForMultipleJobs("Matcher", lCmdsTuples, self._doClean, self._isCopyOnNode)\n+\n+ if self._mergeResults:\n+ FileUtils.catFilesByPattern("*.match.path", "../%s.match.path" % self._outPrefix)\n+ if self._queryFileName or self._subjectFileName:\n+ outTabFileName = "../%s.match.tab" % self._outPrefix\n+ self._writeTabHeader(outTabFileName)\n+ FileUtils.catFilesByPattern("*.match.tab", outTabFileName, skipHeaders = True)\n+ os.chdir("..")\n+ if self._doClean and self._mergeResults:\n+ self._log.warning("Working directory will be cleaned")\n+ shutil.rmtree(self._workingDir)\n+ self._log.info("END LaunchMatchInParallel")\n+\n+if __name__ == "__main__":\n+ iLaunch = LaunchMatcherInParallel()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/ListAndDropTables.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/ListAndDropTables.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+
+##@file
+# List and drop MySQL tables.
+#
+# usage: ListAndDropTables.py [ options ]
+# options:
+#      -h: this help
+#      -l: tables to list (can be a pattern, '*' for all)
+#      -d: tables to drop (can be a pattern, '*' for all)
+#      -C: configuration file
+#      -v: verbose (default=0/1)
+# it doesn't drop 'info_tables'
+
+
+import sys
+import getopt
+from commons.core.sql.DbMySql import DbMySql
+
+
+class ListAndDropTables( object ):
+
+    def __init__( self ):
+        self._action = "list"
+        self._tableNames = ""
+        self._configFileName = ""
+        self._verbose = 0
+        self._db = None
+
+
+    def help( self ):
+        print
+        print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
+        print "options:"
+        print "     -h: this help"
+        print "     -l: tables to list (can be a pattern, '*' for all)"
+        print "     -d: tables to drop (can be a pattern, '*' for all)"
+        print "     -C: configuration file (otherwise, use env variables)"
+        print "     -v: verbose (default=0/1)"
+        print "Note: it doesn't drop 'info_tables'."
+        print
+
+
+    def setAttributesFromCmdLine( self ):
+        try:
+            opts, args = getopt.getopt(sys.argv[1:],"hl:d:C:v:")
+        except getopt.GetoptError, err:
+            print str(err); self.help(); sys.exit(1)
+        for o,a in opts:
+            if o == "-h":
+                self.help(); sys.exit(0)
+            elif o == "-l":
+                self._action = "list"
+                self._tableNames = a
+            elif o == "-d":
+                self._action = "drop"
+                self._tableNames = a
+            elif o == "-C":
+                self._configFileName = a
+            elif o == "-v":
+                self._verbose = int(a)
+
+
+    def checkAttributes( self ):
+        """
+        Before running, check the required attributes are properly filled.
+        """
+        if self._tableNames == "":
+            print "ERROR: missing input table"
+            self.help()
+            sys.exit(1)
+#        if self._configFileName == "":
+#            print "ERROR: missing configuration file"
+#            self.help()
+#            sys.exit(1)
+
+
+    def getlistTables( self ):
+        """
+        Return a list with the table names corresponding to the given pattern.
+        """
+        lTables = []
+        if self._tableNames != "*":
+            sql_cmd = "SHOW TABLES like '%%%s%%'" % ( self._tableNames )
+        else:
+            sql_cmd = "SHOW TABLES"
+        self._db.execute( sql_cmd )
+        res = self._db.fetchall()
+        for i in res:
+            lTables.append( i[0] )
+        return lTables
+
+
+    def list( self ):
+        """
+        List the tables corresponding to the pattern.
+        """
+        lTables = i.getlistTables()
+        if len(lTables) == 0:
+            print "no table corresponding to '%s'" % ( self._tableNames )
+        else:
+            print "list of tables:"
+            for t in lTables:
+                print t
+            print "%i tables corresponding to '%s'" % ( len(lTables), self._tableNames )
+        sys.stdout.flush()
+
+
+    def drop( self ):
+        """
+        Drop the tables corresponding to the pattern.
+        """
+        lTables = i.getlistTables()
+        if len(lTables) == 0:
+            print "no table corresponding to '%s'" % ( self._tableNames )
+        else:
+            print "deleting %i tables corresponding to '%s'" % ( len(lTables), self._tableNames )
+            for t in lTables:
+                if t != "info_tables":
+                    self._db.dropTable( t )
+        sys.stdout.flush()
+
+
+    def start( self ):
+        self.checkAttributes()
+        if self._verbose > 0:
+            print "START %s" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+        if self._configFileName != "":
+            self._db = DbMySql( cfgFileName = self._configFileName )
+        else:
+            if self._verbose > 0:
+                print "WARNING: use environment variables to connect to MySQL"
+                sys.stdout.flush()
+            self._db = DbMySql()
+
+
+    def end( self ):
+        self._db.close()
+        if self._verbose > 0:
+            print "END %s" % (sys.argv[0].split("/")[-1])
+            sys.stdout.flush()
+
+
+    def run( self ):
+        self.start()
+        if i._action == "list":
+            i.list()
+        if i._action == "drop":
+            i.drop()
+        self.end()
+
+
+if __name__ == "__main__":
+    i = ListAndDropTables()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/MergeMatchsFiles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/MergeMatchsFiles.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,84 @@
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.coord.Align import Align
+import shutil
+import os
+import sys
+
+class MergeMatchsFiles(object):
+
+    def __init__(self, fileType, outFileBaseName, allByAll = False, clean = True, verbose = 0):
+        self._fileType = fileType
+        self._outFileBaseName = outFileBaseName
+        self._allByAll = allByAll
+        self._verbose = verbose
+        self._clean = clean
+
+    def _filterRedundantMatches( self, inFile, outFile ):
+        """
+        When a pairwise alignment is launched ~ all-by-all (ie one batch against all chunks),
+        one filters the redundant matches. For instance we keep 'chunk3-1-100-chunk7-11-110-...'
+        and we discards 'chunk7-11-110-chunk3-1-100-...'.
+        Also we keep 'chunk5-1-100-chunk5-11-110-...' and we discards
+        'chunk5-11-110-chunk5-1-100-...'.
+        For this of course the results need to be sorted by query, on plus strand,
+        and in ascending coordinates (always the case with Blaster).
+        """
+        inFileHandler = open( inFile, "r" )
+        outFileHandler = open( outFile, "w" )
+        iAlign = Align()
+        countMatches = 0
+        tick = 100000
+        while True:
+            line = inFileHandler.readline()
+            if line == "":
+                break
+            countMatches += 1
+            iAlign.setFromString( line )
+            if "chunk" not in iAlign.range_query.seqname \
+                   or "chunk" not in iAlign.range_subject.seqname:
+                print "ERROR: 'chunk' not in seqname"
+                sys.exit(1)
+            if int(iAlign.range_query.seqname.split("chunk")[1]) < int(iAlign.range_subject.seqname.split("chunk")[1]):
+                iAlign.write( outFileHandler )
+            elif int(iAlign.range_query.seqname.split("chunk")[1]) == int(iAlign.range_subject.seqname.split("chunk")[1]):
+                if iAlign.range_query.getMin() < iAlign.range_subject.getMin():
+                    iAlign.write( outFileHandler )
+            if countMatches % tick == 0:   # need to free buffer frequently as file can be big
+                outFileHandler.flush()
+                os.fsync( outFileHandler.fileno() )
+        inFileHandler.close()
+        outFileHandler.close()
+
+    def run(self):
+        if self._verbose > 1:
+            print "concatenate the results of each job"
+            sys.stdout.flush()
+
+        tmpFileName = "%s.%s_tmp" % (self._outFileBaseName, self._fileType)
+        outFileName = "%s.%s" % (self._outFileBaseName, self._fileType)
+        pattern = "*.%s" % self._fileType
+
+        if os.path.exists(tmpFileName):
+            os.remove(tmpFileName)
+
+        FileUtils.catFilesByPattern(pattern, tmpFileName)
+        if self._clean:
+            FileUtils.removeFilesByPattern(pattern)
+
+        if self._fileType == "align":
+            if self._allByAll:
+                self._filterRedundantMatches(tmpFileName, outFileName)
+            else:
+                shutil.move(tmpFileName, outFileName)
+        else:
+            prg = "%s/bin/%snum2id" % (os.environ["REPET_PATH"], self._fileType)
+            cmd = prg
+            cmd += " -i %s" % tmpFileName
+            cmd += " -o %s" % outFileName
+            cmd += " -v %i" % (self._verbose - 1)
+            log = os.system(cmd)
+            if log != 0:
+                print "*** Error: %s returned %i" % (prg, log)
+                sys.exit(1)
+        if self._clean and FileUtils.isRessourceExists(tmpFileName):
+            os.remove(tmpFileName)
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/MysqlConnect.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/MysqlConnect.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,56 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.sql.DbMySql import DbMySql
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+
+def main():
+    description = "Check MySQL connection on a node"
+    usage = "MysqlConnect.py -n hostname"
+
+    parser = RepetOptionParser(description = description, usage = usage, version = "v1.0")
+    parser.add_option( "-n", "--node", dest = "node", type = "string", help = "node name", default = "")
+    parser.add_option( "-C", "--config", dest = "config", type = "string", help = "path to config file", default = "")
+
+    options, args = parser.parse_args()
+
+    node = options.node
+
+    try:
+        if options.config != "":
+            idbMySQL = DbMySql(cfgFileName = options.config)
+        else:
+            idbMySQL = DbMySql()
+        print "SUCCESS to connect to MySQL database from '%s'" % node
+    except SystemExit:
+        print "FAILED to connect to MySQL database from '%s'" % node
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/OrientSequences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/OrientSequences.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,375 @@\n+#!/usr/bin/env python\n+\n+"""\n+Interface to orient sequences before making a multiple alignment.\n+Use hashing or suffix tree to get an idea of the appropriate strand.\n+Use \'orienter\' by default, otherwise use \'mummer\'.\n+"""\n+\n+import sys\n+import os\n+import glob\n+import getopt\n+\n+from commons.core.seq.BioseqDB import BioseqDB\n+import pyRepet.seq.fastaDB\n+from commons.core.checker.CheckerUtils import CheckerUtils\n+\n+class OrientSequences( object ):\n+ """\n+ Interface to orient sequences before making a multiple alignment.\n+ Use hashing or suffix tree to get an idea of the appropriate strand.\n+ Use \'orienter\' by default, otherwise use \'mummer\'.\n+ """\n+ \n+ def __init__(self, inFileName="", minMatchLength=10, prgToOrient = "orienter", outFileName="", clean=False, verbosity=1):\n+ """\n+ Constructor.\n+ """\n+ self._inFileName = inFileName\n+ self._minMatchLength = minMatchLength\n+ self._prgToOrient = prgToOrient\n+ self._outFileName = outFileName\n+ self._clean = clean\n+ self._verbose = verbosity\n+ \n+ def help( self ):\n+ """\n+ Display the help on stdout.\n+ """\n+ print\n+ print "usage:",sys.argv[0].split("/")[-1],"[options]"\n+ print "options:"\n+ print " -h: this help"\n+ print " -i: name of the input file (format=\'fasta\')"\n+ print " -m: minimum match length (default=10)"\n+ print " -p: program to use first (default=orienter/mummer)"\n+ print " -o: name of the output file (default=inFileName+\'.oriented\')"\n+ print " -c: clean"\n+ print " -v: verbosity level (0/default=1/2)"\n+ print\n+ \n+ def setAttributesFromCmdLine( self ):\n+ """\n+ Set the attributes from the command-line.\n+ """\n+ try:\n+ opts, args = getopt.getopt(sys.argv[1:],"hi:m:p:o:cv:")\n+ except getopt.GetoptError, err:\n+ print str(err); self.help(); sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ self.help(); sys.exit(0)\n+ elif o == "-i":\n+ self.setInputFileName( a )\n+ elif o == "-m":\n+ self.setMinMatchLength( a )\n+ elif o == "-p":\n+ self.setPrgToOrient( a )\n+ elif o == "-o":\n+ self.setOutputFileName( a )\n+ elif o == "-c":\n+ self.setClean()\n+ elif o == "-v":\n+ self.setVerbosityLevel( a )\n+ \n+ def setInputFileName( self, inFileName ):\n+ self._inFileName = inFileName\n+ \n+ def setMinMatchLength( self, minMatchLength ):\n+ self._minMatchLength = int(minMatchLength)\n+ \n+ def setPrgToOrient( self, prgToOrient ):\n+ self._prgToOrient = prgToOrient\n+ \n+ def setOutputFileName( self, outFileName ):\n+ self._outFileName = outFileName\n+ \n+ def setClean( self ):\n+ self._clean = True\n+ \n+ def setVerbosityLevel( self, verbose ):\n+ self._verbose = int(verbose)\n+ \n+ def checkAttributes( self ):\n+ """\n+ Check the attributes are valid before running the algorithm.\n+ """\n+ if self._inFileName == "":\n+ print "ERROR: missing input file name"\n+ self.help(); sys.exit(1)\n+ if not os.path.exists( self._inFileName ):\n+ print "ERROR: input file \'%s\' doesn\'t exist" % ( self._inFileName )\n+ self.help(); sys.exit(1)\n+ if self._prgToOrient not in [ "orienter", "mummer" ]:\n+ print "ERROR: unknown program \'%s\'" % ( self._prgToOrient )\n+ self.help(); sys.exit(1)\n+ if self._outFileName == "":\n+ self._outFileName = "%s.oriented" % ( self._inFileName )\n+ \n+ def useOrienter( self ):\n+ """\n+ Use \'orienter\'.\n+ @return: exit value of \'orienter\'\n+ """\n+ prg = '..b'tStatus = self.useOrienter()\n+ if exitStatus == 0:\n+ self.end()\n+ sys.exit(0)\n+ if exitStatus != 0:\n+ print "\\nWARNING: \'orienter\' had a problem, switching to \'mummer\'"\n+ sys.stdout.flush()\n+ \n+ lInHeaders = pyRepet.seq.fastaDB.dbHeaders( self._inFileName )\n+ nbInSeq = len( lInHeaders )\n+ if self._verbose > 0:\n+ print "nb of input sequences: %i" % ( nbInSeq )\n+ sys.stdout.flush()\n+ \n+ pyRepet.seq.fastaDB.shortenSeqHeaders( self._inFileName, 1 )\n+ tmpFileName = "%s.shortH" % ( self._inFileName )\n+ lNewHeaders = pyRepet.seq.fastaDB.dbHeaders( tmpFileName )\n+ dNew2Init = pyRepet.seq.fastaDB.retrieveLinksNewInitialHeaders( "%slink" % ( tmpFileName ) )\n+ \n+ pyRepet.seq.fastaDB.dbSplit( tmpFileName, nbSeqPerBatch=1, newDir=True )\n+ os.chdir( "batches" )\n+ self.compareInputSequencesWithMummer( nbInSeq )\n+ dMatrix = self.getCumulativeMatchLengthsOnBothStrandForEachPairwiseComparison( lNewHeaders, nbInSeq )\n+ os.chdir( ".." )\n+ \n+ lNewHeadersToReverse = self.getSequencesToReverseFromMatrix( dMatrix, lNewHeaders )\n+ for newH in lNewHeadersToReverse:\n+ lSequenceHeadersToReverse.append( dNew2Init[ newH ] )\n+ if self._verbose > 0:\n+ print "nb of sequences to reverse: %i" % ( len(lNewHeadersToReverse) )\n+ for initH in lSequenceHeadersToReverse: print " %s" % ( initH )\n+ sys.stdout.flush()\n+ \n+ if self._clean:\n+ os.remove( tmpFileName )\n+ os.remove( "%slink" % ( tmpFileName ) )\n+ \n+ return lSequenceHeadersToReverse\n+ \n+ def orientInputSequences( self, lSequenceHeadersToReverse, tmpFileName="" ):\n+ """\n+ Save input sequences while re-orienting those needing it.\n+ @param lSequenceHeadersToReverse: list of headers corresponding to sequences than need to be re-oriented\n+ @type lSequenceHeadersToReverse: list of strings\n+ @param tmpFileName: name of a fasta file (inFileName by default)\n+ @type tmpFileName: string\n+ """\n+ if self._verbose > 0:\n+ print "saving oriented sequences..."\n+ sys.stdout.flush()\n+ if tmpFileName == "":\n+ tmpFileName = self._inFileName\n+ inDB = BioseqDB( tmpFileName )\n+ outDB = BioseqDB()\n+ for bs in inDB.db:\n+ if bs.header in lSequenceHeadersToReverse:\n+ bs.reverseComplement()\n+ bs.header += " re-oriented"\n+ outDB.add( bs )\n+ outDB.save( self._outFileName )\n+ \n+ def clean( self ):\n+ if os.path.exists( "batches" ):\n+ os.system( "rm -rf batches" )\n+ if os.path.exists( "orienter_error.log" ):\n+ os.remove( "orienter_error.log" )\n+ for f in glob.glob( "core.*" ):\n+ os.remove( f )\n+ \n+ def start( self ):\n+ """\n+ Useful commands before running the program.\n+ """\n+ self.checkAttributes()\n+ if self._verbose > 0:\n+ print "START %s" % ( type(self).__name__ )\n+ print "input file: %s" % ( self._inFileName )\n+ sys.stdout.flush()\n+ \n+ def end( self ):\n+ """\n+ Useful commands before ending the program.\n+ """\n+ if self._clean:\n+ self.clean()\n+ if self._verbose > 0:\n+ print "END %s" % ( type(self).__name__ )\n+ sys.stdout.flush()\n+ \n+ def run( self ):\n+ """\n+ Run the program.\n+ """\n+ self.start()\n+ lSequenceHeadersToReverse = self.getSequencesToReverse()\n+ self.orientInputSequences( lSequenceHeadersToReverse )\n+ self.end()\n+ \n+if __name__ == "__main__":\n+ i = OrientSequences()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/PostAnalyzeTELib.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/PostAnalyzeTELib.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,301 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.stat.Stat import Stat\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.launcher.LaunchBlastclust import LaunchBlastclust\n+from commons.tools.AnnotationStats import AnnotationStats\n+import os\n+\n+CONSENSUS = "TE"\n+CLUSTER = "Cluster"\n+LOG_DEPTH = "repet.tools"\n+LOG_FORMAT = "%(message)s"\n+\n+class PostAnalyzeTELib(object):\n+ \n+ def __init__(self, analysis = 1, fastaFileName = "", clusterFileName = "", pathTableName="", seqTableName="", genomeSize=0, configFileName = "", doClean = False, verbosity = 3):\n+ self._analysis = analysis\n+ self._fastaFileName = fastaFileName\n+ self._pathTableName = pathTableName\n+ self._seqTableName = seqTableName\n+ self._genomeSize = genomeSize\n+ if self._analysis == 1:\n+ self.setBioseqDB()\n+ self._identity = 0\n+ self._coverage = 80\n+ self._applyCovThresholdOnBothSeq = False\n+ self.setClusterFileName(clusterFileName)\n+ self.setStatPerClusterFileName()\n+ self.setClassifStatPerClusterFileName()\n+ self.setAnnotationStatsPerTEFileName()\n+ self.setAnnotationStatsPerClusterFileName()\n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity, LOG_FORMAT)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Tool to post-analyze a TE library : clusterize, give stats on cluster, on annotation,...\\n"\n+ epilog = "\\nExample 1: clustering (e.g. to detect redundancy)\\n"\n+ epilog += "\\t$ python PostAnalyzeTELib.py -a 1 -i TElib.fa -L 98 -S 95 -b\\n"\n+ epilog += "Example 2: classification stats per cluster\\n"\n+ epilog += "\\t$ python PostAnalyzeTELib.py -a 2 -t TElib.tab\\n"\n+ epilog += "Example 3: annotation stats per consensus\\n"\n+ epilog += "\\t$ python PostAnalyzeTELib.py -a 3 -p project_chr_allTEs_nr_noSSR_join_path -s project_refTEs_seq -g 129919500\\n"\n+ epilog += "Example 4: annotation stats per cluster\\n"\n+ epilog += "\\t$ python PostAnalyzeTELib.py -a 4 -t TElib.tab -p project_chr_al'..b'ce("-comp","")\n+ if "-chim" in classifInfos:\n+ nbChim += 1\n+ classifInfos = classifInfos.replace("-chim","")\n+ if "noCat" in classifInfos:\n+ nbNoCat += 1\n+ classifInfos = classifInfos.replace("noCat","")\n+ \n+ classif = classifInfos.split("-")[-1]\n+ if classif != "":\n+ if dClassifNb.get(classif, None) is None:\n+ dClassifNb[classif] = 0\n+ dClassifNb[classif] +=1\n+ \n+ occurences= []\n+ for classif, occs in dClassifNb.items():\n+ occurences.append("%s (%d)" % (classif, occs))\n+ \n+ f.write("%d\\t%d\\t%d\\t%d\\t%d\\t%s\\n" % (clusterId+1, nbNoCat, nbChim\\\n+ , nbComp, nbIncomp,"\\t".join(occurences)))\n+ \n+ def _giveStatsOnTEClusters(self):\n+ with open(self._clusterFileName) as fCluster:\n+ with open(self._statPerClusterFileName, \'w\') as fStatPerCluster:\n+ fStatPerCluster.write("cluster\\tsequencesNb\\tsizeOfSmallestSeq\\tsizeOfLargestSeq\\taverageSize\\tmedSize\\n")\n+ line = fCluster.readline()\n+ clusterNb = 0\n+ clusterSeqList= line.split() \n+ minClusterSize = len(clusterSeqList)\n+ maxClusterSize = 0\n+ totalSeqNb = 0\n+ seqNbInBigClusters = 0\n+ dClusterSize2ClusterNb = {1:0, 2:0, 3:0}\n+ while line:\n+ clusterSeqList= line.split() \n+ seqNb = len(clusterSeqList)\n+ totalSeqNb += seqNb\n+ if seqNb > 2:\n+ seqNbInBigClusters += seqNb\n+ dClusterSize2ClusterNb[3] += 1\n+ else:\n+ dClusterSize2ClusterNb[seqNb] += 1\n+ if seqNb > maxClusterSize:\n+ maxClusterSize = seqNb\n+ if seqNb < minClusterSize:\n+ minClusterSize = seqNb\n+ line = fCluster.readline()\n+ clusterNb += 1\n+ clusterSeqLengths = self._iBioseqDB.getSeqLengthByListOfName(clusterSeqList)\n+ iStatSeqLengths = Stat(clusterSeqLengths)\n+ fStatPerCluster.write("%d\\t%d\\t%d\\t%d\\t%d\\t%d\\n" %(clusterNb, seqNb, min(clusterSeqLengths), max(clusterSeqLengths), iStatSeqLengths.mean(), iStatSeqLengths.median()))\n+ \n+ with open(self._globalStatPerClusterFileName, \'w\') as fG:\n+ fG.write("nb of clusters: %d\\n" % clusterNb)\n+ fG.write("nb of clusters with 1 sequence: %d\\n" % dClusterSize2ClusterNb[1])\n+ fG.write("nb of clusters with 2 sequences: %d\\n" % dClusterSize2ClusterNb[2])\n+ fG.write("nb of clusters with >2 sequences: %d (%d sequences)\\n" % (dClusterSize2ClusterNb[3], seqNbInBigClusters))\n+ fG.write("nb of sequences: %d\\n" % totalSeqNb)\n+ fG.write("nb of sequences in the largest cluster: %d\\n" % maxClusterSize)\n+ fG.write("nb of sequences in the smallest cluster: %d\\n" % minClusterSize)\n+ lSeqSizes = self._iBioseqDB.getListOfSequencesLength()\n+ iStat = Stat(lSeqSizes)\n+ fG.write("size of the smallest sequence: %d\\n" % min(lSeqSizes))\n+ fG.write("size of the largest sequence: %d\\n" % max(lSeqSizes))\n+ fG.write("average sequences size: %d\\n" % iStat.mean())\n+ fG.write("median sequences size: %d\\n" % iStat.median())\n+\n+if __name__ == "__main__":\n+ iLaunch = PostAnalyzeTELib()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/PrepareBatches.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/PrepareBatches.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import os
+import sys
+from ConfigParser import NoSectionError, NoOptionError
+from commons.core.checker.CheckerUtils import CheckerUtils
+from commons.core.checker.CheckerException import CheckerException
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.seq.FastaUtils import FastaUtils
+
+class PrepareBatches(object):
+
+    def __init__(self, pipelineName, projectDir, projectName, iConfig, verbose):
+        self._pipelineName = pipelineName
+        self._projectDir = projectDir
+        self._projectName = projectName
+        self._iConfig = iConfig
+        self._verbose = verbose
+
+    def run(self):
+        if self._verbose > 0:
+            print "beginning of step 1"
+            sys.stdout.flush()
+        if FileUtils.isRessourceExists("%s_db" % self._projectName):
+            print "ERROR: directory '%s_db' already exists" % self._projectName
+            sys.exit(1)
+
+        os.mkdir("%s_db" % self._projectName)
+        os.chdir("%s_db" % self._projectName)
+        genomeFastaFileName = "%s.fa" % self._projectName
+        os.symlink("../%s" % genomeFastaFileName, genomeFastaFileName)
+        sectionName = "prepare_batches"
+        self._checkConfig(sectionName)
+
+        separator = "\n"
+        inGenomeFileHandler = open(genomeFastaFileName, "r")
+        try:
+            CheckerUtils.checkHeaders(inGenomeFileHandler)
+        except CheckerException, e:
+            print "Error in file %s. Wrong headers are :" % genomeFastaFileName
+            print separator.join(e.messages)
+            print "Authorized characters are : a-z A-Z 0-9 - . : _\n"
+            inGenomeFileHandler.close()
+            sys.exit(1)
+        inGenomeFileHandler.close()
+
+        doClean = False
+        if self._iConfig.get(sectionName, "clean") == "yes":
+            doClean = True
+        chunkFilePrefix = "%s_chunks" % self._projectName
+        chunkLength = int(self._iConfig.get(sectionName, "chunk_length"))
+        chunkOverlap = int(self._iConfig.get(sectionName, "chunk_overlap"))
+        FastaUtils.dbChunks(genomeFastaFileName, chunkLength, chunkOverlap, 0, chunkFilePrefix, doClean, self._verbose)
+
+        nbSeq = int(self._iConfig.get(sectionName, "nb_seq_per_batch"))
+        FastaUtils.splitFastaFileInBatches("%s.fa" % chunkFilePrefix, nbSeq * chunkLength)
+
+        if self._iConfig.get(sectionName, "clean") == "yes":
+            FileUtils.removeFilesByPattern("%s.fa*" % self._projectName)
+
+        os.chdir( ".." )
+        if self._verbose > 0:
+            print "step 1 finished successfully"
+            sys.stdout.flush()
+
+    def _checkConfig(self, sectionName):
+        try:
+            CheckerUtils.checkSectionInConfigFile(self._iConfig, sectionName)
+        except NoSectionError:
+            print "ERROR: the section %s must be in your configuration file" % sectionName
+            sys.exit(1)
+        try:
+            CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_length")
+        except NoOptionError:
+            print "ERROR: the option 'chunk_length' must be defined in %s in your configuration file" % sectionName
+            sys.exit(1)
+        try:
+            CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_overlap")
+        except NoOptionError:
+            print "ERROR: the option 'chunk_overlap' must be defined in %s in your configuration file" % sectionName
+            sys.exit(1)
+        try:
+            CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "nb_seq_per_batch")
+        except NoOptionError:
+            print "ERROR: the option 'nb_seq_per_batch' must be defined in %s in your configuration file" % sectionName
+            sys.exit(1)
+        try:
+            CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "resources")
+        except NoOptionError:
+            print "ERROR: the option 'resources' must be defined in %s in your configuration file" % sectionName
+            sys.exit(1)
+        try:
+            CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "tmpDir")
+        except NoOptionError:
+            print "ERROR: the option 'tmpDir' must be defined in %s in your configuration file" % sectionName
+            sys.exit(1)

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/RetrieveInitHeaders.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/RetrieveInitHeaders.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,150 @@
+#! /usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from optparse import OptionParser
+from commons.core.sql.DbMySql import DbMySql
+
+class RetrieveInitHeaders(object):
+
+    def __init__(self, inTableName = "", linkFileName = "", outTableName = "", isQueryHeaders = True, clean = False, verbose = 0):
+        self._inTableName = inTableName
+        self._linkFileName = linkFileName
+        if outTableName == "":
+            self._outTableName = self._inTableName
+        else:
+            self._outTableName = outTableName
+        self._isQueryHeaders = isQueryHeaders
+        self._clean = clean
+        self._verbose = verbose
+        self._iDb = None
+        self._tmpTableName = "%s_tmp" % self._inTableName
+
+    #TODO: can handle config file
+    #TODO: description, help...
+    def setAttributesFromCmdLine(self):
+        description = ""
+        parser = OptionParser(description = description)
+        parser.add_option("-i", "--input", dest = "inTableName", type = "string", help = "", default = "")
+        parser.add_option("-l", "--link", dest = "linkFileName", type = "string", help = "", default = "")
+        parser.add_option("-o", "--output", dest = "outTableName", type = "string", help = "(default = input table name)", default = "")
+        parser.add_option("-s", "--subject", dest = "isQueryHeaders", action = "store_false", help = "change subject name and not query name", default = True)
+        parser.add_option("-c", "--clean", dest = "clean", action = "store_true", help = "drop input table", default = False)
+        parser.add_option("-v", "--verbose", dest = "verbose", type = "int", help = "0 or 1", default = 0)
+        options, args = parser.parse_args()
+        self.setAttributesFromOptions(options)
+
+    def setAttributesFromOptions(self, options):
+        self.setInTableName(options.inTableName)
+        self.setLinkFileName(options.linkFileName)
+        self.setOutTableName(options.outTableName)
+        self.setIsQueryHeaders(options.isQueryHeaders)
+        self.setClean(options.clean)
+        self.setVerbose(options.verbose)
+
+    def setInTableName(self, inTableName):
+        self._inTableName = inTableName
+        self._tmpTableName = "%s_tmp" % self._inTableName
+
+    def setLinkFileName(self, linkFileName):
+        self._linkFileName = linkFileName
+
+    def setOutTableName(self, outTableName):
+        if outTableName == "":
+            self._outTableName = self._inTableName
+        else:
+            self._outTableName = outTableName
+
+    def setIsQueryHeaders(self, isQueryHeaders):
+        self._isQueryHeaders = isQueryHeaders
+
+    def setClean(self, clean):
+        self._clean = clean
+
+    def setVerbose(self, verbose):
+        self._verbose = verbose
+
+    #TODO: checkOptions
+    def checkOptions(self):
+        pass
+
+    def run(self):
+        if self._verbose > 0:
+            print "START RetrieveInitHeaders.py"
+        self.checkOptions()
+
+        if self._verbose > 0:
+            print "copy '%s' table to '%s' table" % (self._inTableName, self._tmpTableName)
+        self._iDb = DbMySql()
+        self._iDb.copyTable(self._inTableName, self._tmpTableName)
+
+        if self._verbose > 0:
+            print "read '%s' file" % self._linkFileName
+        f = open(self._linkFileName)
+        line = f.readline()
+        count = 0
+        while line:
+            oldHeader = line.split()[0]
+            newHeader = line.split()[1]
+            if self._isQueryHeaders:
+                self._updateQueryName(oldHeader, newHeader)
+            else:
+                self._updateSubjectName(oldHeader, newHeader)
+            count += 1
+            line = f.readline()
+        f.close()
+
+        if self._verbose > 0:
+            print "nb of relationships: %i" % count
+        if self._clean:
+            self._iDb.dropTable(self._inTableName)
+            if self._verbose > 0:
+                print "drop '%s' table" % self._inTableName
+        if self._verbose > 0:
+            print "rename '%s' table to '%s' table" % (self._tmpTableName, self._outTableName)
+        self._iDb.renameTable(self._tmpTableName, self._outTableName)
+        self._iDb.close()
+        if self._verbose > 0:
+            print "END RetrieveInitHeaders.py"
+
+    #TODO: methods must be in TablePathAdaptator ?
+    def _updateQueryName(self, oldH, newH):
+        sqlCmd = "UPDATE %s SET query_name = '%s' WHERE query_name = '%s'" % (self._tmpTableName, newH, oldH)
+        self._iDb.execute(sqlCmd)
+
+    def _updateSubjectName(self, oldH, newH):
+        sqlCmd = "UPDATE %s SET subject_name = '%s' WHERE subject_name = '%s'" % (self._tmpTableName, newH, oldH)
+        self._iDb.execute(sqlCmd)
+
+if __name__ == "__main__":
+    iRIH = RetrieveInitHeaders()
+    iRIH.setAttributesFromCmdLine()
+    iRIH.run()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/RmvPairAlignInChunkOverlaps.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/RmvPairAlignInChunkOverlaps.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,286 @@\n+#!/usr/bin/env python\n+\n+"""\n+Remove hits due to chunk overlaps.\n+"""\n+\n+import os\n+import sys\n+import getopt\n+import exceptions\n+import copy\n+from commons.core.coord.Align import *\n+\n+\n+class RmvPairAlignInChunkOverlaps( object ):\n+ """\n+ Remove hits due to chunk overlaps.\n+ """\n+ \n+ \n+ def __init__( self, inFileName="", chunkLength=200000, chunkOverlap=10000, margin=10, outFileName="", verbose=0 ):\n+ """\n+ Constructor.\n+ """\n+ self._inFileName = inFileName\n+ self._chunkLength = chunkLength\n+ self._chunkOverlap = chunkOverlap\n+ self._margin = margin\n+ self._outFileName = outFileName\n+ self._verbose = verbose\n+\n+ def help( self ):\n+ """\n+ Display the help.\n+ """\n+ print\n+ print "usage: %s [ options ]" % ( sys.argv[0] )\n+ print "options:"\n+ print " -h: this help"\n+ print " -i: name of the input file (format=\'align\')"\n+ print " -l: chunk length (in bp)"\n+ print " -o: chunk overlap (in bp)"\n+ print " -m: margin to remove match included into a chunk overlap (default=10)"\n+ print " -O: name of the output file (default=inFileName+\'.not_over\')"\n+ print " -v: verbose (default=0/1)"\n+ print\n+\n+ def setAttributesFromCmdLine( self ):\n+ """\n+ Set attributes from the command-line arguments.\n+ """\n+ try:\n+ opts, args = getopt.getopt(sys.argv[1:],"h:i:l:o:m:O:v:")\n+ except getopt.GetoptError, err:\n+ print str(err); self.help(); sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ self.help(); sys.exit(0)\n+ elif o == "-i":\n+ self.setInputFileName( a )\n+ elif o == "-l":\n+ self.setChunkLength( a )\n+ elif o == "-o":\n+ self.setChunkOverlap( a )\n+ elif o == "-m":\n+ self.setMargin( a )\n+ elif o == "-O":\n+ self.setOutputFileName( a )\n+ elif o == "-v":\n+ self.setVerbosityLevel( a )\n+ \n+ def setInputFileName( self, inFileName ):\n+ self._inFileName = inFileName\n+ \n+ def setChunkLength( self, chunkLength ):\n+ self._chunkLength = int(chunkLength)\n+ \n+ def setChunkOverlap( self, chunkOverlap ):\n+ self._chunkOverlap = int(chunkOverlap)\n+ \n+ def setMargin( self, margin ):\n+ self._margin = int(margin)\n+ \n+ def setOutputFileName( self, outFileName ):\n+ self._outFileName = outFileName\n+ \n+ def setVerbosityLevel( self, verbose ):\n+ self._verbose = int(verbose)\n+ \n+ def checkAttributes( self ):\n+ """\n+ Before running, check the required attributes are properly filled.\n+ """\n+ if self._inFileName == "":\n+ print "ERROR: missing input file"; self.help(); sys.exit(1)\n+ if not os.path.exists(self._inFileName ):\n+ print "ERROR: input file \'%s\' doesn\'t exist" %( self._inFileName )\n+ if self._outFileName == "":\n+ self._outFileName = "%s.not_over" % ( self._inFileName )\n+ \n+ \n+ def isPairAlignAChunkOverlap( self, a, chunkQuery, chunkSubject ):\n+ """\n+ Return True if the pairwise alignment exactly corresponds to a 2-chunk overlap, False otherwise.\n+ Take into account cases specific to BLASTER or PALS.\n+ """\n+ \n+ if a.range_query.isOnDirectStrand() != a.range_subject.isOnDirectStrand():\n+ if self._verbose > 1: print "on different strand"\n+ return False\n+ \n+ if chunkQuery == chunkSubject + 1:\n+ if self._verbose > 1: print "query > subject"\n+ if a.range_query.start == 1 and a.range_subject.end == self._chunkLength \\\n+ and ( a.range_query.getLength() == self._chunkOverlap \\\n+ '..b' \n+ if self._verbose > 1: print "not included"\n+ return False\n+ \n+ \n+ def removeChunkOverlaps( self ):\n+ """\n+ Remove pairwise alignments exactly corresponding to chunk overlaps or those included within such overlaps.\n+ """\n+ totalNbPairAlign = 0\n+ nbChunkOverlaps = 0\n+ d = {}\n+ nbPairAlignWithinChunkOverlaps = 0\n+ \n+ inF = open( self._inFileName, "r" )\n+ outF = open( self._outFileName, "w" )\n+ alignInstance = Align()\n+ \n+ while True:\n+ if not alignInstance.read( inF ): break\n+ totalNbPairAlign += 1\n+ if self._verbose > 1: alignInstance.show()\n+ \n+ if "chunk" not in alignInstance.range_query.seqname or "chunk" not in alignInstance.range_subject.seqname:\n+ print "WARNING: no \'chunk\' in query or subject name"; return False\n+ \n+ chunkQuery = int(alignInstance.range_query.seqname.replace("chunk",""))\n+ chunkSubject = int(alignInstance.range_subject.seqname.replace("chunk",""))\n+ \n+ if abs( chunkSubject - chunkQuery ) > 1:\n+ if self._verbose > 1: print "non contiguous chunks -> keep"\n+ alignInstance.write( outF )\n+ continue\n+ \n+ if alignInstance.range_query.isOnDirectStrand() != alignInstance.range_subject.isOnDirectStrand():\n+ if self._verbose > 1: print "on different strand"\n+ alignInstance.write( outF )\n+ continue\n+ \n+ if abs( chunkSubject - chunkQuery ) == 0:\n+ if alignInstance.range_query.start == 1 \\\n+ and alignInstance.range_query.end == self._chunkLength \\\n+ and alignInstance.range_subject.start == 1 \\\n+ and alignInstance.range_subject.end == self._chunkLength:\n+ if self._verbose > 1: print "self-alignment on whole chunk -> remove"\n+ continue\n+ \n+ if self.isPairAlignAChunkOverlap( alignInstance, chunkQuery, chunkSubject ):\n+ if self._verbose > 1: print "chunk overlap -> remove"\n+ nbChunkOverlaps += 1\n+ \n+ elif self.isPairAlignWithinAndDueToAChunkOverlap( alignInstance, chunkQuery, chunkSubject ):\n+ if self._verbose > 1: print "within chunk overlap -> remove"\n+ nbPairAlignWithinChunkOverlaps += 1\n+ \n+ else:\n+ if self._verbose > 1: print "keep"\n+ alignInstance.write( outF )\n+ \n+ inF.close()\n+ if self._verbose > 0: print "nb of pairwise alignments in input file: %i" % ( totalNbPairAlign )\n+ if self._verbose > 0: print "nb of chunk overlaps: %i" % ( nbChunkOverlaps )\n+ if self._verbose > 0: print "nb of pairwise alignments within chunk overlaps: %i" % ( nbPairAlignWithinChunkOverlaps )\n+ \n+ for names,lAligns in d.items():\n+ for alignInstance in lAligns:\n+ alignInstance.write( outF )\n+ outF.close()\n+ \n+ \n+ def start( self ):\n+ """\n+ Useful commands before running the program.\n+ """\n+ if self._verbose > 0:\n+ print "START %s" % ( type(self).__name__ ); sys.stdout.flush()\n+ self.checkAttributes()\n+ \n+ \n+ def end( self ):\n+ """\n+ Useful commands before ending the program.\n+ """\n+ if self._verbose > 0:\n+ print "END %s" % ( type(self).__name__ ); sys.stdout.flush()\n+ \n+ \n+ def run( self ):\n+ """\n+ Run the program.\n+ """\n+ self.start()\n+ self.removeChunkOverlaps()\n+ self.end()\n+ \n+ \n+if __name__ == \'__main__\':\n+ i = RmvPairAlignInChunkOverlaps()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/SpliceTEsFromGenome.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/SpliceTEsFromGenome.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,193 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import getopt
+
+from commons.core.sql.DbMySql import DbMySql
+from commons.core.seq.FastaUtils import FastaUtils
+from commons.core.coord.MapUtils import MapUtils
+from commons.core.coord.AlignUtils import AlignUtils
+from commons.core.coord.PathUtils import PathUtils
+
+
+class SpliceTEsFromGenome( object ):
+
+    def __init__( self ):
+        self._inputData = ""
+        self._formatData = ""
+        self._genomeFile = ""
+        self._configFile = ""
+        self._outFile = ""
+        self._verbose = 0
+        self._db = None
+
+
+    def help( self ):
+        print
+        print "usage: SpliceTEsFromGenome.py [ options ]"
+        print "options:"
+        print "     -h: this help"
+        print "     -i: input TE coordinates (can be file or table)"
+        print "         TEs as subjects if align or path format"
+        print "     -f: format of the data (map/align/path)"
+        print "     -g: genome file (format=fasta)"
+        print "     -C: configuration file (if table as input)"
+        print "     -o: output fasta file (default=genomeFile+'.splice')"
+        print "     -v: verbosity level (default=0/1)"
+        print
+
+
+    def setAttributesFromCmdLine( self ):
+        try:
+            opts, args = getopt.getopt(sys.argv[1:],"hi:f:g:C:o:v:")
+        except getopt.GetoptError, err:
+            msg = "%s" % str(err)
+            sys.stderr.write( "%s\n" % msg )
+            self.help(); sys.exit(1)
+        for o,a in opts:
+            if o == "-h":
+                self.help(); sys.exit(0)
+            elif o == "-i":
+                self._inputData = a
+            elif o == "-f":
+                self._formatData = a
+            elif o == "-g":
+                self._genomeFile = a
+            elif o == "-C":
+                self._configFile = a
+            elif o =="-o":
+                self._outFile = a
+            elif o == "-v":
+                self._verbose = int(a)
+
+
+    def checkAttributes( self ):
+        if self._inputData == "":
+            msg = "ERROR: missing input data (-i)"
+            sys.stderr.write( "%s\n" % msg )
+            self.help()
+            sys.exit(1)
+        if not os.path.exists( self._inputData ):
+            if not os.path.exists( self._configFile ):
+                msg = "ERROR: neither input file '%s' nor configuration file '%s'" % ( self._inputData, self._configFile )
+                sys.stderr.write( "%s\n" % msg )
+                self.help()
+                sys.exit(1)
+            if not os.path.exists( self._configFile ):
+                msg = "ERROR: can't find config file '%s'" % ( self._configFile )
+                sys.stderr.write( "%s\n" % msg )
+                sys.exit(1)
+            self._db = DbMySql( cfgFileName=self._configFile )
+            if not self._db.doesTableExist( self._inputData ):
+                msg = "ERROR: can't find table '%s'" % ( self._inputData )
+                sys.stderr.write( "%s\n" % msg )
+                self.help()
+                sys.exit(1)
+        if self._formatData == "":
+            msg = "ERROR: need to precise format (-f)"
+            sys.stderr.write( "%s\n" % msg )
+            self.help()
+            sys.exit(1)
+        if self._formatData not in [ "map", "align", "path" ]:
+            msg = "ERROR: format '%s' not yet supported" % ( self._formatData )
+            sys.stderr.write( "%s\n" % msg )
+            self.help()
+            sys.exit(1)
+        if self._genomeFile == "":
+            msg = "ERROR: missing genome file (-g)"
+            sys.stderr.write( "%s\n" % msg )
+            self.help()
+            sys.exit(1)
+        if not os.path.exists( self._genomeFile ):
+            msg = "ERROR: can't find genome file '%s'" % ( self._genomeFile )
+            sys.stderr.write( "%s\n" % msg )
+            self.help()
+            sys.exit(1)
+        if self._outFile == "":
+            self._outFile = "%s.splice" % ( self._genomeFile )
+            if self._verbose > 0:
+                print "output fasta file: %s" % self._outFile
+
+
+    def getCoordsAsMapFile( self ):
+        if self._verbose > 0:
+            print "get TE coordinates as 'Map' file"
+            sys.stdout.flush()
+        if self._db != None:
+            cmd = "srptExportTable.py"
+            cmd += " -i %s" % ( self._inputData )
+            cmd += " -C %s" % ( self._configFile )
+            cmd += " -o %s.%s" % ( self._inputData, self._formatData )
+            returnStatus = os.system( cmd )
+            if returnStatus != 0:
+                msg = "ERROR while exporting data from table"
+                sys.stderr.write( "%s\n" % msg )
+                sys.exit(1)
+            self._inputData += ".%s" % ( self._formatData )
+
+        if self._formatData == "map":
+            return self._inputData
+        elif self._formatData == "align":
+            mapFile = "%s.map" % ( self._inputData )
+            AlignUtils.convertAlignFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile )
+            return mapFile
+        elif self._formatData == "path":
+            mapFile = "%s.map" % ( self._inputData )
+            PathUtils.convertPathFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile )
+            return mapFile
+
+
+    def mergeCoordsInMapFile( self, mapFile ):
+        if self._verbose > 0:
+            print "merge TE coordinates"
+            sys.stdout.flush()
+        mergeFile = "%s.merge" % ( mapFile )
+        MapUtils.mergeCoordsInFile( mapFile, mergeFile )
+        if self._formatData != "map" or self._db != None:
+            os.remove( mapFile )
+        return mergeFile
+
+
+    def spliceFastaFromCoords( self, mergeFile ):
+        if self._verbose > 0:
+            print "splice TE copies from the genome"
+            sys.stdout.flush()
+        FastaUtils.spliceFromCoords( self._genomeFile,
+                                     mergeFile,
+                                     self._outFile )
+
+        os.remove( mergeFile )
+
+
+    def start( self ):
+        self.checkAttributes()
+        if self._verbose > 0:
+            print "START SpliceTEsFromGenome.py"
+            sys.stdout.flush()
+
+
+    def end( self ):
+        if self._db != None:
+            self._db.close()
+        if self._verbose > 0:
+            print "END SpliceTEsFromGenome.py"
+            sys.stdout.flush()
+
+
+    def run( self ):
+        self.start()
+
+        mapFile = self.getCoordsAsMapFile()
+
+        mergeFile = self.mergeCoordsInMapFile( mapFile )
+
+        self.spliceFastaFromCoords( mergeFile )
+
+        self.end()
+
+
+if __name__ == "__main__":
+    i = SpliceTEsFromGenome()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/SplicerFromAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/SplicerFromAnnotation.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,218 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import ConfigParser\n+\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.parsing.FastaParser import FastaParser\n+from ConfigParser import MissingSectionHeaderError\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.LoggerFactory import LoggerFactory\n+\n+#TODO: use configuration file\n+\n+LOG_DEPTH = "repet.tools"\n+\n+## Get 3 annotation files, using output from TEannot:\n+#- consensus with one or more full length copy, \n+#- consensus with one or more full length fragment,\n+#- consensus without copy\n+\n+class SplicerFromAnnotation(object):\n+ \n+ def __init__(self, inInfoFileName = "", tableName = "", verbose = 0):\n+ self._inInfoFileName = inInfoFileName\n+ self._tableName = tableName\n+ self._verbosity = verbose\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+ \n+ def setAttributesFromCmdLine(self):\n+ desc = "Splice annotations from genome. These annotations are Full Length Copy or Full Length Fragment according to consensus."\n+ desc += "A TEs library and annotation are necessary. Connection to the database parameters are retrieved from the environment"\n+ \n+ examples = "\\nExample : with a project called \\"MyTEannotAnalysis\\":\\n"\n+ examples += "\\t$ python SplicerFromAnnotation.py -i inputFastaFileName -C configFileName -t MyTEannotAnalysis_refTEs_seq "\n+ examples += "\\n\\t"\n+ examples += "\\n\\n"\n+ \n+ parser = RepetOptionParser(description = desc, epilog = examples)\n+ parser.add_option("-i", "--file", dest = "inputFastaFileName", action = "store", type = "string", help = "input file (mandatory) = output file with .splice)")\n+ parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "config file name to set database connection", default = "")\n+ parser.add_option("-t", "--copyType", dest = "copyType" , action = "store"'..b' self._logAndRaise("Input fasta file does not exist!")\n+ else:\n+ self._logAndRaise("No specified -i option! It is mandatory")\n+ \n+ if self._outputFileName =="":\n+ self._outputFileName = os.path.basename(self._inputFastaFileName)+\'.splice\'\n+ \n+ if self._copyType!=1 or self._copyType!=2:\n+ self._logAndRaise("Copy type must be only 1 or 2!")\n+ if self._configFileName != "":\n+ iDb = DbMySql(cfgFileName = self._configFileName)\n+ iDb.close()\n+ else:\n+ self._logAndRaise("No specified config file name!")\n+\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self.checkOptions()\n+ \n+ msg = "START SplicerFromAnnotation"\n+ msg += "\\n input info file: %s" % self._inputFastaFileName\n+ msg += "\\n Copy type is: %s" % self._copyType\n+ msg += "\\n identity is: %s" % self._identity\n+ msg += "\\n host is: %s" % os.environ["REPET_HOST"]\n+ msg += "\\n user is: %s" % os.environ["REPET_USER"]\n+ msg += "\\n DB is: %s" % os.environ["REPET_DB"] \n+ msg += "\\n port is: %s" % os.environ["REPET_PORT"]\n+ self._log.debug("%s\\n" % msg)\n+ \n+ cmd="PostAnalyzeTELib.py -a 3 -p %s_chr_allTEs_nr_noSSR_join_path -s %s_refTEs_seq -g %s" % (self._projectName,self._projectName,self.genomeSize) \n+ os.system(cmd)\n+ \n+ cmd="GetSpecificTELibAccordingToAnnotation.py -i %s_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE.tab -t %s_refTEs_seq -v 2" % (self._projectName,self._projectName)\n+ os.system(cmd) \n+\n+ if self._copyType == 1 :\n+ f = open("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_FullLengthCopy.txt", "r")\n+ else :\n+ f = open("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_FullLengthFrag.txt", "r")\n+ \n+ lines=f.readlines()[1:]\n+ if len(lines)>0:\n+ lConsensusHeader_copyType=[i.split(\'\\t\',1)[0] for i in lines] \n+ db = DbFactory.createInstance()\n+ \n+ sql_cmd = "CREATE TABLE %s_annotationIdentitySup%d_path SELECT * FROM %s_chr_allTEs_nr_noSSR_join_path where identity >=%f" % ( self._projectName,int(self._identity),self._projectName,self._identity)\n+ db.execute( sql_cmd )\n+ \n+ iTPA = TablePathAdaptator(db, "%s_annotationIdentitySup%d_path" % (self._projectName, int(self._identity)))\n+ lAllDistinctPath=[]\n+ for consensusName in lConsensusHeader_copyType:\n+ lDistinctPath = iTPA.getIdListFromSubject(consensusName)\n+ lAllDistinctPath=lAllDistinctPath+lDistinctPath\n+ \n+ iTPA = TablePathAdaptator(db,"%s_chr_allTEs_nr_noSSR_join_path" % self._projectName)\n+ sql_cmd = "CREATE TABLE %s_annotationToSplice_path LIKE %s_chr_allTEs_nr_noSSR_join_path" % ( self._projectName, self._projectName )\n+ db.execute( sql_cmd )\n+\n+ for pathId in lAllDistinctPath:\n+ sql_cmd = "INSERT INTO %s_annotationToSplice_path SELECT * FROM %s_chr_allTEs_nr_noSSR_join_path where path =%d" % ( self._projectName, self._projectName, pathId )\n+ db.execute( sql_cmd ) \n+ db.close()\n+ \n+ cmd="SpliceTEsFromGenome.py -i %s_annotationToSplice_path -f path -g %s -o %s -C %s -v 2" % (self._projectName, self._inputFastaFileName, self._outputFileName, self._configFileName) \n+ os.system(cmd)\n+ \n+ else : \n+ msg = "There is no consensus in this copy type.\\n"\n+ self._log.info(msg)\n+ f.close() \n+ \n+ self._log.info("END SplicerFromAnnotation")\n+ return 0\n+\n+if __name__ == \'__main__\':\n+ iGetTELib = SplicerFromAnnotation()\n+ iGetTELib.setAttributesFromCmdLine()\n+ iGetTELib.run() \n+ \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/TEclassifierPE.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/TEclassifierPE.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,193 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import os\n+import sys\n+\n+if not "REPET_PATH" in os.environ.keys():\n+ print "ERROR: no environment variable REPET_PATH"\n+ sys.exit(1)\n+sys.path.append(os.environ["REPET_PATH"])\n+if not "PYTHONPATH" in os.environ.keys():\n+ os.environ["PYTHONPATH"] = os.environ["REPET_PATH"]\n+else:\n+ os.environ["PYTHONPATH"] = "%s:%s" % (os.environ["REPET_PATH"], os.environ["PYTHONPATH"])\n+ \n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.checker.ConfigChecker import ConfigRules\n+from commons.core.checker.ConfigChecker import ConfigChecker\n+from commons.core.seq.FastaUtils import FastaUtils\n+from denovo_pipe.ReverseComplementAccordingToClassif import ReverseComplementAccordingToClassif\n+from denovo_pipe.RenameHeaderClassif import RenameHeaderClassif\n+from denovo_pipe.DetectTEFeatures import DetectTEFeatures\n+from denovo_pipe.LaunchPASTEC import LaunchPASTEC\n+from PASTEC.StatPastec import StatPastec\n+\n+LOG_DEPTH = "repet.tools"\n+#LOG_FORMAT = "%(message)s"\n+\n+####TEclassifier PASTEC Edition\n+#\n+class TEclassifierPE(object):\n+ \n+ def __init__(self, fastaFileName = "", configFileName = "", addWickerCode = False, reverseComp = False, doClean = False, verbosity = 0):\n+ self._fastaFileName = fastaFileName\n+ self._addWickerCode = addWickerCode\n+ self._reverseComp = reverseComp\n+ self._configFileName = configFileName\n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._projectName = ""\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "TE classifier PASTEC Edition.\\n"\n+ description += "Detect TE features on consensus and classify them. Give some classification statistics.\\n"\n+ description += "Can rename headers with classification info and Wicker\'s code at the beginning.\\n"\n+ description += "Can reverse-complement consensus if they are detected in reverse strand.\\n"\n+ description += "Warning : it\'s highly advised to use sequences in upper case.\\n"\n+ epilog = "\\n"\n+ epilog += "Example 1: launch and clean temporary files\\n"\n+ epilog += "\\t$ python TEclassifierPE.py -i consensus.fa -'..b'ions.reverseComp)\n+ self.setConfigFileName(options.configFileName)\n+ self.setDoClean(options.doClean)\n+ self.setVerbosity(options.verbosity)\n+\n+ def _checkConfig(self): \n+ iConfigRules = ConfigRules()\n+ iConfigRules.addRuleOption(section="project", option ="project_name", mandatory=True, type="string")\n+ sectionName = "classif_consensus"\n+ iConfigRules.addRuleOption(section=sectionName, option ="clean", mandatory=True, type="bool")\n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)\n+ iConfig = iConfigChecker.getConfig()\n+ self._setAttributesFromConfig(iConfig)\n+ \n+ def _setAttributesFromConfig(self, iConfig):\n+ self.setProjectName(iConfig.get("project", "project_name"))\n+ sectionName = "classif_consensus"\n+ self.setDoClean(iConfig.get(sectionName, "clean"))\n+ \n+ def setFastaFileName(self, fastaFileName):\n+ self._fastaFileName = fastaFileName\n+ \n+ def setConfigFileName(self, configFileName):\n+ self._configFileName = configFileName\n+ \n+ def setAddWickerCode(self, addWickerCode):\n+ self._addWickerCode = addWickerCode\n+ \n+ def setReverseComp(self, reverseComp):\n+ self._reverseComp = reverseComp\n+ \n+ def setDoClean(self, doClean):\n+ self._doClean = doClean\n+ \n+ def setVerbosity(self, verbosity):\n+ self._verbosity = verbosity\n+ \n+ def setProjectName(self, projectName):\n+ self._projectName = projectName\n+ \n+ def _checkOptions(self):\n+ if self._fastaFileName == "":\n+ self._logAndRaise("ERROR: Missing input fasta file name")\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ if self._configFileName:\n+ self._checkConfig()\n+ self._checkOptions()\n+ self._log.info("START TEclassifier PASTEC Edition")\n+ self._log.debug("Fasta file name: %s" % self._fastaFileName)\n+ nbSeq = FastaUtils.dbSize(self._fastaFileName)\n+ self._log.debug("Total number of sequences: %i)" % nbSeq)\n+\n+ #TODO: add step => avoid to re-launch DetectTEFeatures, if error with PASTEC (e.g. wrong bank format)\n+ #step 1\n+ iDF = DetectTEFeatures(self._fastaFileName, self._projectName, self._configFileName, self._doClean, self._verbosity)\n+ iDF.run()\n+ \n+ #step 2\n+ iLP = LaunchPASTEC(configFileName = self._configFileName, inputFileName = self._fastaFileName, projectName = self._projectName, verbose = self._verbosity)\n+ iLP.run()\n+ \n+ classifFileName = "%s.classif" % self._projectName\n+\n+ iSP = StatPastec(classifFileName)\n+ iSP.run()\n+ \n+ if self._reverseComp:\n+ self._log.info("Reverse complement...")\n+ iRevComplAccording2Classif = ReverseComplementAccordingToClassif()\n+ iRevComplAccording2Classif.setFastaFile(self._fastaFileName)\n+ iRevComplAccording2Classif.setClassifFile(classifFileName)\n+ iRevComplAccording2Classif.run()\n+ tmpFastaFileName = "%s_negStrandReversed.fa" % os.path.splitext(self._fastaFileName)[0]\n+ else:\n+ tmpFastaFileName = self._fastaFileName\n+\n+ if self._addWickerCode:\n+ self._log.info("Rename headers according to Wicker\'s code...")\n+ iRHC = RenameHeaderClassif(classifFileName, tmpFastaFileName, self._projectName)\n+ iRHC.setOutputFileName("")\n+ iRHC.run()\n+ if self._doClean:\n+ os.remove(tmpFastaFileName)\n+ \n+ self._log.info("END TEclassifier PASTEC Edition")\n+\n+if __name__ == "__main__":\n+ iLaunch = TEclassifierPE()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/TEclassifierPE_parallelized.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/TEclassifierPE_parallelized.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,261 @@\n+#!/usr/bin/env python\n+\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import os\n+import sys\n+import shutil\n+\n+if not "REPET_PATH" in os.environ.keys():\n+ print "ERROR: no environment variable REPET_PATH"\n+ sys.exit(1)\n+sys.path.append(os.environ["REPET_PATH"])\n+if not "PYTHONPATH" in os.environ.keys():\n+ os.environ["PYTHONPATH"] = os.environ["REPET_PATH"]\n+else:\n+ os.environ["PYTHONPATH"] = "%s:%s" % (os.environ["REPET_PATH"], os.environ["PYTHONPATH"])\n+ \n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.checker.ConfigChecker import ConfigRules\n+from commons.core.checker.ConfigChecker import ConfigChecker\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.launcher.Launcher import Launcher\n+from denovo_pipe.ReverseComplementAccordingToClassif import ReverseComplementAccordingToClassif\n+from denovo_pipe.DetectTEFeatures_parallelized import DetectTEFeatures_parallelized\n+from denovo_pipe.RenameHeaderClassif import RenameHeaderClassif\n+from denovo_pipe.LaunchPASTEC import LaunchPASTEC\n+from PASTEC.StatPastec import StatPastec\n+\n+LOG_DEPTH = "repet.tools"\n+#LOG_FORMAT = "%(message)s"\n+\n+####TEclassifier PASTEC Edition - parallelized\n+#\n+class TEclassifierPE_parallelized(object):\n+ \n+ def __init__(self, fastaFileName = "", configFileName = "", addWickerCode = False, reverseComp = False, doClean = False, verbosity = 0):\n+ self._fastaFileName = fastaFileName\n+ self._addWickerCode = addWickerCode\n+ self._reverseComp = reverseComp\n+ self._configFileName = configFileName\n+ self._doClean = doClean\n+ self._verbosity = verbosity\n+ self._projectName = ""\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "TE classifier PASTEC Edition.\\n"\n+ description += "Detect TE features on consensus and classify them. Give some classification statistics.\\n"\n+ description += "Can rename headers with classification info and Wicker\'s code at the beginning.\\n"\n+ description += "Can reverse-comple'..b'f._configFileName:\n+ self._checkConfig()\n+ self._checkOptions()\n+ self._log.info("START TEclassifier PASTEC Edition")\n+ self._log.debug("Fasta file name: %s" % self._fastaFileName)\n+ nbSeq = FastaUtils.dbSize(self._fastaFileName)\n+ self._log.debug("Total number of sequences: %i)" % nbSeq)\n+\n+ self._log.debug("Launch DetectTEFeatures on each batch")\n+ iDF = DetectTEFeatures_parallelized(self._fastaFileName, self._projectName, self._configFileName, self._doClean, self._verbosity)\n+ iDF.run()\n+ \n+ self._log.debug("Insert banks in database")\n+ iLP = LaunchPASTEC(self._configFileName, "1", projectName = self._projectName, verbose = self._verbosity)\n+ iLP.run()\n+ \n+ self._log.info("Split fasta file")\n+ if self._maxJobNb == 0 or nbSeq / self._maxJobNb <= 1.0:\n+ nbSeqPerBatch = nbSeq\n+ else:\n+ nbSeqPerBatch = nbSeq / self._maxJobNb + 1\n+ FastaUtils.dbSplit(self._fastaFileName, nbSeqPerBatch, True, verbose = self._verbosity - 2)\n+ \n+ self._log.info("Launch PASTEC on each batch")\n+ queue = self._resources\n+ cDir = os.getcwd()\n+ if self._tmpDir != "":\n+ tmpDir = self._tmpDir\n+ else:\n+ tmpDir = cDir\n+ \n+ #TODO: allow not to parallelize\n+ groupid = "%s_PASTEC" % self._projectName\n+ acronym = "PASTEC"\n+ iDb = DbFactory.createInstance()\n+ iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")\n+ iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)\n+ lCmdsTuples = []\n+ lFiles = FileUtils.getFileNamesList("%s/batches" % cDir, "batch_")\n+ if len(lFiles) == 0:\n+ self._logAndRaise("ERROR: directory \'batches\' is empty")\n+ classifFileName = "%s.classif" % self._projectName\n+ count = 0\n+ for file in lFiles:\n+ count += 1\n+ lCmds = [self.getPASTECcommand(iLauncher, file)] \n+ lCmdStart = []\n+ lCmdStart.append("shutil.copy(\\"%s/batches/%s\\", \\".\\")" % (cDir, file))\n+ lCmdStart.append("shutil.copy(\\"%s/%s\\", \\".\\")" % (cDir, self._configFileName))\n+ lCmdFinish = []\n+ lCmdFinish.append("shutil.move(\\"%s\\", \\"%s/%s_%i\\")" % (classifFileName, cDir, classifFileName, count))\n+ lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n+ iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._doClean) \n+ \n+ FileUtils.catFilesByPattern("%s_*" % classifFileName, classifFileName)\n+ if self._doClean:\n+ FileUtils.removeFilesByPattern("%s_*" % classifFileName)\n+ shutil.rmtree("batches")\n+\n+ self._log.debug("Compute stats about classification")\n+ iSP = StatPastec(classifFileName)\n+ iSP.run()\n+ \n+ if self._reverseComp:\n+ self._log.debug("Reverse complement")\n+ iRevComplAccording2Classif = ReverseComplementAccordingToClassif()\n+ iRevComplAccording2Classif.setFastaFile(self._fastaFileName)\n+ iRevComplAccording2Classif.setClassifFile(classifFileName)\n+ iRevComplAccording2Classif.run()\n+ newFastaFileName = "%s_negStrandReversed.fa" % os.path.splitext(self._fastaFileName)[0]\n+ else:\n+ newFastaFileName = self._fastaFileName\n+\n+ if self._addWickerCode:\n+ self._log.debug("Rename headers according to Wicker\'s code")\n+ iRHC = RenameHeaderClassif(classifFileName, newFastaFileName, self._projectName)\n+ iRHC.setOutputFileName("")\n+ iRHC.run()\n+ \n+ self._log.info("END TEclassifier PASTEC Edition")\n+\n+if __name__ == "__main__":\n+ iLaunch = TEclassifierPE_parallelized()\n+ iLaunch.setAttributesFromCmdLine()\n+ iLaunch.run() \n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/blast2align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/blast2align.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+import sys
+import getopt
+
+
+def help():
+    print
+    print "usage: blast2align.py [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: input file name (format=tabulated BLAST)"
+    print "     -o: output file name (format=align, default=inFileName+'.align')"
+    print
+
+
+def blast2align( inFile, outFile ):
+    inFileHandler = open( inFile, "r" )
+    outFileHandler = open( outFile, "w" )
+    while True:
+        line = inFileHandler.readline()
+        if line == "":
+            break
+        if line[0] != "#":
+            data = line.split("\t")
+            qryName = data[0]
+            sbjName = data[1]
+            percId = data[2]
+            qryStart = data[6]
+            qryEnd = data[7]
+            sbjStart = data[8]
+            sbjEnd = data[9]
+            Eval = data[10]
+            bitScore = data[11][:-1]
+            string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, Eval, bitScore, percId )
+            outFileHandler.write( string )
+    inFileHandler.close()
+    outFileHandler.close()
+
+
+def main():
+    inFileName = ""
+    outFileName = ""
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:o:")
+    except getopt.GetoptError, err:
+        sys.stderr.write( "%s\n" % str(err) )
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+
+    if  inFileName == "":
+        msg =  "ERROR: missing input file name (-i)"
+        sys.stderr.write( "%s\n" % msg )
+        help()
+        sys.exit(1)
+
+    if outFileName == "":
+        outFileName = inFileName + ".align"
+
+    blast2align( inFileName, outFileName )
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/dbBestLength.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/dbBestLength.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+
+## @file
+# This program extracts the n longest sequences from the input fasta file.
+# usage: dbBestLength.py [ options ]
+# options:
+#      -h: this help
+#      -i: name of the input fasta file
+#      -n: maximum number of sequences in the output file (default=20)
+#      -o: name of the output fasta file (default=inFileName+'.best20')
+#      -v: verbose (default=0/1/2)
+
+import os
+import sys
+import getopt
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "*** Error: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+from pyRepet.seq.fastaDB import *
+
+
+def help():
+    """
+    Give the list of the command-line options.
+    """
+    print
+    print "usage: dbBestLength.py [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input fasta file"
+    print "     -n: maximum number of sequences in the output file (default=20)"
+    print "     -o: name of the output fasta file (default=inFileName+'.best20')"
+    print "     -v: verbose (default=0/1/2)"
+    print
+
+
+def main():
+    """
+    This program extracts the n longest sequences from the input fasta file.
+    """
+
+    inFileName = ""
+    nbSeq = 20
+    outFileName = ""
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:n:o:v:")
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-n":
+            nbSeq = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+
+    if  inFileName == "":
+        print "ERROR: missing input file (-i)"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "START dbBestLength.py"
+        sys.stdout.flush()
+
+    if outFileName == "":
+        outFileName = "%s.best%s" % ( inFileName, nbSeq )
+
+    log = dbBestLength( nbSeq, inFileName, outFileName, verbose )
+    if log != 0:
+        print "ERROR: dbBestLength() returned %i" % ( log )
+        sys.exit(1)
+
+    if verbose > 0:
+        print "END dbBestLength.py"
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/dbConsensus.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/dbConsensus.py Tue Apr 30 14:33:21 2013 -0400

[

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/dbShuffle.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/dbShuffle.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import os
+import sys
+import getopt
+
+from commons.core.seq.FastaUtils import FastaUtils
+
+
+def help():
+    print
+    print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
+    print "options:"
+    print "     -h: this help"
+    print "     INPUT: use '-i' or '-I'"
+    print "        -i: name of the input file (fasta format)"
+    print "        -I: name of the input directory (containing fasta files)"
+    print "     OUTPUT: use '-o' or '-O'"
+    print "         -o: name of the output file (use only with '-i')"
+    print "         -O: name of the output directory (use only with '-I')"
+    print "             output file are: prefix of input fasta file + '_shuffle.fa')"
+    print "     -v: verbose (default=0/1/2)"
+    print
+
+
+def main():
+    inData = ""
+    outData = ""
+    verbose = 0
+    try:
+        opts, args = getopt.getopt( sys.argv[1:], "hi:I:o:O:v:" )
+    except getopt.GetoptError, err:
+        sys.stderr.write( "%s\n" % str(err) )
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inData = a
+        elif o == "-I":
+            inData = a
+        elif o == "-o":
+            outData = a
+        elif o == "-O":
+            outData = a
+        elif o == "-v":
+            verbose = int(a)
+
+    if inData == "" or ( not os.path.isfile( inData ) \
+                         and not os.path.isdir( inData ) ):
+        msg = "ERROR: missing input file or directory (-i or -I)"
+        sys.stderr.write( "%s\n" % msg )
+        help()
+        sys.exit(1)
+
+    if outData == "":
+        print "ERROR: missing name of output file or directory (-o or -O)"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "START %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    FastaUtils.dbShuffle( inData, outData, verbose )
+
+    if verbose > 0:
+        print "END %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/dbSplit.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/dbSplit.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+
+##@file
+# Split the input fasta file in several output files
+# usage: dbSplit.py [ options ]
+# options:
+#      -h: this help
+#      -i: name of the input file (format='fasta')
+#      -n: number of sequences per output file (default=1)
+#      -d: record the output fasta files in a directory called 'batches'
+#      -s: use the sequence header if '-n 1' (otherwise 'batch_00X')"
+#      -p: use a prefix for the output files (default='batch')"
+#      -v: verbose (default=0/1)
+
+
+import sys
+import getopt
+
+from commons.core.seq.FastaUtils import FastaUtils
+
+
+## Give the list of the command-line options
+#
+def help():
+    print
+    print "usage: dbSplit.py [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='fasta')"
+    print "     -n: number of sequences per batch file (default=1)"
+    print "     -d: record the output fasta files in a directory called 'batches'"
+    print "     -s: use the sequence header if '-n 1' (otherwise 'batch_00X')"
+    print "     -p: use a prefix for the output files (default='batch')"
+    print "     -v: verbosity level (default=0/1/2)"
+    print
+
+
+## Split the input fasta file in several output files
+#
+def main():
+    inFile = ""
+    nbSeqPerBatch = 1
+    newDir = False
+    useSeqHeader = False
+    prefix = "batch"
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt( sys.argv[1:], "hi:n:dsp:v:" )
+    except getopt.GetoptError, err:
+        sys.stderr.write( "%s\n" % ( str(err) ) )
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFile = a
+        elif o == "-n":
+            nbSeqPerBatch = int(a)
+        elif o == "-d":
+            newDir = True
+        elif o == "-s":
+            useSeqHeader = True
+        elif o == "-p":
+            prefix = a
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFile == "":
+        msg = "ERROR: missing input file (-i)"
+        sys.stderr.write( "%s\n" % ( msg ) )
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "START %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    FastaUtils.dbSplit( inFile, nbSeqPerBatch, newDir, useSeqHeader, prefix, verbose )
+
+    if verbose > 0:
+        print "END %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/filterOutMatcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/filterOutMatcher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+import logging
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "ERROR: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+import pyRepet.coord.MatchDB
+import pyRepet.seq.BioseqDB
+
+
+def help():
+    """
+    Give the list of the command-line options.
+    """
+    print
+    print "usage: %s [ options ]" % ( sys.argv[0] )
+    print "options:"
+    print "     -h: this help"
+    print "     -q: fasta filename of the queries"
+    print "     -s: fasta filename of the subjects (same as queries if left blank)"
+    print "     -m: output file from Matcher (format='tab')"
+    print "     -o: name of the output query file (format=fasta, default=qryFileName+'.filtered')"
+    print "     -i: identity threshold (default=0.95)"
+    print "     -l: length threshold (default=0.98)"
+    print "     -L: name of a 'log' file (usually from 'rmvRedundancy.py')"
+    print "     -v: verbose (default=0/1)"
+    print
+
+
+def writeOutQuery( qryDB, outFileName, lQryToKeep ):
+    """
+    Write in a fasta file the queries than haven't been filtered (i.e. they are not included in any subject).
+    """
+    outFile = open( outFileName, "w" )
+    nbRmvSeq = 0
+    for bs in qryDB.db:
+        if bs.header in lQryToKeep:
+            bs.write( outFile )
+        else:
+            nbRmvSeq += 1
+    outFile.close()
+    if verbose > 0:
+        print "%i removed queries out of %i" % ( nbRmvSeq, qryDB.getSize() ); sys.stdout.flush()
+
+
+def main():
+    """
+    This program filters the ouput from Matcher by removing queries 'included' in subjects.
+    """
+    qryFileName = ""
+    sbjFileName = ""
+    tabFileName = ""
+    outFileName = ""
+    thresIdentity = 0.95   # remove the seq if it is identical to 95% of another seq
+    thresLength = 0.98   # and if its length is 98% of that seq
+    logFileName = ""
+    global verbose
+    verbose = 0
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"h:q:s:m:o:i:l:L:v:")
+    except getopt.GetoptError:
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-q":
+            qryFileName = a
+        elif o == "-s":
+            sbjFileName = a
+        elif o == "-m":
+            tabFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-i":
+            thresIdentity = float(a)
+        elif o == "-l":
+            thresLength = float(a)
+        elif o == "-L":
+            logFileName = a
+        elif o == "-v":
+            verbose = int(a)
+    if qryFileName == "" or tabFileName == "":
+        print "ERROR: missing compulsory options"
+        help()
+        sys.exit(1)
+    if verbose > 0:
+        print "START %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    # prepare the 'log' file
+    handler = logging.FileHandler( logFileName )
+    formatter = logging.Formatter( "%(asctime)s %(levelname)s: %(message)s" )
+    handler.setFormatter( formatter )
+    logging.getLogger('').addHandler( handler )
+    logging.getLogger('').setLevel( logging.DEBUG )
+    logging.info( "use '%s' on '%s'" % ( sys.argv[0].split("/")[-1], tabFileName ) )
+
+    if sbjFileName == "":
+        sbjFileName = qryFileName
+    if outFileName == "":
+        outFileName = "%s.filtered" % ( qryFileName )
+
+    # load the input fasta file corresponding to the queries
+    qryDB = pyRepet.seq.BioseqDB.BioseqDB( qryFileName )
+    if sbjFileName != qryFileName:
+        string = "nb of input sequences (as query only): %i" % ( qryDB.getSize() ); sys.stdout.flush()
+        logging.info( string )
+        if verbose > 0: print string
+    else:
+        string = "nb of input sequences (as query and subject): %i" % ( qryDB.getSize() ); sys.stdout.flush()
+        logging.info( string )
+        if verbose > 0: print string
+
+    # load the input 'tab' file
+    matchDB = pyRepet.coord.MatchDB.MatchDB()
+    tabFile = open( tabFileName, "r" )
+    matchDB.read( tabFile, thresIdentity, thresLength, verbose )
+    tabFile.close()
+    longString = ""
+    string = "nb of matches (id>=%.2f,qlgth>=%.2f): %i" % ( thresIdentity, thresLength, matchDB.getNbMatchesWithThres( thresIdentity, thresLength ) )
+    longString += "\n%s" % ( string )
+    if verbose > 0: print string
+    string = "nb of distinct queries having matches (id>=%.2f,qlgth>=%.2f): %i" % ( thresIdentity, thresLength, matchDB.getNbDistinctQryWithThres( thresIdentity, thresLength ) )
+    longString += "\n%s" % ( string )
+    if verbose > 0: print string
+    logging.info( longString )
+    sys.stdout.flush()
+
+    lQryToKeep = matchDB.filterDiffQrySbj( qryDB, thresIdentity, thresLength, verbose - 1 )
+
+    # here, possibility to save the information about by which match a specific query has been removed
+
+    string = "%i queries to be kept" % ( len(lQryToKeep) ); sys.stdout.flush()
+    logging.info( string )
+    if verbose > 0: print string
+
+    # write the output fasta file without the included queries
+    writeOutQuery( qryDB, outFileName, lQryToKeep )
+
+    if verbose > 0:
+        print "END %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main ()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/getCumulLengthFromTEannot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/getCumulLengthFromTEannot.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+
+##@file
+# usage: getCumulLengthFromTEannot.py [ options ]
+# options:
+#      -h: this help
+#      -i: table with the annotations (format=path)
+#      -r: name of a TE reference sequence (if empty, all subjects are considered)
+#      -g: length of the genome (in bp)
+#      -C: configuration file
+#      -c: clean
+#      -v: verbosity level (default=0/1)
+
+
+import sys
+import os
+import getopt
+from commons.core.sql.DbMySql import DbMySql
+from commons.core.sql.TablePathAdaptator import TablePathAdaptator
+
+
+class getCumulLengthFromTEannot( object ):
+    """
+    Give the cumulative length of TE annotations (subjects mapped on queries).
+    """
+
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        self._tableName = ""
+        self._TErefseq = ""
+        self._genomeLength = 0
+        self._configFileName = ""
+        self._clean = False
+        self._verbose = 0
+        self._db = None
+        self._tpA = None
+
+
+    def help( self ):
+        """
+        Display the help on stdout.
+        """
+        print
+        print "usage: getCumulLengthFromTEannot.py [ options ]"
+        print "options:"
+        print "     -h: this help"
+        print "     -i: table with the annotations (format=path)"
+        print "     -r: name of a TE reference sequence (if empty, all subjects are considered)"
+        print "     -g: length of the genome (in bp)"
+        print "     -C: configuration file"
+        print "     -c: clean"
+        print "     -v: verbosity level (default=0/1)"
+        print
+
+
+    def setAttributesFromCmdLine( self ):
+        """
+        Set the attributes from the command-line.
+        """
+        try:
+            opts, args = getopt.getopt(sys.argv[1:],"hi:r:g:C:cv:")
+        except getopt.GetoptError, err:
+            print str(err); self.help(); sys.exit(1)
+        for o,a in opts:
+            if o == "-h":
+                self.help(); sys.exit(0)
+            elif o == "-i":
+                self.setInputTable( a )
+            elif o == "-r":
+                self.setTErefseq( a )
+            elif o == "-g":
+                self.setGenomeLength( a )
+            elif o == "-C":
+                self.setConfigFileName( a )
+            elif o == "-c":
+                self.setClean()
+            elif o == "-v":
+                self.setVerbosityLevel( a )
+
+
+    def setInputTable( self, inTable ):
+        self._tableName = inTable
+
+    def setTErefseq( self, a ):
+        self._TErefseq = a
+
+    def setGenomeLength( self, genomeLength ):
+        self._genomeLength = int(genomeLength)
+
+    def setConfigFileName( self, configFileName ):
+        self._configFileName = configFileName
+
+    def setClean( self ):
+        self._clean = True
+
+    def setVerbosityLevel( self, verbose ):
+        self._verbose = int(verbose)
+
+    def checkAttributes( self ):
+        """
+        Check the attributes are valid before running the algorithm.
+        """
+        if self._tableName == "":
+            print "ERROR: missing input table"; self.help(); sys.exit(1)
+
+
+    def setAdaptatorToTable( self ):
+        self._db = DbMySql( cfgFileName=self._configFileName )
+        self._tpA = TablePathAdaptator( self._db, self._tableName )
+
+
+    def getAllSubjectsAsMapOfQueries( self ):
+        mapFileName = "%s.map" % self._tableName
+        mapFile = open( mapFileName, "w" )
+        if self._TErefseq != "":
+            lPathnums = self._tpA.getIdListFromSubject( self._TErefseq )
+        else:
+            lPathnums = self._tpA.getIdList()
+        if self._verbose > 0:
+            print "nb of paths: %i" % ( len(lPathnums) )
+        for pathnum in lPathnums:
+            lPaths = self._tpA.getPathListFromId( pathnum )
+            for path in lPaths:
+                map = path.getSubjectAsMapOfQuery()
+                map.write( mapFile )
+        mapFile.close()
+        return mapFileName
+
+
+    def mergeRanges( self, mapFileName ):
+        mergeFileName = "%s.merge" % mapFileName
+        prg = os.environ["REPET_PATH"] + "/bin/mapOp"
+        cmd = prg
+        cmd += " -q %s" % ( mapFileName )
+        cmd += " -m"
+        cmd += " 2>&1 > /dev/null"
+        log = os.system( cmd )
+        if log != 0:
+            print "*** Error: %s returned %i" % ( prg, log )
+            sys.exit(1)
+        if self._clean:
+            os.remove( mapFileName )
+        return mergeFileName
+
+
+    def getCumulLength( self, mergeFileName ):
+        mergeFile = open( mergeFileName, "r" )
+        total = 0
+        while True:
+            line = mergeFile.readline()
+            if line == "":
+                break
+            tok = line.split("\t")
+            total += abs( int(tok[3]) - int(tok[2]) ) + 1
+        mergeFile.close()
+        if self._clean:
+            os.remove( mergeFileName )
+        return total
+
+
+    def start( self ):
+        """
+        Useful commands before running the program.
+        """
+        self.checkAttributes()
+        if self._verbose > 0:
+            print "START %s" % ( type(self).__name__ ); sys.stdout.flush()
+        self.setAdaptatorToTable()
+
+
+    def end( self, mapFileName, mergeFileName ):
+        """
+        Useful commands before ending the program.
+        """
+        self._db.close()
+        if self._verbose > 0:
+            print "END %s" % ( type(self).__name__ ); sys.stdout.flush()
+
+
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+
+        mapFileName = self.getAllSubjectsAsMapOfQueries()
+        mergeFileName = self.mergeRanges( mapFileName )
+        total = self.getCumulLength( mergeFileName )
+        print "cumulative length: %i bp" % total
+        if self._genomeLength > 0:
+            print "TE content: %.2f%%" % ( 100 * total / float(self._genomeLength) )
+
+        self.end( mapFileName, mergeFileName )
+
+
+if __name__ == "__main__":
+    i = getCumulLengthFromTEannot()
+    i.setAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/pathnum2id.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/pathnum2id.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+
+def setup_env():
+    if "REPET_PATH" in os.environ.keys():
+        sys.path.append( os.environ["REPET_PATH"] )
+    else:
+        print "*** Error: no environment variable REPET_PATH ***"
+        sys.exit(1)
+setup_env()
+
+from commons.core.parsing.PathNum2Id import PathNum2Id
+
+#-----------------------------------------------------------------------------
+
+def help():
+
+    print ""
+    print "usage:",sys.argv[0]," [ options ]"
+    print "option:"
+    print "    -h: this help"
+    print "    -i: input file name (path format)"
+    print "    -o: output file name (path format, default=inFileName+'.path')"
+    print ""
+
+#-----------------------------------------------------------------------------
+
+def main():
+
+    inFileName = ""
+    outFileName = ""
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:o:")
+    except getopt.GetoptError:
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+
+    if inFileName == "":
+        print "*** Error: missing input file name"
+        help()
+        sys.exit(1)
+
+    if outFileName == "":
+        outFileName = inFileName + ".path"
+
+    pathNum2Id = PathNum2Id()
+    pathNum2Id.setInFileName( inFileName )
+    pathNum2Id.setOutFileName( outFileName )
+    pathNum2Id.run()
+
+    return 0
+
+#-----------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/refalign2fasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/refalign2fasta.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,408 @@\n+#!/usr/bin/env python\n+\n+##@file\n+# Convert the output from Refalign (MSA program) into the \'fasta\' format.\n+# Usually used before subsequent analysis such as the estimation of deletion rate.\n+#\n+# usage: refalign2fasta.py [ options ]\n+# options:\n+# -h: this help\n+# -i: name of the input file (output from refalign)\n+# -r: name of the reference sequence (discard if not provided)\n+# -g: for the gaps, keep only deletions (\'d\'), only insertions (\'i\') or both (default=\'id\')\n+# -o: name of the output file (default=inFileName\'.fa_aln\',format=\'fasta\')\n+\n+import os\n+import sys\n+import getopt\n+import exceptions\n+\n+if not os.environ.has_key( "REPET_PATH" ):\n+ print "ERROR: no environment variable REPET_PATH"\n+ sys.exit(1)\n+sys.path.append( os.environ["REPET_PATH"] )\n+\n+import pyRepet.seq.Bioseq\n+\n+\n+def help():\n+ """\n+ Give the list of the command-line options.\n+ """\n+ print\n+ print "usage:",sys.argv[0]," [ options ]"\n+ print "options:"\n+ print " -h: this help"\n+ print " -i: name of the input file (output from refalign)"\n+ print " -r: name of the reference sequence (discard if not provided)"\n+ print " -g: for the gaps, keep only deletions (\'d\'), only insertions (\'i\') or both (default=\'id\')"\n+ print " -o: name of the output file (default=inFileName\'.fa_aln\',format=\'fasta\')"\n+ print\n+\n+\n+def getAlignments( inFileName ):\n+ """\n+ Retrieve the alignments from the input file.\n+\n+ @param inFileName: name of the input file\n+ @type: string\n+\n+ @return: list of alignments ( refseq, seq, header of seq )\n+ @rtype: list of 3d-tuples\n+ """\n+\n+ lAlign = []\n+\n+ inFile = open( inFileName, "r" )\n+ line = inFile.readline()\n+ while True:\n+ if line == "":\n+ break\n+ refseq, seq, label = line[:-1].split("\\t")[:3]\n+ lAlign.append( ( refseq, seq, label ) )\n+ line = inFile.readline()\n+ inFile.close()\n+\n+ return lAlign\n+\n+\n+def getGaps( seq ):\n+ """\n+ Get the gaps on a sequence, start coordinate and length. The start\n+ coordinate of a gap is the # of the nucleotide after which it starts.\n+\n+ @param seq: sequence to analyse\n+ @type seq: string\n+\n+ @return: list of gaps ( start coordinate, length )\n+ @rtype: list of 2d-tuples\n+ """\n+\n+ prev = "N"\n+ lGapsOnSeq = []\n+ i = 0\n+ lengthGap = 0\n+ for c in seq:\n+ if c == "-" and prev != "-":\n+ startGap = i\n+ if c != "-" and prev == "-":\n+ lGapsOnSeq.append( ( startGap, lengthGap ) )\n+ lengthGap = 0\n+ if c != "-":\n+ i += 1\n+ else:\n+ lengthGap += 1\n+ prev = c\n+\n+ # case with a gap at the end of the sequence\n+ if seq[ len(seq) - 1 ] == "-":\n+ lGapsOnSeq.append( ( startGap, lengthGap ) )\n+\n+ return lGapsOnSeq\n+\n+\n+def getGapsOnRefSeq( lAlign ):\n+ """\n+ Retrieve the gaps on the ref seq in all the alignments.\n+\n+ @param lAlign: list of alignments ( refseq, seq, header of seq )\n+ @type lAlign: list of 3d-tuples\n+\n+ @return: list of gaps per alignment\n+ @rtype: list of lists of 2d-tuples\n+ """\n+\n+ lGapsOnRef = []\n+\n+ for align in lAlign:\n+ refseq = align[0]\n+ lGapsOnRef.append( getGaps( refseq ) )\n+\n+ return lGapsOnRef\n+\n+\n+def insertGap( seq, startGap, lengthGap ):\n+ """\n+ Get a new seq by inserting a gap in the give seq.\n+\n+ @param seq: sequence\n+ @type seq: string\n+\n+ @param startGap:\n+ @type: startGap: integer\n+\n+ @param lengthGap: length of the gap\n+ @type lengthGap: integer\n+\n+ @return: new seq made from the give seq by inserting the gap\n+ @rtype: string\n+ """\n+\n+ new_seq = seq[:startGap] + (lengthGap*\'-\') + seq[startGap:] \n+ return new_seq\n+\n+\n+def insertListGaps( inSeq, lGaps ):\n+ """\n+ Insert all the gaps from the list into the sequence.\n+\n+ @param inSeq: sequence\n+ @type inSeq: string\n+\n+ @param lGaps'..b'ign[j][1]), newlist )\n+ header = lAlign[j][2]\n+ Align_seq.append( ( header, newSeq ) )\n+\n+ return Align_seq\n+\n+\n+def getSeqWithDeletions( lAlign ):\n+ """\n+ Get the sequences by putting gaps only when they correspond to a deletion compare to ref seq.\n+ Used for instance when we want to estimate the deletion rate.\n+\n+ @param lAlign: list of alignments ( refseq, seq, header of seq )\n+ @type lAlign: list of 3d-tuples\n+\n+ @return: list of lists ( header, sequence with gaps )\n+ @rtype: list of 2d-tuples\n+ """\n+\n+ Align_seq = []\n+\n+ for align in lAlign:\n+ refseq = align[0]\n+ seq = align[1]\n+ header = align[2]\n+ newSeq = ""\n+ for i in xrange(0,len(refseq)):\n+ if refseq[i] != "-":\n+ newSeq += seq[i]\n+ Align_seq.append( ( header, newSeq ) )\n+\n+ return Align_seq\n+\n+\n+def saveMSA( outFileName, Align_seq, Align_seqref=None ):\n+ """\n+ Save the results as a multiple sequence alignment (MSA) in the \'fasta\' format.\n+\n+ @param outFileName: name of the output file\n+ @type outFileName: string\n+\n+ @param Align_seqref: sequence of ref seq\n+ @type Align_seqref: string\n+ """\n+\n+ outFile = open( outFileName, "w" )\n+ bs = pyRepet.seq.Bioseq.Bioseq()\n+\n+ # if provided, save the ref seq\n+ if Align_seqref != None:\n+ bs.header = Align_seqref[0]\n+ bs.sequence = Align_seqref[1]\n+ bs.write( outFile )\n+\n+ # save the other sequences\n+ for i in Align_seq:\n+ bs.header = i[0]\n+ bs.sequence = i[1]\n+ bs.write( outFile )\n+\n+ outFile.close()\n+ \n+ \n+def saveOnlyWithDeletions( lAlign, refseqName, outFileName ):\n+ Align_seq = getSeqWithDeletions( lAlign )\n+ if refseqName != "":\n+ Align_seqref = ( refseqName, lAlign[0][0].replace("-","") )\n+ saveMSA( outFileName, Align_seq, Align_seqref )\n+ else:\n+ saveMSA( outFileName, Align_seq )\n+ \n+ \n+def main():\n+ \n+ inFileName = ""\n+ refseqName = ""\n+ keepGap = "id"\n+ outFileName = ""\n+ global verbose\n+ verbose = 0\n+ \n+ try:\n+ opts, args = getopt.getopt(sys.argv[1:],"hi:r:g:o:v:")\n+ except getopt.GetoptError, err:\n+ print str(err)\n+ help()\n+ sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ help()\n+ sys.exit(0)\n+ elif o == "-i":\n+ inFileName = a\n+ elif o == "-r":\n+ refseqName = a\n+ elif o == "-g":\n+ keepGap = a\n+ elif o == "-o":\n+ outFileName = a\n+ elif o == "-v":\n+ verbose = int(a)\n+\n+ if inFileName == "":\n+ print "ERROR: missing input file name"\n+ help()\n+ sys.exit(1)\n+ \n+ if verbose > 0:\n+ print "START %s" % (sys.argv[0].split("/")[-1])\n+ sys.stdout.flush()\n+ \n+ lAlign = getAlignments( inFileName )\n+ if verbose > 0:\n+ print "nb of alignments: %i" % ( len(lAlign) )\n+ sys.stdout.flush()\n+ \n+ if outFileName == "":\n+ outFileName = "%s.fa_aln" % ( inFileName )\n+ if verbose > 0:\n+ print "output file: \'%s\'" % ( outFileName )\n+ \n+ if keepGap == "id":\n+ lGapsOnRefSeqPerAlign = getGapsOnRefSeq( lAlign )\n+ Align_seq = insertgap_seq( lAlign, lGapsOnRefSeqPerAlign )\n+ if refseqName != "":\n+ Align_seqref = insertGapsInRefSeq( lAlign, lGapsOnRefSeqPerAlign, refseqName )\n+ saveMSA( outFileName, Align_seq, Align_seqref )\n+ else:\n+ saveMSA( outFileName, Align_seq )\n+ \n+ elif keepGap == "d":\n+ saveOnlyWithDeletions( lAlign, refseqName, outFileName )\n+ \n+ elif keepGap == "i":\n+ print "ERROR: \'-g i\' not yet available"\n+ sys.exit(1)\n+ \n+ if verbose > 0:\n+ print "END %s" % (sys.argv[0].split("/")[-1])\n+ sys.stdout.flush()\n+ \n+ return 0\n+\n+\n+if __name__ == "__main__" :\n+ main ()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/removeDescriptionInFastaHeaderProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/removeDescriptionInFastaHeaderProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+
+import re
+from commons.pyRepetUnit.components.AbstractProgramLauncher import AbstractProgramLauncher
+
+class removeDescriptionInFastaHeaderProgramLauncher(AbstractProgramLauncher):
+
+    def __init__( self ):
+        AbstractProgramLauncher.__init__( self )
+        self._formatInFile = "fasta"
+
+    def run( self ):
+        self.checkInput()
+        fastaHandler = open(self.getInputFile(), "r")
+        lines = fastaHandler.readlines()
+        fastaHandler.close()
+        newFastaName = ".".join([self.getInputFile().split(".")[0], "preprocessed", "fasta"])
+
+        self._writePreprocessedFastaFile(lines, newFastaName)
+
+    def _writePreprocessedFastaFile(self, lines, newFastaName):
+        newFastaHandler = open(newFastaName, "w")
+        for line in lines:
+            if re.match(">", line):
+                newLine = line.split(" ",1)[0] + "\n"
+                newFastaHandler.write(newLine)
+            else:
+                newFastaHandler.write(line)
+        newFastaHandler.close()
+
+
+
+
+if __name__ == "__main__":
+    i = removeDescriptionInFastaHeaderProgramLauncher()
+    i.checkAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/replaceGreaterThanSymbolInFastaHeaderProgramLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/replaceGreaterThanSymbolInFastaHeaderProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+
+import re
+from commons.pyRepetUnit.components.AbstractProgramLauncher import AbstractProgramLauncher
+
+class replaceGreaterThanSymbolInFastaHeaderProgramLauncher(AbstractProgramLauncher):
+
+    def __init__( self ):
+        AbstractProgramLauncher.__init__( self )
+        self._formatInFile = "fasta"
+
+    def run( self ):
+        self.checkInput()
+        fastaHandler = open(self.getInputFile(), "r")
+        lines = fastaHandler.readlines()
+        fastaHandler.close()
+        newFastaName = ".".join([self.getInputFile().split(".")[0], "preprocessed", "fasta"])
+
+        self._writePreprocessedFastaFile(lines, newFastaName)
+
+    def _writePreprocessedFastaFile(self, lines, newFastaName):
+        newFastaHandler = open(newFastaName, "w")
+        for line in lines:
+            if re.match(">", line):
+                newLine = re.sub("-->|->", " to ", line)
+                newFastaHandler.write(newLine)
+            else:
+                newFastaHandler.write(line)
+        newFastaHandler.close()
+
+
+
+
+if __name__ == "__main__":
+    i = replaceGreaterThanSymbolInFastaHeaderProgramLauncher()
+    i.checkAttributesFromCmdLine()
+    i.run()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/setnum2id.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/setnum2id.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+import exceptions
+
+#-----------------------------------------------------------------------------
+
+def help():
+
+        print "\nusage:",sys.argv[0]," [ options ]"
+        print "option:"
+        print "    -h: this help"
+        print "    -i: input set file"
+        print "output on stdout\n"
+
+#-----------------------------------------------------------------------------
+
+def main():
+
+    inFileName = ""
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:")
+    except getopt.GetoptError:
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        if o == "-i":
+            inFileName = a
+
+    if inFileName == "":
+        print "*** Error: missing input file name"
+        help()
+        sys.exit(1)
+
+    inFile = open( inFileName, "r" )
+    line = inFile.readline()
+
+    dID2count = {}
+    count = 1
+
+    while 1:
+
+        if line == "":
+            break
+
+        line = line.split()
+
+        path = line[0]
+        sbjName = line[1]
+        qryName = line[2]
+        qryStart = line[3]
+        qryEnd = line[4]
+
+        key_id = path + "-" + qryName + "-" + sbjName
+        if key_id not in dID2count.keys():
+     newPath = count
+     count += 1
+     dID2count[ key_id ] = newPath
+ else:
+     newPath = dID2count[ key_id ]
+
+        data = str(newPath) + "\t" + sbjName + "\t" + qryName + "\t"
+        data += qryStart + "\t" + qryEnd
+
+        print data
+        sys.stdout.flush()
+
+        line = inFile.readline()
+
+    inFile.close()
+
+    return 0
+
+#-----------------------------------------------------------------------------
+
+if __name__ == '__main__':
+ main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/srptBlasterMatcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/srptBlasterMatcher.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,448 @@\n+#!/usr/bin/env python\n+\n+"""\n+This program takes a query directory as input,\n+then launches Blaster and/or Matcher on each file in it,\n+finally results are optionally gathered in a single file.\n+"""\n+\n+import os\n+import sys\n+import getopt\n+import logging\n+import glob\n+import ConfigParser\n+\n+from pyRepet.launcher.programLauncher import programLauncher\n+from pyRepet.launcher import Launcher\n+from pyRepet.sql.RepetJobMySQL import RepetJob\n+from commons.core.coord.Align import Align\n+\n+\n+def help():\n+ print\n+ print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )\n+ print "options:"\n+ print " -h: this help"\n+ print " -g: name of the group identifier (same for all the jobs)"\n+ print " -q: name of the query directory"\n+ print " -S: suffix in the query directory (default=\'*.fa\' for Blaster, \'*.align\' for Matcher)"\n+ print " -s: absolute path to the subject databank"\n+ print " -Q: resources needed on the cluster)"\n+ print " -d: absolute path to the temporary directory"\n+ print " -m: mix of Blaster and/or Matcher"\n+ print " 1: launch Blaster only"\n+ print " 2: launch Matcher only (on \'*.align\' query files)"\n+ print " 3: launch Blaster+Matcher in the same job"\n+ print " -B: parameters for Blaster (e.g. \\"-a -n tblastx\\")"\n+ print " -M: parameters for Matcher (e.g. \\"-j\\")"\n+ print " -Z: collect all the results into a single file"\n+ print " align (after Blaster)"\n+ print " path/tab (after Matcher)"\n+ print " -C: configuration file from TEdenovo or TEannot pipeline"\n+ print " -t: name of the table recording the jobs (default=jobs)"\n+ print " -p: absolute path to project directory (if jobs management via files)"\n+ print " -c: clean (remove job launch files and job stdout)"\n+ print " -v: verbose (default=0/1/2)"\n+ print\n+\n+\n+def filterRedundantMatches( inFile, outFile ):\n+ """\n+ When a pairwise alignment is launched ~ all-by-all (ie one batch against all chunks),\n+ one filters the redundant matches. For instance we keep \'chunk3-1-100-chunk7-11-110-...\'\n+ and we discards \'chunk7-11-110-chunk3-1-100-...\'.\n+ Also we keep \'chunk5-1-100-chunk5-11-110-...\' and we discards\n+ \'chunk5-11-110-chunk5-1-100-...\'.\n+ For this of course the results need to be sorted by query, on plus strand,\n+ and in ascending coordinates (always the case with Blaster).\n+ """\n+ inFileHandler = open( inFile, "r" )\n+ outFileHandler = open( outFile, "w" )\n+ iAlign = Align()\n+ countMatches = 0\n+ tick = 100000\n+ while True:\n+ line = inFileHandler.readline()\n+ if line == "":\n+ break\n+ countMatches += 1\n+ iAlign.setFromString( line )\n+ if "chunk" not in iAlign.range_query.seqname \\\n+ or "chunk" not in iAlign.range_subject.seqname:\n+ print "ERROR: \'chunk\' not in seqname"\n+ sys.exit(1)\n+ if int(iAlign.range_query.seqname.split("chunk")[1]) < int(iAlign.range_subject.seqname.split("chunk")[1]):\n+ iAlign.write( outFileHandler )\n+ elif int(iAlign.range_query.seqname.split("chunk")[1]) == int(iAlign.range_subject.seqname.split("chunk")[1]):\n+ if iAlign.range_query.getMin() < iAlign.range_subject.getMin():\n+ iAlign.write( outFileHandler )\n+ if countMatches % tick == 0: # need to free buffer frequently as file can be big\n+ outFileHandler.flush()\n+ os.fsync( outFileHandler.fileno() )\n+ inFileHandler.close()\n+ outFileHandler.close()\n+\n+\n+def runCollect( groupid, collect, allByAll ):\n+ """\n+ Gather the results of each job in a single job and adapt path ID if necessary.\n+ """\n+ if verbose > 0:\n+ print "concatenate the results of each job"; sys.stdout.flush()\n+\n+ # retrieve the list of the files\n+ lFiles = glob.glob( "*.%s" % ( collect ) )\n+ lFiles.so'..b' cleanB += "if os.path.exists( \\"" + prefix + ".raw\\" ):\\n"\n+ cleanB += "\\tos.remove( \\"" + prefix + ".raw\\" )\\n"\n+ cleanB += "if os.path.exists( \\"" + prefix + ".seq_treated\\" ):\\n"\n+ cleanB += "\\tos.remove( \\"" + prefix + ".seq_treated\\" )\\n"\n+ launchM = ""\n+ launchM += os.environ["REPET_PATH"] + "/bin/matcher"\n+ launchM += " -m %s.align" % ( prefix )\n+ launchM += " -q %s" % ( prefix )\n+ launchM += " -s %s" % ( subjectBank )\n+ if paramMatcher != "":\n+ launchM += " %s" % ( paramMatcher )\n+ cleanM = ""\n+ s = ""\n+ if "-a" in paramMatcher:\n+ s = "match"\n+ else:\n+ s = "clean_match"\n+ if collect == "path":\n+ cleanM += "if not os.path.exists( \\"%s/%s.align.%s.path\\" ):\\n" % ( currentDir, prefix, s )\n+ cleanM += "\\tos.system( \\"mv %s.align.%s.path %s\\" )\\n" % ( prefix, s, currentDir )\n+ cleanM += "if os.path.exists( \\"" + prefix + ".align."+s+".tab\\" ):\\n"\n+ cleanM += "\\tos.remove( \\"" + prefix + ".align."+s+".tab\\" )\\n"\n+ elif collect == "tab":\n+ cleanM += "if not os.path.exists( \\"%s/%s.align.%s.tab\\" ):\\n" % ( currentDir, prefix, s )\n+ cleanM += "\\tos.system( \\"mv %s.align.%s.tab %s\\" )\\n" % ( prefix, s, currentDir )\n+ cleanM += "if os.path.exists( \\"" + prefix + ".align."+s+".path\\" ):\\n"\n+ cleanM += "\\tos.remove( \\"" + prefix + ".align."+s+".path\\" )\\n"\n+ cleanM += "if not os.path.exists( \\"%s/%s.align.%s.param\\" ):\\n" % ( currentDir, prefix, s )\n+ cleanM += "\\tos.system( \\"mv %s.align.%s.param %s\\" )\\n" % ( prefix, s, currentDir )\n+ if tmpDir != currentDir:\n+ cleanM += "if os.path.exists( \\"%s\\" ):\\n" % ( prefix )\n+ cleanM += "\\tos.remove( \\"%s\\" )\\n" % ( prefix )\n+ if clean == True:\n+ cleanM += "if os.path.exists( \\"" + prefix + ".align\\" ):\\n"\n+ cleanM += "\\tos.remove( \\"" + prefix + ".align\\" )\\n"\n+ else:\n+ cleanM += "if not os.path.exists( \\"%s/%s.align\\" ):\\n" % ( currentDir, prefix )\n+ cleanM += "\\tos.system( \\"mv %s.align %s\\" )\\n" % ( prefix, currentDir )\n+ cleanM += "if os.path.exists( \\"" + prefix + ".align."+s+".fa\\" ):\\n"\n+ cleanM += "\\tos.remove( \\"" + prefix + ".align."+s+".fa\\" )\\n"\n+ cleanM += "if os.path.exists( \\"" + prefix + ".align."+s+".map\\" ):\\n"\n+ cleanM += "\\tos.remove( \\"" + prefix + ".align."+s+".map\\" )\\n"\n+ cmd_start += "print \\"" + launchB + "\\"; sys.stdout.flush()\\n"\n+ cmd_start += "log = os.system( \\"" + launchB + "\\" )\\n"\n+ cmd_start += "if log != 0:\\n"\n+ cmd_start += launcher.cmd_test( launcher.job, "error", loop=1 )\n+ cmd_start += "\\tsys.exit(1)\\n"\n+ cmd_start += cleanB\n+ cmd_start += "print \\"" + launchM + "\\"; sys.stdout.flush()\\n"\n+ cmd_start += "log = os.system( \\"" + launchM + "\\" )\\n"\n+ cmd_start += cleanM\n+ launcher.runSingleJob( cmd_start )\n+ launcher.acronyme = "BlasterMatcher"\n+ launcher._nbJobs = count\n+ launcher.endRun()\n+ if clean == True:\n+ launcher.clean( "BlasterMatcher_*" )\n+\n+ else:\n+ print "ERROR: option \'-m %s\' not recognized" % ( mix )\n+ sys.exit(1)\n+\n+\n+ if collect != "":\n+ if collect in [ "align", "path", "tab" ]:\n+ runCollect( groupid, collect, allByAll )\n+ else:\n+ print "ERROR: collect \'%s\' not implemented" % ( collect )\n+ sys.exit(1)\n+\n+\n+ logging.info( "finished" )\n+\n+ if verbose > 0:\n+ print "END %s" % ( sys.argv[0].split("/")[-1] )\n+ sys.stdout.flush()\n+\n+ return 0\n+\n+\n+if __name__ == "__main__":\n+ main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/srptCreateTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/srptCreateTable.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+import ConfigParser
+
+from commons.core.sql.DbMySql import DbMySql
+
+
+def help():
+    print
+    print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
+    print "options:"
+    print "     -h: this help"
+    print "     -f: name of the input file"
+    print "     -n: name of the MySQL table"
+    print "     -t: table type (fasta|align|path|set|match|map|TEclassif|cluster)"
+    print "     -o: overwrite (default=False)"
+    print "     -c: configuration file from TEdenovo or TEannot pipeline"
+    print "     -H: MySQL host (if no configuration file)"
+    print "     -U: MySQL user (if no configuration file)"
+    print "     -P: MySQL password (if no configuration file)"
+    print "     -D: MySQL database (if no configuration file)"
+    print "     -T: MySQL port (if no configuration file, default=3306)"
+    print "     -v: verbose (default=0/1)"
+    print
+
+
+def main():
+    """
+    This program loads data from a file into a MySQL table.
+    """
+    filename = ""
+    tablename = ""
+    filetype = ""
+    overwrite = False
+    configFileName = ""
+    host = ""
+    user = ""
+    passwd = ""
+    dbname = ""
+    port = 0
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt( sys.argv[1:], "hf:t:n:oc:H:U:P:D:T:v:" )
+    except getopt.GetoptError, err:
+        sys.stderr.write( "%s\n" % str(err) )
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-f":
+            filename = a
+        elif o == "-n":
+            tablename = a
+        elif o == "-t":
+            filetype = a
+        elif o == "-o":
+            overwrite = True
+        elif o == "-c":
+            configFileName = a
+        elif o == "-H":
+            host = a
+        elif o == "-U":
+            user = a
+        elif o == "-P":
+            passwd = a
+        elif o == "-D":
+            dbname = a
+        elif o == "-T":
+            port = int(a)
+        elif o == "-v":
+            verbose = int(a)
+
+    if  filename == "" or tablename == "" or filetype == "":
+        print "ERROR: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if configFileName != "":
+        config = ConfigParser.ConfigParser()
+        config.readfp( open(configFileName) )
+        host = config.get("repet_env","repet_host")
+        user = config.get("repet_env","repet_user")
+        passwd = config.get("repet_env","repet_pw")
+        dbname = config.get("repet_env","repet_db")
+        port = config.get("repet_env","repet_port")
+
+    if host == "" and os.environ.get( "REPET_HOST" ) != "":
+        host = os.environ.get( "REPET_HOST" )
+    if user == "" and os.environ.get( "REPET_USER" ) != "":
+        user = os.environ.get( "REPET_USER" )
+    if passwd == "" and os.environ.get( "REPET_PW" ) != "":
+        passwd = os.environ.get( "REPET_PW" )
+    if dbname == "" and os.environ.get( "REPET_DB" ) != "":
+        dbname = os.environ.get( "REPET_DB" )
+    if port == 0 and os.environ.get( "REPET_PORT" ) != "":
+        port = int( os.environ.get( "REPET_PORT" ) )
+
+    if host == "":
+        print "ERROR: missing host"
+        sys.exit(1)
+    if user == "":
+        print "ERROR: missing user"
+        sys.exit(1)
+    if passwd == "":
+        print "ERROR: missing password"
+        sys.exit(1)
+    if dbname == "":
+        print "ERROR: missing db name"
+        sys.exit(1)
+    if port == 0:
+        print "ERROR: missing port"
+        sys.exit(1)
+
+    db = DbMySql(user, host, passwd, dbname, port )
+
+    if not os.path.exists( filename ):
+        print "ERROR: input file '%s' doesn't exist" % ( filename )
+        sys.exit(1)
+
+    db.createTable( tablename, filetype, filename, overwrite)
+
+    db.close()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/srptExportTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/srptExportTable.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+
+import user, os, sys, getopt, exceptions, ConfigParser
+
+#-----------------------------------------------------------------------------
+
+def help():
+
+    """
+    Give the list of the command-line options.
+    """
+
+    print
+    print "usage:",sys.argv[0]," [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the table to export"
+    print "     -o: name of the output file (default=inTable)"
+    print "     -p: extra parameters to add to the SQL query (e.g. 'ORDER BY path')"
+    print "     -k: keep the first line"
+    print "     -C: configuration file from TEdenovo or TEannot pipeline"
+    print "     -H: MySQL host (if no configuration file)"
+    print "     -U: MySQL user (if no configuration file)"
+    print "     -P: MySQL password (if no configuration file)"
+    print "     -D: MySQL database (if no configuration file)"
+    print "     -v: verbose (default=0/1)"
+    print
+
+#-----------------------------------------------------------------------------
+
+def main():
+
+    """
+    This program exports all the data contained in a MySQL table into a flat file in the current directory.
+    """
+
+    inTable = ""
+    outFileName = ""
+    param = ""
+    keepFirstLine = False
+    configFileName = ""
+    host = ""
+    user = ""
+    passwd = ""
+    dbname = ""
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:o:p:kC:H:U:P:D:v:")
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inTable = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-p":
+            param = a
+        elif o == "-k":
+            keepFirstLine = True
+        elif o == "-C":
+            configFileName = a
+        elif o == "-H":
+            host = a
+        elif o == "-U":
+            user = a
+        elif o == "-P":
+            passwd = a
+        elif o == "-D":
+            dbname = a
+        elif o == "-v":
+            verbose = int(a)
+
+    if inTable == "":
+        print "*** Error: missing input table name"
+        help()
+        sys.exit(1)
+
+    if configFileName != "":
+        config = ConfigParser.ConfigParser()
+        config.readfp( open(configFileName) )
+        host = config.get("repet_env","repet_host")
+        user = config.get("repet_env","repet_user")
+        passwd = config.get("repet_env","repet_pw")
+        dbname = config.get("repet_env","repet_db")
+    if host == "" or user == "" or passwd == "" or dbname == "":
+        if os.environ.get( "REPET_HOST" ) not in [ "", None ]:
+            host = os.environ.get( "REPET_HOST" )
+        if os.environ.get( "REPET_USER" ) not in [ "", None ]:
+            user = os.environ.get( "REPET_USER" )
+        if os.environ.get( "REPET_PW" ) not in [ "", None ]:
+            passwd = os.environ.get( "REPET_PW" )
+        if os.environ.get( "REPET_DB" ) not in [ "", None ]:
+            dbname = os.environ.get( "REPET_DB" )
+    if host == "" or user == "" or passwd == "" or dbname == "":
+        print "*** Error: missing information about MySQL connection"
+        sys.exit(1)
+
+    if outFileName == "":
+        outFileName = inTable
+
+    prg = "mysql"
+    cmd = prg
+    cmd += " -h %s" % ( host )
+    cmd += " -u %s" % ( user )
+    cmd += " -p\"%s\"" % ( passwd )
+    cmd += " --database=%s" % ( dbname )
+    cmd += " -e\"SELECT * FROM %s" % ( inTable )
+    if param != "":
+        cmd += " %s" % ( param )
+    cmd += ";\""
+    cmd += " > "
+    if keepFirstLine == False:
+        cmd += "%s.tmp" % ( outFileName )
+    else:
+        cmd += "%s" % ( outFileName )
+    if verbose > 0: print cmd; sys.stdout.flush()
+    log = os.system( cmd )
+    if log != 0:
+        print "*** Error: %s returned %i" % ( prg, log )
+        sys.exit(1)
+
+    if keepFirstLine == False:
+        tmpFileName = "%s.tmp" % ( outFileName )
+        tmpFile = open( tmpFileName, "r" )
+        outFile = open( outFileName, "w" )
+        i = 0
+        for line in tmpFile:
+            if i > 0:
+                outFile.write( line )
+            i += 1
+        tmpFile.close()
+        outFile.close()
+        os.remove( tmpFileName )
+
+    return 0
+
+#----------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/srptGameXmlMaker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/srptGameXmlMaker.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,283 @@\n+#!/usr/bin/env python\n+\n+import user, os, sys, getopt, ConfigParser\n+from os import listdir\n+\n+def setup_env():\n+ if "REPET_PATH" in os.environ.keys():\n+ sys.path.append( os.environ["REPET_PATH"] )\n+ else:\n+ print "*** Error: no environment variable REPET_PATH ***"\n+ sys.exit(1)\n+setup_env()\n+\n+from pyRepet.sql.RepetDBMySQL import *\n+from pyRepet.gamexml.Xml_writer import *\n+from pyRepet.gamexml.computational import *\n+\n+#------------------------------------------------------------------------------\n+\n+def help():\n+\n+ print ""\n+ print "usage:",sys.argv[0],"[options]"\n+ print "options:"\n+ print " -h: this help"\n+ print " -f: fasta file (required to generate new \'.gamexml\' files)"\n+ print " -n: annotation tier_name"\n+ print " -g: gamexml file (for Apollo). If it\'s not mentionned, all \'.gamexml\' files will be updated with the result file"\n+ print " -l: light gameXML file (without sequence)"\n+ print " -r: result files (require -n)"\n+ print " -R: reverse the query and subject of Blaster results"\n+ print " -s: tier_name of an annotation to remove from a gameXML file"\n+ print " -t: file of table name to use to create the gamexml files (tier name \'tab\' format \'tab\' table name)"\n+ print " -c: configuration file from TEdenovo or TEannot pipeline"\n+ print " -H: MySQL host (if no configuration file)"\n+ print " -U: MySQL user (if no configuration file)"\n+ print " -P: MySQL password (if no configuration file)"\n+ print " -D: MySQL database (if no configuration file)"\n+ print " -v: verbose (default=0/1/2)"\n+ print ""\n+\n+#------------------------------------------------------------------------------\n+\n+def automatisation( result_file, tier_name, reverse, comput ):\n+\n+ if verbose > 1:\n+ print "Auto update"; sys.stdout.flush()\n+ writer = Xml_writer()\n+ file_liste = []\n+ liste_comp = []\n+ liste_comp = listdir(\'./\')\n+\n+ if result_file != "":\n+ for j in liste_comp:\n+ if writer.file_in_keys( j, comput ):\n+ file_liste = file_liste + [j]\n+\n+ for i in file_liste:\n+ writer.update_gamexml( i, result_file, tier_name, comput )\n+\n+ else:\n+ for j in liste_comp:\n+ if j.find( "gamexml" ) != -1:\n+ writer.parse_gamexml( j )\n+ writer.verif_name_prog( tier_name )\n+ writer.write( j )\n+ if verbose > 1:\n+ print tier_name + " program from " +j +" removed"\n+\n+#------------------------------------------------------------------------------\n+\n+def main():\n+\n+ f_result = ""\n+ f_gamexml = ""\n+ f_fasta = ""\n+ f_table = ""\n+ tier_name = ""\n+ substract_name = ""\n+ no_seq = 0\n+ configFileName = ""\n+ host = ""\n+ user = ""\n+ passwd = ""\n+ dbname = ""\n+ verbose = 0\n+\n+ try:\n+ options,arguments=getopt.getopt(sys.argv[1:],"hn:f:g:r:s:lRt:c:H:U:P:D:v:",["help","tier_name=","fasta","gamexml","result","substract_program","light","reverse_result","table"])\n+ except getopt.GetoptError:\n+ help()\n+ sys.exit(1)\n+ if options == []:\n+ help()\n+ sys.exit(1)\n+ for o,a in options:\n+ if o == "-h" or o == "--help":\n+ help()\n+ sys.exit(0)\n+ elif o == "-f" or o == "--fasta":\n+ f_fasta = a\n+ elif o == "-g" or o == "--gamexml":\n+ f_gamexml = a \n+ elif o == "-n" or o == "--tier_name":\n+ tier_name = a\n+ elif o == "-r" or o == "--result":\n+ f_result = a\n+ elif o == "-s" or o == "--subtract_program":\n+ substract_name = a\n+ elif o == "-l" or o == "--light":\n+ no_seq = 1\n+ elif o == "-R" or o == "--reverse_result":\n+ writer.set_reverse()\n+ elif o == "-t" or o == "--table":\n+ f_table = a\n+ elif o == "-c":\n+ '..b'tdout.flush()\n+\t writer.parse_gamexml( f_gamexml )\n+\n+\t if f_gamexml != "":\n+## key=".".join(f_gamexml.split(".")[:-1])\n+ key = f_gamexml.split(".")[0]\n+\t else:\n+ key = ""\n+\n+\t tfile = open( f_table )\n+\t lines = tfile.readlines()\n+\t for l in lines:\n+ if l[0] == "#":\n+ continue\n+ tok = l.split()\n+ #print tok\n+ if len(tok) == 0:\n+ break\n+ tier_name = tok[0]\n+ format = tok[1]\n+ table = tok[2]\n+ alias = ""\n+ if verbose > 1:\n+ print "table: " + table + " (format=" + format + ")"\n+ if len(tok) > 3:\n+ alias = tok[3]\n+ if verbose > 1:\n+ print " alias=" + alias\n+\n+ if host == "" or user == "" or passwd == "" or dbname == "":\n+ print "*** Error: missing information about MySQL connection"\n+ sys.exit(1)\n+ db = RepetDB( user, host, passwd, dbname )\n+\n+ if format == "path":\n+ comput.load_dico_path_from_table( db, key, table, alias )\n+ writer.update_gamexml_comput( tier_name, comput )\n+ elif format == "rpath":\n+ comput.load_dico_rpath_from_table( db, key, table, alias )\n+ elif format == "ipath":\n+ comput.load_dico_ipath_from_table( db, key, table, alias )\n+ writer.update_gamexml_comput( tier_name, comput )\n+ elif format == "align":\n+ comput.load_dico_align_from_table( db, key, table, alias )\n+ writer.update_gamexml_comput( tier_name, comput )\n+ elif format == "map":\n+ comput.load_dico_map_from_table( db, key, table, alias )\n+ writer.update_gamexml_comput( tier_name, comput )\n+ elif format == "rmap":\n+ comput.load_dico_rmap_from_table( db, key, table, alias )\n+ writer.update_gamexml_comput( tier_name, comput )\n+ elif format == "set":\n+ comput.load_dico_set_from_table( db, key, table, alias )\n+ writer.update_gamexml_comput( tier_name, comput )\n+ elif format == "annot":\n+ comput.load_dico_annot_from_table( db, key, table, alias )\n+ writer.update_gamexml_annot( table, comput )\n+ elif format == "annot_set":\n+ comput.load_dico_annotset_from_table( db, key, table, alias )\n+ writer.update_gamexml_annot( table, comput )\n+ else:\n+ print "*** Error: unknown format \'%s\'" % ( format )\n+ sys.exit(1)\n+ writer.write(f_gamexml)\n+\n+ db.close()\n+\n+ # \n+ if f_gamexml == "" and f_result != "" and f_fasta == "":\n+ automatisation( f_result, tier_name, writer.get_reverse(), comput )\n+\n+ # update a ".gamexml" file (options \'-g\' and \'-t\')\n+ if f_gamexml != "" and f_result != "":\n+ writer.update_gamexml( f_gamexml, f_result, tier_name, comput )\n+\n+ # remove a comput\n+ if substract_name != "" and tier_name == "":\n+ if f_gamexml != "":\n+ writer.parse_gamexml( f_gamexml )\n+ writer.verif_name_prog( substract_name )\n+ writer.write( f_gamexml )\n+ if verbose > 1:\n+ print substract_name + " program from " + f_gamexml +" removed"\n+ else:\n+ automatisation( "", substract_name, 0, None )\n+\n+ if verbose > 0:\n+ print "%s finished successfully\\n" % (sys.argv[0].split("/")[-1])\n+ sys.stdout.flush()\n+\n+ return 0\n+\n+#------------------------------------------------------------------------------\n+\n+if __name__ == \'__main__\':\n+ main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/srptPhyML.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/srptPhyML.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+
+import user, os, sys, getopt, exceptions, logging, ConfigParser
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "*** Error: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+import pyRepet.sql.RepetJobMySQL
+import pyRepet.launcher.Launcher
+
+#-----------------------------------------------------------------------------
+
+def help():
+
+    """
+    Give the list of the command-line options.
+    """
+
+    print
+    print "usage:",sys.argv[0]," [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -g: name of the group identifier (same for all the jobs)"
+    print "     -q: name of the query directory"
+    print "     -S: suffix in the query directory  (default='*.fa')"
+    print "     -Q: name of the queue (on the cluster)"
+    print "     -d: absolute path to the temporary directory"
+    print "     -C: configuration file from TEdenovo or TEannot pipeline"
+    print "     -t: job table name (default=jobs)"
+    print "     -p: absolute path to project directory (if jobs management via files)"
+    print "     -c: clean (remove job launch files and job stdout)"
+    print "     -v: verbose (default=0/1/2)"
+    print
+
+#-----------------------------------------------------------------------------
+
+def main():
+
+    """
+    This program takes a directory as input and launches MAP on each file in it.
+    """
+
+    groupid = ""
+    queryDir = ""
+    patternSuffix = "*.fa"
+    queue = ""
+    tmpDir = ""
+    configFileName = ""
+    jobTable = "jobs"
+    projectDir = ""
+    clean = False
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hg:q:S:Q:d:C:t:p:cv:")
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-g":
+            groupid = a
+        elif o == "-q":
+            queryDir = a
+        elif o == "-S":
+            patternSuffix = a
+        elif o == "-Q":
+            queue = a
+        elif o == "-d":
+            tmpDir = a
+        elif o == "-C":
+            configFileName = a
+        elif o == "-t":
+            jobTable = a
+        elif o == "-p":
+            projectDir = a
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+
+    if  groupid == "" or queryDir == "":
+        print "*** Error: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if os.environ["REPET_JOBS"] == "files" and projectDir == "":
+        print "*** Error: missing compulsory options for jobs management via files"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "\nbeginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    #--------------------------------------------------------------------------
+
+    # create the 'log' file
+
+    logFileName = "%s_pid%s.log" % ( groupid, os.getpid() )
+    handler = logging.FileHandler( logFileName )
+    formatter = logging.Formatter( "%(asctime)s %(levelname)s: %(message)s" )
+    handler.setFormatter( formatter )
+    logging.getLogger('').addHandler( handler )
+    logging.getLogger('').setLevel( logging.DEBUG )
+    logging.info( "started" )
+
+
+    # open a connection to the MySQL table
+
+    if configFileName != "":
+        if not os.path.exists( configFileName ):
+            print "*** Error: configuration file '%s' doesn't exist" % ( configFileName )
+            sys.exit(1)
+        config = ConfigParser.ConfigParser()
+        config.readfp( open(configFileName) )
+        host = config.get("repet_env","repet_host")
+        user = config.get("repet_env","repet_user")
+        passwd = config.get("repet_env","repet_pw")
+        dbname = config.get("repet_env","repet_db")
+    else:
+        host = os.environ["REPET_HOST"]
+        user = os.environ["REPET_USER"]
+        passwd = os.environ["REPET_PW"]
+        dbname = os.environ["REPET_DB"]
+
+    if os.environ["REPET_JOBS"] == "files":
+        jobdb = pyRepet.sql.RepetJobMySQL.RepetJob( dbname = projectDir + "/" + os.environ["REPET_DB"] )
+    elif os.environ["REPET_JOBS"] == "MySQL":
+        jobdb = pyRepet.sql.RepetJobMySQL.RepetJob( user, host, passwd, dbname )
+    else:
+        print "*** Error: REPET_JOBS is '%s'" % ( os.environ["REPET_JOBS"] )
+        sys.exit(1)
+
+
+    currentDir = os.getcwd()
+    if tmpDir == "":
+        tmpDir = currentDir
+
+    # launch PhyML on each fasta file in queryDir
+    cL = pyRepet.launcher.Launcher.PhyMlLauncher( jobdb=jobdb, query=queryDir, cdir=currentDir, tmpdir=tmpDir, job_table=jobTable, queue=queue, groupid=groupid, acro="PhyML" )
+    cL.run( patternSuffix )
+
+    # clean
+    if clean == True:
+        cL.clean()
+
+
+    logging.info( "finished" )
+
+    if verbose > 0:
+        print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+#----------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    main()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/srptTableOverlap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/srptTableOverlap.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,319 @@\n+#!/usr/bin/env python\n+\n+import os\n+import sys\n+import getopt\n+import logging\n+import string\n+import ConfigParser\n+\n+from pyRepet.sql.TableAdaptator import *\n+import pyRepet.sql.RepetDBMySQL\n+import pyRepet.coord.Map\n+import pyRepet.coord.Path\n+import pyRepet.coord.Set\n+\n+\n+def help():\n+ print\n+ print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )\n+ print "options:"\n+ print " -h: this help"\n+ print " -q: query table"\n+ print " -s: subject table"\n+ print " -p: by path"\n+ print " -t: table type comparison: qtype/stype where qtype=[map,set,path] and stype=[path,set,map]"\n+ print " -c: configuration file from TEdenovo or TEannot pipeline"\n+ print " -H: MySQL host (if no configuration file)"\n+ print " -U: MySQL user (if no configuration file)"\n+ print " -P: MySQL password (if no configuration file)"\n+ print " -D: MySQL database (if no configuration file)"\n+ print\n+ \n+ \n+def pathOverlapByPath( qtable, qtype, stable, stype, db, fout, verbose=0 ):\n+ \n+ if qtype == "path":\n+ db.create_path_index( qtable )\n+ qtablePathAdaptator = TablePathAdaptator( db, qtable )\n+ path_num_list = qtablePathAdaptator.getPath_num()\n+ elif qtype == "set":\n+ db.create_set_index( qtable )\n+ qtableSetAdaptator = TableSetAdaptator( db, qtable )\n+ path_num_list = qtableSetAdaptator.getSet_num()\n+ else:\n+ string = "unknown query table type: %s" % ( qtype )\n+ if verbose > 0:\n+ print string\n+ logging.error( string )\n+ sys.exit(1)\n+ string = "nb of paths in query table: %i" % (len(path_num_list) )\n+ if verbose > 0:\n+ print string\n+ logging.info( string )\n+ \n+ if stype == "path":\n+ stablePathAdaptator = TableBinPathAdaptator( db, stable )\n+# stablePathAdaptator=TablePathAdaptator(db,stable)\n+ elif stype == "set":\n+ stableSetAdaptator = TableBinSetAdaptator( db, stable )\n+# stableSetAdaptator=TableSetAdaptator(db,stable)\n+ else:\n+ string = "unknown subject table type: %s" % ( stype )\n+ if verbose > 0:\n+ print string\n+ logging.error( string )\n+ sys.exit(1)\n+ \n+ count = 0\n+ for path_num in path_num_list:\n+ if qtype == "path":\n+ qlist = qtablePathAdaptator.getPathList_from_num( path_num )\n+ qlist = pyRepet.coord.Path.path_list_rangeQ2Set( qlist )\n+ elif qtype == "set":\n+ qlist = qtableSetAdaptator.getSetList_from_num( path_num )\n+ \n+ qlist.sort()\n+ qmin, qmax = pyRepet.coord.Set.set_list_boundaries( qlist )\n+ \n+ qmin = qmin - 1\n+ qmax = qmax + 1\n+ if stype == "path":\n+ slist = stablePathAdaptator.getPathList_from_qcoord(qlist[0].seqname.split()[0],qmin,qmax)\n+ slist = pyRepet.coord.Path.path_list_rangeQ2Set( slist )\n+ elif stype == "set":\n+ slist = stableSetAdaptator.getSetList_from_qcoord(qlist[0].seqname.split()[0],qmin,qmax)\n+ \n+ if len(slist) > 0:\n+ print "----------------------------------------"\n+ print "query:"\n+ pyRepet.coord.Set.set_list_show( qlist )\n+ qlist=pyRepet.coord.Set.set_list_merge( qlist )\n+ qsize=pyRepet.coord.Set.set_list_size( qlist )\n+ print "query size=",qsize\n+ \n+ slist_dict = pyRepet.coord.Set.set_list_split( slist )\n+ subj_names = ""\n+ for i,l in slist_dict.items():\n+ if subj_names != "":\n+ subj_names += "|"\n+ subj_names += "%d:%s" % (i,l[0].name)\n+ subj_count = len(slist_dict.keys())\n+ \n+ print "subject:"\n+ pyRepet.coord.Set.set_list_show( slist )\n+ slist = pyRepet.coord.Set.set_list_merge( slist )\n+ ssize = pyRepet.coord.Set.set_list_size( slist )\n+ '..b'ion of query: %.3f" % ( float(sum_osize)/sum_qsize )\n+ string += "\\nsize of non-overlaps with the subject table: %i nt" % ( sum_non_osize )\n+ string += "\\n proportion of query: %.3f" % ( float(sum_non_osize)/sum_qsize )\n+ if verbose > 0:\n+ print string; sys.stdout.flush()\n+ logging.info( string )\n+ \n+ return sum_osize, sum_non_osize, sum_qsize\n+\n+\n+def main ():\n+ """\n+ This program computes the overlaps between two tables recording spatial coordinates.\n+ """\n+ qtable = ""\n+ stable = ""\n+ type = ""\n+ by_path = False\n+ configFileName = ""\n+ host = ""\n+ user = ""\n+ passwd = ""\n+ db = ""\n+ verbose = 0\n+ try:\n+ opts, args = getopt.getopt( sys.argv[1:], "hq:s:t:pc:H:U:P:D:v:" )\n+ except getopt.GetoptError:\n+ help()\n+ sys.exit(1)\n+ if len(args) != 0:\n+ help()\n+ sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ help()\n+ sys.exit(0)\n+ elif o == "-q":\n+ qtable = a\n+ elif o == "-s":\n+ stable = a\n+ elif o == "-t":\n+ type = a\n+ elif o == "-p":\n+ by_path = True\n+ elif o == "-c":\n+ configFileName = a\n+ elif o == "-H":\n+ host = a\n+ elif o == "-U":\n+ user = a \n+ elif o == "-P":\n+ passwd = a\n+ elif o == "-D":\n+ db = a\n+ elif o == "-v":\n+ verbose = int(a)\n+ if qtable=="" or stable=="" or \\\n+ (configFileName== "" and (host=="" or \\\n+ user=="" or passwd=="" or db=="")):\n+ print "ERROR: missing compulsory options"\n+ help()\n+ sys.exit(1)\n+ if verbose > 0:\n+ print "START %s" % (sys.argv[0].split("/")[-1])\n+ sys.stdout.flush()\n+ \n+ if configFileName != "":\n+ config = ConfigParser.ConfigParser()\n+ config.readfp( open(configFileName) )\n+ host = config.get("repet_env","repet_host")\n+ user = config.get("repet_env","repet_user")\n+ passwd = config.get("repet_env","repet_pw")\n+ dbname = config.get("repet_env","repet_db")\n+ \n+ logfilename = qtable + "-" + stable + "-" + str(os.getpid()) + ".log"\n+ handler = logging.FileHandler( logfilename )\n+ formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")\n+ handler.setFormatter( formatter )\n+ logging.getLogger(\'\').addHandler(handler)\n+ logging.getLogger(\'\').setLevel(logging.DEBUG)\n+ logging.info("started")\n+ \n+ db = pyRepet.sql.RepetDBMySQL.RepetDB( user, host, passwd, dbname )\n+ \n+ qtype, stype = type.split("/")\n+ \n+ if not db.exist( qtable ):\n+ if not os.path.exists( qtable ):\n+ msg = "ERROR: neither table nor file \'%s\'" % ( qtable )\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ tmp = qtable.replace(".","_")\n+ db.create_table( db, tmp, qtable, qtype )\n+ qtable = tmp\n+ if not db.exist( stable ):\n+ if not os.path.exists( stable ):\n+ msg = "ERROR: neither table nor file \'%s\'" % ( stable )\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ tmp = stable.replace(".","_")\n+ db.create_table( db, tmp, stable, qtype )\n+ stable = tmp\n+ \n+ string = "input tables:"\n+ string += "\\nquery table: %s (\'%s\' format)" % ( qtable, qtype )\n+ string += "\\nsubject table: %s (\'%s\' format)" % ( stable, stype )\n+ logging.info( string )\n+ \n+ if by_path:\n+ fout = open(qtable+"_vs_"+stable+".dat","w")\n+ pathOverlapByPath( qtable, qtype, stable, stype, db, fout, verbose )\n+ fout.close()\n+ else:\n+ getOverlapAllPaths( qtable, qtype, stable, stype, db, verbose )\n+ \n+ logging.info("finished")\n+ \n+ if verbose > 0:\n+ print "END %s" % (sys.argv[0].split("/")[-1])\n+ sys.stdout.flush()\n+ \n+ \n+if __name__ == "__main__":\n+ main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tabFileReader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tabFileReader.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,600 @@\n+#!/usr/bin/env python\n+\n+###@file\n+# Read a file recording matches in the \'tab\' format (output from Matcher) and return the number of matches between queries and subjects being CC, CI, IC and II.\n+# A match is said to be CC (for complete-complete) when both query and subject match over x% of their entire respective length. By default, x=95.\n+#\n+# usage: tabFileReader.py [ options ]\n+# options:\n+# -h: this help\n+# -m: name of the file recording the matches (format=\'tab\', output from Matcher)\n+# -q: name of the fasta file recording the queries\n+# -s: name of the fasta file recording the subjects\n+# -t: threshold over which the match is \'complete\', in % of the seq length (default=95)\n+# -i: identity below which matches are ignored (default=0)\n+# -l: length below which matches are ignored (default=0)\n+# -o: overlap on query and subject below which matches are ignored (default=0)\n+# -v: verbose (default=0/1)\n+\n+import sys\n+import getopt\n+from string import *\n+\n+import pyRepet.seq.BioseqDB\n+import pyRepet.util.Stat\n+\n+#TODO: remove case changes in headers (4 lower() method calls in this script) \n+\n+#----------------------------------------------------------------------------\n+\n+def help():\n+ print\n+ print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )\n+ print "options:"\n+ print " -h: this help"\n+ print " -m: name of the file recording the matches (format=\'tab\', output from Matcher)"\n+ print " -q: name of the fasta file recording the queries"\n+ print " -s: name of the fasta file recording the subjects"\n+ print " -t: coverage threshold over which the match is \'complete\' (in %% of the seq length, default=95)"\n+ print " -i: identity below which matches are ignored (default=0)"\n+ print " -l: length below which matches are ignored (default=0)"\n+ print " -o: overlap on query and subject below which matches are ignored (default=0)"\n+ print " -I: identity threshold for \'CC\' matches (default=90)"\n+ print " -E: E-value threshold for \'CC\' matches (default=1e-10)"\n+ print " -T: coverage threshold for match length on query compare to subject length (default=90)"\n+ print " -v: verbose (default=0/1)"\n+ print\n+\n+#----------------------------------------------------------------------------\n+\n+#here are the fields of a \'.tab\' file:\n+#[0]: query sequence name\n+#[1]: whole match start coordinate on the query sequence\n+#[2]: whole match end coordinate on the query sequence\n+#[3]: length on the query sequence\n+#[4]: length in percentage of the query sequence\n+#[5]: length on the query relative to the subject length in percentage\n+#[6]: subject sequence name\n+#[7]: whole match start coordinate on the subject sequence\n+#[8]: whole match end coordinate on the subject sequence\n+#[9]: length on the subject sequence\n+#[10]: length in percentage of the subject sequence\n+#[11]: BLAST E-value\n+#[12]: BLAST score\n+#[13]: identity percentage\n+#[14]: path\n+\n+class tabFileReader( object ):\n+\n+ def __init__( self, line ):\n+\n+ columns = line.split("\\t")\n+\n+ self.name_sbj = (columns[6])\n+ self.length_sbj = int(round(int(columns[9])/float(columns[10]),0)) #length of the subject\n+ self.prct_sbj = float(columns[10]) * 100 #prct_sbj = length of the match on the subject divided by the length of the subject * 100\n+ if int(columns[7]) < int(columns[8]):\n+ self.start_sbj = int(columns[7]) #start of the match on the subject\n+ self.end_sbj = int(columns[8]) #end of the match on the subject\n+ else:\n+ self.start_sbj = int(columns[8])\n+ self.end_sbj = int(columns[7])\n+ self.sbj_dist_ends = int(columns[9]) #length on the subject that matches with the query\n+\n+ self.name_qry = columns[0]\n+ self.length_qry = int(round(int(columns[3])/float(col'..b'\n+\n+ outFile.write( "\\nNumber of matches with L < %i%% for subject & query: %i\\n" % ( thresholdCoverage, len(ListMatches_inf_sbjqry) ) )\n+ outFile.write( " Number of different subjects in that case: %s\\n" % (len(Sbj2Qry_inf_sbjqry)))\n+ outFile.write( " Among them, number of different subjects having exactly one match: %s\\n" % (len(UniqSbj_inf_sbjqry)))\n+ outFile.write( " Among them, number of different subjects having more than one match: %s\\n" % (len(RedunSbj_inf_sbjqry)))\n+ outFile.write( " Number of different queries in that case: %s\\n" % (len(Qry2Sbj_inf_sbjqry)))\n+ outFile.write( " Among them, number of different queries having exactly one match: %s\\n" % (len(UniqQry_inf_sbjqry)))\n+ outFile.write( " Among them, number of different queries having more than one match: %s\\n" % (len(RedunQry_inf_sbjqry)))\n+ \n+ \n+ # For the elements already counted in the matches with L >= 95% for subject & query, remove them from the other dictionnaries\n+ rmv_Sbj2Qry = remove( Sbj2Qry_all, Sbj2Qry_sup_sbjqry, Sbj2Qry_sup_sbj, Sbj2Qry_sup_qry, Sbj2Qry_inf_sbjqry )\n+ rmv_Qry2Sbj = remove( Qry2Sbj_all, Qry2Sbj_sup_sbjqry, Qry2Sbj_sup_sbj, Qry2Sbj_sup_qry, Qry2Sbj_inf_sbjqry )\n+ \n+ outFile.write("\\n\\nAfter removal of the subjects/queries already counted in the matches with L >= %i%% for them:\\n" % ( thresholdCoverage ) )\n+ \n+ outFile.write( "\\nMatches with L >= %i%% for subject and L < %i%% for query:\\n" % ( thresholdCoverage, thresholdCoverage ) )\n+ outFile.write( " # Number of different subjects in the \'CI\' case: %s (%.2f%%)\\n" % ( len(rmv_Sbj2Qry[0]), 100*len(rmv_Sbj2Qry[0])/float(nbSbj) ) )\n+ outFile.write( " # Number of different queries in the \'CI\' case: %s (%.2f%%)\\n" % ( len(rmv_Qry2Sbj[0]), 100*len(rmv_Qry2Sbj[0])/float(nbQry) ) )\n+ \n+ outFile.write( "\\nMatches with L < %i%% for subject and L >= %i%% for query:\\n" % ( thresholdCoverage, thresholdCoverage ) )\n+ outFile.write( " # Number of different subjects in the \'IC\' case: %s (%.2f%%)\\n" % (len(rmv_Sbj2Qry[1]), 100*len(rmv_Sbj2Qry[1])/float(nbSbj) ) )\n+ outFile.write( " # Number of different queries in the \'IC\' case: %s (%.2f%%)\\n" % (len(rmv_Qry2Sbj[1]), 100*len(rmv_Qry2Sbj[1])/float(nbQry) ) )\n+ \n+ outFile.write( "\\nMatches with L < %i%% for subject & query:\\n" % ( thresholdCoverage ) )\n+ outFile.write( " # Number of different subjects in the \'II\' case: %s (%.2f%%)\\n" % (len(rmv_Sbj2Qry[2]), 100*len(rmv_Sbj2Qry[2])/float(nbSbj) ) )\n+ outFile.write( " # Number of different queries in the \'II\' case: %s (%.2f%%)\\n" % (len(rmv_Qry2Sbj[2]), 100*len(rmv_Qry2Sbj[2])/float(nbQry) ) )\n+ \n+ outFile.write("\\n==========================================================================\\n")\n+ \n+ write_output( outFile, \'CC\', Sbj2Qry_sup_sbjqry, dSbj2Cat, Qry2Sbj_sup_sbjqry, dQry2Cat )\n+ \n+ outFile.write("\\n==========================================================================\\n")\n+ \n+ write_output( outFile, \'CI\', rmv_Sbj2Qry[0], dSbj2Cat, rmv_Qry2Sbj[0], dQry2Cat )\n+ \n+ outFile.write("\\n==========================================================================\\n")\n+ \n+ write_output( outFile, \'IC\', rmv_Sbj2Qry[1], dSbj2Cat, rmv_Qry2Sbj[1], dQry2Cat )\n+ \n+ outFile.write("\\n==========================================================================\\n")\n+ \n+ write_output( outFile, \'II\', rmv_Sbj2Qry[2], dSbj2Cat, rmv_Qry2Sbj[2], dQry2Cat )\n+ \n+ outFile.write("\\n==========================================================================\\n")\n+ \n+ outFile.close()\n+ \n+ writeSubjectCategory( dSbj2Cat )\n+ writeQueryCategory( dQry2Cat )\n+ \n+ if verbose > 0:\n+ print "END %s" % (sys.argv[0].split("/")[-1])\n+ sys.stdout.flush()\n+ \n+ return 0\n+\n+#-----------------------------------------------------------------------------------------------------\n+\n+if __name__ == "__main__":\n+ main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/MockFastaForReplaceGreaterThanSymbolInFastaHeader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/MockFastaForReplaceGreaterThanSymbolInFastaHeader.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,59 @@
+class MockFastaForReplaceGreaterThanSymbolInFastaHeader(object):
+ def write(self, inFileName):
+ f = open(inFileName, 'w')
+ f.write(">sp|P0A2V0|NDVA_AGRT5 Beta-(1-->2)glucan export ATP-binding/permease protein ndvA OS=Agrobacterium tumefaciens (strain C58 / ATCC 33970) GN=ndvA PE=3 SV=1\n")
+ f.write("MTLFQVYTRALRYLTVHKWRVAVVVIANVILAAITIAEPVLFGRIIDAISSGTNVTPILI\n")
+ f.write("LWAGFGVFNTVAYVAVAREADRLAHGRRATLLTEAFGRIISMPLSWHHLRGTSNALHTLL\n")
+ f.write("RASETLFGLWLEFMRTHLATFVALVLLIPTAMAMDLRLSFVLIGLGIVYWFIGKWVMGRT\n")
+ f.write("KDGQASVEEHYHSVFAHVSDSISNVSVLHSYNRIEAETKALKSFTEKLLSAQYPVLDWWA\n")
+ f.write("FASALNRTASTVSMMIILVIGTVLVKNGELRVGDVIAFIGFANLLIGRLDQMRQFVTQIF\n")
+ f.write("EARAKLEDFFVLEDAVKEREEPGDARELSNVSGTVEFRNINFGFANTKQGVHDVSFTAKA\n")
+ f.write("GETVAIVGPTGAGKTTLINLLQRVYDPDSGQILIDGTDISTVTKNSLRNSIATVFQDAGL\n")
+ f.write("LNRSIRENIRLGRETATDAEVVEAAAAAAATDFIDSRINGYLTQVGERGNRLSGGERQRI\n")
+ f.write("AIARAILKNAPILVLDEATSALDVETEARVKAAVDALRKNRTTFIIAHRLSTVRDADLVL\n")
+ f.write("FLDQGRIIEKGTFDELTQRGGRFTSLLRTSGLLTEDEGQQPRPKAIAS\n")
+ f.write(">sp|Q694C1|ABC3G_GORGO DNA dC->dU-editing enzyme APOBEC-3G OS=Gorilla gorilla gorilla GN=APOBEC3G PE=2 SV=1\n")
+ f.write("MTPQFRNTVERMYRDTFSYNFNNRPILSRRNTVWLCYEVKTKDPSRPPLDAKIFRGQVYS\n")
+ f.write("ELKYHPEMRFFHWFSKWRKLHRDQEYEVTWYISWSPCTKCTRNVATFLAEDPKVTLTIFV\n")
+ f.write("ARLYYFWDQDYQEALRSLCQKRDGPRATMKIMNYDEFQHCWSKFVYSQRELFEPWNNLPK\n")
+ f.write("YYMLLHIMLGEILRHSMDPPTFTSNFNNEHWVRGRHETYLCYEVERLHNDTWVLLNQRRG\n")
+ f.write("FLCNQAPHKHGFLEGRHAELCFLDVIPFWKLDLHQDYRVTCFTSWSPCFSCAQEMAKFIS\n")
+ f.write("NKKHVSLCIFAARIYDDQGRCQEGLRTLAEAGAKISIMTYSEFKHCWDTFVYHQGCPFQP\n")
+ f.write("WDGLEEHSQALSGRLQAILQNQGN\n")
+ f.write(">sp|P0A4M1|ZTOX_ENTFA Zeta-toxin OS=Enterococcus faecalis PE=3 SV=1\n")
+ f.write("MANITDFTEKQFEDRLEKNVERLTKNRLAVESPTAFLLGGQPGSGKTSLRSAISEETQGN\n")
+ f.write("VVIIDNDTFKQQHPNFDELVKLYEKDVVKYVTPYSNRMTEAIISRLRDKGYNLVIEGTGR\n")
+ f.write("TTDVPIQTATMLQAKDYETKMYVMAVPKINSYLGTIERYETMYADDPMTARATPKQAHDI\n")
+ f.write("VVKNLPTNLETLHKTGLFSDIRLYNREGVKLYSSLETPSISPKETLERELNRKVSGKEIQ\n")
+ f.write("PTLERIEQKMVQNQHQETPEFKAIQQKMESLQPPTPPIPKTPKLPGI\n")
+ f.close()
+
+class MockFastaForReplaceGreaterThanSymbolInFastaHeader_withoutGreaterThan(object):
+ def write(self, inFileName):
+ f = open(inFileName, 'w')
+ f.write(">sp|P0A2V0|NDVA_AGRT5 Beta-(1 to 2)glucan export ATP-binding/permease protein ndvA OS=Agrobacterium tumefaciens (strain C58 / ATCC 33970) GN=ndvA PE=3 SV=1\n")
+ f.write("MTLFQVYTRALRYLTVHKWRVAVVVIANVILAAITIAEPVLFGRIIDAISSGTNVTPILI\n")
+ f.write("LWAGFGVFNTVAYVAVAREADRLAHGRRATLLTEAFGRIISMPLSWHHLRGTSNALHTLL\n")
+ f.write("RASETLFGLWLEFMRTHLATFVALVLLIPTAMAMDLRLSFVLIGLGIVYWFIGKWVMGRT\n")
+ f.write("KDGQASVEEHYHSVFAHVSDSISNVSVLHSYNRIEAETKALKSFTEKLLSAQYPVLDWWA\n")
+ f.write("FASALNRTASTVSMMIILVIGTVLVKNGELRVGDVIAFIGFANLLIGRLDQMRQFVTQIF\n")
+ f.write("EARAKLEDFFVLEDAVKEREEPGDARELSNVSGTVEFRNINFGFANTKQGVHDVSFTAKA\n")
+ f.write("GETVAIVGPTGAGKTTLINLLQRVYDPDSGQILIDGTDISTVTKNSLRNSIATVFQDAGL\n")
+ f.write("LNRSIRENIRLGRETATDAEVVEAAAAAAATDFIDSRINGYLTQVGERGNRLSGGERQRI\n")
+ f.write("AIARAILKNAPILVLDEATSALDVETEARVKAAVDALRKNRTTFIIAHRLSTVRDADLVL\n")
+ f.write("FLDQGRIIEKGTFDELTQRGGRFTSLLRTSGLLTEDEGQQPRPKAIAS\n")
+ f.write(">sp|Q694C1|ABC3G_GORGO DNA dC to dU-editing enzyme APOBEC-3G OS=Gorilla gorilla gorilla GN=APOBEC3G PE=2 SV=1\n")
+ f.write("MTPQFRNTVERMYRDTFSYNFNNRPILSRRNTVWLCYEVKTKDPSRPPLDAKIFRGQVYS\n")
+ f.write("ELKYHPEMRFFHWFSKWRKLHRDQEYEVTWYISWSPCTKCTRNVATFLAEDPKVTLTIFV\n")
+ f.write("ARLYYFWDQDYQEALRSLCQKRDGPRATMKIMNYDEFQHCWSKFVYSQRELFEPWNNLPK\n")
+ f.write("YYMLLHIMLGEILRHSMDPPTFTSNFNNEHWVRGRHETYLCYEVERLHNDTWVLLNQRRG\n")
+ f.write("FLCNQAPHKHGFLEGRHAELCFLDVIPFWKLDLHQDYRVTCFTSWSPCFSCAQEMAKFIS\n")
+ f.write("NKKHVSLCIFAARIYDDQGRCQEGLRTLAEAGAKISIMTYSEFKHCWDTFVYHQGCPFQP\n")
+ f.write("WDGLEEHSQALSGRLQAILQNQGN\n")
+ f.write(">sp|P0A4M1|ZTOX_ENTFA Zeta-toxin OS=Enterococcus faecalis PE=3 SV=1\n")
+ f.write("MANITDFTEKQFEDRLEKNVERLTKNRLAVESPTAFLLGGQPGSGKTSLRSAISEETQGN\n")
+ f.write("VVIIDNDTFKQQHPNFDELVKLYEKDVVKYVTPYSNRMTEAIISRLRDKGYNLVIEGTGR\n")
+ f.write("TTDVPIQTATMLQAKDYETKMYVMAVPKINSYLGTIERYETMYADDPMTARATPKQAHDI\n")
+ f.write("VVKNLPTNLETLHKTGLFSDIRLYNREGVKLYSSLETPSISPKETLERELNRKVSGKEIQ\n")
+ f.write("PTLERIEQKMVQNQHQETPEFKAIQQKMESLQPPTPPIPKTPKLPGI\n")
+ f.close()

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_AlignTEOnGenomeAccordingToAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_AlignTEOnGenomeAccordingToAnnotation.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,304 @@\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.Path import Path\n+import os\n+import unittest\n+from commons.tools.AlignTEOnGenomeAccordingToAnnotation import AlignTEOnGenomeAccordingToAnnotation\n+\n+class Test_AlignTEOnGenomeAccordingToAnnotation(unittest.TestCase):\n+\n+ def test_alignBioseqWithNWalign(self):\n+ iBioseq1 = Bioseq("dmel_chr4", "TAATCGTAGAGGAAAAGGGTATGAGAGATTTGTTTAGAAGTACGTTACTAGTAGAAGAAA")\n+ iBioseq2 = Bioseq("DmelChr4-B-R1-Map4_NoCat", "TTTCTTCTACCTATTACATACTTTTCAACAAATCTAGCATACCCTTTTACTCTACGAGTA")\n+ iAlignedBioseq1 = Bioseq("dmel_chr4 Score=58 Identity=0.7", "TAATCGTAGAGGAAAAGGGTATGAGAGATTTGTT-TAGA-AGTACGTTACTAGTAGAAGAAA----------------------------")\n+ iAlignedBioseq2 = Bioseq("DmelChr4-B-R1-Map4_NoCat Score=58 Identity=0.7", "----------------------------TTTCTTCTACCTATTACAT-ACTTTTC-AACAAATCTAGCATACCCTTTTACTCTACGAGTA")\n+ expiAlignedBioseqDB = AlignedBioseqDB()\n+ expiAlignedBioseqDB.add(iAlignedBioseq1)\n+ expiAlignedBioseqDB.add(iAlignedBioseq2)\n+ \n+ iATEOGATA = AlignTEOnGenomeAccordingToAnnotation()\n+ obsiAlignedBioseqDB = iATEOGATA.alignBioseqWithNWalign(iBioseq1, iBioseq2)\n+ \n+ self.assertEquals(expiAlignedBioseqDB, obsiAlignedBioseqDB)\n+ \n+ def test_alignSeqAccordingToPathAndBuildAlignedSeqTable_one_path(self):\n+ iBioseq1 = Bioseq("dmel_chr4", "TTTCTTCTACTAGTAACGTACTTCTAAACAAATCTCTCATACCCTTTTCCTCTACGATTA")\n+ iBioseq2 = Bioseq("DmelChr4-B-R1-Map4_NoCat", "TTATTTGTCTCCGAACTTCTTACTGACAATGGCACCAAAATATGATCAGTGTTTGTTGGTATTAATGGCCTTTCATCTAATGGGGGAGTAGCACATCCGAGTCCGTTTTCCAGAATTTCGGCATCAATGGGGGTGGAATTTCCAAAAACACATCGGTATTTTGCAAGAACAAGAAACTTATTATTAATTACAGCTATGTATCCAGTTACACCAATAAATTTCCGTCGCCGTACTTGTTAGTCTTAATAATACCCGTACTCGTAGAGTAAAAGGGTATGCTAGATTTGTTGAAAAGTATGTAATAGGTAGAAGAAAGCGTTTTCGACCATATCAAGTATATATATATTTTTGATAAGGATTAATAGCCGAGTCGAAGTGGCCATGCCATAGACGCATAGAAGCATAGACGCAGCGTAGAGATCCATGTTTCCCACTATAACGCCCACAAACCGCCCAAAATAAAATCGAAATGTTTCGATTTTTCTTTGTTTTATTGTTTGCCTTGTCAATTTCTATGTATTTTATACCCAAAACACTTTGGCCAAGGCCAAGATGTAAGGAAGCATCTACAACACGCTTTTGAACACATCTTTTAATATCGCAGATAAATGATGAAATTTCTCGTTCACATTTCCACTACCATTCCCCGGCTCAGGGGAAACTCAACCATAGCATTCTCTCTTGCTTTGACTTCAGCATAGTGACGATAAGCACCAAGAGTCTGAATTATAAGTAGACTACCGTATAGGACTCATAAAACTGAAAAGCTGCTTTTATATTGCGATCAAAAAAGAAATTAATATTACGAAAATTGCTGTTGCTGGGGCAGGTGGATCAACATATCCCAACATATATTTAACTGTATTGCATTTTACCCTAAAATACGCTTTAATTAATGGTTTAAAAAAAACAGACTGTTTCTATTTTCTATTTCTATTTATTTTATTCTTGGAAAGTTATTTTTTTAATTTTTTGACAAGAATCTTTGAATTTACTTGCTTGCAATCCCACTAGCTAAGTGACGGGTATATAGAGTTTTCCCTTATTTTAATAATCTAGGGTCGCCAGTTTGTCCGTCAGTTCTTCGCTAACGAAGTTCGCTGTTGGCTCGGTATCGACTGTGTAGTCTAGACTCCCTTCGTTGGTCCTCCTGCTGCGTTTCGTCCTTGCTAAGGGCTTTGCCCTCTGGCTTCGTAGTCTTCCTGGTGTGTAGGGTCTATTTTGTTTCCATCTCTGAGTAAGTATAAATAAATTTGGCTTGATTTTTATACTTTAGAGAGCCGTAACGGATAATGAAATAACGCCCCCATACTCTAGATACAGTTTTATCAACAAAGAATTAACCGTATATTGTAATTTGACAAAAATGTTGTATATTTTTCTTCGTAATAAGATCGATAGGGCATAGTTCTTATAATACAAAGCCGGCTTTTCTTACATACGTATAAGTCCATACATTTTTTAAACTAAAATTTATAATTTTTTTGACAATTGTAAATAAATTGTTATCTTGCTAAAAAATAAAAAAGTTACAAAAATTTTTTAAAAATCATATTGATGTCAGCAATTAGATGTGCAATCACCATCTAGTGTAGCAGCAACATTTCCTAATTTTCGTGAAAGATGCATCTTTCGGCAATTAATATCAGTTTCAAGTGATTCCATAATCTTTAAACAAAGGTAAATAAGTGGATTAGACGAAAAAAATTACGAAAATAGGTTAAATAGGTTACCTGATCCTTATATTTAGTTATATAAATGTATAACTCTTTTAATGCAGAAATAGTATCAAACTCTTCGTTTTGCCTTACTGATAGCTGTTGCATAGCTGTACTCATTTCCTGGTCGCTTATTTGTGGAAGCCTTGATACATCGCGGTAAAACTCTTTGACCATAATTCTATAGTTTGGAATATCCTGTCATATAAACCAGTTAATTAAATATTAATGATTTTATTAAAATATTTATACCATTACTTATAAATATTTAAATTTATTATTTGTGTATAAATTTGATAAAGGTAATAATCTAAGTAATATATACCAATATATATGCATATATGTATTCACATTTAAAAAAATGAAAGGGAAAAGAATTTGAAGTCTAATTGTATAAATTGTTTTTAATTTGTTTTATTCTCATTAGACCTCGATAATAGGTCTTTTGTGTGATTAATTAATTTGATAGACTCCTTACTTACAAGAAAAGAAGAAA'..b'")\n+ f.write("AGTGACGGGTATATAGAGTTTTCCCTTATTTTAATAATCTAGGGTCGCCAGTTTGTCCGT\\n")\n+ f.write("CAGTTCTTCGCTAACGAAGTTCGCTGTTGGCTCGGTATCGACTGTGTAGTCTAGACTCCC\\n")\n+ f.write("TTCGTTGGTCCTCCTGCTGCGTTTCGTCCTTGCTAAGGGCTTTGCCCTCTGGCTTCGTAG\\n")\n+ f.write("TCTTCCTGGTGTGTAGGGTCTATTTTGTTTCCATCTCTGAGTAAGTATAAATAAATTTGG\\n")\n+ f.write("CTTGATTTTTATACTTTAGAGAGCCGTAACGGATAATGAAATAACGCCCCCATACTCTAG\\n")\n+ f.write("ATATAGTTTTATCAACAAAGAATTAACCGTATATTGTAATTTGACAAAAATGTTGTATAT\\n")\n+ f.write("TTTTCTTCGTAATAAGATCGATAGGGCATAGTTCTTATAATACAAAGCCGGCTTCTCTTA\\n")\n+ f.write("CATACGTATAAGTCCATACATTTTTTAAACTAAAATTTATAATTTTTTTGACAATTGTAA\\n")\n+ f.write("ATAAATTGTTATCTTGCTAAAAAATAAAAAAGTTACAAAAATTTTTAAAAAATCATATTG\\n")\n+ f.write("ATGTCAGCAATTAGATGTGCAATCACCATCTAGTGTAGCAGCAACATTTCCTAATTTTCG\\n")\n+ f.write("TGAAAGATGCATCTTTCGGCAATTAATATCAGTTTCAAGTGATTCCATAATCTTTAAACA\\n")\n+ f.write("AAGGTAAATAAGTGGATTAGACGAAAAAAATTACGAAAATAGGTTAAATAGGTTACCTGA\\n")\n+ f.write("TCCTTATATTTAGTTATATAAATGTATAACTCTTTTAATGCAGAAATAGTATCAAACTCT\\n")\n+ f.write("TCGTTTTGCCTTACTGATAGCTGTTGCATAGCTGTACTCATTTCCTGGTCGCTTATTTGT\\n")\n+ f.write("GGAAGCCTTGATACATCGCGGTAAAACTCTTTGACCATAATTCTATAGTTTGGAATATCC\\n")\n+ f.write("TGTCATATAAACCAGTTAATTAAATATTAATGATTTTATTAAAATATTTATACCATTACT\\n")\n+ f.write("TATAAATATTTAAATTTATTATTTGTGTATAAATTTGATAAAGGTAATAATCTAAGTAAT\\n")\n+ f.write("ATATACCAATATATATGCATATATGTATTCACATTTAAAAAAATGAAAGGGAAAAGAATT\\n")\n+ f.write("TGAAGTCTAATTGTATAAATTGTTTTTAATTTGTTTTATTCTCATTAGACCTCGATAATA\\n")\n+ f.write("GGTCTTTTGTGTGATTAATTAATTTGATAGACTCCTTACTTACAAGAAAAGAAGAAAACA\\n")\n+ f.write("GAAGGTTTACCAGGTGTTCCCTTTATCCGTTACTCCAGTAAAGCGAGGGCGAACTAAATT\\n")\n+ f.write("TTTAAGCGTTGAAGAGGCCGTCTTGGTATTGGATCTTCCATGCTTAATGTCAACGTTCTA\\n")\n+ f.write("TATTTTCATACGGACGGACATGGCCAGATCAAAGACACTAGAATAACAAGATGCGTAACG\\n")\n+ f.write("CCATACGATTTTTTGGCACACAATTTTTTGGCCGTGGCTCTAGAGGTGGCTCCAGGCTCT\\n")\n+ f.write("CTCGAATTTTTGTTAGAGAGCGAGAGAGCGAAGAGCGCTACAGCGAACCGCTCTTTTCTA\\n")\n+ f.write("CGCATACAGTGATAGCAGACAACTTTATGTGCGCACACGTATGCTCATGCATTGTAAATT\\n")\n+ f.write("TGACAAAATATGCCCTTCACCGTAGAAGTTCTTAGACTTTAAATCTATATTATTTTTGAT\\n")\n+ f.write("CAATTGGCACCATGCGAAAAATTCTTGTTTTGCATTGCCTTAACGTTATTATTATTTGAA\\n")\n+ f.write("AATAGATTAGAAATAGCCAAATCTATGTACATAATATCACAAAAATAAATTTCAAAAATG\\n")\n+ f.write("ACTTTATATAAGAATATTTGTCATTAGAGTATTCATCTTGCGGGGTGTGAAAAATGAATA\\n")\n+ f.write("AGGCAATGATTGTTGCTTGTGTCCGTCCGCACTTCGTGCCTCAAGATATGACCAAAACAA\\n")\n+ f.write("AGACACTAGAATAATTCTAGTGTCTTTGATGTGACTTTTGCAATAAACAGTTTTCATATT\\n")\n+ f.write("TTTATTTATTTTACAAATTTTTATTTTCTACTTCGTATTATTTTTATGAAATATTTATTT\\n")\n+ f.write("CTCGATGTAATGTATTCCTTTTGTTATAAGTAGTTATAATAATTTATATTTTACTTCCTT\\n")\n+ f.write("CAATATCACAAAATAAATTTCAAGTATGGCTTTATATTAGAATAATTGTCATTAAATTAT\\n")\n+ f.write("TCAGCTTGGGACGTGGGAAAAATTAGAGTAGACATGTCTAGTAGACATCGATTCTTAGGT\\n")\n+ f.write("GCTTCTGACCGCACGTCGTGCCTCAAGAAATCAATTTCGCATATTTATACCCATTATTTC\\n")\n+ f.write("TAGAGTAAAAGGGTATACTACATTCGTTGAAAAGTATGTAACAGGCAGAAGAAAGCGTTT\\n")\n+ f.write("CCGACTATATAATGTATAAAGATCAGGATCAACAGCCGAGTCGATCTGGCCATGTCCGTC\\n")\n+ f.write("TGTCCGTATGAACGTCGAGGTCTCAGGAACTATAAAAGCAAGAAGGTAGAGATTATGCAT\\n")\n+ f.write("ACATATTATGCATACATATTGCCCGTTGACCCATGTTGTCACGCCCACTCTAACGTCCAC\\n")\n+ f.write("AAACCGCACAAAACTGCCACGCCCACAAATTCAAAAAATGTAAATGTAATTGACCAAATT\\n")\n+ f.write("TAACAAAACTCTAAAAATGTAACTTTGTTTCTTAGATCAAAATTTAGACCAGAAAATCGT\\n")\n+ f.write("CTTCATCTTCTAATGTTTTTACTCACACAAGAAAGCAAATTCTATTTTTAGATTTTAAAG\\n")\n+ f.write("TACTCCATTTTACGCGAGCGGAGAGAGAGAGCAATTTTGGCCGTCACCAAAAAAGTGTCT\\n")\n+\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_CalcCoordCumulLength.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_CalcCoordCumulLength.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,74 @@
+import unittest
+import os
+import time
+from commons.tools.CalcCoordCumulLength import CalcCoordCumulLength
+from commons.core.utils.FileUtils import FileUtils
+from pyRepet.util.Stat import Stat
+
+
+class Test_CalcCoordCumulLength( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = CalcCoordCumulLength()
+        self._uniqId = time.strftime("%Y%m%d%H%M%S")
+
+
+    def tearDown( self ):
+        self._i = None
+        self._uniqId = None
+
+
+    def test_mergeCoordinates( self ):
+        inFileName = "dummyInFile_%s"  %( self._uniqId )
+        inF = open( inFileName, "w" )
+        inF.write( "TE1\tchr1\t101\t200\n" )
+        inF.write( "TE2\tchr1\t401\t600\n" )
+        inF.write( "TE2\tchr1\t451\t500\n" )
+        inF.close()
+        expFileName = "dummyExpFile_%s"  %( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write( "TE1\tchr1\t101\t200\n" )
+        expF.write( "TE2\tchr1\t401\t600\n" )
+        expF.close()
+        self._i.setInputFileName( inFileName )
+        obsFileName = self._i.mergeCoordinates()
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ inFileName, expFileName, obsFileName ]: os.remove( f )
+
+
+    def test_getStatsPerChr( self ):
+        inFileName = "dummyInFile_%s"  %( self._uniqId )
+        inF = open( inFileName, "w" )
+        inF.write( "TE1\tchr1\t101\t200\n" )
+        inF.write( "TE2\tchr1\t401\t600\n" )
+        inF.write( "TE1\tchr2\t1301\t1600\n" )
+        inF.close()
+        dExp = { "chr1": Stat(), "chr2": Stat() }
+        dExp["chr1"].add( 200-101+1 )
+        dExp["chr1"].add( 600-401+1 )
+        dExp["chr2"].add( 1600-1301+1 )
+        dObs = self._i.getStatsPerChr( inFileName )
+        self.assertEqual( dObs, dExp )
+
+
+    def test_zRunAsScript( self ):
+        cDir = os.getcwd()
+        inFileName = "dummyInFile_%s"  %( self._uniqId )
+        inF = open( inFileName, "w" )
+        inF.write( "TE1\tchr1\t101\t200\n" )
+        inF.write( "TE2\tchr1\t401\t600\n" )
+        inF.write( "TE2\tchr1\t451\t500\n" )
+        inF.close()
+        outFileName = "dummyOutFile_%s"  %( self._uniqId )
+        cmd = "CalcCoordCumulLength.py"
+        cmd += " -i %s" % ( inFileName )
+        cmd += " -o %s" % ( outFileName )
+        cmd += " -v 0"
+        log = os.system( cmd )
+        self.assertTrue( log == 0 )
+
+        for f in [ inFileName, outFileName ]: os.remove( f )
+        os.chdir( cDir )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_ChangeSequenceHeaders.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_ChangeSequenceHeaders.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,428 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders\n+\n+\n+class Test_ChangeSequenceHeaders( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._i = ChangeSequenceHeaders()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ \n+ \n+ def tearDown( self ):\n+ self._i = None\n+ self._uniqId = None\n+ \n+ \n+ def test_script_no_input_file( self ):\n+ cDir = os.getcwd()\n+ \n+ inFile = "dummyInFaFile_%s" % ( self._uniqId )\n+ \n+ obsFile = "dummyObsFile_%s" % ( self._uniqId )\n+ \n+ cmd = "python ../ChangeSequenceHeaders.py"\n+ cmd += " -i %s" % ( inFile )\n+ cmd += " -f fasta"\n+ cmd += " -s 1"\n+ cmd += " -p TE"\n+ cmd += " -o %s" % ( obsFile )\n+ exitStatus = os.system( cmd )\n+ \n+ self.assertFalse( exitStatus == 0 )\n+ \n+ os.chdir( cDir )\n+ \n+ \n+ def test_shortenSequenceHeadersForFastaFile_fasta_script( self ):\n+ cDir = os.getcwd()\n+ \n+ inFile = "dummyInFaFile_%s" % ( self._uniqId )\n+ inF = open( inFile, "w" )\n+ inF.write( ">DmelChr4-B-G387-MAP16\\nATGTACGATGACGATCAG\\n" )\n+ inF.write( ">consensus524\\nGTGCGGATGGAACAGT\\n" )\n+ inF.close()\n+ \n+ linkFile = "dummyLinkFile_%s" % ( self._uniqId )\n+ \n+ expFile = "dummyExpFile_%s" % ( self._uniqId )\n+ expF = open( expFile, "w" )\n+ expF.write( ">TE1\\nATGTACGATGACGATCAG\\n" )\n+ expF.write( ">TE2\\nGTGCGGATGGAACAGT\\n" )\n+ expF.close()\n+ \n+ obsFile = "dummyObsFile_%s" % ( self._uniqId )\n+ \n+ cmd = "python ../ChangeSequenceHeaders.py"\n+ cmd += " -i %s" % ( inFile )\n+ cmd += " -f fasta"\n+ cmd += " -s 1"\n+ cmd += " -p TE"\n+ cmd += " -l %s" % ( linkFile )\n+ cmd += " -o %s" % ( obsFile )\n+ exitStatus = os.system( cmd )\n+ \n+ self.assertTrue( exitStatus == 0 )\n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, linkFile, expFile, obsFile ]:\n+ os.remove( f )\n+ os.chdir( cDir )\n+ \n+ \n+ def test_retrieveIni'..b' ):\n+ cDir = os.getcwd()\n+ \n+ linkFile = "dummyLinkFile_%s" % ( self._uniqId )\n+ linkF = open( linkFile, "w" )\n+ linkF.write( "seq1\\tname=Dm_Blaster_Piler_30.38_Map_8|category=classI|order=LTR|completeness=comp\\t1\\t1000\\n" )\n+ linkF.write( "seq2\\tname=Dm_Blaster_Recon_34_Map_20|category=classI|order=LTR|completeness=comp\\t1\\t800\\n" )\n+ linkF.close()\n+ \n+ inFile = "dummyAlignFile_%s" % ( self._uniqId )\n+ inFileHandler = open( inFile, "w" )\n+ inFileHandler.write( "seq1\\t1\\t100\\tseq2\\t110\\t11\\t1e-38\\t254\\t98.5\\n" )\n+ inFileHandler.write( "seq2\\t11\\t110\\tseq1\\t100\\t1\\t1e-38\\t254\\t98.5\\n" )\n+ inFileHandler.close()\n+ \n+ expFile = "dummyExpAlignFile_%s" % ( self._uniqId )\n+ expFileHandler = open( expFile, "w" )\n+ expFileHandler.write( "name=Dm_Blaster_Piler_30.38_Map_8|category=classI|order=LTR|completeness=comp\\t1\\t100\\tname=Dm_Blaster_Recon_34_Map_20|category=classI|order=LTR|completeness=comp\\t110\\t11\\t1e-38\\t254\\t98.500000\\n" )\n+ expFileHandler.write( "name=Dm_Blaster_Recon_34_Map_20|category=classI|order=LTR|completeness=comp\\t11\\t110\\tname=Dm_Blaster_Piler_30.38_Map_8|category=classI|order=LTR|completeness=comp\\t100\\t1\\t1e-38\\t254\\t98.500000\\n" )\n+ expFileHandler.close()\n+ \n+ obsFile = "dummyObsAlignFile_%s" % ( self._uniqId )\n+ \n+ cmd = "python ../ChangeSequenceHeaders.py"\n+ cmd += " -i %s" % ( inFile )\n+ cmd += " -f align"\n+ cmd += " -s 2"\n+ cmd += " -l %s" % ( linkFile )\n+ cmd += " -o %s" % ( obsFile )\n+ exitStatus = os.system( cmd )\n+ \n+ self.assertTrue( exitStatus == 0 )\n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, linkFile, expFile, obsFile ]:\n+ os.remove( f )\n+ os.chdir( cDir )\n+ \n+ \n+ def test_retrieveInitialSequenceHeadersForPathFile( self ):\n+ cDir = os.getcwd()\n+ \n+ linkFile = "dummyLinkFile_%s" % ( self._uniqId )\n+ linkF = open( linkFile, "w" )\n+ linkF.write( "seq1\\tname=Dm_Blaster_Piler_30.38_Map_8|category=classI|order=LTR|completeness=comp\\t1\\t1000\\n" )\n+ linkF.write( "seq2\\tname=Dm_Blaster_Recon_34_Map_20|category=classI|order=LTR|completeness=comp\\t1\\t800\\n" )\n+ linkF.close()\n+ \n+ inFile = "dummyAlignFile_%s" % ( self._uniqId )\n+ inFileHandler = open( inFile, "w" )\n+ inFileHandler.write( "11\\tseq1\\t1\\t100\\tseq2\\t110\\t11\\t1e-38\\t254\\t98.5\\n" )\n+ inFileHandler.write( "2\\tseq2\\t11\\t110\\tseq1\\t100\\t1\\t1e-38\\t254\\t98.5\\n" )\n+ inFileHandler.close()\n+ \n+ expFile = "dummyExpAlignFile_%s" % ( self._uniqId )\n+ expFileHandler = open( expFile, "w" )\n+ expFileHandler.write( "11\\tname=Dm_Blaster_Piler_30.38_Map_8|category=classI|order=LTR|completeness=comp\\t1\\t100\\tname=Dm_Blaster_Recon_34_Map_20|category=classI|order=LTR|completeness=comp\\t110\\t11\\t1e-38\\t254\\t98.500000\\n" )\n+ expFileHandler.write( "2\\tname=Dm_Blaster_Recon_34_Map_20|category=classI|order=LTR|completeness=comp\\t11\\t110\\tname=Dm_Blaster_Piler_30.38_Map_8|category=classI|order=LTR|completeness=comp\\t100\\t1\\t1e-38\\t254\\t98.500000\\n" )\n+ expFileHandler.close()\n+ \n+ obsFile = "dummyObsAlignFile_%s" % ( self._uniqId )\n+ \n+ cmd = "python ../ChangeSequenceHeaders.py"\n+ cmd += " -i %s" % ( inFile )\n+ cmd += " -f path"\n+ cmd += " -s 2"\n+ cmd += " -l %s" % ( linkFile )\n+ cmd += " -o %s" % ( obsFile )\n+ exitStatus = os.system( cmd )\n+ \n+ self.assertTrue( exitStatus == 0 )\n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, linkFile, expFile, obsFile ]:\n+ os.remove( f )\n+ os.chdir( cDir )\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_CorrelateTEageWithGCcontent.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_CorrelateTEageWithGCcontent.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,60 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_CorrelateTEageWithGCcontent( unittest.TestCase ):
+
+    def test_zLaunchAsScript( self ):
+        cDir = os.getcwd()
+
+        coordFile = "dummyPathFile"
+        coordFileHandler = open( coordFile, "w" )
+        coordFileHandler.write( "1\tchr1\t1\t5\tTE1\t1\t5\t0.0\t100\t98.7\n" )
+        coordFileHandler.write( "1\tchr1\t11\t20\tTE1\t6\t15\t0.0\t100\t98.7\n" )
+        coordFileHandler.write( "2\tchr1\t26\t30\tTE1\t1\t5\t0.0\t100\t96.7\n" )
+        coordFileHandler.write( "3\tchr2\t1\t10\tTE2\t1\t10\t0.0\t100\t98.7\n" )
+        coordFileHandler.close()
+
+        genomeFile = "dummyGenomeFile"
+        genomeFileHandler = open( genomeFile, "w" )
+        genomeFileHandler.write( ">chr1\n" )
+        genomeFileHandler.write( "AGCTGTTTTTAGCAGACGCATTTTTGGAGGTTTT\n" )
+        genomeFileHandler.write( ">chr2\n" )
+        genomeFileHandler.write( "ATATATATGGTTTTTTTTTT\n" )
+        genomeFileHandler.close()
+
+        refseqFile = "dummyRefseqFile"
+        refseqFileHandler = open( refseqFile, "w" )
+        refseqFileHandler.write( ">TE1\nAGCAGCGACGACGACGACGACTTTT\n" )
+        refseqFileHandler.write( ">TE2\nAGCAGCGACGACGACGACGACTTTT\n" )
+        refseqFileHandler.write( ">TE3\nAGCAGCGACGACGACGACGACTTTT\n" )
+        refseqFileHandler.close()
+
+        expFile = "dummyExpFile"
+        expFileHandler = open( expFile, "w" )
+        expFileHandler.write( "copy\tTE\tchr\tlength\tid\tGC\tlengthPerc\n" )
+        expFileHandler.write( "1\tTE1\tchr1\t15\t98.70\t%.2f\t%.2f\n" % ( 100 * 9 / 15.0, 100 * 15 / 25.0 ) )
+        expFileHandler.write( "2\tTE1\tchr1\t5\t96.70\t%.2f\t%.2f\n" % ( 100 * 4 / 5.0, 100 * 5 / 25.0 ) )
+        expFileHandler.write( "3\tTE2\tchr2\t10\t98.70\t%.2f\t%.2f\n" % ( 100 * 2 / 10.0, 100 * 10 / 25.0 ) )
+        expFileHandler.close()
+
+        obsFile = "dummyObsFile"
+
+        cmd = "CorrelateTEageWithGCcontent.py"
+        cmd += " -i %s" % ( coordFile )
+        cmd += " -g %s" % ( genomeFile )
+        cmd += " -r %s" % ( refseqFile )
+        cmd += " -o %s" % ( obsFile )
+        cmd += " -v %i" % ( 0 )
+        returnStatus = os.system( cmd )
+
+        self.assertTrue( returnStatus == 0 )
+        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )
+
+        for f in [ coordFile, genomeFile, refseqFile, expFile, obsFile ]:
+            os.remove( f )
+        os.chdir( cDir )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_AlignTEOnGenomeAccordingToAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_AlignTEOnGenomeAccordingToAnnotation.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,122 @@
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.utils.FileUtils import FileUtils
+import subprocess
+import os
+import unittest
+from commons.tools.AlignTEOnGenomeAccordingToAnnotation import AlignTEOnGenomeAccordingToAnnotation
+
+class Test_F_AlignTEOnGenomeAccordingToAnnotation(unittest.TestCase):
+
+    def test_run_merge_same_paths(self):
+        pathFileName = "%s/Tools/DmelChr4_chr_allTEs_nr_noSSR_join_path.path" % os.environ["REPET_DATA"]
+        queryFileName = "%s/commons/DmelChr4.fa" % os.environ["REPET_DATA"]
+        subjectFileName = "%s/Tools/DmelChr4_refTEs.fa" % os.environ["REPET_DATA"]
+        pathTableName = "testDmelChr4_chr_allTEs_nr_noSSR_join_path"
+        queryTableName = "testDmelChr4_chr_seq"
+        subjectTableName = "testDmelChr4_refTEs_seq"
+        iDb = DbFactory.createInstance()
+        iDb.createTable(queryTableName, "seq", queryFileName, True)
+        iDb.createTable(subjectTableName, "seq", subjectFileName, True)
+        iDb.createTable(pathTableName, "path", pathFileName, True)
+        expFileName = "%s/Tools/exp%s_merge.alignedSeq" % (os.environ["REPET_DATA"], pathTableName)
+
+        iATOGATA = AlignTEOnGenomeAccordingToAnnotation(pathTableName, queryTableName, subjectTableName, True)
+        iATOGATA.run()
+        obsFileName = "obs%s_merge.alignedSeq" % pathTableName
+        iDb.exportDataToFile("%s_align" % pathTableName, obsFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(obsFileName)
+        iDb.dropTable(queryTableName)
+        iDb.dropTable(subjectTableName)
+        iDb.dropTable(pathTableName)
+        iDb.dropTable("%s_align" % pathTableName)
+        iDb.close()
+
+    def test_run_as_script_merge_same_paths(self):
+        pathFileName = "%s/Tools/DmelChr4_chr_allTEs_nr_noSSR_join_path.path" % os.environ["REPET_DATA"]
+        queryFileName = "%s/commons/DmelChr4.fa" % os.environ["REPET_DATA"]
+        subjectFileName = "%s/Tools/DmelChr4_refTEs.fa" % os.environ["REPET_DATA"]
+        pathTableName = "testDmelChr4_chr_allTEs_nr_noSSR_join_path"
+        queryTableName = "testDmelChr4_chr_seq"
+        subjectTableName = "testDmelChr4_refTEs_seq"
+        iDb = DbFactory.createInstance()
+        iDb.createTable(queryTableName, "seq", queryFileName, True)
+        iDb.createTable(subjectTableName, "seq", subjectFileName, True)
+        iDb.createTable(pathTableName, "path", pathFileName, True)
+        expFileName = "%s/Tools/exp%s_merge.alignedSeq" % (os.environ["REPET_DATA"], pathTableName)
+
+        cmd = "AlignTEOnGenomeAccordingToAnnotation.py -p %s -q %s -s %s -m " % (pathTableName, queryTableName, subjectTableName)
+        process = subprocess.Popen(cmd, shell = True)
+        process.communicate()
+        obsFileName = "obs%s_merge.alignedSeq" % pathTableName
+        iDb.exportDataToFile("%s_align" % pathTableName, obsFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(obsFileName)
+        iDb.dropTable(queryTableName)
+        iDb.dropTable(subjectTableName)
+        iDb.dropTable(pathTableName)
+        iDb.dropTable("%s_align" % pathTableName)
+        iDb.close()
+
+    def test_run_as_script_without_merge(self):
+        pathFileName = "%s/Tools/DmelChr4_chr_allTEs_nr_noSSR_join_path.path" % os.environ["REPET_DATA"]
+        queryFileName = "%s/commons/DmelChr4.fa" % os.environ["REPET_DATA"]
+        subjectFileName = "%s/Tools/DmelChr4_refTEs.fa" % os.environ["REPET_DATA"]
+        pathTableName = "testDmelChr4_chr_allTEs_nr_noSSR_join_path"
+        queryTableName = "testDmelChr4_chr_seq"
+        subjectTableName = "testDmelChr4_refTEs_seq"
+        iDb = DbFactory.createInstance()
+        iDb.createTable(queryTableName, "seq", queryFileName, True)
+        iDb.createTable(subjectTableName, "seq", subjectFileName, True)
+        iDb.createTable(pathTableName, "path", pathFileName, True)
+        expFileName = "%s/Tools/exp%s.alignedSeq" % (os.environ["REPET_DATA"], pathTableName)
+
+        cmd = "AlignTEOnGenomeAccordingToAnnotation.py -p %s -q %s -s %s" % (pathTableName, queryTableName, subjectTableName)
+        process = subprocess.Popen(cmd, shell = True)
+        process.communicate()
+        obsFileName = "obs%s.alignedSeq" % pathTableName
+        iDb.exportDataToFile("%s_align" % pathTableName, obsFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(obsFileName)
+        iDb.dropTable(queryTableName)
+        iDb.dropTable(subjectTableName)
+        iDb.dropTable(pathTableName)
+        iDb.dropTable("%s_align" % pathTableName)
+        iDb.close()
+
+#    def test_run_as_script_merge_same_paths_old(self):
+#        pathFileName = "%s/Tools/DmelChr4_chr_allTEs_nr_noSSR_join_path.path" % os.environ["REPET_DATA"]
+#        queryFileName = "%s/commons/DmelChr4.fa" % os.environ["REPET_DATA"]
+#        subjectFileName = "%s/Tools/DmelChr4_refTEs.fa" % os.environ["REPET_DATA"]
+#        pathTableName = "testDmelChr4_chr_allTEs_nr_noSSR_join_path"
+#        queryTableName = "testDmelChr4_chr_seq"
+#        subjectTableName = "testDmelChr4_refTEs_seq"
+#        iDb = DbFactory.createInstance()
+#        iDb.createTable(queryTableName, "seq", queryFileName, True)
+#        iDb.createTable(subjectTableName, "seq", subjectFileName, True)
+#        iDb.createTable(pathTableName, "path", pathFileName, True)
+#        expFileName = "%s/Tools/exp%s_merge_old.alignedSeq" % (os.environ["REPET_DATA"], pathTableName)
+#
+#        cmd = "srptAlignPath.py -m %s -q %s -s %s" % (pathTableName, queryTableName, subjectTableName)
+#        process = subprocess.Popen(cmd, shell = True)
+#        process.communicate()
+#        obsFileName = "obs%s.alignedSeq" % pathTableName
+#        iDb.exportDataToFile("%s_align" % pathTableName, obsFileName)
+#
+#        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+#
+#        os.remove(obsFileName)
+#        iDb.dropTable(queryTableName)
+#        iDb.dropTable(subjectTableName)
+#        iDb.dropTable(pathTableName)
+#        iDb.dropTable("%s_align" % pathTableName)
+#        iDb.close()
+#
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_CheckMysqlConnect.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_CheckMysqlConnect.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,64 @@
+import unittest
+import os
+import socket
+
+class Test_F_CheckMysqlConnect(unittest.TestCase):
+
+    HOST_NAME = "compute-2-46.local"
+
+    def setUp(self):
+        self._host = socket.gethostname()
+        self._cmd = ""
+
+    def tearDown(self):
+        os.remove("testOutputCheckMysqlConnect.txt")
+
+    def test_CheckMysqlConnect_as_script_True(self):
+        if self.HOST_NAME in self._host:
+            self._cmd = "CheckMysqlConnect.py -p /share/apps/bin/python > testOutputCheckMysqlConnect.txt"
+        else:
+            self._cmd = "CheckMysqlConnect.py > testOutputCheckMysqlConnect.txt"
+        os.system(self._cmd)
+
+        self.assertFalse(self._isFailedInOutputFile())
+
+    def test_CheckMysqlConnect_as_script_False(self):
+        configFile = "%s/repet_tools/tests/config.cfg" % os.environ.get("REPET_PATH")
+        host = "unknownServer"
+        self._writeConfig(configFile, host)
+
+        if self.HOST_NAME in self._host:
+            self._cmd = "CheckMysqlConnect.py -p /share/apps/bin/python -C %s > testOutputCheckMysqlConnect.txt" % configFile
+        else:
+            self._cmd = "CheckMysqlConnect.py -C %s > testOutputCheckMysqlConnect.txt " % configFile
+        os.system(self._cmd)
+        os.remove(configFile)
+
+        self.assertTrue(self._isFailedInOutputFile())
+
+    def _isFailedInOutputFile(self):
+        f = open("testOutputCheckMysqlConnect.txt", "r")
+        line = f.readline()
+
+        while line:
+            if "FAILED" in line:
+                f.close()
+                return True
+            line = f.readline()
+
+        f.close()
+        return False
+
+    def _writeConfig(self, configFileName, repetHost = os.environ.get("REPET_HOST")):
+        f = open( configFileName, "w" )
+        f.write("[repet_env]\n")
+        f.write("repet_host: %s\n" % repetHost)
+        f.write("repet_user: %s\n" % os.environ.get("REPET_USER"))
+        f.write("repet_pw: %s\n" % os.environ.get("REPET_PW"))
+        f.write("repet_db: %s\n" % os.environ.get("REPET_DB"))
+        f.write("repet_port: %s\n" % os.environ.get("REPET_PORT"))
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
+
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_FilterAlign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_FilterAlign.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,168 @@\n+from commons.core.utils.FileUtils import FileUtils\n+import unittest\n+import os\n+\n+class Test_F_FilterAlign(unittest.TestCase):\n+\n+ def test_as_script(self):\n+ alignFileName = "batch.fa.align"\n+ self._writeAlignFile(alignFileName)\n+ expFileName = "exp.filtered"\n+ self._writeExpFile(expFileName)\n+ obsFileName = "batch.fa.align.filtered"\n+ cmd = "FilterAlign.py"\n+ cmd += " -i %s" % alignFileName\n+ cmd += " -S 100"\n+ cmd += " -v 1"\n+ os.system(cmd)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(alignFileName)\n+ os.remove(expFileName)\n+ os.remove(obsFileName)\n+ \n+ def _writeExpFile(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("chunk09914\\t2257\\t2722\\trefTE_747\\t2614\\t2169\\t0\\t249\\t77.5463\\n")\n+ f.write("chunk09914\\t3114\\t4494\\trefTE_747\\t1598\\t228\\t0\\t571\\t78.0453\\n")\n+ f.write("chunk09914\\t2251\\t4816\\trefTE_759\\t173\\t2769\\t0\\t3203\\t90.5966\\n")\n+ f.write("chunk09914\\t3019\\t3698\\trefTE_764\\t49\\t735\\t0\\t343\\t78.3912\\n")\n+ f.write("chunk09914\\t4102\\t4530\\trefTE_764\\t1191\\t1603\\t0\\t154\\t83.0601\\n")\n+ f.write("chunk09914\\t4603\\t4864\\trefTE_779\\t291\\t24\\t6.5e-39\\t135\\t80.9917\\n")\n+ f.write("chunk09914\\t4642\\t4909\\trefTE_787\\t56\\t326\\t0\\t132\\t79.0323\\n")\n+ f.write("chunk09914\\t5086\\t6724\\trefTE_787\\t532\\t2194\\t0\\t218\\t71.4668\\n")\n+ f.write("chunk09914\\t6180\\t6594\\trefTE_812\\t2516\\t2949\\t1.6e-32\\t125\\t72.5441\\n")\n+ f.write("chunk09914\\t2349\\t2723\\trefTE_818\\t1034\\t663\\t0\\t347\\t80.0539\\n")\n+ f.write("chunk09914\\t2412\\t2727\\trefTE_818\\t1169\\t1482\\t0\\t278\\t78.5942\\n")\n+ f.write("chunk09914\\t3853\\t5302\\trefTE_828\\t3133\\t1713\\t0\\t417\\t77.8644\\n")\n+ f.write("chunk09914\\t5486\\t6724\\trefTE_828\\t1715\\t454\\t0\\t584\\t73.8655\\n")\n+ f.write("chunk09914\\t849\\t1203\\trefTE_838\\t83\\t449\\t0\\t493\\t95.7386\\n")\n+ f.write("chunk09914\\t3355\\t4471\\trefTE_861\\t2024\\t3183\\t0\\t260\\t77.1289\\n")\n+ f.write("chunk09914\\t2263\\t4696\\trefTE_866\\t19\\t2476\\t0\\t3136\\t91.1287\\n")\n+ f.write("chunk09914\\t4803\\t5747\\trefTE_866\\t2479\\t3429\\t0\\t1352\\t93.1217\\n")\n+ f.write("chunk09914\\t4134\\t4702\\trefTE_880\\t1685\\t1129\\t7.8e-35\\t126\\t80.7851\\n")\n+ f.write("chunk09914\\t3598\\t4029\\trefTE_904\\t666\\t1122\\t0\\t414\\t84.7418\\n")\n+ f.write("chunk09914\\t3887\\t4480\\trefTE_904\\t1030\\t1635\\t0\\t384\\t82.6087\\n")\n+ f.write("chunk09914\\t2257\\t2722\\trefTE_908\\t1005\\t551\\t0\\t240\\t78.5377\\n")\n+ f.write("chunk09914\\t6180\\t6594\\trefTE_914\\t1420\\t987\\t8.6e-30\\t125\\t72.5441\\n")\n+ f.write("chunk09914\\t4144\\t6724\\trefTE_931\\t3801\\t1202\\t0\\t1057\\t74.7937\\n")\n+ f.write("chunk09914\\t848\\t1204\\trefTE_966\\t370\\t1\\t0\\t522\\t98.0282\\n")\n+ f.write("chunk09914\\t854\\t1145\\trefTE_966\\t5130\\t4829\\t0\\t411\\t95.8763\\n")\n+ f.write("chunk09914\\t4142\\t4487\\trefTE_969\\t455\\t130\\t4.2039e-45\\t161\\t81.1881\\n")\n+ f.write("chunk09914\\t848\\t1218\\trefTE_980\\t1182\\t814\\t0\\t431\\t92.1127\\n")\n+ f.write("chunk09914\\t4093\\t4489\\trefTE_999\\t5850\\t5485\\t3.6e-39\\t142\\t81.0651\\n")\n+ f.write("chunk09914\\t2257\\t2722\\trefTE_1006\\t2472\\t2025\\t0\\t257\\t77.4193\\n")\n+ f.write("chunk09914\\t3755\\t4491\\trefTE_1006\\t941\\t230\\t0\\t188\\t80.0948\\n")\n+ f.write("chunk09914\\t5060\\t6724\\trefTE_1006\\t7451\\t5776\\t0\\t711\\t72.3792\\n")\n+ f.write("chunk09914\\t2251\\t3361\\trefTE_1015\\t7278\\t8364\\t0\\t1514\\t86.8087\\n")\n+ f.write("chunk09914\\t2780\\t6724\\trefTE_1015\\t1\\t3962\\t0\\t5370\\t91.9552\\n")\n+ f.write("chunk09914\\t3383\\t4672\\trefTE_1015\\t8150\\t9445\\t0\\t1746\\t92.1217\\n")\n+ f.write("chunk09914\\t6228\\t6594\\trefTE_1018\\t5554\\t5920\\t6.2e-22\\t106\\t73.3138\\n")\n+ f.close()\n+ \n+ def _writeAlignFile(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("chunk09914\\t5038\\t5133\\trefTE_745\\t288\\t389\\t1.3e-15\\t38\\t86.747\\n")\n+ '..b'k09914\\t4689\\t4718\\trefTE_914\\t3130\\t3103\\t8.6e-30\\t37\\t92.8571\\n")\n+ f.write("chunk09914\\t6180\\t6594\\trefTE_914\\t1420\\t987\\t8.6e-30\\t125\\t72.5441\\n")\n+ f.write("chunk09914\\t4691\\t4755\\trefTE_930\\t2120\\t2187\\t2.3e-14\\t39\\t86.6667\\n")\n+ f.write("chunk09914\\t6172\\t6506\\trefTE_930\\t3738\\t4087\\t2.3e-14\\t71\\t72.293\\n")\n+ f.write("chunk09914\\t3755\\t3945\\trefTE_931\\t4127\\t3947\\t0\\t45\\t79.3939\\n")\n+ f.write("chunk09914\\t4144\\t6724\\trefTE_931\\t3801\\t1202\\t0\\t1057\\t74.7937\\n")\n+ f.write("chunk09914\\t4692\\t4772\\trefTE_962\\t2382\\t2453\\t1.5e-16\\t39\\t85.0746\\n")\n+ f.write("chunk09914\\t6178\\t6560\\trefTE_962\\t3929\\t4309\\t1.5e-16\\t79\\t70.5882\\n")\n+ f.write("chunk09914\\t848\\t1204\\trefTE_966\\t370\\t1\\t4e-147\\t522\\t98.0282\\n")\n+ f.write("chunk09914\\t854\\t1145\\trefTE_966\\t5130\\t4829\\t1.5e-115\\t411\\t95.8763\\n")\n+ f.write("chunk09914\\t1139\\t1203\\trefTE_966\\t4770\\t4681\\t1.5e-115\\t38\\t87.5\\n")\n+ f.write("chunk09914\\t646\\t734\\trefTE_967\\t4583\\t4671\\t4.8e-16\\t87\\t76.4045\\n")\n+ f.write("chunk09914\\t3565\\t3698\\trefTE_969\\t1106\\t990\\t4.3e-45\\t53\\t79.646\\n")\n+ f.write("chunk09914\\t4142\\t4487\\trefTE_969\\t455\\t130\\t4.3e-45\\t161\\t81.1881\\n")\n+ f.write("chunk09914\\t848\\t1218\\trefTE_980\\t1182\\t814\\t6.8e-120\\t431\\t92.1127\\n")\n+ f.write("chunk09914\\t4691\\t4762\\trefTE_986\\t3059\\t3133\\t3.3e-21\\t36\\t82.0895\\n")\n+ f.write("chunk09914\\t6172\\t6594\\trefTE_986\\t4689\\t5120\\t3.3e-21\\t97\\t71.2121\\n")\n+ f.write("chunk09914\\t149\\t280\\trefTE_987\\t3872\\t3721\\t2.8e-18\\t38\\t79.2\\n")\n+ f.write("chunk09914\\t4689\\t4769\\trefTE_987\\t3186\\t3102\\t2.8e-18\\t37\\t83.5616\\n")\n+ f.write("chunk09914\\t6178\\t6594\\trefTE_987\\t1600\\t1188\\t2.8e-18\\t77\\t72.0317\\n")\n+ f.write("chunk09914\\t4108\\t4241\\trefTE_991\\t2079\\t2237\\t1.2e-12\\t62\\t82.1705\\n")\n+ f.write("chunk09914\\t4602\\t4701\\trefTE_991\\t2535\\t2634\\t1.2e-12\\t43\\t78.022\\n")\n+ f.write("chunk09914\\t3596\\t3757\\trefTE_999\\t6434\\t6307\\t3.6e-39\\t52\\t85.124\\n")\n+ f.write("chunk09914\\t4093\\t4489\\trefTE_999\\t5850\\t5485\\t3.6e-39\\t142\\t81.0651\\n")\n+ f.write("chunk09914\\t4348\\t4702\\trefTE_999\\t5619\\t5295\\t5.9e-13\\t54\\t79.7251\\n")\n+ f.write("chunk09914\\t4541\\t4805\\trefTE_1002\\t3207\\t2973\\t6.8e-27\\t57\\t85.5769\\n")\n+ f.write("chunk09914\\t6178\\t6723\\trefTE_1002\\t1494\\t946\\t6.8e-27\\t96\\t75.7764\\n")\n+ f.write("chunk09914\\t4085\\t4179\\trefTE_1004\\t3933\\t4033\\t1.5e-17\\t47\\t80\\n")\n+ f.write("chunk09914\\t4690\\t4718\\trefTE_1004\\t4126\\t4153\\t1.5e-17\\t38\\t92.8571\\n")\n+ f.write("chunk09914\\t6262\\t6594\\trefTE_1004\\t5242\\t5574\\t1.5e-17\\t65\\t70.2265\\n")\n+ f.write("chunk09914\\t2257\\t2722\\trefTE_1006\\t2472\\t2025\\t2.5e-134\\t257\\t77.4193\\n")\n+ f.write("chunk09914\\t3134\\t3382\\trefTE_1006\\t1450\\t1199\\t2.9e-118\\t45\\t75.7991\\n")\n+ f.write("chunk09914\\t3331\\t3661\\trefTE_1006\\t1468\\t1146\\t2.5e-134\\t98\\t78.2313\\n")\n+ f.write("chunk09914\\t3755\\t4491\\trefTE_1006\\t941\\t230\\t2.5e-134\\t188\\t80.0948\\n")\n+ f.write("chunk09914\\t5060\\t6724\\trefTE_1006\\t7451\\t5776\\t3.8e-204\\t711\\t72.3792\\n")\n+ f.write("chunk09914\\t2670\\t2723\\trefTE_1009\\t6642\\t6591\\t7.1e-18\\t39\\t86\\n")\n+ f.write("chunk09914\\t3328\\t3647\\trefTE_1009\\t5826\\t5508\\t7.1e-18\\t84\\t75\\n")\n+ f.write("chunk09914\\t6\\t130\\trefTE_1011\\t3884\\t3994\\t1.6e-11\\t38\\t76.6355\\n")\n+ f.write("chunk09914\\t4690\\t4741\\trefTE_1011\\t4997\\t5046\\t1.6e-11\\t37\\t91.1111\\n")\n+ f.write("chunk09914\\t6180\\t6716\\trefTE_1011\\t6676\\t7235\\t1.6e-11\\t55\\t71.2274\\n")\n+ f.write("chunk09914\\t2251\\t3361\\trefTE_1015\\t7278\\t8364\\t0\\t1514\\t86.8087\\n")\n+ f.write("chunk09914\\t2780\\t6724\\trefTE_1015\\t1\\t3962\\t0\\t5370\\t91.9552\\n")\n+ f.write("chunk09914\\t3383\\t4672\\trefTE_1015\\t8150\\t9445\\t0\\t1746\\t92.1217\\n")\n+ f.write("chunk09914\\t6228\\t6594\\trefTE_1018\\t5554\\t5920\\t6.2e-22\\t106\\t73.3138\\n")\n+ f.close()\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_GFF3Maker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_GFF3Maker.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,707 @@\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.tools.GFF3Maker import GFF3Maker\n+\n+class Test_F_GFF3Maker(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._projectName = "projectName"\n+ self._iDb = DbMySql()\n+ self._tablesFileName = "annotation_tables.txt"\n+ self._fastaFileName = "%s_chr.fa" % self._projectName\n+ self._fastaTableName = "%s_chr_seq" % self._projectName\n+ self._writeFastaFile(self._fastaFileName)\n+ self._iDb.createTable(self._fastaTableName, "seq", self._fastaFileName, True)\n+ self._inputFileName = "%s_chr_allTEs_nr_noSSR_join.pathOrSet" % self._projectName\n+ self._expGFFFileName = "explm_SuperContig_29_v2.gff3"\n+ self._obsGFFFileName = "lm_SuperContig_29_v2.gff3"\n+ self._obsGFFEmptyFileName = "lm_SuperContig_30_v2.gff3"\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable(self._fastaTableName)\n+ self._iDb.dropTable(self._inputTableName)\n+ self._iDb.close()\n+ os.remove(self._obsGFFFileName)\n+ os.remove(self._expGFFFileName)\n+ os.remove(self._fastaFileName)\n+ os.remove(self._tablesFileName)\n+ os.remove(self._inputFileName)\n+ \n+ def test_run_as_script_path_with_seq_withAllFiles(self):\n+ self._writeTablesFile("path")\n+ self._writePathFile(self._inputFileName)\n+ self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName\n+ self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)\n+ self._writeExpPathGFFFile(self._expGFFFileName)\n+ expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3"\n+ self._writeExpEmptyPathGFFFileWithSeq(expGFFEmptyFileName)\n+ \n+ cmd = "GFF3Maker.py -t %s -f %s -w -a -p"% (self._tablesFileName, self._fastaTableName) \n+ os.system(cmd)\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName))\n+ \n+ os.remove(expGFFEmptyFileName)\n+ os.remove(self._obsGFFEmptyFileName)\n+ \n+ def test_run_as_script_path_without_seq_withAllFiles(self):\n+ self._writeTablesFile("path")\n+ self._writePathFile(self._inputFileName)\n+ self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName\n+ self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)\n+ self._writeExpPathGFFFile_without_seq(self._expGFFFileName)\n+ expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3"\n+ self._writeExpEmptyPathGFFFile(expGFFEmptyFileName)\n+ \n+ cmd = "GFF3Maker.py -t %s -f %s -a -p" % (self._tablesFileName, self._fastaTableName)\n+ os.system(cmd)\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName))\n+ \n+ os.remove(expGFFEmptyFileName)\n+ os.remove(self._obsGFFEmptyFileName)\n+ \n+ def test_run_without_seq(self):\n+ self._writeTablesFile("path")\n+ self._writePathFile(self._inputFileName)\n+ self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName\n+ self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)\n+ self._writeExpPathGFFFile_without_seq(self._expGFFFileName)\n+ \n+ iGFF3Maker = GFF3Maker()\n+ iGFF3Maker.setTablesFileName(self._tablesFileName)\n+ iGFF3Maker.setInFastaName(self._fastaTableName)\n+ iGFF3Maker.setAreMatchPartCompulsory(True)\n+ iGFF3Maker.run()\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))\n+ self.assertFalse(FileUtils.isResso'..b'ch\\t193781\\t194212\\t0.0\\t+\\t.\\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t193781\\t194212\\t0.0\\t+\\t.\\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch\\t192832\\t193704\\t0.0\\t+\\t.\\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t192832\\t193704\\t0.0\\t+\\t.\\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch\\t78031\\t78588\\t0.0\\t+\\t.\\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t78031\\t78080\\t0.0\\t+\\t.\\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t78081\\t78588\\t0.0\\t+\\t.\\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\\n")\n+ f.write("##FASTA\\n")\n+ self._writeSeq1(f)\n+ f.close()\n+ \n+ def _writeExpSetGFFFileReversed(self, inFileName):\n+ f = open(inFileName, \'w\')\n+ f.write("##gff-version 3\\n")\n+ f.write("##sequence-region lm_SuperContig_29_v2 1 120\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch\\t193781\\t194212\\t0.0\\t+\\t.\\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t193781\\t194212\\t0.0\\t+\\t.\\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch\\t192832\\t193704\\t0.0\\t+\\t.\\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t192832\\t193704\\t0.0\\t+\\t.\\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch\\t78031\\t78588\\t0.0\\t-\\t.\\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t78031\\t78080\\t0.0\\t-\\t.\\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\\n")\n+ f.write("lm_SuperContig_29_v2\\tprojectName_REPET_TEs\\tmatch_part\\t78081\\t78588\\t0.0\\t-\\t.\\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\\n")\n+ f.write("##FASTA\\n")\n+ self._writeSeq1(f)\n+ f.close()\n+ \n+ def _writeFastaFile(self, inFileName):\n+ f = open(inFileName,\'w\')\n+ self._writeSeq2(f)\n+ self._writeSeq1(f)\n+ f.close()\n+ \n+ def _writeFastaFileExtended(self, inFileName):\n+ f = open(inFileName,\'w\')\n+ self._writeSeq2(f)\n+ self._writeSeq1(f)\n+ f.write(">chr1\\n")\n+ f.write("CTAAGCTGCGCTATGTAG\\n")\n+ f.close()\n+ \n+ def _writeSeq1(self, f):\n+ f.write(\'>lm_SuperContig_29_v2\\n\')\n+ f.write(\'CCTAGACAATTAATTATAATAATTAATAAACTATTAGGCTAGTAGTAGGTAATAATAAAA\\n\')\n+ f.write(\'GGATTACTACTAAGCTGCGCTATGTAGATATTTAAAACATGTGGCTTAGGCAAGAGTATA\\n\')\n+\n+ def _writeSeq2(self, f):\n+ f.write(\'>lm_SuperContig_30_v2\\n\')\n+ f.write(\'TGTTCATATTCATAGGATGGAGCTAGTAAGCGATGTCGGCTTAGCTCATCCACATGAATG\\n\')\n+ f.write(\'CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACACGAGCTGGGTTAAAT\\n\')\n+ \n+ def _writeFastaFile_DmelChr4(self, inFileName):\n+ f = open(inFileName,\'w\')\n+ f.write(">dmel_chr4\\n")\n+ f.write("CTAAGCTGCGCTATGTAG\\n")\n+ f.write(">dmel_chr1\\n")\n+ f.write("CGTAACGCTAGCGCTTATAGTGAGC\\n")\n+ f.close()\n+\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_GameXmlMaker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_GameXmlMaker.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,281 @@\n+import unittest\n+import os\n+import glob\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.tools.GameXmlMaker import GameXmlMaker\n+\n+\n+class Test_F_GameXmlMaker(unittest.TestCase):\n+\n+ def setUp(self):\n+ pass\n+\n+\n+ def tearDown(self):\n+ FileUtils.removeFilesByPattern("*gamexml")\n+\n+\n+ def test_run_as_script_step1(self):\n+ fastaFileName = "input.fa"\n+ self._writeFastaFile(fastaFileName)\n+ \n+ cmd = "GameXmlMaker.py -f %s " % fastaFileName \n+ os.system(cmd)\n+ \n+ lObsGameXmlNew = glob.glob("*.gamexml")\n+ self._writeStep1ExpectedXmls()\n+ self.assertTrue(len(lObsGameXmlNew) > 0)\n+ for obsGameXmlNew in lObsGameXmlNew:\n+ expGameXmlNew = "exp_%s" % obsGameXmlNew\n+ self.assertTrue(FileUtils.are2FilesIdentical(expGameXmlNew,obsGameXmlNew))\n+ \n+ os.remove(fastaFileName)\n+ \n+ def test_run_as_script_step2(self):\n+ self._tableFileName = "annotation_tables.txt"\n+ \n+ self._writeStep2InputTableFile()\n+ self._writeStep2InputXmls()\n+ self._writeStep2InputPathFile()\n+ self._writeStep2InputSetFile()\n+ self._writeStep2InputMapFile()\n+ self._createStep2Tables()\n+ \n+ lObsGameXmlNew = glob.glob("*.gamexml")\n+ \n+ cmd = "GameXmlMaker.py -t %s " % self._tableFileName\n+ os.system(cmd)\n+ \n+ self._writeStep2ExpXmls()\n+ \n+ self.assertTrue(len(lObsGameXmlNew) > 0)\n+ for obsGameXmlNew in lObsGameXmlNew:\n+ expGameXmlNew = "exp_%s" % obsGameXmlNew\n+ self.assertTrue(FileUtils.are2FilesIdentical(expGameXmlNew,obsGameXmlNew))\n+ \n+ os.remove("Test_F_GameXmlMaker_path.path")\n+ os.remove("Test_F_GameXmlMaker_set.set")\n+ os.remove("Test_F_GameXmlMaker_chk_map.map")\n+ \n+ os.remove(self._tableFileName)\n+ os.system("ListAndDropTables.py -d Test_F_GameXmlMaker")\n+ \n+ def test_run_step2(self):\n+ self._tableFileName = "annotation_tables.txt"\n+ \n+ self._writeStep2InputTableFile()\n+ self._writeStep2InputXmls()\n+ self._writeStep2InputPathFile()\n+ self._writeStep2InputSetFile()\n+ self._writeStep2InputMapFile()\n+ self._createStep2Tables()\n+ \n+ lObsGameXmlNew = glob.glob("*.gamexml")\n+ \n+ iGameXmlMaker = GameXmlMaker(tablesFileName = self._tableFileName)\n+ iGameXmlMaker.run()\n+ self._writeStep2ExpXmls()\n+ self.assertTrue(len(lObsGameXmlNew) > 0)\n+ for obsGameXmlNew in lObsGameXmlNew:\n+ expGameXmlNew = "exp_%s" % obsGameXmlNew\n+ self.assertTrue(FileUtils.are2FilesIdentical(expGameXmlNew,obsGameXmlNew))\n+ \n+ os.remove("Test_F_GameXmlMaker_path.path")\n+ os.remove("Test_F_GameXmlMaker_set.set")\n+ os.remove("Test_F_GameXmlMaker_chk_map.map")\n+ \n+ os.remove(self._tableFileName)\n+ os.system("ListAndDropTables.py -d Test_F_GameXmlMaker")\n+ \n+ def _writeFastaFile(self, inFileName):\n+ f = open(inFileName,\'w\')\n+ f.write(\'>seq1\\n\')\n+ f.write(\'TGTTCATATTCATAGGATGGAGCTAGTAAGCGATGTCGGCTTAGCTCATCCACATGAATG\\n\')\n+ f.write(\'CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACACGAGCTGGGTTAAAT\\n\')\n+ f.write(\'CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACA\\n\')\n+ f.write(\'>seq2\\n\')\n+ f.write(\'TGTTCATATTCATAGGATGGAGCTAGTAAGCGATGTCGGCTTAGCTCATCCACATGAATG\\n\')\n+ f.write(\'CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACACGAGCTGGGTTAAAT\\n\')\n+ f.write(\'CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACACGAGCTGGGTTAAAT\\n\')\n+ f.write(\'>seq3\\n\')\n+ f.write(\'CCTAGACAATTAATTATAATAATTAATAAACTATTAGGCTAGTAGTAGGTAATAATAAAA\\n\')\n+ f.write(\'GGATTACTACTAAGCTGCGCTATGTAGATATTTAAAACATGTGGCTTAGGCAAGAGTATA\\n\')\n+ f.write(\'>seq4\\n\')\n+ '..b'rt>-1</start><end>-1</end></span></seq_relationship>\')\n+ f.write(\'<score>0</score></result_span></result_set></computational_analysis>\')\n+ f.write(\'<computational_analysis><program>Test_F_GameXmlMaker_chk_map</program><database>db</database></computational_analysis></game>\')\n+ f.close()\n+ \n+ f = open("exp_seq3.gamexml", \'w\')\n+ f.write(\'<?xml version="1.0" ?><game><seq focus="true" id="seq3"><name>seq3</name>\')\n+ f.write(\'<residues>CCTAGACAATTAATTATAATAATTAATAAACTATTAGGCTAGTAGTAGGTAATAATAAAAGGATTACTACTAAGCTGCGCTATGTAGATATTTAAAACATGTGGCTTAGGCAAGAGTATA</residues>\')\n+ f.write(\'</seq><map_position><arm>seq3</arm><span><start>1</start><end>120</end></span></map_position>\')\n+ f.write(\'<computational_analysis><program>Test_F_GameXmlMaker_path</program><database>db</database><result_set id="5"><name>TE2::5</name>\')\n+ f.write(\'<result_span><seq_relationship seq="seq3" type="query"><span><start>1</start><end>120</end></span>\')\n+ f.write(\'</seq_relationship><seq_relationship seq="TE2::5" type="subject"><span><start>450</start><end>550</end></span></seq_relationship>\')\n+ f.write(\'<score>75.7</score></result_span></result_set></computational_analysis>\')\n+ f.write(\'<computational_analysis><program>Test_F_GameXmlMaker_set</program><database>db</database><result_set id="5"><name>seq3::5</name>\')\n+ f.write(\'<result_span><seq_relationship seq="seq3" type="query"><span><start>1</start><end>120</end></span></seq_relationship>\')\n+ f.write(\'<seq_relationship seq="seq3::5" type="subject"><span><start>-1</start><end>-1</end></span></seq_relationship>\')\n+ f.write(\'<score>0</score></result_span></result_set></computational_analysis>\')\n+ f.write(\'<computational_analysis><program>Test_F_GameXmlMaker_chk_map</program><database>db</database>\')\n+ f.write(\'<result_set id="-1"><name>chunk3::-1</name>\')\n+ f.write(\'<result_span><seq_relationship seq="seq3" type="query"><span><start>380001</start><end>580000</end></span></seq_relationship>\')\n+ f.write(\'<seq_relationship seq="chunk3::-1" type="subject"><span><start>-1</start><end>-1</end></span></seq_relationship>\')\n+ f.write(\'<score>0</score></result_span></result_set></computational_analysis></game>\')\n+ f.close()\n+ \n+ f = open("exp_seq4.gamexml", \'w\')\n+ f.write(\'<?xml version="1.0" ?><game><seq focus="true" id="seq4"><name>seq4</name>\')\n+ f.write(\'<residues>TGTTCATATTCATAGGATGGAGCTAGTAAGCGATGTCGGCTTAGCTCATCCACATGAATGCAGGAATCATGAAGGGTACGACTGTTCG</residues>\')\n+ f.write(\'</seq><map_position><arm>seq4</arm><span><start>1</start><end>88</end></span></map_position>\')\n+ f.write(\'<computational_analysis><program>Test_F_GameXmlMaker_path</program><database>db</database></computational_analysis>\')\n+ f.write(\'<computational_analysis><program>Test_F_GameXmlMaker_set</program><database>db</database></computational_analysis>\')\n+ f.write(\'<computational_analysis><program>Test_F_GameXmlMaker_chk_map</program><database>db</database>\')\n+ f.write(\'<result_set id="-1"><name>chunk4::-1</name>\')\n+ f.write(\'<result_span><seq_relationship seq="seq4" type="query"><span><start>570001</start><end>770000</end></span></seq_relationship>\')\n+ f.write(\'<seq_relationship seq="chunk4::-1" type="subject"><span><start>-1</start><end>-1</end></span></seq_relationship>\')\n+ f.write(\'<score>0</score></result_span></result_set></computational_analysis></game>\')\n+ f.close()\n+ \n+ def _writeStep2InputTableFile(self):\n+ f = open(self._tableFileName,\'w\')\n+ f.write("Test_F_GameXmlMaker_path\\tpath\\tTest_F_GameXmlMaker_path\\n")\n+ f.write("Test_F_GameXmlMaker_set\\tset\\tTest_F_GameXmlMaker_set\\n")\n+ f.write("Test_F_GameXmlMaker_chk_map\\tmap\\tTest_F_GameXmlMaker_chk_map")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_GetMultiAlignAndPhylogenyPerTErefSeq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_GetMultiAlignAndPhylogenyPerTErefSeq.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,147 @@\n+import subprocess\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.tools.GetMultAlignAndPhylogenyPerTErefSeq import GetMultAlignAndPhylogenyPerTErefSeq\n+from shutil import rmtree\n+from glob import glob\n+\n+class Test_F_GetMultAlignAndPhylogenyPerTErefSeq(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._verbosity = 3\n+ \n+ self._testPrefix = \'Test_GMAAPperTErefSeq_\'\n+ self._configFileName = "%sConfig.cfg" % self._testPrefix\n+ self._iDb = DbMySql()\n+\n+ self.inPathTableName = "%sDmelChr4_chr_allTEs_nr_noSSR_join_path" % (self._testPrefix)\n+ self.inPathFileName = "%s/Tools/DmelChr4_chr_allTEs_nr_noSSR_join_path.path" % os.environ["REPET_DATA"]\n+ self._iDb.createTable(self.inPathTableName, "path", self.inPathFileName, True)\n+ \n+ self.RefTETableName = "%sDmelChr4_refTEs" % (self._testPrefix)\n+ self.RefTEFileName = "%s/Tools/DmelChr4_refTEs.fa" % os.environ["REPET_DATA"]\n+ self._iDb.createTable(self.RefTETableName, "seq", self.RefTEFileName, True)\n+ \n+ self.genomeTableName = "%sDmelChr4_chr" % (self._testPrefix)\n+ self.genomeFileName = "%s/Tools/DmelChr4.fa" % os.environ["REPET_DATA"]\n+ self._iDb.createTable(self.genomeTableName, "seq", self.genomeFileName, True)\n+\n+ try:\n+ os.makedirs(self._testPrefix)\n+ except:pass\n+ os.chdir(self._testPrefix)\n+ self._writeConfigFile()\n+ \n+# self._expFileName_FullLengthCopy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthCopy.txt" % os.environ["REPET_DATA"]\n+# self._expFileName_Copy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithCopy.txt" % os.environ["REPET_DATA"]\n+# self._expFileName_FullLengthFrag = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthFrag.txt" % os.environ["REPET_DATA"]\n+# self._expFastaFileName_FullLengthCopy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthCopy.fa" % os.environ["REPET_DATA"]\n+# self._expFastaFileName_Copy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithCopy.fa" % os.environ["REPET_DATA"]\n+# self._expFastaFileName_FullLengthFrag = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthFrag.fa" % os.environ["REPET_DATA"]\n+\n+ \n+# inputFastaFileName = "input_TEannot_refTEs.fa"\n+# os.symlink(self._expFastaFileName_Copy, inputFastaFileName)\n+ \n+# self._tableName = "Dummy_Atha_refTEs_seq"\n+# self._iDb.createTable(self._tableName, "seq", inputFastaFileName, True)\n+# os.remove(inputFastaFileName) \n+# \n+# self._obsFileName_FullLengthCopy = "%s_FullLengthCopy.txt" % os.path.splitext(os.path.basename(self._inFileName))[0]\n+# self._obsFileName_Copy = "%s_OneCopyAndMore.txt" % os.path.splitext(os.path.basename(self._inFileName))[0]\n+# self._obsFileName_FullLengthFrag = "%s_FullLengthFrag.txt" % os.path.splitext(os.path.basename(self._inFileName))[0]\n+# self._obsFastaFileName_FullLengthCopy = "%s.fa" % os.path.splitext(self._obsFileName_FullLengthCopy)[0]\n+# self._obsFastaFileName_Copy = "%s.fa" % os.path.splitext(self._obsFileName_Copy)[0]\n+# self._obsFastaFileName_FullLengthFrag = "%s.fa" % os.path.splitext(self._obsFileName_FullLengthFrag)[0]\n+ \n+ def tearDown(self):\n+# self._iDb.dropTable(self.inPathTableName)\n+# self._iDb.dropTable(self.RefTETableName)\n+# self._iDb.dropTable(self.genomeTableName)\n+# self._iDb.close()\n+ \n+ os.chdir("..")\n+# try:\n+# rmtree(self._testPrefix)\n+# except:pass\n+ \n+\n+# def test_run_step1(self):\n+# iGMAAPperTErefSeq = GetMultAlignAndPhylogenyPerTErefSeq(pathTableName= self.inPathTableName, refSeqTableName=self.RefTETableName, genomeSeqTableName= self.genomeTableName, configFileName=self._configFileName, step=1, verbosity=self._verbosit'..b'elf._configFileName, step=1, verbosity=self._verbosity)\n+# iGMAAPperTErefSeq.run()\n+# iGMAAPperTErefSeq.step = 2\n+# iGMAAPperTErefSeq.run()\n+#\n+# self.assertTrue(os.stat("DmelChr4-B-G9-Map3_NoCat_all.fa.oriented_refalign.fa_aln")[6] != 0)\n+\n+ def test_run_step1Step2Step3Map(self):\n+ iGMAAPperTErefSeq = GetMultAlignAndPhylogenyPerTErefSeq(pathTableName= self.inPathTableName, refSeqTableName=self.RefTETableName, genomeSeqTableName= self.genomeTableName, mSAmethod="Map", configFileName=self._configFileName, step=1, verbosity=self._verbosity)\n+ iGMAAPperTErefSeq.run()\n+ iGMAAPperTErefSeq.step = 2\n+ iGMAAPperTErefSeq.run()\n+ iGMAAPperTErefSeq.step = 3\n+ iGMAAPperTErefSeq.run()\n+\n+ self.assertTrue(os.stat("DmelChr4-B-G9-Map3_NoCat_all.fa.oriented_map.fa_aln")[6] != 0)\n+ \n+ def _writeConfigFile(self):\n+ f = open(self._configFileName, "w")\n+ f.write("[repet_env]\\n")\n+ f.write("repet_host: %s\\n" % os.environ["REPET_HOST"])\n+ f.write("repet_user: %s\\n" % os.environ["REPET_USER"])\n+ f.write("repet_pw: %s\\n" % os.environ["REPET_PW"])\n+ f.write("repet_db: %s\\n" % os.environ["REPET_DB"])\n+ f.write("repet_port: %s\\n" % os.environ["REPET_PORT"])\n+ f.write("repet_job_manager: SGE\\n")\n+ f.close()\n+ \n+ \n+# def test_run_as_script_step1Step2Map(self):\n+# #cmd= "GetMultAlignAndPhylogenyPerTErefSeq.py -S 1 -m \'Map\' -p %s -s %s -g %s -C %s" % (self._inPathTableName,self._RefTETableName, self._genomeTableName, self._configFileName)\n+# cmd= "GetMultAlignAndPhylogenyPerTErefSeq.py -S 1 -p %s -s %s -g %s -C %s" % (self.inPathTableName,self.RefTETableName, self.genomeTableName, self._configFileName)\n+# subprocess.call(cmd, shell = True)\n+# \n+# cmd= "GetMultAlignAndPhylogenyPerTErefSeq.py -S 2 -p %s -s %s -g %s -C %s" % (self.inPathTableName,self.RefTETableName, self.genomeTableName, self._configFileName)\n+# subprocess.call(cmd, shell = True)\n+# \n+# self.assertTrue(os.stat("DmelChr4-B-G9-Map3_NoCat_all.fa.oriented_map.fa_aln")[6] != 0)\n+ \n+ \n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthCopy, self._obsFileName_FullLengthCopy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_Copy, self._obsFileName_Copy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthFrag, self._obsFileName_FullLengthFrag))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthCopy, self._obsFastaFileName_FullLengthCopy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_Copy, self._obsFastaFileName_Copy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthFrag, self._obsFastaFileName_FullLengthFrag))\n+\n+# def test_run_as_script_step2(self):\n+# cmd= "GetMultAlignAndPhylogenyPerTErefSeq.py -S 1 -p %s -s %s -g %s -C %s" % (self._inPathTableName,self._RefTETableName, self._genomeTableName, self._configFileName)\n+# print cmd\n+# subprocess.call(cmd, shell = True)\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthCopy, self._obsFileName_FullLengthCopy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_Copy, self._obsFileName_Copy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthFrag, self._obsFileName_FullLengthFrag))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthCopy, self._obsFastaFileName_FullLengthCopy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_Copy, self._obsFastaFileName_Copy))\n+# self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthFrag, self._obsFastaFileName_FullLengthFrag))\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_GetSpecificTELibAccordingToAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_GetSpecificTELibAccordingToAnnotation.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,67 @@
+import subprocess
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.sql.DbMySql import DbMySql
+from commons.tools.GetSpecificTELibAccordingToAnnotation import GetSpecificTELibAccordingToAnnotation
+
+class Test_F_GetSpecificTELibAccordingToAnnotation(unittest.TestCase):
+
+    def setUp(self):
+        self._expFileName_FullLengthCopy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthCopy.txt" % os.environ["REPET_DATA"]
+        self._expFileName_Copy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithCopy.txt" % os.environ["REPET_DATA"]
+        self._expFileName_FullLengthFrag = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthFrag.txt" % os.environ["REPET_DATA"]
+        self._expFastaFileName_FullLengthCopy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthCopy.fa" % os.environ["REPET_DATA"]
+        self._expFastaFileName_Copy = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithCopy.fa" % os.environ["REPET_DATA"]
+        self._expFastaFileName_FullLengthFrag = "%s/Tools/GiveInfoTeAnnotFile_ConsensusWithFullLengthFrag.fa" % os.environ["REPET_DATA"]
+
+        self._inFileName = "%s/Tools/GiveInfoTeAnnotFile.txt" % os.environ["REPET_DATA"]
+        inputFastaFileName = "input_TEannot_refTEs.fa"
+        os.symlink(self._expFastaFileName_Copy, inputFastaFileName)
+        self._iDb = DbMySql()
+        self._tableName = "Dummy_Atha_refTEs_seq"
+        self._iDb.createTable(self._tableName, "seq", inputFastaFileName, True)
+        os.remove(inputFastaFileName)
+
+        self._obsFileName_FullLengthCopy = "%s_FullLengthCopy.txt" % os.path.splitext(os.path.basename(self._inFileName))[0]
+        self._obsFileName_Copy = "%s_OneCopyAndMore.txt" % os.path.splitext(os.path.basename(self._inFileName))[0]
+        self._obsFileName_FullLengthFrag = "%s_FullLengthFrag.txt" % os.path.splitext(os.path.basename(self._inFileName))[0]
+        self._obsFastaFileName_FullLengthCopy = "%s.fa" % os.path.splitext(self._obsFileName_FullLengthCopy)[0]
+        self._obsFastaFileName_Copy = "%s.fa" % os.path.splitext(self._obsFileName_Copy)[0]
+        self._obsFastaFileName_FullLengthFrag = "%s.fa" % os.path.splitext(self._obsFileName_FullLengthFrag)[0]
+
+    def tearDown(self):
+        self._iDb.dropTable(self._tableName)
+        self._iDb.close()
+        os.remove(self._obsFileName_FullLengthCopy)
+        os.remove(self._obsFileName_Copy)
+        os.remove(self._obsFileName_FullLengthFrag)
+        os.remove(self._obsFastaFileName_FullLengthCopy)
+        os.remove(self._obsFastaFileName_Copy)
+        os.remove(self._obsFastaFileName_FullLengthFrag)
+
+    def test_run(self):
+        iGetTELib = GetSpecificTELibAccordingToAnnotation(self._inFileName)
+        iGetTELib.setTableName(self._tableName)
+        iGetTELib.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthCopy, self._obsFileName_FullLengthCopy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_Copy, self._obsFileName_Copy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthFrag, self._obsFileName_FullLengthFrag))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthCopy, self._obsFastaFileName_FullLengthCopy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_Copy, self._obsFastaFileName_Copy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthFrag, self._obsFastaFileName_FullLengthFrag))
+
+    def test_run_as_script(self):
+        cmd = "GetSpecificTELibAccordingToAnnotation.py -i %s -t %s -v 4" % (self._inFileName, self._tableName)
+        subprocess.call(cmd, shell = True)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthCopy, self._obsFileName_FullLengthCopy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_Copy, self._obsFileName_Copy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName_FullLengthFrag, self._obsFileName_FullLengthFrag))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthCopy, self._obsFastaFileName_FullLengthCopy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_Copy, self._obsFastaFileName_Copy))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFastaFileName_FullLengthFrag, self._obsFastaFileName_FullLengthFrag))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,95 @@
+import os
+import unittest
+from commons.core.utils.FileUtils import FileUtils
+
+CURRENT_DIR = os.getcwd()
+
+class Test_F_HmmOutput2alignAndTransformCoordInNtAndFilterScores_script(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFile = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/test_hmmpfam_output"
+        self._outputFile = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/hmmpfam_output.align"
+        self._prg = "HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py"
+        self._inputFile2 = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/OutputHmmpfamTest"
+        self._consensusFile = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/ConsensusTestFile_nt.fsa"
+        self._inputFileScan =  os.environ["REPET_PATH"] + "/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput"
+        self._outputFileScan = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/hmmscan_output.align"
+        self._expectedFile =  os.environ["REPET_PATH"] + "/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput.align"
+        os.chdir(CURRENT_DIR)
+
+    def testHmmpfamOuput2align(self):
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFile )
+        cmd += " -o %s" % ( self._outputFile )
+        os.system( cmd )
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._inputFile + ".align", self._outputFile))
+        self._clean()
+
+    def testHmmpfamOuput2align_clean_option(self):
+        os.system( "cp " + self._inputFile + " " + self._inputFile + "bis" )
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFile + "bis" )
+        cmd += " -o %s" % ( self._outputFile )
+        cmd += " -c"
+        os.system( cmd )
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue( FileUtils.are2FilesIdentical( self._inputFile + ".align", self._outputFile) )
+        self.assertFalse( FileUtils.isRessourceExists( self._inputFile + "bis" ) )
+        self._clean()
+
+    def testHmmpfamOutput2alignAndTransformCoordInNtAndFilterScores(self):
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFile2 )
+        cmd += " -o %s" % ( self._outputFile )
+        cmd += " -T %s" % ( self._consensusFile )
+        os.system( cmd )
+        self._expectedFile = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/PostPostProcessTestFiltered.align"
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFile))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expectedFile, self._outputFile))
+        self._clean()
+
+    def testHmmscanOuput2align(self):
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFileScan )
+        cmd += " -o %s" % ( self._outputFileScan )
+        cmd += " -p hmmscan"
+        os.system( cmd )
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFileScan))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expectedFile, self._outputFileScan))
+        self._cleanScan()
+
+    def testHmmscanOuput2align_clean_option(self):
+        os.system( "cp " + self._inputFileScan + " " + self._inputFileScan + "bis" )
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFileScan + "bis" )
+        cmd += " -o %s" % ( self._outputFileScan )
+        cmd += " -c -p hmmscan"
+        os.system( cmd )
+        self.assertTrue( FileUtils.are2FilesIdentical( self._expectedFile, self._outputFileScan) )
+        self.assertFalse( FileUtils.isRessourceExists( self._inputFileScan + "bis" ) )
+        self._cleanScan()
+
+    def testHmmscanOutput2alignAndTransformCoordInNtAndFilterScores(self):
+        self._inputFileScan2 = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput"
+        self._consensusFileScan = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/repetHmmscan.fa"
+        self._expectedFileScan = os.environ["REPET_PATH"] + "/commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/hmmscanTransformedExpected.align"
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFileScan2 )
+        cmd += " -o %s" % ( self._outputFileScan )
+        cmd += " -T %s" % ( self._consensusFileScan )
+        cmd += " -p hmmscan"
+        os.system( cmd )
+        self.assertTrue(FileUtils.isRessourceExists(self._outputFileScan))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expectedFileScan, self._outputFileScan))
+        self._cleanScan()
+
+    def _clean(self):
+        os.system( "rm " + self._outputFile )
+
+    def _cleanScan(self):
+        os.system( "rm " + self._outputFileScan )
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_LaunchBlaster.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_LaunchBlaster.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,101 @@
+from commons.core.utils.FileUtils import FileUtils
+import unittest
+import os
+import glob
+from commons.tools.LaunchBlaster import LaunchBlaster
+
+class Test_F_LaunchBlaster(unittest.TestCase):
+
+    def setUp(self):
+        self._inFileName = "DmelChr4.fa"
+        inFilePath = "%s/Tools/%s" % (os.environ["REPET_DATA"], self._inFileName)
+        try:
+            os.remove(self._inFileName)
+        except:
+            pass
+        os.symlink(inFilePath, self._inFileName)
+        self._iLaunchBlaster = LaunchBlaster(self._inFileName)
+        self._iLaunchBlaster.setDoAllByall(True)
+        self._iLaunchBlaster.setVerbosity(4)
+
+    def tearDown(self):
+        try:
+            FileUtils.removeFilesByPattern("%s*" % self._inFileName)
+            os.remove("formatdb.log")
+        except:
+            pass
+
+    def test_run_as_class_1_file(self):
+        expFileName = "%s/Tools/DmelChr4.align" % os.environ["REPET_DATA"]
+        obsFileName = "%s.align" % self._inFileName
+
+        self._iLaunchBlaster.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+    def test_run_as_class_1_file_changed_params(self):
+        expFileName = "DmelChr4.align"
+        with open(expFileName, "w") as fh:
+            fh.write("dmel_chr4\t691910\t692326\tdmel_chr4\t700019\t700435\t0\t827\t100\n")
+        obsFileName = "%s.align" % self._inFileName
+
+        self._iLaunchBlaster.setIdentity(100)
+        self._iLaunchBlaster.setCPU(4)
+        self._iLaunchBlaster.setDoClean(True)
+        self._iLaunchBlaster.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(expFileName)
+        self.assertTrue(glob.glob("*fa_cut*") == [])
+        self.assertTrue(glob.glob("*Nstretch*") == [])
+        self.assertTrue(glob.glob("*seq_treated*") == [])
+        self.assertTrue(glob.glob("*.log") == [])
+
+    def test_run_as_script_1bank_1file(self):
+        inputFileName = "chunks.fa"
+        with open(inputFileName, "w") as f:
+            f.write(">chunk1\n")
+            f.write("GAATTCGCGTCCGCTTACCCATGTGCCTGTGGATGCCGAACAGGAGGCGCCGTTGACGGC\n")
+            f.write("GAATGACTTACTCAAGGGAGTAGCCAATCTGTCGGATACGCCCGGATTGGAGCTGCCCAT\n")
+        expFileName = "exp.align"
+        with open(expFileName, "w") as f:
+            f.write("chunk1\t1\t120\tdmel_chr4\t1\t120\t2e-64\t238\t100\n")
+        obsFileName = "%s.align" % inputFileName
+
+        cmd = "LaunchBlaster.py -q %s -s %s -e 0.1 -a" % (inputFileName, self._inFileName)
+        os.system(cmd)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        FileUtils.removeFilesByPattern("%s*" % inputFileName)
+        os.remove(expFileName)
+
+    def test_run_as_script_1bank_1file_withoutABA(self):
+        queryFileName = "chunks.fa"
+        with open(queryFileName, "w") as f:
+            f.write(">chunk1\n")
+            f.write("GAATTCGCGTCCGCTTACCCATGTGCCTGTGGATGCCGAACAGGAGGCGCCGTTGACGGC\n")
+            f.write("GAATGACTTACTCAAGGGAGTAGCCAATCTGTCGGATACGCCCGGATTGGAGCTGCCCAT\n")
+        subjectFileName = "genome.fa"
+        with open(subjectFileName, "w") as f:
+            f.write(">chunk1\n")
+            f.write("GAATTCGCGTCCGCTTACCCATGTGCCTGTGGATGCCGAACAGGAGGCGCCGTTGACGGC\n")
+            f.write("GAATGACTTACTCAAGGGAGTAGCCAATCTGTCGGATACGCCCGGATTGGAGCTGCCCAT\n")
+            f.write(">chunk2\n")
+            f.write("GAATTCGCGTCCGCTTACCCATGTGCCTGTGGATGCCGAACAGGAGGCGCCGTTGACGGC\n")
+            f.write("GAATGACTTACTCAAGGGAGTAGCCAATCTGTCGGATACGCCCGGATTGGAGCTGCCCAT\n")
+        expFileName = "exp.align"
+        with open(expFileName, "w") as f:
+            f.write("chunk1\t1\t120\tchunk1\t1\t120\t4e-68\t238\t100\n")
+            f.write("chunk1\t1\t120\tchunk2\t1\t120\t4e-68\t238\t100\n")
+        obsFileName = "%s.align" % queryFileName
+
+        cmd = "LaunchBlaster.py -q %s -s %s -e 0.1" % (queryFileName, subjectFileName)
+        os.system(cmd)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        FileUtils.removeFilesByPattern("%s*" % queryFileName)
+        FileUtils.removeFilesByPattern("%s*" % subjectFileName)
+        os.remove(expFileName)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_LaunchBlasterInParallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_LaunchBlasterInParallel.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,116 @@
+from commons.core.utils.FileUtils import FileUtils
+import unittest
+import os
+import shutil
+from commons.tools.LaunchBlasterInParallel import LaunchBlasterInParallel
+
+class Test_F_LaunchBlasterInParallel(unittest.TestCase):
+
+    CLUSTER_HOST = "compute-2-46.local"
+
+    def setUp(self):
+        self._inFilePath = "%s/Tools/DmelChr4_chunks.fa" % os.environ["REPET_DATA"]
+        self._configFileName = "TE.cfg"
+        self._outputFileName = "out.align.not_over"
+
+    def tearDown(self):
+        try:
+            os.remove(self._outputFileName)
+            os.remove(self._configFileName)
+        except:
+            pass
+
+    def test_run_as_script_1bank_1file_withoutABA(self):
+        self._outputFileName = "out.align"
+        os.mkdir("tmp")
+        os.chdir("tmp")
+        queryFileName = "chunks.fa"
+        with open(queryFileName, "w") as f:
+            f.write(">chunk1\n")
+            f.write("GAATTCGCGTCCGCTTACCCATGTGCCTGTGGATGCCGAACAGGAGGCGCCGTTGACGGC\n")
+            f.write("GAATGACTTACTCAAGGGAGTAGCCAATCTGTCGGATACGCCCGGATTGGAGCTGCCCAT\n")
+        os.chdir("..")
+        subjectFileName = "genome.fa"
+        with open(subjectFileName, "w") as f:
+            f.write(">chunk1\n")
+            f.write("GAATTCGCGTCCGCTTACCCATGTGCCTGTGGATGCCGAACAGGAGGCGCCGTTGACGGC\n")
+            f.write("GAATGACTTACTCAAGGGAGTAGCCAATCTGTCGGATACGCCCGGATTGGAGCTGCCCAT\n")
+            f.write(">chunk2\n")
+            f.write("GAATTCGCGTCCGCTTACCCATGTGCCTGTGGATGCCGAACAGGAGGCGCCGTTGACGGC\n")
+            f.write("GAATGACTTACTCAAGGGAGTAGCCAATCTGTCGGATACGCCCGGATTGGAGCTGCCCAT\n")
+        expFileName = "expected"
+        with open(expFileName, "w") as f:
+            f.write("chunk1\t1\t120\tchunk1\t1\t120\t4e-68\t238\t100\n")
+            f.write("chunk1\t1\t120\tchunk2\t1\t120\t4e-68\t238\t100\n")
+        self._writeConfig(0.1)
+
+        cmd = "LaunchBlasterInParallel.py -q %s/tmp -s %s/%s -C %s -o %s" % (os.getcwd(), os.getcwd(), subjectFileName, self._configFileName, self._outputFileName)
+        os.system(cmd)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+        FileUtils.removeFilesByPattern("*.param")
+        shutil.rmtree("tmp")
+        os.remove(subjectFileName)
+        os.remove(expFileName)
+
+    def test_run_as_script_1bank_2_batches(self):
+        os.mkdir("tmp")
+        os.chdir("tmp")
+        os.symlink("%s/Tools/batches/batch_1.fa" % os.environ["REPET_DATA"], "batch_1.fa")
+        os.symlink("%s/Tools/batches/batch_2.fa" % os.environ["REPET_DATA"], "batch_2.fa")
+        os.chdir("..")
+
+        expFileName = "%s/Tools/DmelChr4.align.not_over" % os.environ["REPET_DATA"]
+        self._writeConfig()
+
+        cmd = "LaunchBlasterInParallel.py -q %s/tmp -s %s -a -o %s -C %s" % (os.getcwd(), self._inFilePath, self._outputFileName, self._configFileName)
+        os.system(cmd)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+        FileUtils.removeFilesByPattern("*.param")
+        shutil.rmtree("tmp")
+
+    def test_run_1bank_1bank_2_batches(self):
+        os.mkdir("tmp")
+        os.chdir("tmp")
+        os.symlink("%s/Tools/batches/batch_1.fa" % os.environ["REPET_DATA"], "batch_1.fa")
+        os.symlink("%s/Tools/batches/batch_2.fa" % os.environ["REPET_DATA"], "batch_2.fa")
+        os.chdir("..")
+
+        expFileName = "%s/Tools/DmelChr4.align.not_over" % os.environ["REPET_DATA"]
+        self._writeConfig()
+
+        iLaunchBlaster = LaunchBlasterInParallel(configFileName = self._configFileName, outFileName = self._outputFileName)
+        iLaunchBlaster.setDoAllByall(True)
+        iLaunchBlaster.setVerbosity(4)
+        iLaunchBlaster.setQueryDirectory("%s/tmp" % os.getcwd())
+        iLaunchBlaster.setSubjectFilePath(self._inFilePath)
+        iLaunchBlaster.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, self._outputFileName))
+        FileUtils.removeFilesByPattern("*.param")
+        shutil.rmtree("tmp")
+
+    def _writeConfig(self, eValue = 1e-300):
+        with open(self._configFileName, "w") as fh:
+            fh.write("[jobs]\n")
+            if os.getenv("HOSTNAME") == self.CLUSTER_HOST:
+                fh.write("resources: test\n")
+            else:
+                fh.write("resources:\n")
+            fh.write("tmpDir:\n")
+            fh.write("copy: no\n")
+            fh.write("clean: yes\n")
+            fh.write("\n")
+            fh.write("[prepare_data]\n")
+            fh.write("chunk_length: 200000\n")
+            fh.write("chunk_overlap: 10000\n")
+            fh.write("\n")
+            fh.write("[alignment]\n")
+            fh.write("blast: ncbi\n")
+            fh.write("Evalue: %s\n" % eValue)
+            fh.write("length: 100\n")
+            fh.write("identity: 90\n")
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_LaunchMatcherInParallel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_LaunchMatcherInParallel.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,88 @@
+from commons.core.utils.FileUtils import FileUtils
+import shutil
+import unittest
+import os
+from commons.tools.LaunchMatcherInParallel import LaunchMatcherInParallel
+
+class Test_F_LaunchMatcherInParallel(unittest.TestCase):
+
+    CLUSTER_HOST = "compute-2-46.local"
+
+    def setUp(self):
+        self._configFileName = "TE.cfg"
+        self._writeConfig()
+
+    def tearDown(self):
+        FileUtils.removeFilesByPattern("DmelChr4*")
+        os.remove(self._configFileName)
+
+    def test_run_as_class_1_seq(self):
+        inFileName = "DmelChr4.align"
+        fastaFileName = "DmelChr4.fa"
+        inFilePath = "%s/Tools/%s" % (os.environ["REPET_DATA"], inFileName)
+        inFastaPath = "%s/Tools/%s" % (os.environ["REPET_DATA"], fastaFileName)
+        os.symlink(inFilePath, inFileName)
+        os.symlink(inFastaPath, fastaFileName)
+        expPathFileName = "%s/Tools/DmelChr4.align.match.path" % os.environ["REPET_DATA"]
+        expTabFileName = "%s/Tools/DmelChr4.align.match.tab" % os.environ["REPET_DATA"]
+
+        obsPathFileName = "%s.match.path" % inFileName
+        obsTabFileName = "%s.match.tab" % inFileName
+
+        iLaunchMatcherInParallel = LaunchMatcherInParallel(align=inFileName, queryFileName=fastaFileName, subjectFileName=fastaFileName, doJoin=True, evalue="1e-20", keepConflict=True, config=self._configFileName)
+        iLaunchMatcherInParallel.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expPathFileName, obsPathFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expTabFileName, obsTabFileName))
+
+    def test_run_as_class_n_seq_less_jobs_than_n(self):
+        inFileName = "DmelChr4_dummy.align"
+        fastaFileName = "DmelChr4.fa"
+        inFilePath = "%s/Tools/%s" % (os.environ["REPET_DATA"], inFileName)
+        inFastaPath = "%s/Tools/%s" % (os.environ["REPET_DATA"], fastaFileName)
+        os.symlink(inFilePath, inFileName)
+        os.symlink(inFastaPath, fastaFileName)
+        expPathFileName = "%s/Tools/DmelChr4_dummy.align.match.path" % os.environ["REPET_DATA"]
+        expTabFileName = "%s/Tools/DmelChr4_dummy.align.match.tab" % os.environ["REPET_DATA"]
+
+        obsPathFileName = "%s.match.path" % inFileName
+        obsTabFileName = "%s.match.tab" % inFileName
+
+        iLaunchMatcherInParallel = LaunchMatcherInParallel(align=inFileName, queryFileName=fastaFileName, subjectFileName=fastaFileName, doJoin=True, evalue="1e-20", keepConflict=True, config=self._configFileName, doClean=True)
+        iLaunchMatcherInParallel.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expPathFileName, obsPathFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(expTabFileName, obsTabFileName))
+
+    def test_run_as_class_n_seq_less_jobs_than_n_not_merged_results(self):
+        inFileName = "DmelChr4_dummy.align"
+        fastaFileName = "DmelChr4.fa"
+        inFilePath = "%s/Tools/%s" % (os.environ["REPET_DATA"], inFileName)
+        inFastaPath = "%s/Tools/%s" % (os.environ["REPET_DATA"], fastaFileName)
+        os.symlink(inFilePath, inFileName)
+        os.symlink(inFastaPath, fastaFileName)
+
+        iLaunchMatcherInParallel = LaunchMatcherInParallel(align=inFileName, queryFileName=fastaFileName, subjectFileName=fastaFileName, doJoin=True, evalue="1e-20", keepConflict=True, config=self._configFileName, maxFileSize=100, mergeResults=False)
+        iLaunchMatcherInParallel.run()
+
+        baseFileName = os.path.splitext(inFileName)[0]
+        lExpMatchFileNames = ["%s_1.align.match.path" % baseFileName, "%s_2.align.match.path" % baseFileName, "%s_3.align.match.path" % baseFileName]
+        lExpTabFileNames = ["%s_1.align.match.tab" % baseFileName, "%s_2.align.match.tab" % baseFileName, "%s_3.align.match.tab" % baseFileName]
+        for i in range(0,3):
+            self.assertTrue(FileUtils.isRessourceExists("tmpMatcher/%s" % lExpMatchFileNames[i]))
+            self.assertTrue(FileUtils.isRessourceExists("tmpMatcher/%s" % lExpTabFileNames[i]))
+        shutil.rmtree("tmpMatcher")
+
+    def _writeConfig(self):
+        with open(self._configFileName, "w") as fh:
+            fh.write("[jobs]\n")
+            if os.getenv("HOSTNAME") == self.CLUSTER_HOST:
+                fh.write("resources: test\n")
+            else:
+                fh.write("resources:\n")
+            fh.write("tmpDir:\n")
+            fh.write("copy: no\n")
+            fh.write("clean: yes\n")
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_MergeMatchsFiles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_MergeMatchsFiles.py Tue Apr 30 14:33:21 2013 -0400

b'@@ -0,0 +1,302 @@\n+import os\n+import unittest\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.tools.MergeMatchsFiles import MergeMatchsFiles\n+\n+\n+class Test_F_MergeMatchsFiles(unittest.TestCase):\n+\n+ def test_run_set(self):\n+ inFileName1 = "test1.set"\n+ inFileName2 = "test2.set"\n+ self._writeSetFiles(inFileName1, inFileName2)\n+ iMMF = MergeMatchsFiles("set", "out")\n+ iMMF.run()\n+ obsFileName = "out.set"\n+ expFileName = "exp.set"\n+ self._writeExpSetFile(expFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(expFileName)\n+ os.remove(obsFileName)\n+\n+ def test_run_align(self):\n+ inFileName1 = "test1.align"\n+ inFileName2 = "test2.align"\n+ self._writeAlignFiles(inFileName1, inFileName2)\n+ iMMF = MergeMatchsFiles("align", "out")\n+ iMMF.run()\n+ obsFileName = "out.align"\n+ expFileName = "exp.align"\n+ self._writeExpAlignFile(expFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(expFileName)\n+ os.remove(obsFileName)\n+\n+ def test_run_align_allByAll(self):\n+ inFileName1 = "test1.align"\n+ inFileName2 = "test2.align"\n+ self._writeAlignFiles(inFileName1, inFileName2)\n+ iMMF = MergeMatchsFiles("align", "out", True)\n+ iMMF.run()\n+ obsFileName = "out.align"\n+ expFileName = "exp.align"\n+ self._writeExpAlignFile_allByAll(expFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(expFileName)\n+ os.remove(obsFileName)\n+\n+ def test_run_path(self):\n+ inFileName1 = "test1.path"\n+ inFileName2 = "test2.path"\n+ self._writePathFiles(inFileName1, inFileName2)\n+ iMMF = MergeMatchsFiles("path", "out")\n+ iMMF.run()\n+ obsFileName = "out.path"\n+ expFileName = "exp.path"\n+ self._writeExpPathFile(expFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(expFileName)\n+ os.remove(obsFileName)\n+\n+ def test_run_tab(self):\n+ inFileName1 = "test1.tab"\n+ inFileName2 = "test2.tab"\n+ self._writeTabFiles(inFileName1, inFileName2)\n+ iMMF = MergeMatchsFiles("tab", "out")\n+ iMMF.run()\n+ obsFileName = "out.tab"\n+ expFileName = "exp.tab"\n+ self._writeExpTabFile(expFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(expFileName)\n+ os.remove(obsFileName)\n+ \n+ def _writeSetFiles(self, fileName1, fileName2):\n+ f = open(fileName1, "w")\n+ f.write("1\\t(TCTAT)3\\tchunk006\\t295\\t309\\n")\n+ f.write("2\\t(A)33\\tchunk006\\t679\\t711\\n")\n+ f.write("3\\t(G)16\\tchunk006\\t731\\t746\\n")\n+ f.write("4\\t(GAG)9\\tchunk006\\t903\\t929\\n")\n+ f.write("5\\t(GGAGGG)4\\tchunk006\\t905\\t929\\n")\n+ f.close()\n+ f = open(fileName2, "w")\n+ f.write("1\\t(CCACT)3\\tchunk011\\t101\\t116\\n")\n+ f.write("2\\t(TATATA)7\\tchunk011\\t316\\t357\\n")\n+ f.write("3\\t(AT)22\\tchunk011\\t323\\t366\\n")\n+ f.close()\n+ \n+ def _writeExpSetFile(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("1\\t(TCTAT)3\\tchunk006\\t295\\t309\\n")\n+ f.write("2\\t(A)33\\tchunk006\\t679\\t711\\n")\n+ f.write("3\\t(G)16\\tchunk006\\t731\\t746\\n")\n+ f.write("4\\t(GAG)9\\tchunk006\\t903\\t929\\n")\n+ f.write("5\\t(GGAGGG)4\\tchunk006\\t905\\t929\\n")\n+ f.write("6\\t(CCACT)3\\tchunk011\\t101\\t116\\n")\n+ f.write("7\\t(TATATA)7\\tchunk011\\t316\\t357\\n")\n+ f.write("8\\t(AT)22\\tchunk011\\t323\\t366\\n")\n+ f.close()\n+\n+ def _writeAlignFiles(self, fileName1, fileName2):\n+ f = open(fileName1, "w")\n+ f.write("chunk1\\t25601\\t27800\\tchunk1\\t52250\\t54450\\t0\\t4244\\t99.36\\n")\n+ f.write("chunk1\\t27791\\t2862'..b' f.write("13\\tchunk006\\t29180\\t29201\\tAT_rich#Low_complexity\\t163\\t142\\t0.0\\t14\\t68.18\\n")\n+ f.write("14\\tchunk006\\t33773\\t33825\\tAT_rich#Low_complexity\\t142\\t194\\t0.0\\t11\\t22.64\\n")\n+ f.close()\n+ \n+ def _writeTabFiles(self, fileName1, fileName2):\n+ f = open(fileName1, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_1_Map_3\\t2\\t542\\t541\\t0.998155\\t0.32473\\tTC1_DM:ClassII:TIR:Tc1-Mariner\\t1\\t543\\t543\\t0.32593\\t1.4e-93\\t984\\t87.541\\t1\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_2_Map_8\\t2\\t517\\t516\\t0.998066\\t0.115179\\tPROTOP:ClassII:TIR:P\\t530\\t16\\t515\\t0.114955\\t1.2e-79\\t928\\t95.2118\\t2\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_3_Map_20\\t5\\t598\\t594\\t0.978583\\t0.537557\\tPROTOP_A:ClassII:TIR:P\\t572\\t1\\t572\\t0.517647\\t3e-93\\t1048\\t97.6307\\t3\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_4_Map_3\\t5\\t534\\t530\\t0.992509\\t0.479638\\tPROTOP_A:ClassII:TIR:P\\t1105\\t576\\t530\\t0.479638\\t2.9e-87\\t1828\\t98.4848\\t4\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_5_Map_3\\t5\\t704\\t700\\t0.994318\\t0.425791\\tTC1-2_DM:ClassII:TIR:Tc1-Mariner\\t1644\\t945\\t700\\t0.425791\\t7.3e-120\\t1228\\t98.1349\\t5\\n")\n+ f.close()\n+ f = open(fileName2, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_5_Map_3\\t274\\t702\\t429\\t0.609375\\t1.26176\\tTC1-2_DMp:ClassII:TIR:Tc1-Mariner\\t340\\t198\\t143\\t0.420588\\t5e-74\\t265\\t98.6014\\t1\\n")\n+ f.write("DmelChr4_bench_Blaster_Recon_12_Map_3\\t311\\t374\\t64\\t0.028021\\t0.0359349\\tBEL-27_AA-I_1p:ClassI:LTR:Bel-Pao\\t605\\t582\\t24\\t0.0134756\\t6.3e-17\\t11\\t30.8511\\t2\\n")\n+ f.write("DmelChr4_bench_Blaster_Recon_12_Map_3\\t1472\\t1552\\t81\\t0.0354641\\t0.0463918\\tBEL-76_AA-I_1p:ClassI:LTR:Bel-Pao\\t79\\t52\\t28\\t0.0160367\\t1e-13\\t8\\t27.7778\\t3\\n")\n+ f.close()\n+ \n+ def _writeExpTabFile(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_1_Map_3\\t2\\t542\\t541\\t0.998155\\t0.32473\\tTC1_DM:ClassII:TIR:Tc1-Mariner\\t1\\t543\\t543\\t0.32593\\t1.4e-93\\t984\\t87.541\\t1\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_2_Map_8\\t2\\t517\\t516\\t0.998066\\t0.115179\\tPROTOP:ClassII:TIR:P\\t530\\t16\\t515\\t0.114955\\t1.2e-79\\t928\\t95.2118\\t2\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_3_Map_20\\t5\\t598\\t594\\t0.978583\\t0.537557\\tPROTOP_A:ClassII:TIR:P\\t572\\t1\\t572\\t0.517647\\t3e-93\\t1048\\t97.6307\\t3\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_4_Map_3\\t5\\t534\\t530\\t0.992509\\t0.479638\\tPROTOP_A:ClassII:TIR:P\\t1105\\t576\\t530\\t0.479638\\t2.9e-87\\t1828\\t98.4848\\t4\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_5_Map_3\\t5\\t704\\t700\\t0.994318\\t0.425791\\tTC1-2_DM:ClassII:TIR:Tc1-Mariner\\t1644\\t945\\t700\\t0.425791\\t7.3e-120\\t1228\\t98.1349\\t5\\n")\n+ f.write("DmelChr4_bench_Blaster_Grouper_5_Map_3\\t274\\t702\\t429\\t0.609375\\t1.26176\\tTC1-2_DMp:ClassII:TIR:Tc1-Mariner\\t340\\t198\\t143\\t0.420588\\t5e-74\\t265\\t98.6014\\t6\\n")\n+ f.write("DmelChr4_bench_Blaster_Recon_12_Map_3\\t311\\t374\\t64\\t0.028021\\t0.0359349\\tBEL-27_AA-I_1p:ClassI:LTR:Bel-Pao\\t605\\t582\\t24\\t0.0134756\\t6.3e-17\\t11\\t30.8511\\t7\\n")\n+ f.write("DmelChr4_bench_Blaster_Recon_12_Map_3\\t1472\\t1552\\t81\\t0.0354641\\t0.0463918\\tBEL-76_AA-I_1p:ClassI:LTR:Bel-Pao\\t79\\t52\\t28\\t0.0160367\\t1e-13\\t8\\t27.7778\\t8\\n")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_PostAnalyzeTELib.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_PostAnalyzeTELib.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,294 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.tools.PostAnalyzeTELib import PostAnalyzeTELib\n+import subprocess\n+import unittest\n+import os\n+\n+\n+class Test_F_PostAnalyzeTELib(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._expStatFileName = "expStats.tab"\n+ self._obsStatFileName = ""\n+ self._genomeSize = 1281640\n+ self._pathTableName = "dummyDmelChr4_chr_allTEs_nr_noSSR_join_path"\n+ self._seqTableName = "dummyDmelChr4_denovoLibTEs_seq"\n+ \n+ def tearDown(self):\n+ try:\n+ os.remove(self._expStatFileName)\n+ except: pass\n+ try:\n+ os.remove(self._obsStatFileName)\n+ except: pass\n+\n+ def test_run_analysis1(self):\n+ libFileName = "TElib.fa"\n+ self._writeInputFasta_analysis1(libFileName)\n+ expClusterFileName = "expClusters.tab"\n+ self._writeExpClusterFile_analysis1(expClusterFileName)\n+ expGlobalStatFileName = "expGlobalStats.txt"\n+ self._writeExpGlobalStats_analysis1(expGlobalStatFileName)\n+ self._writeExpStatsFile_analysis1(self._expStatFileName)\n+ obsClusterFileName = "TElib.tab"\n+ obsGlobalStatFileName = "TElib.globalStatsPerCluster.txt"\n+ self._obsStatFileName = "TElib.statsPerCluster.tab"\n+ \n+ iPATEL = PostAnalyzeTELib(analysis=1, fastaFileName=libFileName, doClean=True)\n+ iPATEL.run()\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(expGlobalStatFileName, obsGlobalStatFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expStatFileName, self._obsStatFileName))\n+ \n+ os.remove(libFileName)\n+ os.remove(expClusterFileName)\n+ os.remove(expGlobalStatFileName)\n+ os.remove(obsClusterFileName)\n+ os.remove(obsGlobalStatFileName)\n+\n+ def test_run_as_script_analysis1(self):\n+ libFileName = "TElib.fa"\n+ with open(libFileName, "w") as f:\n+ f.write(">transib2\\n")\n+ f.write("GGCCAGTCACAATGGGGGTTTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGA\\n")\n+ f.write("ATAAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\\n")\n+ f.write("GATTCGTTTCATTCACCGGATCTCTTGCGTCCGCCTCCGCCGTGCGACCTCCGCATTC\\n")\n+ f.write(">transib3\\n")\n+ f.write("ATAAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\\n")\n+ f.write("TGAAACTCGTCAGCGTCGTTTCCAAGTCCT\\n")\n+ f.write(">transib4\\n")\n+ f.write("GGCCAGTCACAATGGGGGTTTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGA\\n")\n+ f.write("ATAAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\\n")\n+ f.write("GATTCGTTTCATTCACCGGATCTCTTGCGTCCGCCTCCGCCGTGCGACCTCCGCATTCAT\\n")\n+ f.write("AAAAAATGATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGG\\n")\n+ expClusterFileName = "expClusters.tab"\n+ with open(expClusterFileName, "w") as f:\n+ f.write("transib4 \\n")\n+ f.write("transib2 \\n")\n+ f.write("transib3 \\n")\n+ with open(self._expStatFileName, "w") as f:\n+ f.write("cluster\\tsequencesNb\\tsizeOfSmallestSeq\\tsizeOfLargestSeq\\taverageSize\\tmedSize\\n")\n+ f.write("1\\t1\\t238\\t238\\t238\\t238\\n")\n+ f.write("2\\t1\\t178\\t178\\t178\\t178\\n")\n+ f.write("3\\t1\\t90\\t90\\t90\\t90\\n")\n+ expGlobalStatFileName = "expGlobalStats.txt"\n+ with open(expGlobalStatFileName, "w") as f:\n+ f.write("nb of clusters: 3\\n")\n+ f.write("nb of clusters with 1 sequence: 3\\n")\n+ f.write("nb of clusters with 2 sequences: 0\\n")\n+ f.write("nb of clusters with >2 sequences: 0 (0 sequences)\\n")\n+ f.write("nb of sequences: 3\\n")\n+ f.write("nb of sequences in the largest cluster: 1\\n")\n+ f.write("nb of sequences in the s'..b'f.write("DmelChr4-B-G7-Map3_classII-TIR-incomp\\t1944\\t15212\\t49\\t1\\t42\\t1\\t89.44\\t382.36\\t19.67\\n")\n+ f.write("DmelChr4-B-G9-Map3_NoCat\\t1590\\t11564\\t24\\t0\\t21\\t1\\t92.03\\t550.67\\t34.63\\n")\n+ f.write("DmelChr4-B-P0.0-Map3_classII-TIR-incomp\\t1042\\t4001\\t13\\t3\\t11\\t3\\t85.11\\t366.36\\t35.16\\n")\n+ f.write("DmelChr4-B-R1-Map4_NoCat\\t2367\\t66031\\t484\\t0\\t361\\t0\\t77.84\\t182.91\\t7.73\\n")\n+ f.write("DmelChr4-B-R12-Map3_NoCat\\t2284\\t4938\\t3\\t2\\t3\\t2\\t99.26\\t1646.00\\t72.07\\n")\n+ f.write("DmelChr4-B-R19-Map4_NoCat\\t705\\t3328\\t10\\t3\\t10\\t3\\t88.51\\t332.80\\t47.21\\n")\n+ f.write("DmelChr4-B-R2-Map6_NoCat\\t4638\\t20539\\t34\\t2\\t29\\t3\\t80.93\\t708.24\\t15.27\\n")\n+ f.write("DmelChr4-B-R4-Map5_NoCat\\t1067\\t7292\\t35\\t1\\t28\\t1\\t86.50\\t260.54\\t24.42\\n")\n+ f.write("DmelChr4-B-R9-Map3_NoCat\\t714\\t5453\\t19\\t2\\t16\\t2\\t81.18\\t340.81\\t47.73\\n")\n+ \n+ def _writeExpGlobalStats_analysis3(self, fileName):\n+ with open(fileName, "w") as f:\n+ f.write("nb of sequences: 11\\n")\n+ f.write("nb of matched sequences: 11\\n")\n+ f.write("cumulative coverage: 150275 bp\\n")\n+ f.write("coverage percentage: 11.73%\\n")\n+ f.write("\\n")\n+ f.write("total nb of TE fragments: 710\\n")\n+ f.write("total nb full-length fragments: 18 (2.54%)\\n")\n+ f.write("total nb of TE copies: 553\\n")\n+ f.write("total nb full-length copies: 20 (3.62%)\\n")\n+ f.write("families with full-length fragments: 8 (72.73%)\\n")\n+ f.write(" with only one full-length fragment: 2\\n")\n+ f.write(" with only two full-length fragments: 3\\n")\n+ f.write(" with only three full-length fragments: 2\\n")\n+ f.write(" with more than three full-length fragments: 1\\n")\n+ f.write("families with full-length copies: 9 (81.82%)\\n")\n+ f.write(" with only one full-length copy: 3\\n")\n+ f.write(" with only two full-length copies: 2\\n")\n+ f.write(" with only three full-length copies: 3\\n")\n+ f.write(" with more than three full-length copies: 1\\n")\n+ f.write("mean of median identity of all families: 88.30 +- 8.33\\n")\n+ f.write("mean of median length percentage of all families: 30.83 +- 32.30\\n")\n+ \n+ def _writeClusterFile_analysis4(self, fileName):\n+ with open(fileName, "w") as f:\n+ f.write("1\\tDmelChr4-B-R1-Map4_NoCat\\tDmelChr4-B-R2-Map6_NoCat\\tDmelChr4-B-R4-Map5_NoCat\\n")\n+ f.write("2\\tDmelChr4-B-G7-Map3_classII-TIR-incomp\\tDmelChr4-B-P0.0-Map3_classII-TIR-incomp\\n")\n+\n+ def _writeExpStatsFile_analysis4(self, fileName):\n+ with open(fileName, "w") as f:\n+ f.write("Cluster\\tcovg\\tfrags\\tcopies\\n")\n+ f.write("1\\t93862\\t553\\t418\\n")\n+ f.write("2\\t19213\\t62\\t53\\n")\n+ \n+ def _writeConfigFile(self, configFileName):\n+ with open(configFileName, "w") as fHandle:\n+ fHandle.write("[repet_env]\\n")\n+ fHandle.write("repet_host: %s\\n" % os.environ["REPET_HOST"])\n+ fHandle.write("repet_user: %s\\n" % os.environ["REPET_USER"])\n+ fHandle.write("repet_pw: %s\\n" % os.environ["REPET_PW"])\n+ fHandle.write("repet_db: %s\\n" % os.environ["REPET_DB"])\n+ fHandle.write("repet_port: 3306\\n")\n+ fHandle.write("[analysis1]\\n")\n+ fHandle.write("fasta_name: %s\\n" % self._expStatFileName)\n+ \n+ fHandle.write("[analysis2]\\n")\n+ fHandle.write("clusterFileName: %s\\n" % self._expStatFileName)\n+ \n+ fHandle.write("[analysis3]\\n")\n+ fHandle.write("pathTableName: %s\\n" % self._pathTableName)\n+ fHandle.write("seqTableName: %s\\n" % self._seqTableName)\n+ fHandle.write("genomeSize: %s\\n" % self._genomeSize)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_ReplaceGreaterThanSymbolInFastaHeader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_ReplaceGreaterThanSymbolInFastaHeader.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,51 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.tools.tests.MockFastaForReplaceGreaterThanSymbolInFastaHeader import MockFastaForReplaceGreaterThanSymbolInFastaHeader_withoutGreaterThan,\
+    MockFastaForReplaceGreaterThanSymbolInFastaHeader
+from commons.tools.replaceGreaterThanSymbolInFastaHeaderProgramLauncher import replaceGreaterThanSymbolInFastaHeaderProgramLauncher
+
+class Test_F_ReplaceGreaterThanSymbolInHeader(unittest.TestCase):
+
+
+    def test_run(self):
+        inputFileName = "dummy_fasta.fsa"
+        mockInput = MockFastaForReplaceGreaterThanSymbolInFastaHeader()
+        mockInput.write(inputFileName)
+
+        expFileName = "exp_dummy_fasta.preprocessed.fsa"
+        mockInput = MockFastaForReplaceGreaterThanSymbolInFastaHeader_withoutGreaterThan()
+        mockInput.write(expFileName)
+
+        obsFileName = "dummy_fasta.preprocessed.fasta"
+        rgtspl = replaceGreaterThanSymbolInFastaHeaderProgramLauncher()
+        rgtspl.setInputFile(inputFileName)
+        rgtspl.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(inputFileName)
+        os.remove(expFileName)
+        os.remove(obsFileName)
+
+    def test_run_as_script(self):
+        inputFileName = "dummy_fasta.fsa"
+        mockInput = MockFastaForReplaceGreaterThanSymbolInFastaHeader()
+        mockInput.write(inputFileName)
+
+        expFileName = "exp_dummy_fasta.preprocessed.fsa"
+        mockInput = MockFastaForReplaceGreaterThanSymbolInFastaHeader_withoutGreaterThan()
+        mockInput.write(expFileName)
+
+        obsFileName = "dummy_fasta.preprocessed.fasta"
+
+        cmd2Launch = "replaceGreaterThanSymbolInFastaHeaderProgramLauncher.py -i " + inputFileName
+        os.system(cmd2Launch)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(inputFileName)
+        os.remove(expFileName)
+        os.remove(obsFileName)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_RetrieveInitHeaders.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_RetrieveInitHeaders.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,85 @@
+from commons.core.sql.DbMySql import DbMySql
+from commons.core.sql.TablePathAdaptator import TablePathAdaptator
+from commons.core.coord.PathUtils import PathUtils
+from commons.core.utils.FileUtils import FileUtils
+import os
+import unittest
+
+class Test_F_RetrieveInitHeaders(unittest.TestCase):
+
+    def setUp(self):
+        self._iDb = DbMySql()
+
+    def tearDown(self):
+        self._iDb.close()
+
+    def test_run_as_script_rename_subject_and_clean_table(self):
+        shortHLinkFileName = "dummy.shortHlink"
+        self._writeShortHLinkFile(shortHLinkFileName)
+        pathTableName = "dummyInput_path"
+        self._createPathTable(pathTableName)
+        expFileName = "exp.path"
+        self._writeExpFile(expFileName)
+        obsTableName = "dummyOutput_path"
+
+        cmd = "RetrieveInitHeaders.py -i %s -l %s -o %s -s -c -v 1" % (pathTableName, shortHLinkFileName, obsTableName)
+        os.system(cmd)
+
+        self.assertTrue(self._iDb.doesTableExist(obsTableName))
+        self.assertFalse(self._iDb.doesTableExist(pathTableName))
+        obsFileName = "obs.path"
+        iTPA = TablePathAdaptator(self._iDb, obsTableName)
+        lPaths = iTPA.getListOfAllPaths()
+        PathUtils.writeListInFile(lPaths, obsFileName)
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        os.remove(shortHLinkFileName)
+        os.remove(expFileName)
+        os.remove(obsFileName)
+        self._iDb.dropTable(obsTableName)
+
+    def _writeShortHLinkFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("refTE_1\tPaphi_v2-B-R11932-Map16_classI-LTR-comp\t1\t5388\n")
+        f.write("refTE_2\tPaphi_v2-B-R2469-Map20_classI-LTR-comp\t1\t6385\n")
+        f.write("refTE_3\tPaphi_v2-B-R8543-Map9_classI-LTR-comp\t1\t7148\n")
+        f.write("refTE_4\tPaphi_v2-B-R33384-Map4_classI-LTR-incomp\t1\t262\n")
+        f.write("refTE_5\tPaphi_v2-B-P73.1298-Map3_classI-LTR-incomp\t1\t389\n")
+        f.write("refTE_6\tPaphi_v2-B-R36143-Map3_classI-LTR-incomp\t1\t393\n")
+        f.write("refTE_7\tPaphi_v2-B-R35261-Map5_classI-LTR-incomp\t1\t408\n")
+        f.write("refTE_8\tPaphi_v2-B-R12434-Map4_classI-LTR-incomp\t1\t420\n")
+        f.write("refTE_9\tPaphi_v2-B-R20580-Map4_classI-LTR-incomp\t1\t426\n")
+        f.write("refTE_10\tPaphi_v2-B-R5129-Map3_classI-LTR-incomp\t1\t441\n")
+        f.close()
+
+    def _createPathTable(self, tableName):
+        fileName = "dummy.path"
+        f = open(fileName, "w")
+        f.write("7615\tchunk0030\t188432\t188611\trefTE_1\t5386\t5208\t0\t174\t97.23\n")
+        f.write("86375\tchunk0372\t155816\t157157\trefTE_1\t2213\t3575\t0\t843\t62.89\n")
+        f.write("88018\tchunk0381\t52141\t53500\trefTE_1\t2191\t3575\t0\t860\t63.3\n")
+        f.write("110377\tchunk0473\t117281\t117518\trefTE_1\t3966\t3716\t0\t230\t97.05\n")
+        f.write("39621\tchunk0169\t123563\t124003\trefTE_10\t1\t441\t0\t439\t99.55\n")
+        f.write("544710\tchunk2778\t62387\t62625\trefTE_10\t100\t327\t0\t165\t69.09\n")
+        f.write("601761\tchunk4780\t441\t559\trefTE_10\t5\t123\t0\t119\t100\n")
+        f.write("17589\tchunk0071\t58591\t59763\trefTE_2\t5139\t6384\t0\t1150\t98.12\n")
+        f.write("21050\tchunk0087\t186576\t186812\trefTE_2\t3636\t3891\t0\t232\t98.31\n")
+        f.close()
+        self._iDb.createTable(tableName, "path", fileName, True)
+        os.remove(fileName)
+
+    def _writeExpFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("7615\tchunk0030\t188432\t188611\tPaphi_v2-B-R11932-Map16_classI-LTR-comp\t5386\t5208\t0\t174\t97.230000\n")
+        f.write("86375\tchunk0372\t155816\t157157\tPaphi_v2-B-R11932-Map16_classI-LTR-comp\t2213\t3575\t0\t843\t62.890000\n")
+        f.write("88018\tchunk0381\t52141\t53500\tPaphi_v2-B-R11932-Map16_classI-LTR-comp\t2191\t3575\t0\t860\t63.300000\n")
+        f.write("110377\tchunk0473\t117281\t117518\tPaphi_v2-B-R11932-Map16_classI-LTR-comp\t3966\t3716\t0\t230\t97.050000\n")
+        f.write("39621\tchunk0169\t123563\t124003\tPaphi_v2-B-R5129-Map3_classI-LTR-incomp\t1\t441\t0\t439\t99.550000\n")
+        f.write("544710\tchunk2778\t62387\t62625\tPaphi_v2-B-R5129-Map3_classI-LTR-incomp\t100\t327\t0\t165\t69.090000\n")
+        f.write("601761\tchunk4780\t441\t559\tPaphi_v2-B-R5129-Map3_classI-LTR-incomp\t5\t123\t0\t119\t100.000000\n")
+        f.write("17589\tchunk0071\t58591\t59763\tPaphi_v2-B-R2469-Map20_classI-LTR-comp\t5139\t6384\t0\t1150\t98.120000\n")
+        f.write("21050\tchunk0087\t186576\t186812\tPaphi_v2-B-R2469-Map20_classI-LTR-comp\t3636\t3891\t0\t232\t98.310000\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_SplicerFromAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_SplicerFromAnnotation.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,131 @@\n+import unittest\n+import os\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.utils.FileUtils import FileUtils\n+#from commons.tools.SplicerFromAnnotation import SplicerFromAnnotation\n+\n+class Test_F_SplicerFromAnnotation(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inFastaFileName = "%s/Tools/Splicer_inputFile.fa" % os.environ["REPET_DATA"]\n+ self._outFastaFileName = "%s/Tools/Splicer_outputSplicedFile.fa.splice" % os.environ["REPET_DATA"]\n+ self._refTEseqFileName = "%s/Tools/Splicer_refTEs_seq.fa" % os.environ["REPET_DATA"]\n+ self.configFileName = "TEdenovo.cfg"\n+ self._writeConfigFile()\n+ self.pathFileName = "annotations_path"\n+ self._writePathFile()\n+ self._iDb = DbMySql(cfgFileName = self.configFileName)\n+ self._tableName = "Splicer_inputFile_chr_allTEs_nr_noSSR_join_path"\n+ self._iDb.createTable(self._tableName, "path", self.pathFileName, True)\n+ self._tableName_refTEseq = "Splicer_inputFile_refTEs_seq"\n+ self._iDb.createTable(self._tableName_refTEseq, "seq", self._refTEseqFileName, True)\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable(self._tableName)\n+ self._iDb.dropTable(self._tableName_refTEseq)\n+ self._iDb.dropTable("Splicer_inputFile_annotationIdentitySup80_path")\n+ self._iDb.dropTable("Splicer_inputFile_annotationToSplice_path")\n+ self._iDb.close()\n+ os.remove("annotations_path")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE.tab")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.globalAnnotStatsPerTE.txt")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_FullLengthCopy.fa")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_FullLengthCopy.txt")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_FullLengthFrag.fa")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_FullLengthFrag.txt")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_OneCopyAndMore.txt")\n+ os.remove("Splicer_inputFile_chr_allTEs_nr_noSSR_join_path.annotStatsPerTE_OneCopyAndMore.fa")\n+ os.remove("Splicer_inputFile_annotationToSplice_path.path")\n+ os.remove(self.configFileName)\n+# os.remove(self.obsOutSplicedFastaFileName)\n+ \n+ def test_run_as_script_FLC(self):\n+ expOutSplicedFastaFileName = "%s/Tools/Splicer_outputSplicedFile_FLC.fa.splice" % os.environ["REPET_DATA"]\n+ self.obsOutSplicedFastaFileName = "Splicer_inputFile_FLC.fa.splice"\n+ self._writeConfigFile()\n+ self._writePathFile()\n+ cmd = \'SplicerFromAnnotation.py -i %s -C %s -o %s -v 2\' % (self._inFastaFileName,self.configFileName, self.obsOutSplicedFastaFileName)\n+ os.system(cmd)\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expOutSplicedFastaFileName, self.obsOutSplicedFastaFileName))\n+ \n+ def test_run_as_script_FLF(self):\n+ expOutSplicedFastaFileName = "%s/Tools/Splicer_outputSplicedFile_FLF.fa.splice" % os.environ["REPET_DATA"]\n+ self.obsOutSplicedFastaFileName = "Splicer_inputFile_FLF.fa.splice"\n+ self._writeConfigFile()\n+ self._writePathFile()\n+ cmd = \'SplicerFromAnnotation.py -i %s -t 3 -C %s -o %s -v 2\' % (self._inFastaFileName,self.configFileName, self.obsOutSplicedFastaFileName)\n+ os.system(cmd)\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expOutSplicedFastaFileName, self.obsOutSplicedFastaFileName))\n+\n+ def _writeConfigFile(self):\n+ f = open(self.configFileName, "w")\n+ f.write("[repet_env]\\n")\n+ f.write("repet_version: 2.0\\n")\n+ f.write("repet_host: %s\\n" % os.environ["REPET_HOST"])\n+ f.write("repet_user: %s\\n" % os.environ["REPET_USER"])\n+ f.write("repet_pw: %s\\n" % os.environ["REPET_'..b'd\\t5551\\t5526\\t0\\t24\\t96.15\\n")\n+ f.write("40\\tA_contig00033\\t19641\\t20410\\tDTX-incomp-chim_3b_Itr1_v2-L-B171-Map1_reversed\\t5533\\t4749\\t0\\t651\\t84.55\\n")\n+ f.write("41\\tA_contig00033\\t19475\\t19640\\tDTX-incomp-chim_3b_Itr1_v2-L-B151-Map1_reversed\\t9514\\t9708\\t0\\t127\\t76.7025\\n")\n+ f.write("42\\tA_contig00033\\t3244\\t3364\\tDTX-incomp_3b_Itr1_v2-L-B1836-Map1\\t2536\\t2405\\t6.8e-23\\t95\\t78.8462\\n")\n+ f.write("43\\tA_contig00033\\t19376\\t19408\\tRLX-comp_3b_Itr1_v2-L-B1517-Map1\\t705\\t733\\t1.1e-11\\t27\\t80.4348\\n")\n+ f.write("43\\tA_contig00033\\t19456\\t19474\\tRLX-comp_3b_Itr1_v2-L-B1517-Map1\\t775\\t790\\t1.1e-11\\t15\\t80.4348\\n")\n+ f.write("44\\tA_contig00033\\t5494\\t5583\\tRIX-incomp-chim_3b_Itr1_v2-L-B121-Map1\\t7419\\t7507\\t4.9e-11\\t68\\t76.1905\\n")\n+ f.write("45\\tA_contig00033\\t10810\\t11031\\tRLX-comp_3b_Itr1_v2-L-B2073-Map1\\t3079\\t3297\\t1e-17\\t171\\t77.2021\\n")\n+ f.write("46\\tA_contig00033\\t11253\\t11406\\tRLX-comp_3b_Itr1_v2-L-B1616-Map1\\t1726\\t1569\\t0\\t120\\t78.2609\\n")\n+ f.write("47\\tA_contig00033\\t11407\\t11508\\tRLX-comp_3b_Itr1_v2-L-B351-Map1_reversed\\t10133\\t10039\\t0\\t79\\t77.8689\\n")\n+ f.write("48\\tA_contig00033\\t3673\\t3719\\tRLX-comp_3b_Itr1_v2-L-B228-Map1_reversed\\t13011\\t12966\\t0\\t37\\t78.629\\n")\n+ f.write("49\\tA_contig00033\\t3959\\t4055\\tRLX-comp_3b_Itr1_v2-L-B134-Map1_reversed\\t1917\\t2010\\t5.5e-24\\t74\\t77.2727\\n")\n+ f.write("50\\tA_contig00033\\t30505\\t37270\\tRLX-comp_3b_Itr1_v2-L-B124-Map1\\t14149\\t7383\\t0\\t6365\\t94.0818\\n")\n+ f.write("51\\tA_contig00033\\t8859\\t9079\\tRLX-incomp_3b_Itr1_v2-L-B2010-Map1_reversed\\t390\\t603\\t1.9e-11\\t171\\t77.6596\\n")\n+ f.write("52\\tA_contig00033\\t9080\\t9140\\tRLX-incomp_3b_Itr1_v2-L-B1720-Map1_reversed\\t1393\\t1460\\t0\\t49\\t80.8333\\n")\n+ f.write("53\\tA_contig00033\\t12386\\t12435\\tRLX-incomp_3b_Itr1_v2-L-B1706-Map1\\t2678\\t2725\\t1.3e-25\\t40\\t81.1765\\n")\n+ f.write("54\\tA_contig00033\\t5052\\t5096\\tRLX-incomp_3b_Itr1_v2-L-B77-Map2_reversed\\t5445\\t5402\\t6.1e-13\\t37\\t84.507\\n")\n+ f.write("55\\tA_contig00033\\t14485\\t16756\\tRLX-incomp_3b_Itr1_v2-L-B965-Map1\\t4566\\t6848\\t0\\t1881\\t82.8154\\n")\n+ f.write("57\\tA_contig00033\\t10229\\t10370\\tRLX-incomp_3b_Itr1_v2-L-B502-Map1_reversed\\t8598\\t8438\\t0\\t109\\t77.2727\\n")\n+ f.write("59\\tA_contig00033\\t2457\\t2617\\tRLX-incomp_3b_Itr1_v2-L-B405-Map1\\t4742\\t4910\\t0\\t129\\t80.2721\\n")\n+ f.write("60\\tA_contig00033\\t8071\\t8183\\tRLX-incomp_3b_Itr1_v2-L-B109-Map1\\t4413\\t4529\\t0\\t97\\t86.5979\\n")\n+ f.write("61\\tA_contig00033\\t12867\\t14362\\tRXX-LARD-chim_3b_Itr1_v2-L-B1998-Map1_reversed\\t1465\\t3000\\t0\\t1283\\t85.8025\\n")\n+ f.write("62\\tA_contig00033\\t90\\t256\\tRXX-LARD-chim_3b_Itr1_v2-L-B1928-Map1\\t905\\t717\\t2.5e-38\\t128\\t76.875\\n")\n+ f.write("63\\tA_contig00033\\t5097\\t5249\\tRXX-LARD-chim_3b_Itr1_v2-L-B1928-Map1\\t4158\\t4017\\t4e-36\\t127\\t83.3333\\n")\n+ f.write("64\\tA_contig00033\\t8689\\t8796\\tRXX-LARD-chim_3b_Itr1_v2-L-B1905-Map1\\t5584\\t5469\\t5.04467e-44\\t85\\t79.2079\\n")\n+ f.write("65\\tA_contig00033\\t6800\\t6825\\tRXX-LARD-chim_3b_Itr1_v2-L-B1790-Map1_reversed\\t3426\\t3399\\t0\\t21\\t79.1667\\n")\n+ f.write("66\\tA_contig00033\\t10193\\t10228\\tRLX-comp-chim_3b_Itr1_v2-L-B1760-Map1_reversed\\t1470\\t1504\\t8.7e-23\\t29\\t80.5195\\n")\n+ f.write("67\\tA_contig00033\\t6542\\t6603\\tRXX-LARD-chim_3b_Itr1_v2-L-B1741-Map1_reversed\\t5509\\t5567\\t0\\t48\\t77.9412\\n")\n+ f.write("69\\tA_contig00033\\t12769\\t12816\\tRLX-comp-chim_3b_Itr1_v2-L-B1518-Map1\\t2026\\t1979\\t0\\t39\\t81.25\\n")\n+ f.write("70\\tA_contig00033\\t28383\\t29146\\tRLX-comp-chim_3b_Itr1_v2-L-B1494-Map1_reversed\\t999\\t1761\\t0\\t672\\t88.0282\\n")\n+ f.write("71\\tA_contig00033\\t21712\\t28382\\tRLX-incomp-chim_3b_Itr1_v2-L-B1485-Map1\\t1132\\t7813\\t0\\t6011\\t90.1124\\n")\n+ f.write("71\\tA_contig00033\\t20784\\t21728\\tRLX-incomp-chim_3b_Itr1_v2-L-B1485-Map1\\t159\\t1113\\t0\\t833\\t88.172 \\n")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_F_TEclassifierPE.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_F_TEclassifierPE.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,87 @@
+import os
+import unittest
+from commons.tools.TEclassifierPE import TEclassifierPE
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.sql.DbFactory import DbFactory
+
+class Test_F_TEclassifierPE(unittest.TestCase):
+
+    def test_run(self):
+        projectName = "dummy"
+        configFileName = "dummy.cfg"
+        self.writeConfigFile(configFileName, projectName)
+        inputFileName = "dummy.fa"
+        with open(inputFileName, "w") as f:
+            f.write(">dummySSR\n")
+            f.write("AGTTACCATGCCCAGCATTAACCCCCCTCAACAACCACCTCCGCCTATGAAGCCCGCCCG\n")
+            f.write("AGTTACCATGCCCAGCATTAACCCCCCTCAACAACCACCTCCGCCTATGAAGCCCGCCCG\n")
+        expFileName = "exp.classif"
+        with open(expFileName, "w") as f:
+            f.write("dummySSR\t120\t.\tok\tNA\tSSR\tNA\tCI=100; struct=(TElength: <150bps; SSRCoverage=0.00)\n")
+        obsFileName = "%s.classif" % projectName
+        statsFileName = "%s.classif_stats.txt" % projectName
+        analysisFileName = "%s.SSR.set" % inputFileName
+
+        iTEclassifierPE = TEclassifierPE(inputFileName, configFileName, doClean = True)
+        iTEclassifierPE.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+
+        iDb = DbFactory.createInstance()
+        iDb.dropTable("%s_TR_set" % projectName)
+        iDb.dropTable("%s_polyA_set" % projectName)
+        iDb.dropTable("%s_ORF_map" % projectName)
+        iDb.dropTable("%s_SSR_set" % projectName)
+        iDb.close()
+        os.remove(configFileName)
+        os.remove(inputFileName)
+        os.remove(expFileName)
+        os.remove(obsFileName)
+        os.remove(statsFileName)
+        os.remove(analysisFileName)
+
+    def writeConfigFile(self, fileName, projectName):
+        with open(fileName, "w") as f:
+            f.write("[repet_env]\n")
+            f.write("repet_host: %s\n" % os.environ["REPET_HOST"])
+            f.write("repet_user: %s\n" % os.environ["REPET_USER"])
+            f.write("repet_pw: %s\n" % os.environ["REPET_PW"])
+            f.write("repet_db: %s\n" % os.environ["REPET_DB"])
+            f.write("repet_port: %s\n" % os.environ["REPET_PORT"])
+            f.write("\n")
+            f.write("[project]\n")
+            f.write("project_name: %s\n" % projectName)
+            f.write("project_dir: %s\n" % os.getcwd())
+            f.write("\n")
+            f.write("[detect_features]\n")
+            f.write("clean: yes\n")
+            f.write("blast: ncbi\n")
+            f.write("term_rep: no\n")
+            f.write("polyA: no\n")
+            f.write("tand_rep: yes\n")
+            f.write("orf: no\n")
+            f.write("TE_HMMER: no\n")
+            f.write("TE_BLRtx: no\n")
+            f.write("TE_BLRn: no\n")
+            f.write("TE_BLRx: no\n")
+            f.write("HG_BLRn: no\n")
+            f.write("rDNA_BLRn: no\n")
+            f.write("tRNA_scan: no\n")
+            f.write("TRFmaxPeriod: 15\n")
+            f.write("\n")
+            f.write("[classif_consensus]\n")
+            f.write("max_profiles_evalue: 1e-3\n")
+            f.write("min_TE_profiles_coverage: 20\n")
+            f.write("min_HG_profiles_coverage: 75\n")
+            f.write("max_helitron_extremities_evalue: 1e-3\n")
+            f.write("min_TE_bank_coverage: 5\n")
+            f.write("min_HG_bank_coverage: 95\n")
+            f.write("min_rDNA_bank_coverage: 95\n")
+            f.write("min_HG_bank_identity: 90\n")
+            f.write("min_rDNA_bank_identity: 90\n")
+            f.write("min_SSR_coverage: 0.75\n")
+            f.write("max_SSR_size: 150\n")
+            f.write("clean: yes\n")
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_GetMultAlignAndPhylogenyPerTErefSeq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_GetMultAlignAndPhylogenyPerTErefSeq.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,54 @@
+import unittest
+import os
+import time
+from commons.core.sql.DbMySql import DbMySql
+from commons.tools.GetMultAlignAndPhylogenyPerTErefSeq import GetMultAlignAndPhylogenyPerTErefSeq
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_GetMultAlignAndPhylogenyPerTErefSeq( unittest.TestCase ):
+
+    def setUp( self ):
+        self.fileUtils = FileUtils()
+        self._inputTEFile = "dummyTE"
+        self._iGMAP = GetMultAlignAndPhylogenyPerTErefSeq()
+        self._uniqId = "%s_%s" % ( time.strftime("%Y_%m_%d_%H_%M_%S") , os.getpid() )
+        self._configFileName = "dummyConfig_%s" % self._uniqId
+        configFile = open( self._configFileName, "w" )
+        configFile.write("[repet_env]\n")
+        configFile.write( "repet_host: %s\n" % ( os.environ["REPET_HOST"] ) )
+        configFile.write( "repet_user: %s\n" % ( os.environ["REPET_USER"] ) )
+        configFile.write( "repet_pw: %s\n" % ( os.environ["REPET_PW"] ) )
+        configFile.write( "repet_db: %s\n" % ( os.environ["REPET_DB"] ) )
+        configFile.write( "repet_port: %s\n" % ( os.environ["REPET_PORT"] ) )
+        configFile.close()
+
+    def tearDown( self ):
+        os.remove( self._configFileName )
+        os.remove( self._inputTEFile )
+
+
+    def test_getNamesOfTErefSeq_fromFile_ok(self):
+        expTEList =["DHX-incomp-chim_Blc1_DmelChr4-L-B1-Map1_reversed",
+                    "DT-comp_DmelChr4-B-P0.0-Map3", "RLX-incomp_Blc3_DmelChr4-L-B3-Map1"]
+        self._iGMAP._TErefseq = os.path.abspath(self._inputTEFile)
+        self.writeInputTEFile()
+        obsTEList = self._iGMAP.getNamesOfTErefSeq()
+        self.assertEqual(obsTEList, expTEList)
+
+    def test_getNamesOfTErefSeq_from_empty_file(self):
+        expTEList =[]
+        self._iGMAP._TErefseq = os.path.abspath(self._inputTEFile)
+        with open(self._inputTEFile,"w") as TEFile:
+            TEFile.write("")
+        obsTEList = self._iGMAP.getNamesOfTErefSeq()
+        self.assertEqual(obsTEList, expTEList)
+
+    def writeInputTEFile(self):
+        with open(self._inputTEFile,"w") as TEFile:
+            TEFile.write("""DHX-incomp-chim_Blc1_DmelChr4-L-B1-Map1_reversed
+RLX-incomp_Blc3_DmelChr4-L-B3-Map1\t reverse
+DT-comp_DmelChr4-B-P0.0-Map3\t""")
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_GetSpecificTELibAccordingToAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_GetSpecificTELibAccordingToAnnotation.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,64 @@
+import os
+import unittest
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.sql.DbMySql import DbMySql
+from commons.tools.GetSpecificTELibAccordingToAnnotation import GetSpecificTELibAccordingToAnnotation
+
+class Test_GetSpecificTELibAccordingToAnnotation(unittest.TestCase):
+
+    def test_writeFastaFileFromGiveInfoTEAnnot(self):
+        self._iDb = DbMySql()
+        fileName = "GiveInfoTEannot.txt"
+        self._writeGiveInfoTEannotOutput(fileName)
+        tableName = "projectName_refTEs_chr_seq"
+        self._createConsensusTable(tableName)
+        expFileName = "consensusExp.fa"
+        self._writeExpConsensusFile(expFileName)
+        obsFileName = "%s.fa" % os.path.splitext(fileName)[0]
+        iGetTElib = GetSpecificTELibAccordingToAnnotation()
+        iGetTElib.setTableName(tableName)
+        iGetTElib.writeFastaFileFromGiveInfoTEAnnot(fileName)
+        self._iDb.dropTable(tableName)
+        self._iDb.close()
+        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
+        os.remove(fileName)
+        os.remove(expFileName)
+        os.remove(obsFileName)
+
+    def _writeGiveInfoTEannotOutput(self, fileName):
+        f = open(fileName, "w")
+        f.write("TE\tmaxLength\tmeanLength\tcovg\tfrags\tfullLgthFrags\tcopies\tfullLgthCopies\tmeanId\tsdId\tminId\tq25Id\tmedId\tq75Id\tmaxId\tmeanLgth\tsdLgth\tminLgth\tq25Lgth\tmedLgth\tq75Lgth\tmaxLgth\tmeanLgthPerc\tsdLgthPerc\tminLgthPerc\tq25LgthPerc\tmedLgthPerc\tq75LgthPerc\tmaxLgthPerc\n")
+        f.write("Atha_benchs-B-P66.38-Map20_classII-Helitron-incomp\t2150\t2150\t195657\t158\t49\t152\t52\t84.88\t8.75\t74.29\t77.16\t80.08\t96.40\t97.83\t1287.32\t776.47\t21\t570.00\t1320.50\t2077.00\t2272\t59.88\t36.11\t0.98\t26.51\t61.42\t96.60\t105.67\n")
+        f.write("Atha_benchs-B-R1047-Map6_classI-LINE-comp\t1653\t1653\t25423\t57\t4\t56\t4\t79.09\t8.21\t64.32\t73.63\t79.21\t83.65\t96.65\t453.98\t488.66\t29\t98.00\t244.00\t634.00\t1650\t27.46\t29.56\t1.75\t5.93\t14.76\t38.35\t99.82\n")
+        f.write("Atha_benchs-B-R1276-Map4_classII-Helitron-incomp\t1293\t1293\t24416\t48\t4\t47\t4\t80.31\t5.74\t75.14\t77.00\t78.25\t80.89\t99.46\t519.60\t419.01\t37\t171.00\t334.00\t838.00\t1296\t40.19\t32.41\t2.86\t13.23\t25.83\t64.81\t100.23\n")
+        f.close()
+
+    def _createConsensusTable(self, tableName):
+        fileName = "consensus.fa"
+        f = open(fileName, "w")
+        f.write(">Atha_benchs-B-P66.38-Map20_classII-Helitron-incomp\n")
+        f.write("ATGCTAGCTAGCT\n")
+        f.write(">Atha_benchs-B-R12-Map5_classII-Helitron-comp\n")
+        f.write("ATGCTAGCTAGCTATGCTAGCTAGCATGCTAGCTAGCTT\n")
+        f.write(">Atha_benchs-B-R1047-Map6_classI-LINE-comp\n")
+        f.write("ATGCTAGCTAGCT\n")
+        f.write(">Atha_benchs-B-G56-Map5_classII-Helitron-comp\n")
+        f.write("ATGCTAGCTAGCTATGCTAGCTAGCT\n")
+        f.write(">Atha_benchs-B-R1276-Map4_classII-Helitron-incomp\n")
+        f.write("ATGCTAGCTAGCATGCTAGCTAGCTATGCTAGCTAGCTATGCTAGCTAGCTT\n")
+        f.close()
+        self._iDb.createTable(tableName, "seq", fileName, True)
+        os.remove(fileName)
+
+    def _writeExpConsensusFile(self, fileName):
+        f = open(fileName, "w")
+        f.write(">Atha_benchs-B-P66.38-Map20_classII-Helitron-incomp\n")
+        f.write("ATGCTAGCTAGCT\n")
+        f.write(">Atha_benchs-B-R1047-Map6_classI-LINE-comp\n")
+        f.write("ATGCTAGCTAGCT\n")
+        f.write(">Atha_benchs-B-R1276-Map4_classII-Helitron-incomp\n")
+        f.write("ATGCTAGCTAGCATGCTAGCTAGCTATGCTAGCTAGCTATGCTAGCTAGCTT\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_OrientSequences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_OrientSequences.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,101 @@
+import unittest
+import time
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.tools.OrientSequences import OrientSequences
+
+
+class Test_OrientSequences( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = OrientSequences ()
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )
+
+
+    def tearDown( self ):
+        self._i = None
+        self._uniqId = None
+
+
+    def test_parseMummerOutput( self ):
+        inFileName = "dummyInFile_%s" % ( self._uniqId )
+        inF = open( inFileName, "w" )
+        inF.write( "> seq1\n" )
+        inF.write( "  seq2      20        1        20\n" )
+        inF.write( "> seq1 Reverse\n" )
+        inF.write( "  seq2      100        1        100\n" )
+        inF.write( "  seq2      450      400        50\n" )
+        inF.close()
+        dExp = { "direct": 1, "reverse": 2 }
+        self._i._verbose = 0
+        dObs = self._i.parseMummerOutput( inFileName, "seq1", "seq2" )
+        self.assertEqual( dObs, dExp )
+        os.remove( inFileName )
+
+
+    def test_getSequencesToReverseFromMatrix_2seq( self ):
+        lNewHeaders = [ "seq1", "seq2" ]
+        dMatrix = { "1_vs_2": { "direct": 1, "reverse": 2 } }
+        lExp = [ "seq2" ]
+        self._i._verbose = 0
+        lObs = self._i.getSequencesToReverseFromMatrix( dMatrix, lNewHeaders )
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lObs, lExp )
+
+
+    def test_getSequencesToReverseFromMatrix_3seq( self ):
+        lNewHeaders = [ "seq1", "seq2", "seq3" ]
+        dMatrix = { "1_vs_2": { "direct": 1, "reverse": 10 },
+                   "1_vs_3": { "direct": 10, "reverse": 1 },
+                   "2_vs_3": { "direct": 1, "reverse": 10 } }
+        lExp = [ "seq2" ]
+        self._i._verbose = 0
+        lObs = self._i.getSequencesToReverseFromMatrix( dMatrix, lNewHeaders )
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_getSequencesToReverseFromMatrix_4seq( self ):
+        lNewHeaders = [ "seq1", "seq2", "seq3", "seq4" ]
+        dMatrix = { "1_vs_2": { "direct": 10, "reverse": 1 },
+                   "1_vs_3": { "direct": 1, "reverse": 10 },
+                   "1_vs_4": { "direct": 1, "reverse": 10 },
+                   "2_vs_3": { "direct": 1, "reverse": 10 },
+                   "2_vs_4": { "direct": 1, "reverse": 10 },
+                   "3_vs_4": { "direct": 1, "reverse": 2 } }
+        lExp = [ "seq3", "seq4" ]
+        self._i._verbose = 0
+        lObs = self._i.getSequencesToReverseFromMatrix( dMatrix, lNewHeaders )
+        lExp.sort()
+        lObs.sort()
+        self.assertEqual( lExp, lObs )
+
+
+    def test_orientInputSequences( self ):
+        lSeqToOrient = [ "seq1" ]
+        tmpFileName = "dummyInFile_%s" % ( self._uniqId )
+        tmpF = open( tmpFileName, "w" )
+        tmpF.write( ">seq1\n" )
+        tmpF.write( "ATGCGGTGCATG\n" )
+        tmpF.write( ">seq2\n" )
+        tmpF.write( "GGCGAAGTGAAA\n" )
+        tmpF.close()
+        expFileName = "dummyExpFile_%s" % ( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write( ">seq1 re-oriented\n" )
+        expF.write( "CATGCACCGCAT\n" )
+        expF.write( ">seq2\n" )
+        expF.write( "GGCGAAGTGAAA\n" )
+        expF.close()
+        obsFileName = "dummyObsFile_%s" % ( self._uniqId )
+        self._i._verbose = 0
+        self._i._outFileName = obsFileName
+        self._i.orientInputSequences( lSeqToOrient, tmpFileName )
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ tmpFileName, expFileName, obsFileName ]:
+            os.remove( f )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py Tue Apr 30 14:33:21 2013 -0400

[

b'@@ -0,0 +1,161 @@\n+import unittest\n+import os\n+import time\n+from commons.core.coord.Align import Align\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps\n+\n+\n+class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._i = RmvPairAlignInChunkOverlaps()\n+ self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") )\n+\n+\n+ def tearDown( self ):\n+ self._i = None\n+ self._uniqId = None\n+ \n+ \n+ def test_isPairAlignAChunkOverlap( self ):\n+ a = Align()\n+ a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )\n+ self._i.setChunkLength( 500 )\n+ self._i.setChunkOverlap( 100 )\n+ obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )\n+ self.assertTrue( obs )\n+ a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) # reverse subject\n+ obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )\n+ self.assertTrue( obs )\n+ a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous\n+ obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 )\n+ self.assertFalse( obs )\n+ a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap\n+ obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )\n+ self.assertFalse( obs )\n+ a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query > subject\n+ obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 )\n+ self.assertTrue( obs )\n+ a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query < subject\n+ obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )\n+ self.assertTrue( obs )\n+ \n+ \n+ def test_isPairAlignWithinAndDueToAChunkOverlap( self ):\n+ a = Align()\n+ a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) ) # included, due to overlap\n+ self._i.setChunkLength( 500 )\n+ self._i.setChunkOverlap( 100 )\n+ self._i._verbose = 0\n+ obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )\n+ self.assertTrue( obs )\n+ a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) ) # reverse subject\n+ obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )\n+ self.assertFalse( obs )\n+ a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous\n+ obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 )\n+ self.assertFalse( obs )\n+ a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap\n+ obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )\n+ self.assertFalse( obs )\n+ a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) ) # repeat within overlap...\n+ self._i._margin = 2 # ... but not due to it\n+ obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )\n+ self.assertFalse( obs )\n+ self._i._margin = 10 # ... and due to it\n+ obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )\n+ self.assertTrue( obs )\n+ \n+ \n+ def test_removeChunkOverlaps( self ):\n+ inFileName = "dummyInFile_%s" % ( self._uniqId )\n+ inF = open( inFileName, "w" )\n+ a = Align()\n+ a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) )\n+ a.write( inF )\n+ a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0"'..b'tChunkLength( 500 )\n+ self._i.setChunkOverlap( 100 )\n+ obsFileName = "dummyObsFile_%s" %( self._uniqId )\n+ self._i.setOutputFileName( obsFileName )\n+ d = self._i.removeChunkOverlaps()\n+ expFileName = "dummyExpFile_%s" % ( self._uniqId )\n+ expF = open( expFileName, "w" )\n+ expF.write( "" )\n+ expF.close()\n+ self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )\n+ for f in [ inFileName, obsFileName, expFileName ]: os.remove( f )\n+ \n+ \n+ def test_zRunAsScript( self ):\n+ cDir = os.getcwd()\n+ \n+ alignFileName = "dummyInFile_%s" % ( self._uniqId )\n+ aF = open( alignFileName, "w" )\n+ aF.write( "chunk1\\t401\\t500\\tchunk2\\t1\\t100\\t0.0\\t131\\t100.0\\n" ) # remove\n+ aF.write( "chunk2\\t1\\t100\\tchunk1\\t401\\t500\\t0.0\\t132\\t100.0\\n" ) # remove\n+ aF.write( "chunk1\\t401\\t500\\tchunk3\\t1\\t100\\t0.0\\t132\\t100.0\\n" ) # keep because non-contiguous chunks\n+ aF.write( "chunk3\\t401\\t500\\tchunk1\\t1\\t100\\t0.0\\t132\\t100.0\\n" ) # keep because non-contiguous chunks\n+ aF.write( "chunk1\\t401\\t500\\tchunk2\\t100\\t1\\t0.0\\t132\\t100.0\\n" ) # keep because within overlap but reverse\n+ aF.write( "chunk1\\t431\\t490\\tchunk2\\t31\\t90\\t0.0\\t132\\t100.0\\n" ) # remove because within and due to overlap\n+ aF.write( "chunk1\\t411\\t430\\tchunk2\\t61\\t90\\t0.0\\t132\\t100.0\\n" ) # keep because within but not due to overlap\n+ aF.write( "chunk1\\t390\\t500\\tchunk2\\t1\\t100\\t0.0\\t132\\t100.0\\n" ) # keep because longer HSP on query\n+ aF.close()\n+ \n+ expFileName = "dummyExpFile_%s" % ( self._uniqId )\n+ expF = open( expFileName, "w" )\n+ expF.write( "chunk1\\t401\\t500\\tchunk3\\t1\\t100\\t0\\t132\\t100.000000\\n" )\n+ expF.write( "chunk3\\t401\\t500\\tchunk1\\t1\\t100\\t0\\t132\\t100.000000\\n" )\n+ expF.write( "chunk1\\t401\\t500\\tchunk2\\t100\\t1\\t0\\t132\\t100.000000\\n" )\n+ expF.write( "chunk1\\t411\\t430\\tchunk2\\t61\\t90\\t0\\t132\\t100.000000\\n" )\n+ expF.write( "chunk1\\t390\\t500\\tchunk2\\t1\\t100\\t0\\t132\\t100.000000\\n" )\n+ expF.close()\n+ \n+ obsFileName = "dummyObsFile_%s" % ( self._uniqId )\n+ cmd = "RmvPairAlignInChunkOverlaps.py"\n+ cmd += " -i %s" % ( alignFileName )\n+ cmd += " -l 500"\n+ cmd += " -o 100"\n+ cmd += " -O %s" % ( obsFileName )\n+ cmd += " -v 0"\n+ os.system( cmd )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )\n+ for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )\n+ os.chdir( cDir )\n+ \n+ def test_zRunAsScript_bug_to_fix(self):\n+ cDir = os.getcwd()\n+ \n+ alignFileName = "dummyInFile_%s" % ( self._uniqId )\n+ aF = open( alignFileName, "w" )\n+ aF.write("chunk4\\t63217\\t63680\\tchunk5\\t195316\\t194837\\t0\\t676\\t92.71\\n")\n+ aF.write("chunk4\\t63217\\t63680\\tchunk6\\t2618\\t3101\\t0\\t714\\t93.6\\n")\n+ aF.write("chunk4\\t63217\\t63680\\tchunk6\\t5316\\t4837\\t0\\t676\\t92.71\\n")\n+ aF.close()\n+ \n+ expFileName = "dummyExpFile_%s" % ( self._uniqId )\n+ expF = open( expFileName, "w" )\n+ expF.write("chunk4\\t63217\\t63680\\tchunk5\\t195316\\t194837\\t0\\t676\\t92.71\\n")\n+ expF.write("chunk4\\t63217\\t63680\\tchunk6\\t2618\\t3101\\t0\\t714\\t93.6\\n")\n+ expF.close()\n+ \n+ obsFileName = "dummyObsFile_%s" % ( self._uniqId )\n+ cmd = "RmvPairAlignInChunkOverlaps.py"\n+ cmd += " -i %s" % ( alignFileName )\n+ cmd += " -l 200000"\n+ cmd += " -o 10000"\n+ cmd += " -O %s" % ( obsFileName )\n+ cmd += " -v 0"\n+ os.system( cmd )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )\n+ for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )\n+ os.chdir( cDir )\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_SpliceTEsFromGenome.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_SpliceTEsFromGenome.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,57 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.tools.SpliceTEsFromGenome import SpliceTEsFromGenome
+
+
+class Test_SpliceTEsFromGenome( unittest.TestCase ):
+
+    def setUp( self ):
+        self._i = SpliceTEsFromGenome()
+
+
+    def test_zLaunchAsScript( self ):
+        cDir = os.getcwd()
+
+        coordFile = "dummyCoordFile"
+        coordFileHandler = open( coordFile, "w" )
+        coordFileHandler.write( "TE1\tchr1\t2\t5\n" )
+        coordFileHandler.write( "TE2\tchr1\t11\t15\n" )
+        coordFileHandler.write( "TE3\tchr1\t12\t14\n" )
+        coordFileHandler.close()
+
+        genomeFile = "dummyGenomeFile"
+        genomeFileHandler = open( genomeFile, "w" )
+        genomeFileHandler.write( ">chr1\n" )
+        genomeFileHandler.write( "AGGGGAAAAACCCCCAAAAA\n" )
+        genomeFileHandler.write( ">chr2\n" )
+        genomeFileHandler.write( "TTTTTTTTTT\n" )
+        genomeFileHandler.close()
+
+        expFile = "dummyExpFile"
+        expFileHandler = open( expFile, "w" )
+        expFileHandler.write( ">chr1\n" )
+        expFileHandler.write( "AAAAAAAAAAA\n" )
+        expFileHandler.write( ">chr2\n" )
+        expFileHandler.write( "TTTTTTTTTT\n" )
+        expFileHandler.close()
+
+        obsFile = "dummyObsFile"
+
+        cmd = "SpliceTEsFromGenome.py"
+        cmd += " -i %s" % ( coordFile )
+        cmd += " -f %s" % ( "map" )
+        cmd += " -g %s" % ( genomeFile )
+        cmd += " -o %s" % ( obsFile )
+        cmd += " -v %i" % ( 0 )
+        returnStatus = os.system( cmd )
+
+        self.assertTrue( returnStatus == 0 )
+        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )
+
+        for f in [ coordFile, genomeFile, expFile, obsFile ]:
+            os.remove( f )
+        os.chdir( cDir )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_getCumulLengthFromTEannot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_getCumulLengthFromTEannot.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,71 @@
+import unittest
+import os
+import time
+from commons.core.sql.DbMySql import DbMySql
+from commons.tools.getCumulLengthFromTEannot import getCumulLengthFromTEannot
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_getCumulLengthFromTEannot( unittest.TestCase ):
+
+    def setUp( self ):
+        self.fileUtils = FileUtils()
+        self._getCumulLengthFromTEannot = None
+        self._uniqId = "%s_%s" % ( time.strftime("%Y_%m_%d_%H_%M_%S") , os.getpid() )
+        self._configFileName = "dummyConfig_%s" % self._uniqId
+        configFile = open( self._configFileName, "w" )
+        configFile.write("[repet_env]\n")
+        configFile.write( "repet_host: %s\n" % ( os.environ["REPET_HOST"] ) )
+        configFile.write( "repet_user: %s\n" % ( os.environ["REPET_USER"] ) )
+        configFile.write( "repet_pw: %s\n" % ( os.environ["REPET_PW"] ) )
+        configFile.write( "repet_db: %s\n" % ( os.environ["REPET_DB"] ) )
+        configFile.write( "repet_port: %s\n" % ( os.environ["REPET_PORT"] ) )
+        configFile.close()
+
+
+    def tearDown( self ):
+        os.remove( self._configFileName )
+
+
+    def test_getAllSubjectsAsMapOfQueries( self ):
+        inFileName = "dummyInFile_%s" % self._uniqId
+        expFileName = "dummyExp_%s" % self._uniqId
+        for f in [inFileName,expFileName]:
+            if os.path.exists(f): os.remove(f)
+        inFile = open( inFileName, "w" )
+        inFile.write( "1\tchr1\t1501\t2500\tTE1\t1\t500\t0.0\t880\t95.7\n" )
+        inFile.write( "2\tchr1\t3401\t4000\tTE3\t101\t700\t0.0\t950\t97.2\n" )
+        inFile.close()
+        inTable = "dummyPathTable_%s" % self._uniqId
+        db = DbMySql(cfgFileName=self._configFileName)
+        db.createTable(inTable, "path", inFileName)
+        expFile = open( expFileName, "w" )
+        expFile.write( "TE1\tchr1\t1501\t2500\n" )
+        expFile.write( "TE3\tchr1\t3401\t4000\n" )
+        expFile.close()
+        self._getCumulLengthFromTEannot = getCumulLengthFromTEannot()
+        self._getCumulLengthFromTEannot._tableName = inTable
+        self._getCumulLengthFromTEannot._configFileName = self._configFileName
+        self._getCumulLengthFromTEannot.setAdaptatorToTable()
+        obsFileName = self._getCumulLengthFromTEannot.getAllSubjectsAsMapOfQueries()
+        self.assertTrue( self.fileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [inFileName,inTable+".map",expFileName]:
+            if os.path.exists(f): os.remove(f)
+        db.dropTable(inTable)
+        db.close()
+
+
+    def test_getCumulLength( self ):
+        mergeFileName = "dummyInFile_%s" % self._uniqId
+        mergeFile = open( mergeFileName, "w" )
+        mergeFile.write( "TE1\tchr1\t1501\t2500\n" )
+        mergeFile.write( "TE3\tchr1\t4000\t3401\n" )
+        mergeFile.close()
+        exp = 1000+600
+        self._getCumulLengthFromTEannot = getCumulLengthFromTEannot()
+        obs = self._getCumulLengthFromTEannot.getCumulLength( mergeFileName )
+        self.assertEqual( obs, exp )
+        os.remove( mergeFileName )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_pathnum2id.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_pathnum2id.py Tue Apr 30 14:33:21 2013 -0400

@@ -0,0 +1,84 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_pathnum2id( unittest.TestCase ):
+
+    def setUp( self ):
+        self._inputFileName = "dummyInputPathFile"
+        self._outputFileName = "dummyOutputPathFile"
+        self._expectedFileName = "dummyExpectedPathFile"
+        self._prg = "pathnum2id.py"
+
+
+    def test_runWhithoutReturnAtEndOfFile( self ):
+        cDir = os.getcwd()
+
+        self._createAndFillInputFileWhithoutReturnAtTheEnd()
+        self._createExpectedFile()
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFileName )
+        cmd += " -o %s" % ( self._outputFileName )
+        os.system( cmd )
+        self.assertTrue( FileUtils.are2FilesIdentical( self._outputFileName, self._expectedFileName ) )
+
+        os.remove( self._inputFileName )
+        os.remove( self._expectedFileName )
+        if os.path.exists( self._outputFileName ):
+            os.remove( self._outputFileName )
+        os.chdir( cDir )
+
+
+    def test_runWhithReturnAtEndOfFile( self ):
+        cDir = os.getcwd()
+
+        self._createAndFillInputFileWhithReturnAtTheEnd()
+        self._createExpectedFile()
+        cmd = self._prg
+        cmd += " -i %s" % ( self._inputFileName )
+        cmd += " -o %s" % ( self._outputFileName )
+        os.system( cmd )
+        self.assertTrue( FileUtils.are2FilesIdentical( self._outputFileName, self._expectedFileName ) )
+
+        os.remove( self._inputFileName )
+        os.remove( self._expectedFileName )
+        if os.path.exists( self._outputFileName ):
+            os.remove( self._outputFileName )
+        os.chdir( cDir )
+
+
+    def _createExpectedFile( self ):
+        f = open(self._expectedFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("4\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("5\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("6\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+
+
+    def _createAndFillInputFileWhithoutReturnAtTheEnd( self ):
+        f = open(self._inputFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0")
+        f.close()
+
+
+    def _createAndFillInputFileWhithReturnAtTheEnd( self ):
+        f = open(self._inputFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_refalign2fasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_refalign2fasta.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,135 @@
+import unittest
+import time
+from commons.tools.refalign2fasta import *
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_refalign2fasta( unittest.TestCase ):
+
+
+    def setUp( self ):
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+
+
+    def tearDown( self ):
+        self._uniqId = None
+
+
+    def test_getAlignments( self ):
+        inFile = "dummyInFile_%s" % ( self._uniqId )
+        inFileHandler = open( inFile, "w" )
+        inFileHandler.write( "AAAACTTTT-T\tAAAA-TTTGGT\trefseq::1 chr3 1..10\n" )  # 1 insertion and 1 deletion in the copy + 1 mismatch
+        inFileHandler.close()
+        lExp = [ ( "AAAACTTTT-T", "AAAA-TTTGGT", "refseq::1 chr3 1..10" ) ]
+        lObs = getAlignments( inFile )
+        self.assertEqual( lExp, lObs )
+        os.remove( inFile )
+
+
+    def test_getGaps_OneGap( self ):
+        sequence = "AAAACTTTT-T"
+        lExp = [ ( 9, 1 ) ]
+        lObs = getGaps( sequence )
+        self.assertEqual( lExp, lObs )
+
+
+    def test_getGaps_TwoGaps( self ):
+        sequence = "AAAA--CTTT-TT"
+        lExp = [ ( 4, 2 ), ( 8, 1 ) ]
+        lObs = getGaps( sequence )
+        self.assertEqual( lExp, lObs )
+
+
+    def test_getGapsOnRefSeq_OneSeq( self ):
+        lAlign = [ ( "AAAACTTTT-T", "AAAA-TTTGGT", "refseq::1 chr3 1..10" ) ]
+        lExp = [ [ ( 9, 1 ) ] ]
+        lObs = getGapsOnRefSeq( lAlign )
+        self.assertEqual( lExp, lObs )
+
+
+    def test_getGapsOnRefSeq_TwoSeq( self ):
+        lAlign = [ ( "AAAACTTTT-T", "AAAA-TTTGGT", "refseq::1 chr3 1..10" ) ]
+        lAlign.append( ( "AAAA--CTTT-TT", "AAAAGGCTTTGTT", "refseq::2 chr5 1..10" ) )
+        lExp = [ [ ( 9, 1 ) ] ]
+        lExp.append( [ ( 4, 2 ), ( 8, 1 ) ] )
+        lObs = getGapsOnRefSeq( lAlign )
+        self.assertEqual( lExp, lObs )
+
+
+    def test_insertGap( self ):
+        sequence = "GGGGAAAGTTG"
+        start = 5
+        length = 3
+        exp = "GGGGA---AAGTTG"
+        obs = insertGap( sequence, start, length )
+        self.assertEqual( exp, obs )
+
+
+    def test_insertListGaps( self ):
+        sequence = "GGGGAAAGTTG"
+        lGaps =[ ( 5, 3 ), ( 9, 1 ) ]
+        exp = "GGGGA---AAGT-TG"
+        obs = insertListGaps( sequence, lGaps )
+        self.assertEqual( exp, obs )
+
+
+    def test_insertGapsInRefSeq( self ):
+        lAlign = [ ( "AAAACTTTT-T", "AAAA-TTTGGT", "refseq::1 chr3 1..10" ) ]
+        lAlign.append( ( "AAAA--CTTT-TT", "AAAAGGCTTTGTT", "refseq::2 chr5 1..10" ) )
+        lGapsOnRefSeqPerAlign = [ [ ( 9, 1 ) ] ]
+        lGapsOnRefSeqPerAlign.append( [ ( 4, 2 ), ( 8, 1 ) ] )
+        refseqName = "reference_sequence"
+        lExp = ( refseqName, "AAAA--CTTT-T-T" )
+        lObs = insertGapsInRefSeq( lAlign, lGapsOnRefSeqPerAlign, refseqName )
+        self.assertEqual( lExp, lObs )
+
+
+    def test_insertgap_seq( self ):
+        # AAAACTTTT-T (refseq)
+        # AAAA-TTTGGT ( copy 1)
+        #
+        # AAAA--CTTT-TT (refseq)
+        # AAAAGGCTTTGTT (copy 2)
+        lAlign = [ ( "AAAACTTTT-T", "AAAA-TTTGGT", "refseq::1 chr3 1..10" ) ]
+        lAlign.append( ( "AAAA--CTTT-TT", "AAAAGGCTTTGTT", "refseq::2 chr5 1..10" ) )
+        lGapsOnRefSeqPerAlign = [ [ ( 9, 1 ) ] ]
+        lGapsOnRefSeqPerAlign.append( [ ( 4, 2 ), ( 8, 1 ) ] )
+        lExp = [ ( "refseq::1 chr3 1..10", "AAAA---TTT-GGT" ) ]
+        lExp.append( ( "refseq::2 chr5 1..10", "AAAAGGCTTTGT-T" ) )
+        lObs = insertgap_seq( lAlign, lGapsOnRefSeqPerAlign )
+        self.assertEqual( lExp, lObs )
+
+
+    def test_getSeqWithDeletions( self ):
+        lAlign = [ ( "AAAACTTTT-T", "AAAA-TTTGGT", "refseq::1 chr3 1..10" ) ]
+        lExp = [ ( "refseq::1 chr3 1..10", "AAAA-TTTGT" ) ]
+        lObs = getSeqWithDeletions( lAlign )
+        self.assertEqual( lExp, lObs )
+
+
+    def test_saveOnlyWithDeletions( self ):
+        refseqName = "Dm-B-G54-Map3"
+        lAlign = [ ( "AAAACTTTT-T", "AAAA-TTTGGT", "Dm-B-G54-Map3::1 chr3 1..10" ) ]
+        lAlign.append( ( "AAAA--CTTT-TT", "AAAAGGCTTTGTT", "Dm-B-G54-Map3::2 chr5 1..10" ) )
+
+        expFile = "dummyExpFile_%s"  %( self._uniqId )
+        expFileHandler = open( expFile, "w" )
+        expFileHandler.write( ">Dm-B-G54-Map3\n" )
+        expFileHandler.write( "AAAACTTTTT\n" )
+        expFileHandler.write( ">Dm-B-G54-Map3::1 chr3 1..10\n" )
+        expFileHandler.write( "AAAA-TTTGT\n" )
+        expFileHandler.write( ">Dm-B-G54-Map3::2 chr5 1..10\n" )
+        expFileHandler.write( "AAAACTTTTT\n" )
+        expFileHandler.close()
+
+        obsFile = "dummyObsFile_%s" % ( self._uniqId )
+
+        saveOnlyWithDeletions( lAlign, refseqName, obsFile )
+
+        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )
+
+        for f in [ expFile, obsFile ]:
+            os.remove( f )
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file

diff -r 5677346472b5 -r 0ab839023fe4 commons/tools/tests/Test_srptTableOverlap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_srptTableOverlap.py Tue Apr 30 14:33:21 2013 -0400

[

@@ -0,0 +1,79 @@
+import unittest
+import os
+import time
+import pyRepet.sql.RepetDBMySQL
+from commons.core.utils.FileUtils import FileUtils
+from commons.tools import srptTableOverlap
+
+
+class Test_srptTableOverlap( unittest.TestCase ):
+
+    def setUp(self):
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+        self._db = pyRepet.sql.RepetDBMySQL.RepetDB( os.environ["REPET_USER"],
+                                                     os.environ["REPET_HOST"],
+                                                     os.environ["REPET_PW"],
+                                                     os.environ["REPET_DB"] )
+
+
+    def test_getOverlapAllPaths(self):
+        qryFileName = "dummyQryFile_%s" % ( self._uniqId )
+        qryF = open( qryFileName, "w" )
+        qryF.write("1\tchr1\t1301\t2000\tTE2\t11\t17\t1e-20\t30\t90.2\n")
+        qryF.close()
+        qryTable = "dummyQryTable_%s" % ( self._uniqId )
+        self._db.create_table( self._db, qryTable, qryFileName, "path" )
+        sbjFileName = "dummySbjFile_%s" % ( self._uniqId )
+        sbjF = open( sbjFileName, "w" )
+        sbjF.write("1\tchr1\t1001\t1500\tTE1\t11\t17\t1e-20\t30\t90.2\n")   # 200-bp overlap: 1301-1500
+        sbjF.write("2\tchr1\t1401\t1800\tTE2\t11\t17\t1e-20\t30\t90.2\n")   # 300-bp overlap: 1501-1800
+        sbjF.write("3\tchr1\t2001\t2100\tTE2\t11\t17\t1e-20\t30\t90.2\n")
+        sbjF.write("4\tchr2\t1301\t2000\tTE2\t11\t17\t1e-20\t30\t90.2\n")   # different query
+        sbjF.close()
+        sbjTable = "dummySbjTable_%s" % ( self._uniqId )
+        self._db.create_table( self._db, sbjTable, sbjFileName, "path" )
+        exp_sum_osize, exp_sum_non_osize, exp_sum_qsize = 500, 200, 700
+        obs_sum_osize, obs_sum_non_osize, obs_sum_qsize = srptTableOverlap.getOverlapAllPaths( qryTable, "path", sbjTable, "path", self._db )
+        self.assertEqual( obs_sum_osize, exp_sum_osize )
+        self.assertEqual( obs_sum_non_osize, exp_sum_non_osize )
+        self.assertEqual( obs_sum_qsize, exp_sum_qsize )
+        os.remove( qryFileName )
+        os.remove( sbjFileName )
+        self._db.remove_if_exist( qryTable )
+        self._db.remove_if_exist( sbjTable )
+
+
+    def test_pathOverlapByPath(self):
+        qryFileName = "dummyQryFile_%s" % ( self._uniqId )
+        qryF = open( qryFileName, "w" )
+        qryF.write("1\tchr1\t1301\t2000\tTE2\t1\t700\t1e-80\t850\t90.2\n")
+        qryF.write("1\tchr1\t2301\t3000\tTE2\t801\t1500\t1e-90\t930\t90.2\n")
+        qryF.close()
+        qryTable = "dummyQryTable_%s" % ( self._uniqId )
+        self._db.create_table( self._db, qryTable, qryFileName, "path" )
+        sbjFileName = "dummySbjFile_%s" % ( self._uniqId )
+        sbjF = open( sbjFileName, "w" )
+        sbjF.write("1\tchr1\t1401\t1800\tTE2\t11\t17\t1e-20\t30\t90.2\n")
+        sbjF.write("1\tchr1\t2101\t250000\tTE2\t11\t17\t1e-20\t30\t90.2\n")
+        sbjF.write("2\tchr1\t3501\t4000\tTE3\t11\t17\t1e-20\t30\t90.2\n")
+        sbjF.close()
+        sbjTable = "dummySbjTable_%s" % ( self._uniqId )
+        self._db.create_table( self._db, sbjTable, sbjFileName, "path" )
+        expFileName = "dummyOutFileExp_%s" % ( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write("1\tTE2\t1400\t1:TE2\t1\t248300\t1100\t0.785714\t0.004430\n")
+        expF.close()
+        obsFileName = "dummyOutFileObs_%s" % ( self._uniqId )
+        obsF = open( obsFileName, "w" )
+        srptTableOverlap.pathOverlapByPath( qryTable, "path", sbjTable, "path", self._db, obsF )
+        obsF.close()
+        self.assertTrue(  FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ qryFileName, sbjFileName, expFileName, obsFileName ]:
+            os.remove( f )
+        self._db.remove_if_exist( qryTable )
+        self._db.remove_if_exist( sbjTable )
+        self._db.remove_if_exist( sbjTable+"_bin" )
+
+
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file