Mercurial > repos > vandelj > giant_factor_generator

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/wrappers/FactorFileGenerator.xml	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,145 @@
+<tool name="GIANT-Factor file generator" id="giant_factor_generator" version="0.1.2">
+  <description>Generate factor file used by other GIANT tools</description>
+  <requirements>
+  </requirements>
+  <code file="../../src/General_functions.py"/>
+  <stdio>
+    <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted, please contact tool developer or administrators." />
+    <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An error occured during R execution, please contact tool developer." />
+    <exit_code range="1:9" level="fatal" description="Error during factor file generation, see log file for more information." />
+  </stdio>
+  <command>	<![CDATA[
+
+  #import imp
+  #set $general_functions=$imp.load_source('General_functions', $__tool_directory__+'/../../src/General_functions.py')
+
+  #set $ret_code=$general_functions.generateFactorFile($inputCondition['inputData'],$factorsSection['factorList'],$outputData.file_name,$log.file_name)
+
+  if [ $ret_code != 0 ]; then
+    printf "[ERROR]Error during factor file generation\n" >> $log;
+    exit $ret_code;
+  fi;
+
+  printf "[INFO]End of tool script" >> $log;
+	]]>
+  </command>
+  <inputs>
+  <param type="text" name="title" value="ConditionsGenerator_toPersonalize" label="Title for output"/>
+
+  <conditional name="inputCondition">
+      <param name="selection" type="select" label="Input data type for sample names" force_select="true">
+        <option value="normalizedData">Expression tabular file</option>
+        <option value="CELcollection">.CEL files</option>
+      </param>
+      <when value="normalizedData">
+        <param type="data" name="inputData" format="tabular" label="Select file" optional="false" multiple="false"/>
+      </when>
+      <when value="CELcollection">
+        <param type="data" name="inputData" format="cel" label="Select files" optional="false" multiple="true">
+        <validator type="empty_dataset" message="At least one data file should be selected"></validator>
+        </param>
+      </when>
+  </conditional>
+
+  <section name="factorsSection" title="Factor definition" expanded="True">
+     <repeat name="factorList" title="Factor">
+        <param type="text" name="factorName" value="" label="Factor name"/>
+        <repeat name="valueList" title="Value">
+          <param type="text" name="valueName" value="" label="Value name"/>
+          <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample sharing this value"
+            refresh_on_change="true"  dynamic_options="get_condition_file_names(inputCondition['inputData'])">
+          </param>
+        </repeat>
+    </repeat>
+  </section>
+
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="outputData" label="${title}_conditionsFile"/>
+
+    <data format="txt" name="log" label="${title}_Log" />
+  </outputs>
+
+ <tests>
+  <test maxseconds="3600">
+    <param name="wfile" value="wiggle.wig" />
+    <param name="bfile" value="bedfile.bed" />
+    <param name="span" value="3000" />
+    <param name="pfres" value="50" />
+    <param name="lowersize" value="1000" />
+    <param name="middlesize" value="2000" />
+    <param name="uppersize" value="3000" />
+    <param name="lowerbisize" value="2500" />
+    <param name="upperbisize" value="5000" />
+    <param name="reldist" value="3000" />
+    <param name="genome" value="hg18" />
+    <param name="imagetype" value="PDF" />
+    <param name="enable" value="no" />
+    <output name="outputData" file="ceas_1/ceas_1.pdf" />
+  </test>
+</tests>
+  <help>
+<![CDATA[
+**What it does ?**
+
+This tool generates factor information file used by other tools of GIANT tool suite.
+
+-----
+
+**Parameters**
+
+\- **Title** to personalize output file names (please avoid special characters and spaces).
+
+\- **Input Data** used only to extract sample names
+
+- **Expression tabular file** with samples as columns and genes as rows (only the header row will be used to extract sample names).
+
+    ::
+
+        Conditions  157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL  155_(HuGene-2_0-st).CEL    154_(HuGene-2_0-st).CEL
+        DDX11L2     4.500872                4.429759                 4.780281                   4.996189
+        MIR1302-2   3.415065                3.520472                 3.471503                   3.567988
+        OR4F5       3.737956                3.011586                 3.424494                   3.497545
+        VWA1        5.189621                5.129595                 4.806793                   5.227014
+
+OR
+
+- **.CEL files** of your study (you should select multiple .CEL files or unique collection file).
+
+\- **Factor definition**
+
+- **Factor name** to discriminate between samples as 'Treatments', 'Year', 'Strain' (please avoid special characters)
+
+- **Value name** of different states for the current factor as 'KO' or 'WT' for 'Strain' factor (please avoid special characters)
+
+- **Select sample** to assign to current value
+
+-----
+
+**Outputs**
+
+- **Factor information tabular file**  with factors as columns and samples as rows (header row contains factor names and first column sample names).
+
+    ::
+
+        Conditions                Sex   Treatment Reaction
+        138_(HuGene-2_0-st).CEL   1     TreatA    Pos
+        148_(HuGene-2_0-st).CEL   0     NoTreat   Pos
+        139_(HuGene-2_0-st).CEL   0     TreatB    Neg
+        149_(HuGene-2_0-st).CEL   0     NoTreat   Neg
+
+- **LOG file** for job log. If you see errors, please attached this in the bug report
+
+]]>  </help>
+
+ <citations>
+ </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ExprPlotsScript.R	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,465 @@
+# A command-line interface to basic plots for use with Galaxy
+# written by Jimmy Vandel
+# one of these arguments is required:
+#
+#
+initial.options <- commandArgs(trailingOnly = FALSE)
+file.arg.name <- "--file="
+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
+script.basename <- dirname(script.name)
+source(file.path(script.basename, "utils.R"))
+source(file.path(script.basename, "getopt.R"))
+
+#addComment("Welcome R!")
+
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+loc <- Sys.setlocale("LC_NUMERIC", "C")
+
+#get starting time
+start.time <- Sys.time()
+
+#get options
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs()
+
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "dataFile", "i", 1, "character",
+  "factorInfo","t", 1, "character",
+  "dataFileFormat","j",1,"character",
+  "conditionNames","c",1,"character",
+  "format", "f", 1, "character",
+  "quiet", "q", 0, "logical",
+  "log", "l", 1, "character",
+  "histo" , "h", 1, "character",
+  "maPlot" , "a", 1, "character",
+  "boxplot" , "b", 1, "character",
+  "microarray" , "m", 1, "character",
+  "acp" , "p" , 1, "character",
+  "screePlot" , "s" , 1, "character"),
+  byrow=TRUE, ncol=4)
+opt <- getopt(spec)
+
+# enforce the following required arguments
+if (is.null(opt$log)) {
+  addComment("[ERROR]'log file' is required")
+  q( "no", 1, F )
+}
+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)
+if (is.null(opt$dataFile) || is.null(opt$dataFileFormat)) {
+  addComment("[ERROR]'dataFile' and it format are required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$format)) {
+  addComment("[ERROR]'output format' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$histo) & is.null(opt$maPlot) & is.null(opt$boxplot) & is.null(opt$microarray) & is.null(opt$acp)){
+  addComment("[ERROR]Select at least one plot to draw",T,opt$log)
+  q( "no", 1, F )
+}
+
+verbose <- if (is.null(opt$quiet)) {
+  TRUE
+}else{
+  FALSE}
+
+addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)
+
+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)
+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)
+
+#directory for plots
+dir.create(file.path(getwd(), "plotDir"))
+dir.create(file.path(getwd(), "plotLyDir"))
+
+#silent package loading
+suppressPackageStartupMessages({
+  library("oligo")
+  library("ff")
+  library("ggplot2")
+  library("plotly")
+})
+
+
+#chargement des fichiers en entrée
+#fichier de type CEL
+dataAreFromCel=FALSE
+if(toupper(opt$dataFileFormat)=="CEL"){
+  dataAreFromCel=TRUE
+  celData=read.celfiles(unlist(strsplit(opt$dataFile,",")))
+  #load all expressions
+  dataMatrix=exprs(celData)
+  #select "pm" probes
+  probeInfo=getProbeInfo(celData,probeType = c("pm"),target="probeset")
+  #reduce dataMatrix to log expression matrix for a randomly probe selection
+  dataMatrix=log2(dataMatrix[sample(unique(probeInfo[,1]),min(100000,length(unique(probeInfo[,1])))),])
+  addComment("[INFO]Raw data are log2 transformed",TRUE,opt$log,display=FALSE)
+  remove(probeInfo)
+}else{
+  #fichier deja tabule
+  dataMatrix=read.csv(file=opt$dataFile,header=F,sep="\t",colClasses="character")
+  #remove first row to convert it as colnames (to avoid X before colnames with header=T)
+  colNamesData=dataMatrix[1,-1]
+  dataMatrix=dataMatrix[-1,]
+  #remove first colum to convert it as rownames
+  rowNamesData=dataMatrix[,1]
+  dataMatrix=dataMatrix[,-1]
+  if(is.data.frame(dataMatrix)){
+    dataMatrix=data.matrix(dataMatrix)
+  }else{
+    dataMatrix=data.matrix(as.numeric(dataMatrix))
+  }
+  dimnames(dataMatrix)=list(rowNamesData,colNamesData)
+  if(any(duplicated(rowNamesData)))addComment("[WARNING] several rows share the same probe/gene name, you should merge or rename them to avoid further analysis mistakes",TRUE,opt$log,display=FALSE)
+}
+
+addComment("[INFO]Input data loaded",TRUE,opt$log,display=FALSE)
+addComment(c("[INFO]Dim of data matrix:",dim(dataMatrix)),T,opt$log,display=FALSE)
+
+#get number of conditions
+nbConditions=ncol(dataMatrix)
+
+#get condition names if they are specified
+if(!is.null(opt$conditionNames) && length(opt$conditionNames)==nbConditions){
+  nameConditions=opt$conditionNames
+  colnames(dataMatrix)=nameConditions
+  #rownames(phenoData(celData)@data)=nameConditions
+  #rownames(protocolData(celData)@data)=nameConditions
+}else{
+  nameConditions=colnames(dataMatrix)
+}
+
+#create a correspondance table between plot file names and name displayed in figure legend and html items
+correspondanceNameTable=matrix("",ncol=2,nrow=nbConditions)
+correspondanceNameTable[,1]=paste("Condition",1:nbConditions,sep="")
+correspondanceNameTable[,2]=nameConditions
+rownames(correspondanceNameTable)=correspondanceNameTable[,2]
+
+addComment("[INFO]Retreive condition names",TRUE,opt$log,display=FALSE)
+
+if(!is.null(opt$factorInfo)){
+  #chargement du fichier des facteurs
+  factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character")
+  #remove first row to convert it as colnames
+  colnames(factorInfoMatrix)=factorInfoMatrix[1,]
+  factorInfoMatrix=factorInfoMatrix[-1,]
+  #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case
+  rownames(factorInfoMatrix)=factorInfoMatrix[,1]
+
+
+  if(length(setdiff(colnames(dataMatrix),rownames(factorInfoMatrix)))!=0){
+    addComment("[ERROR]Missing samples in factor file",T,opt$log)
+    q( "no", 1, F )
+  }
+
+  #order sample as in expression matrix and remove spurious sample
+  factorInfoMatrix=factorInfoMatrix[colnames(dataMatrix),]
+
+  addComment("[INFO]Factors OK",T,opt$log,display=FALSE)
+  addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE)
+
+}
+
+addComment("[INFO]Ready to plot",T,opt$log,display=FALSE)
+
+
+##----------------------
+
+###plot histograms###
+histogramPerFigure=50
+if (!is.null(opt$histo)) {
+  for(iToPlot in 1:(((nbConditions-1)%/%histogramPerFigure)+1)){
+    firstPlot=1+histogramPerFigure*(iToPlot-1)
+    lastPlot=min(nbConditions,histogramPerFigure*iToPlot)
+    dataToPlot=data.frame(x=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix)))
+    p <- ggplot(data=dataToPlot, aes(x = x, color=Experiment)) + stat_density(geom="line", size=1, position="identity") +
+      ggtitle("Intensity densities") + theme_bw() + ylab(label="Density") +
+      theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5))
+    if(dataAreFromCel){
+      #original ploting function
+      #hist(celData[,firstPlot:lastPlot],lty=rep(1,nbConditions)[firstPlot:lastPlot],lwd=2,which='pm',target="probeset",transfo=log2,col=rainbow(nbConditions)[firstPlot:lastPlot])
+      p <- p + xlab(label="Log2 intensities")
+    }else{
+      p <- p + xlab(label="Intensities")
+    }
+    if(opt$format=="pdf"){
+      pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{
+        png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse=""))
+      }
+    print(p)
+    dev.off()
+    #save plotly files
+    pp <- ggplotly(p)
+    htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,iToPlot,".html"),collapse=""),selfcontained = F)
+  }
+  remove(p,dataToPlot)
+  addComment("[INFO]Histograms drawn",T,opt$log,display=FALSE)
+}
+
+##----------------------
+
+###plot MAplots###
+MAplotPerPage=4
+if (!is.null(opt$maPlot)) {
+  iToPlot=1
+  plotVector=list()
+  toTake=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix)))
+  refMedianColumn=rowMedians(as.matrix(dataMatrix[toTake,]))
+  if(length(toTake)>100000)addComment(c("[INFO]high number of input data rows ",length(toTake),"; the generation of MA plot can take a while, please be patient"),TRUE,opt$log,display=FALSE)
+  for (iCondition in 1:nbConditions){
+    #MAplot(celData,which=i,what=pm,transfo=log2)
+    #smoothScatter(x=xToPlot,y=yToPlot,main=nameConditions[iCondition])
+    dataA=dataMatrix[toTake,iCondition]
+    dataB=refMedianColumn####ATTENTION PAR DEFAUT
+    xToPlot=0.5*(dataA+dataB)
+    yToPlot=dataA-dataB
+    tempX=seq(min(xToPlot),max(xToPlot),0.1)
+    tempY=unlist(lapply(tempX,function(x){median(yToPlot[intersect(which(xToPlot>=(x-0.1/2)),which(xToPlot<(x+0.1/2)))])}))
+
+    dataToPlot=data.frame(x=xToPlot,y=yToPlot)
+    dataMedianToPlot=data.frame(x=tempX,y=tempY)
+    p <- ggplot(data=dataToPlot, aes(x,y)) + stat_density2d(aes(fill = ..density..^0.25), geom = "tile", contour = FALSE, n = 100) +
+      scale_fill_continuous(low = "white", high = "dodgerblue4") + geom_smooth(data=dataMedianToPlot,colour="red", size=0.5, se=FALSE) +
+      ggtitle(correspondanceNameTable[iCondition,2]) + theme_bw() + xlab(label="") + ylab(label="") +
+      theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none")
+    plotVector[[length(plotVector)+1]]=p
+
+    #save plotly files
+    pp <- ggplotly(p)
+    htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$maPlot,"_",correspondanceNameTable[iCondition,1],".html"),collapse=""),selfcontained = F)
+
+    if(iCondition==nbConditions || length(plotVector)==MAplotPerPage){
+      #define a new plotting file
+      if(opt$format=="pdf"){
+        pdf(paste(c("./plotDir/",opt$maPlot,iToPlot,".pdf"),collapse=""))}else{
+          png(paste(c("./plotDir/",opt$maPlot,iToPlot,".png"),collapse=""))
+        }
+      multiplot(plotlist=plotVector,cols=2)
+      dev.off()
+      if(iCondition<nbConditions){
+        #prepare for a new plotting file if necessary
+        plotVector=list()
+        iToPlot=iToPlot+1
+      }
+    }
+  }
+  remove(p,dataToPlot,dataA,dataB,toTake,xToPlot,yToPlot)
+  addComment("[INFO]MAplots drawn",T,opt$log,display=FALSE)
+}
+
+##----------------------
+
+###plot boxplots###
+boxplotPerFigure=50
+if (!is.null(opt$boxplot)) {
+  for(iToPlot in 1:(((nbConditions-1)%/%boxplotPerFigure)+1)){
+    firstPlot=1+boxplotPerFigure*(iToPlot-1)
+    lastPlot=min(nbConditions,boxplotPerFigure*iToPlot)
+    dataToPlot=data.frame(intensities=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix)))
+    #to make HTML file lighter, sampling will be done amongst outliers
+    #get outliers for each boxplot
+    boxplotsOutliers=apply(dataMatrix[,firstPlot:lastPlot],2,function(x)boxplot.stats(x)$out)
+    #sample amongst them to keep at maximum of 1000 points and include both min and max outliers values
+    boxplotsOutliers=lapply(boxplotsOutliers,function(x)if(length(x)>0)c(sample(c(x),min(length(x),1000)),max(c(x)),min(c(x))))
+    dataOutliers=data.frame(yVal=unlist(boxplotsOutliers),xVal=unlist(lapply(seq_along(boxplotsOutliers),function(x)rep(names(boxplotsOutliers)[x],length(boxplotsOutliers[[x]])))))
+    #plot boxplots without outliers
+    p <- ggplot(data=dataToPlot, aes(y = intensities, x=Experiment ,color=Experiment)) + geom_boxplot(outlier.colour=NA,outlier.shape =NA) +
+      ggtitle("Intensities") + theme_bw() + xlab(label="") +
+      theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 45, hjust = 1),plot.margin=unit(c(10,10,max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x))))),15+max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x)))))),"mm"))
+    #add to plot sampled outliers
+    p <- p + geom_point(data=dataOutliers,aes(x=xVal,y=yVal,color=xVal),inherit.aes = F)
+    if(dataAreFromCel){
+      #original plotting function
+  	  #boxplot(celData[,firstPlot:lastPlot],which='pm',col=rainbow(nbConditions)[firstPlot:lastPlot],target="probeset",transfo=log2,names=nameConditions[firstPlot:lastPlot],main="Intensities")
+      p <- p + ylab(label="Log2 intensities")
+    }else{
+      p <- p + ylab(label="Intensities")
+    }
+    if(opt$format=="pdf"){
+      pdf(paste(c("./plotDir/",opt$boxplot,iToPlot,".pdf"),collapse=""))}else{
+        png(paste(c("./plotDir/",opt$boxplot,iToPlot,".png"),collapse=""))
+      }
+    print(p)
+    dev.off()
+    #save plotly files
+    pp <- ggplotly(p)
+
+    #modify plotly object to get HTML file not too heavy for loading
+    for(iData in 1:length(pp$x$data)){
+      ##get kept outliers y values
+      #yPointsToKeep=dataOutliers$yVal[which(dataOutliers$xVal==pp$x$data[[iData]]$name)]
+      if(pp$x$data[[iData]]$type=="scatter"){
+        ##scatter plot represent outliers points added to boxplot through geom_point
+        ##nothing to do as outliers have been sampled allready, just have to modify hover text
+        #if(length(yPointsToKeep)>0){
+          #pointsToKeep=which(pp$x$data[[iData]]$y %in% yPointsToKeep)
+          #pp$x$data[[iData]]$x=pp$x$data[[iData]]$x[pointsToKeep]
+          #pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[pointsToKeep]
+          #pp$x$data[[iData]]$text=pp$x$data[[iData]]$text[pointsToKeep]
+        #}else{
+          #pp$x$data[[iData]]$x=NULL
+          #pp$x$data[[iData]]$y=NULL
+          #pp$x$data[[iData]]$marker$opacity=0
+          #pp$x$data[[iData]]$hoverinfo=NULL
+          #pp$x$data[[iData]]$text=NULL
+        #}
+        #modify text to display
+        if(dataAreFromCel){
+          pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("log2(intensity) ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = ""))))
+        }else{
+          pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("intensity ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = ""))))
+        }
+      }else{
+        ##disable marker plotting to keep only box and whiskers plot (outliers are displayed through scatter plot)
+        pp$x$data[[iData]]$marker$opacity=0
+
+        #sample 50000 points amongst all data to get a lighter html file, sampling size should not be too low to avoid modifying limit of boxplots
+        pp$x$data[[iData]]$y=c(sample(dataMatrix[,pp$x$data[[iData]]$name],min(length(dataMatrix[,pp$x$data[[iData]]$name]),50000)),min(dataMatrix[,pp$x$data[[iData]]$name]),max(dataMatrix[,pp$x$data[[iData]]$name]))
+        pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y))
+
+        ##first remove outliers info
+        #downUpValues=boxplot.stats(dataMatrix[,pp$x$data[[iData]]$name])$stats
+        #if(verbose)addComment(c("filter values for boxplot",pp$x$data[[iData]]$name,"between",min(downUpValues),"and",max(downUpValues)),T,opt$log)
+        #pointsToRemove=which(pp$x$data[[iData]]$y<min(downUpValues))
+        #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove]
+        #pointsToRemove=which(pp$x$data[[iData]]$y>max(downUpValues))
+        #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove]
+        #then add sampled outliers info
+        #pp$x$data[[iData]]$y=c(yPointsToKeep,pp$x$data[[iData]]$y)
+        #pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y))
+      }
+    }
+
+    htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$boxplot,iToPlot,".html"),collapse=""),selfcontained = F)
+  }
+  remove(p,dataToPlot)
+  addComment("[INFO]Boxplots drawn",T,opt$log,display=FALSE)
+
+}
+
+##----------------------
+
+###plot microarrays (only for .CEL files)###
+if (!is.null(opt$microarray) && dataAreFromCel) {
+  for (iCondition in 1:nbConditions){
+    if(opt$format=="pdf"){
+      pdf(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".pdf"),collapse=""),onefile = F,width = 5,height = 5)}else{
+        png(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".png"),collapse=""))
+      }
+    image(celData[,iCondition],main=correspondanceNameTable[iCondition,2])
+    dev.off()
+  }
+  addComment("[INFO]Microarray drawn",T,opt$log,display=FALSE)
+}
+
+##----------------------
+
+###plot PCA plot###
+if (!is.null(opt$acp)){
+  ##to avoid error when nrow is too large, results quite stable with 200k random selected rows
+  randomSelection=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix)))
+  #remove constant variables
+
+  dataFiltered=dataMatrix[randomSelection,]
+  toRemove=which(unlist(apply(dataFiltered,1,var))==0)
+  if(length(toRemove)>0){
+    dataFiltered=dataFiltered[-toRemove,]
+  }
+  ##geom_text(aes(label=Experiments,hjust=1, vjust=1.3), y = PC2+0.01)
+  PACres = prcomp(t(dataFiltered),scale.=TRUE)
+
+  if(!is.null(opt$screePlot)){
+    #scree plot
+    #p <- fviz_eig(PACres)
+    dataToPlot=data.frame(compo=seq(1,length(PACres$sdev)),var=(PACres$sdev^2/sum(PACres$sdev^2))*100)
+    p<-ggplot(data=dataToPlot, aes(x=compo, y=var)) + geom_bar(stat="identity", fill="steelblue") + geom_line() + geom_point() +
+      ggtitle("Scree plot") + theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) +
+      xlab(label="Dimensions") + ylab(label="% explained variances") + scale_x_discrete(limits=dataToPlot$compo)
+    pp <- ggplotly(p)
+
+    if(opt$format=="pdf"){
+        pdf(paste(c("./plotDir/",opt$screePlot,".pdf"),collapse=""))}else{
+        png(paste(c("./plotDir/",opt$screePlot,".png"),collapse=""))
+      }
+    plot(p)
+    dev.off()
+    htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$screePlot,".html"),collapse=""),selfcontained = F)
+  }
+
+  #now plot pca plots
+
+  if(!is.null(opt$factorInfo)){
+    fileIdent=""
+    symbolset = c("circle","cross","square","diamond","circle-open","square-open","diamond-open","x")
+
+    #save equivalence between real factor names and generic ones in correspondanceNameTable
+    correspondanceNameTable=rbind(correspondanceNameTable,matrix(c(paste("Factor",1:(ncol(factorInfoMatrix)-1),sep=""),colnames(factorInfoMatrix)[-1]),ncol=2,nrow=ncol(factorInfoMatrix)-1))
+    rownames(correspondanceNameTable)=correspondanceNameTable[,2]
+
+    #first order factors from decreasing groups number
+    orderedFactors=colnames(factorInfoMatrix)[-1][order(unlist(lapply(colnames(factorInfoMatrix)[-1],function(x)length(table(factorInfoMatrix[,x])))),decreasing = T)]
+    allFactorsBigger=length(table(factorInfoMatrix[,orderedFactors[length(orderedFactors)]]))>length(symbolset)
+    if(allFactorsBigger)addComment("[WARNING]All factors are composed of too many groups to display two factors at same time, each PCA plot will display only one factor groups",T,opt$log,display=FALSE)
+    for(iFactor in 1:length(orderedFactors)){
+      #if it is the last factor of the list or if all factor
+      if(iFactor==length(orderedFactors) || allFactorsBigger){
+       if(length(orderedFactors)==1 || allFactorsBigger){
+          dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=","))))
+          p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>%
+            layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),
+                   legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))
+          fileIdent=correspondanceNameTable[orderedFactors[iFactor],1]
+          #add text label to plot
+          ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black'))
+          #save the plotly plot
+          htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)
+        }
+      }else{
+        for(iFactorBis in (iFactor+1):length(orderedFactors)){
+          if(length(table(factorInfoMatrix[,orderedFactors[iFactorBis]]))<=length(symbolset)){
+            dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], Attribute2=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactorBis]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=","))))
+            p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),symbol=~Attribute2,symbols = symbolset,hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>%
+              layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),
+                     legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))
+            fileIdent=paste(correspondanceNameTable[orderedFactors[c(iFactor,iFactorBis)],1],collapse="_AND_")
+            #add text label to plot
+            ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black'))
+            #save the plotly plot
+            htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)
+          }else{
+            addComment(c("[WARNING]PCA with",orderedFactors[iFactor],"and",orderedFactors[iFactorBis],"groups cannot be displayed, too many groups (max",length(symbolset),")"),T,opt$log,display=FALSE)
+          }
+        }
+      }
+    }
+  }else{
+    dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x))
+    p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers",marker=list(size=5,color="salmon"),hoverinfo = 'text',text = ~paste(Experiments))%>%
+      layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),
+             legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))
+    ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black'))
+
+    #save plotly files
+    htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_plot.html"),collapse=""),selfcontained = F)
+  }
+  remove(p,dataToPlot,dataFiltered)
+  addComment("[INFO]ACP plot drawn",T,opt$log,display=FALSE)
+}
+
+#write correspondances between plot file names and displayed names in figure legends, usefull to define html items in xml file
+write.table(correspondanceNameTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F)
+
+end.time <- Sys.time()
+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)
+
+addComment("[INFO]End of R script",T,opt$log,display=FALSE)
+
+printSessionInfo(opt$log)
+#sessionInfo()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/General_functions.py	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,206 @@
+import re
+import numpy as np
+
+def get_column_names( file_path, toNotConsider=-1, each=1):
+	options=[]
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	cpt=0
+	for i, field_component in enumerate( firstLine ):
+		if i!=toNotConsider:#to squeeze the first column
+			if cpt==0:
+				options.append( ( field_component, field_component, False ) )
+			cpt+=1
+			if cpt==each:
+				cpt=0
+	inputfile.close()
+	return options
+
+def get_column_names_filteredList( file_path, toNotConsider=[], each=1):
+	options=[]
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	cpt=0
+	for i, field_component in enumerate( firstLine ):
+		if i not in toNotConsider:#to squeeze the first columns
+			if cpt==0:
+				options.append( ( field_component, field_component, False ) )
+			cpt+=1
+			if cpt==each:
+				cpt=0
+	inputfile.close()
+	return options
+
+def get_column_names_mergeNumber(file_path, numberToMerge=1, toNotConsider=[]):
+	options=[]
+	inputfile = open(file_path)
+	if int(numberToMerge)>0:
+		iHeader=0
+		for iCurrentLine in inputfile:
+			iHeader=iHeader+1
+			if iHeader>int(numberToMerge):
+				break
+			currentLine=iCurrentLine.strip().split("\t")
+			iOption=-1
+			for i, field_component in enumerate( currentLine ):
+				if i not in toNotConsider:#to squeeze specified columns
+					iOption=iOption+1
+					if iHeader==1:
+						options.append( ( str(field_component), str(field_component), False ) )
+					else:
+						options[iOption]=(options[iOption][0]+"_"+str(field_component),options[iOption][1]+"_"+str(field_component),False)
+	else:
+		currentLine = next(inputfile).strip().split("\t")
+		for i, field_component in enumerate( currentLine ):
+			if i not in toNotConsider:#to squeeze specified columns
+				options.append( ( "Column_"+str(i), "Column_"+str(i), False ) )
+	inputfile.close()
+	return options
+
+def get_row_names( file_path, factorName ):
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	iColumn=-1
+	for i, field_component in enumerate( firstLine ):
+		if field_component==factorName:#to test
+			iColumn=i
+	options=[]
+	if iColumn!=-1:
+		for nextLine in inputfile:
+			nextLine=nextLine.strip().split("\t")
+			if len(nextLine)>1:
+				if (nextLine[iColumn], nextLine[iColumn], False) not in options:
+					options.append( (nextLine[iColumn], nextLine[iColumn], False) )
+	inputfile.close()
+	return options
+
+def get_condition_file_names( file_list, toNotConsider=-1, each=1):
+	options=[]
+	if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names
+		inputfile = open(file_list.file_name)
+		firstLine = next(inputfile).strip().split("\t")
+		cpt=0
+		for i, field_component in enumerate( firstLine ):
+			if i!=toNotConsider:#to squeeze the first column
+				if cpt==0:
+					options.append( ( field_component, field_component, False ) )
+				cpt+=1
+				if cpt==each:
+					cpt=0
+		inputfile.close()
+	else:#if input file is a .cel file list or a collection
+		if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily
+			for i, field_component in enumerate( file_list ):
+				options.append( ( field_component.name, field_component.name, False ) )
+		else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object
+			for i, field_component in enumerate( file_list[0].collection.elements ):
+				options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+	return options
+
+def generateFactorFile( file_list, factor_list, outputFileName, logFile):
+	forbidenCharacters={"*",":",",","|"}
+	outputfile = open(outputFileName, 'w')
+	outputLog = open(logFile, 'w')
+	sampleList=[]
+	if not isinstance(file_list,list):
+		conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)
+	else :
+		conditionNames=get_condition_file_names(file_list) #.CEL files
+	for iSample, sample_component in enumerate (conditionNames):
+		sampleList.append(str(sample_component[1]))
+	outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")
+	globalDict=dict()
+	factorNameList=[]
+	firstLine="Conditions"
+	if len(factor_list)==0:#check if there is at least one factor available
+		outputLog.write("[ERROR] no factor was defined !\n")
+		return 1
+	else:
+		for iFactor, factor_component in enumerate( factor_list ):
+			currentSampleList=list(sampleList)
+			currentFactor=str(factor_component['factorName'])
+			#check if factor name contains forbidden characters
+			for specialCharacter in forbidenCharacters:
+				if currentFactor.find(specialCharacter)!=-1:
+					outputLog.write("[ERROR] '"+specialCharacter+"' character is forbidden in factor name : '"+currentFactor+"'\n")
+					return 4
+			#check if factor allready named like that
+			if not globalDict.get(currentFactor) is None:
+				outputLog.write("[ERROR] '"+currentFactor+"' is used several times as factor name\n")
+				return 3
+			globalDict[currentFactor]=dict()
+			firstLine=firstLine+"\t"+currentFactor
+			factorNameList.append(currentFactor)
+			if len(factor_component['valueList'])<=1:#check if there is at least two value available
+				outputLog.write("[ERROR] at least two different values are necessary for '"+currentFactor+"' factor\n")
+				return 1
+			else:
+				for iValue, value_component in enumerate( factor_component['valueList'] ):
+					currentValue=str(value_component['valueName'])
+					#check if factor name contains forbidden characters
+					for specialCharacter in forbidenCharacters:
+						if currentValue.find(specialCharacter)!=-1:
+							outputLog.write("[ERROR] '"+specialCharacter+"' character is forbidden in value name : '"+currentValue+"'\n")
+							return 4
+					currentSample=str(value_component['valueConditions']).split(",")
+					for iSample, sample_component in enumerate (currentSample):
+						if not sample_component in currentSampleList:
+							outputLog.write("[ERROR] sample "+sample_component+" was assigned several times for factor '"+currentFactor+"'\n")
+							return 2
+						currentSampleList.remove(sample_component)
+						globalDict[currentFactor][sample_component]=currentValue
+			if(len(currentSampleList)>0):
+				outputLog.write("[ERROR] for factor '"+currentFactor+"'' sample "+str(currentSampleList)+" are not assigned to any value\n")
+				return 2
+	outputLog.write("[INFO] "+str(len(globalDict))+" factors are detected\n")
+	#start writing the factor file
+	outputfile.write(firstLine+"\n")
+	for iSample, sample_component in enumerate(sampleList):
+		newLine=sample_component
+		for iFactor, factor_component in enumerate(factorNameList):
+			newLine=newLine+"\t"+globalDict[factor_component][sample_component]
+		outputfile.write(newLine+"\n")
+	outputfile.close()
+	outputLog.close()
+	return 0
+
+def selectSubSetTable(file_path,headerLine_number,columnsToAdd,columnNamesToKeep,outputFileName,logFile):
+	outputLog = open(logFile, 'w')
+	outputLog.write("[INFO] header line number : "+ headerLine_number+" lines\n")
+	availableColumnsTuple=get_column_names_mergeNumber(file_path, headerLine_number)
+	#convert tuple list as a simple array
+	availableColumns=[]
+	for iTuple, tuple_content in enumerate (availableColumnsTuple):
+		availableColumns.append(str(tuple_content[0]))
+	if len(availableColumns)==0:
+		outputLog.write("[ERROR] No detected columns in input file\n")
+		return 1
+	selectedColumns=list(columnsToAdd)
+	for iVolcano, volcano_content in enumerate(columnNamesToKeep):
+		selectedColumns.append(availableColumns.index(volcano_content['pvalColumn']))
+		if volcano_content['fdrColumn'] in availableColumns:
+			selectedColumns.append(availableColumns.index(volcano_content['fdrColumn']))
+		else:
+			selectedColumns.append(0)
+		selectedColumns.append(availableColumns.index(volcano_content['fcColumn']))
+	if len(selectedColumns)!=(3*len(columnNamesToKeep)+len(columnsToAdd)):
+		outputLog.write("[ERROR] matching between input file colnames and requested column names failed\n")
+		return 1
+	outputLog.write("[INFO] columns kept : "+str(selectedColumns)+"\n")
+	#start writting formatted file
+	inputfile = open(file_path)
+	outputfile = open(outputFileName, 'w')
+	iLineCpt=-1
+	for iCurrentLine in inputfile:
+		iLineCpt=iLineCpt+1
+		if iLineCpt>=int(headerLine_number):
+			currentLineFields=np.array(iCurrentLine.strip().split("\t"))
+			newLine="\t".join(currentLineFields[selectedColumns])
+			outputfile.write(newLine+"\n")
+	if iLineCpt<int(headerLine_number):
+		outputLog.write("[ERROR] not enough lines in input files ("+(iLineCpt+1)+" lines)\n")
+		return 1
+	inputfile.close()
+	outputfile.close()
+	outputLog.close()
+	return 0
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/LIMMA_options.py	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,330 @@
+import re
+
+def get_column_names( file_path, toNotConsider=None, toNotConsiderBis=None):
+	options=[]
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	for i, field_component in enumerate( firstLine ):
+		if i!=0 and field_component!=toNotConsider and field_component!=toNotConsiderBis:#to squeeze the first column
+			options.append( ( field_component, field_component, False ) )
+	inputfile.close()
+	return options
+
+def get_row_names( file_path, factorName ):
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	iColumn=-1
+	for i, field_component in enumerate( firstLine ):
+		if field_component==factorName:#to test
+			iColumn=i
+	options=[]
+	if iColumn!=-1:
+		for nextLine in inputfile:
+			nextLine=nextLine.strip().split("\t")
+			if len(nextLine)>1:
+				if (nextLine[iColumn], nextLine[iColumn], False) not in options:
+					options.append( (nextLine[iColumn], nextLine[iColumn], False) )
+	inputfile.close()
+	return options
+
+def get_row_names_interaction( file_path, factorNameA, factorNameB ):
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	iColumnA=-1
+	iColumnB=-1
+	for i, field_component in enumerate( firstLine ):
+		if field_component==factorNameA:#to test
+			iColumnA=i
+		if field_component==factorNameB:#to test
+			iColumnB=i
+	possibleValuesA=[]
+	possibleValuesB=[]
+	if iColumnA!=-1 and iColumnB!=-1:
+		for nextLine in inputfile:
+			nextLine=nextLine.strip().split("\t")
+			if len(nextLine)>1:
+				if nextLine[iColumnA] not in possibleValuesA:
+					possibleValuesA.append(nextLine[iColumnA])
+				if nextLine[iColumnB] not in possibleValuesB:
+					possibleValuesB.append(nextLine[iColumnB])
+	inputfile.close()
+	options=[]
+	if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None":
+		for counterA in range(len(possibleValuesA)):
+			for counterB in range(len(possibleValuesB)):
+				options.append( (possibleValuesA[counterA]+"*"+possibleValuesB[counterB], possibleValuesA[counterA]+"*"+possibleValuesB[counterB], False) )
+	return options
+
+def get_comparisonsA( factorA, valuesA ):
+	options=[]
+	formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA))
+	possibleValues=formatValuesA.split("', u'")
+	if len(possibleValues)>=2:
+		for counter in range(len(possibleValues)-1):
+			for innerCounter in range(counter+1,len(possibleValues)):
+				options.append( (possibleValues[counter]+" - "+possibleValues[innerCounter], possibleValues[counter]+" - "+possibleValues[innerCounter], False) )
+				options.append( (possibleValues[innerCounter]+" - "+possibleValues[counter], possibleValues[innerCounter]+" - "+possibleValues[counter], False) )
+	return options
+
+def get_comparisonsAB(factorA, valuesA, factorB, valuesB, interaction):
+	options=[]
+	formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA))
+	possibleValuesA=formatValuesA.split("', u'")
+	formatValuesB=re.sub("(^\[u')|('\]$)","", str(valuesB))
+	possibleValuesB=formatValuesB.split("', u'")
+	if str(interaction)=="False":
+		if len(possibleValuesA)>=2:
+			for counter in range(len(possibleValuesA)-1):
+				for innerCounter in range(counter+1,len(possibleValuesA)):
+					options.append( (possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], False) )
+					options.append( (possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], False) )
+		if len(possibleValuesB)>=2:
+			for counter in range(len(possibleValuesB)-1):
+				for innerCounter in range(counter+1,len(possibleValuesB)):
+					options.append( (possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], False) )
+					options.append( (possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], False) )
+	else:
+		if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None":
+			for counterA in range(len(possibleValuesA)):
+				for innerCounterA in range(len(possibleValuesA)):
+					for counterB in range(len(possibleValuesB)):
+						for innerCounterB in range(len(possibleValuesB)):
+							if not(counterA==innerCounterA and counterB==innerCounterB):
+								options.append( ("("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")","("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")", False) )
+	return options
+
+def get_row_names_allInteractions( file_path, factorSelected):
+	formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected))
+	factorsList=formatFactors.split("', u'")
+	iColumn=[None] * len(factorsList)
+	valuesList=[None] * len(factorsList)
+
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	for iField, fieldComponent in enumerate( firstLine ):
+		for iFactor, factorComponent in enumerate(factorsList):
+			if fieldComponent==factorComponent:
+				iColumn[iFactor]=iField
+				valuesList[iFactor]=[]
+
+	for nextLine in inputfile:
+		nextLine=nextLine.strip().split("\t")
+		if len(nextLine)>1:
+			for iFactor, factorComponent in enumerate(factorsList):
+				if nextLine[iColumn[iFactor]] not in valuesList[iFactor]:
+					valuesList[iFactor].append(nextLine[iColumn[iFactor]])
+	inputfile.close()
+
+	allCombinations=[]
+	for iFactor, factorComponent in enumerate(factorsList):
+		if iFactor==0:
+			allCombinations=valuesList[iFactor]
+		else:
+			currentCombinations=allCombinations
+			allCombinations=[]
+			for iValue, valueComponent in enumerate(valuesList[iFactor]):
+				for iCombination, combination in enumerate(currentCombinations):
+					allCombinations.append(combination+"*"+valueComponent)
+
+	options=[]
+	for iCombination, combination in enumerate(allCombinations):
+		options.append((combination,combination,False))
+
+	return options
+
+def get_allrow_names( file_path, factorSelected ):
+	formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected))
+	factorsList=formatFactors.split("', u'")
+	iColumn=[None] * len(factorsList)
+	valuesList=[None] * len(factorsList)
+
+	inputfile = open(file_path)
+	firstLine = next(inputfile).strip().split("\t")
+	for iField, fieldComponent in enumerate( firstLine ):
+		for iFactor, factorComponent in enumerate(factorsList):
+			if fieldComponent==factorComponent:
+				iColumn[iFactor]=iField
+				valuesList[iFactor]=[]
+
+	for nextLine in inputfile:
+		nextLine=nextLine.strip().split("\t")
+		if len(nextLine)>1:
+			for iFactor, factorComponent in enumerate(factorsList):
+				if nextLine[iColumn[iFactor]] not in valuesList[iFactor]:
+					valuesList[iFactor].append(nextLine[iColumn[iFactor]])
+	inputfile.close()
+
+	allValues=[]
+	for iFactor, factorComponent in enumerate(factorsList):
+		for iValue, valueComponent in enumerate(valuesList[iFactor]):
+			allValues.append(factorComponent+":"+valueComponent)
+
+	options=[]
+	for iValue, valueComponent in enumerate(allValues):
+		options.append((valueComponent,valueComponent,False))
+
+	return options
+
+def replaceNamesInFiles(expressionFile_name,conditionFile_name,outputExpressionFile,outputConditionFile,ouputDictionnary):
+	dico={}
+	forbidenCharacters={"*",":",",","|"}
+	##start with expression file, read only the first line
+	inputfile = open(expressionFile_name)
+	outputfile = open(outputExpressionFile, 'w')
+	firstLine = next(inputfile).rstrip().split("\t")
+	iCondition=1
+	newFirstLine=""
+	for i, field_component in enumerate( firstLine ):
+		if (i>0):
+			#conditions names should not be redundant with other conditions
+			if(field_component not in dico):
+				dico[field_component]="Condition"+str(iCondition)
+				newFirstLine+="\t"+"Condition"+str(iCondition)
+				iCondition+=1
+			else:
+				raise NameError('condition name allready exists!')
+		else:
+			newFirstLine+=field_component
+	outputfile.write(newFirstLine+"\n")
+	for line in inputfile:
+		outputfile.write(line)
+	outputfile.close()
+	inputfile.close()
+	#then parse condition file, read all lines in this case
+	inputfile = open(conditionFile_name)
+	outputfile = open(outputConditionFile, 'w')
+	firstLine=1
+	iFactor=1
+	iValue=1
+	for line in inputfile:
+		currentLine = line.rstrip().split("\t")
+		newCurrentLine=""
+		for i, field_component in enumerate( currentLine ):
+			#special treatment for the first line
+			if (firstLine==1):
+				if (i==0):
+					newCurrentLine=field_component
+				else:
+					#factor names should not be redundant with other factors or conditions
+					if(field_component not in dico):
+						dico[field_component]="Factor"+str(iFactor)
+						newCurrentLine+="\t"+"Factor"+str(iFactor)
+						iFactor+=1
+					else:
+						raise NameError('factor name allready exists!')
+			else:
+				if (i==0):
+					#check if condition name allready exist and used it if it is, or create a new one if not
+					if(field_component not in dico):
+						dico[field_component]="Condition"+str(iCondition)
+						newCurrentLine="Condition"+str(iCondition)
+						iCondition+=1
+					else:
+						newCurrentLine=dico[field_component]
+				else:
+					if(field_component not in dico):
+						dico[field_component]="Value"+str(iValue)
+						newCurrentLine+="\tValue"+str(iValue)
+						iValue+=1
+					else:
+						newCurrentLine+="\t"+dico[field_component]
+		outputfile.write(newCurrentLine+"\n")
+		firstLine=0
+	outputfile.close()
+	inputfile.close()
+	##check if any entries in dictionnary contains forbiden character
+	for key, value in dico.items():
+		for specialCharacter in forbidenCharacters:
+			if value.startswith("Condition")==False and key.find(specialCharacter)!=-1:
+				return 1
+	##then write dictionnary in a additional file
+	outputfile = open(ouputDictionnary, 'w')
+	for key, value in dico.items():
+		outputfile.write(key+"\t"+value+"\n")
+	outputfile.close()
+	return 0
+
+
+def replaceNamesBlockInFiles(expressionFile_name,conditionFile_name,blockingFile_name,outputExpressionFile,outputConditionFile,outputBlockingFile,ouputDictionnary):
+	dico={}
+	forbidenCharacters={"*",":",",","|"}
+	##start with expression file, read only the first line
+	inputfile = open(expressionFile_name)
+	outputfile = open(outputExpressionFile, 'w')
+	firstLine = next(inputfile).rstrip().split("\t")
+	iCondition=1
+	newFirstLine=""
+	for i, field_component in enumerate( firstLine ):
+		if (i>0):
+			#conditions names should not be redundant with other conditions
+			if(field_component not in dico):
+				dico[field_component]="Condition"+str(iCondition)
+				newFirstLine+="\t"+"Condition"+str(iCondition)
+				iCondition+=1
+			else:
+				raise NameError('condition name allready exists!')
+		else:
+			newFirstLine+=field_component
+	outputfile.write(newFirstLine+"\n")
+	for line in inputfile:
+		outputfile.write(line)
+	outputfile.close()
+	inputfile.close()
+	#then parse condition file, read all lines in this case
+	iFactor=1
+	iValue=1
+	for fileNum in range(2):
+		if fileNum==0:
+			inputfile = open(conditionFile_name)
+			outputfile = open(outputConditionFile, 'w')
+		else:
+			inputfile = open(blockingFile_name)
+			outputfile = open(outputBlockingFile, 'w')
+		firstLine=1
+		for line in inputfile:
+			currentLine = line.rstrip().split("\t")
+			newCurrentLine=""
+			for i, field_component in enumerate( currentLine ):
+				#special treatment for the first line
+				if (firstLine==1):
+					if (i==0):
+						newCurrentLine=field_component
+					else:
+						#factor names should not be redundant with other factors or conditions
+						if(field_component not in dico):
+							dico[field_component]="Factor"+str(iFactor)
+							newCurrentLine+="\t"+"Factor"+str(iFactor)
+							iFactor+=1
+						else:
+							raise NameError('factor name allready exists!')
+				else:
+					if (i==0):
+						#check if condition name allready exist and used it if it is, or create a new one if not
+						if(field_component not in dico):
+							dico[field_component]="Condition"+str(iCondition)
+							newCurrentLine="Condition"+str(iCondition)
+							iCondition+=1
+						else:
+							newCurrentLine=dico[field_component]
+					else:
+						if(field_component not in dico):
+							dico[field_component]="Value"+str(iValue)
+							newCurrentLine+="\tValue"+str(iValue)
+							iValue+=1
+						else:
+							newCurrentLine+="\t"+dico[field_component]
+			outputfile.write(newCurrentLine+"\n")
+			firstLine=0
+		outputfile.close()
+		inputfile.close()
+	##check if any entries in dictionnary contains forbiden character
+	for key, value in dico.items():
+		for specialCharacter in forbidenCharacters:
+			if value.startswith("Condition")==False and key.find(specialCharacter)!=-1:
+				return 1
+	##then write dictionnary in a additional file
+	outputfile = open(ouputDictionnary, 'w')
+	for key, value in dico.items():
+		outputfile.write(key+"\t"+value+"\n")
+	outputfile.close()
+	return 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/LIMMAscriptV4.R	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,1002 @@
+# A command-line interface for LIMMA to use with Galaxy
+# written by Jimmy Vandel
+# one of these arguments is required:
+#
+#
+initial.options <- commandArgs(trailingOnly = FALSE)
+file.arg.name <- "--file="
+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
+script.basename <- dirname(script.name)
+source(file.path(script.basename, "utils.R"))
+source(file.path(script.basename, "getopt.R"))
+
+#addComment("Welcome R!")
+
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+loc <- Sys.setlocale("LC_NUMERIC", "C")
+
+#get starting time
+start.time <- Sys.time()
+
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs()
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "dataFile", "i", 1, "character",
+  "factorInfo","a", 1, "character",
+  "blockingInfo","b", 1, "character",
+  "dicoRenaming","g",1,"character",
+  "blockingPolicy","u", 1, "character",
+  "fdrThreshold","t", 1, "double",
+  "thresholdFC","d", 1, "double",
+  "format", "f", 1, "character",
+  "histo","h", 1, "character",
+  "volcano","v", 1, "character",
+  "factorsContrast","r", 1, "character",
+  "contrastNames","p", 1, "character",
+  "firstGroupContrast","m", 1, "character",
+  "secondGroupContrast","n", 1, "character",
+  "controlGroups","c", 1, "character",
+  "fratioFile","s",1,"character",
+  "organismID","x",1,"character",
+  "rowNameType","y",1,"character",
+  "quiet", "q", 0, "logical",
+  "log", "l", 1, "character",
+  "outputFile" , "o", 1, "character",
+  "outputDfFile" , "z", 1, "character"),
+  byrow=TRUE, ncol=4)
+opt <- getopt(spec)
+
+# enforce the following required arguments
+if (is.null(opt$log)) {
+  addComment("[ERROR]'log file' is required\n")
+  q( "no", 1, F )
+}
+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)
+if (is.null(opt$dataFile)) {
+  addComment("[ERROR]'dataFile' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (!is.null(opt$blockingInfo) && is.null(opt$blockingPolicy) ) {
+  addComment("[ERROR]blocking policy is missing",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$dicoRenaming)) {
+  addComment("[ERROR]renaming dictionnary is missing",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$factorsContrast)) {
+  addComment("[ERROR]factor informations are missing",T,opt$log)
+  q( "no", 1, F )
+}
+if (length(opt$firstGroupContrast)!=length(opt$secondGroupContrast)) {
+  addComment("[ERROR]some contrast groups seems to be empty",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$factorInfo)) {
+  addComment("[ERROR]factors info is missing",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$format)) {
+  addComment("[ERROR]'output format' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$fdrThreshold)) {
+  addComment("[ERROR]'FDR threshold' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$outputFile) || is.null(opt$outputDfFile)){
+  addComment("[ERROR]'output files' are required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$thresholdFC)){
+  addComment("[ERROR]'FC threshold' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$fratioFile)) {
+  addComment("[ERROR]F-ratio parameter is missing",T,opt$log)
+  q( "no", 1, F )
+}
+
+#demande si le script sera bavard
+verbose <- if (is.null(opt$quiet)) {
+  TRUE
+}else{
+  FALSE
+}
+
+#paramètres internes
+#pour savoir si on remplace les FC calculés par LIMMA par un calcul du LS-MEAN (ie moyenne de moyennes de chaque groupe dans chaque terme du contraste plutôt qu'une moyenne globale dans chaque terme)
+useLSmean=FALSE
+
+addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)
+
+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)
+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)
+
+#directory for plots
+dir.create(file.path(getwd(), "plotDir"))
+dir.create(file.path(getwd(), "plotLyDir"))
+
+#charge des packages silencieusement
+suppressPackageStartupMessages({
+  library("methods")
+  library("limma")
+  library("biomaRt")
+  library("ggplot2")
+  library("plotly")
+  library("stringr")
+  library("RColorBrewer")
+})
+
+
+#chargement du fichier dictionnaire de renommage
+renamingDico=read.csv(file=file.path(getwd(), opt$dicoRenaming),header=F,sep="\t",colClasses="character")
+rownames(renamingDico)=renamingDico[,2]
+
+
+#chargement des fichiers en entrée
+expDataMatrix=read.csv(file=file.path(getwd(), opt$dataFile),header=F,sep="\t",colClasses="character")
+#remove first row to convert it as colnames (to avoid X before colnames with header=T)
+colNamesData=expDataMatrix[1,-1]
+expDataMatrix=expDataMatrix[-1,]
+#remove first colum to convert it as rownames
+rowNamesData=expDataMatrix[,1]
+expDataMatrix=expDataMatrix[,-1]
+if(is.data.frame(expDataMatrix)){
+  expDataMatrix=data.matrix(expDataMatrix)
+}else{
+  expDataMatrix=data.matrix(as.numeric(expDataMatrix))
+}
+dimnames(expDataMatrix)=list(rowNamesData,colNamesData)
+
+#test the number of rows that are constant in dataMatrix
+nbConstantRows=length(which(unlist(apply(expDataMatrix,1,var))==0))
+if(nbConstantRows>0){
+  addComment(c("[WARNING]",nbConstantRows,"rows are constant across conditions in input data file"),T,opt$log,display=FALSE)
+}
+
+#test if all condition names are present in dico
+if(!all(colnames(expDataMatrix) %in% rownames(renamingDico))){
+  addComment("[ERROR]Missing condition names in renaming dictionary",T,opt$log)
+  q( "no", 1, F )
+}
+
+addComment("[INFO]Expression data loaded and checked",T,opt$log,display=FALSE)
+addComment(c("[INFO]Dim of expression matrix:",dim(expDataMatrix)),T,opt$log,display=FALSE)
+
+#chargement du fichier des facteurs
+factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character")
+#remove first row to convert it as colnames
+colnames(factorInfoMatrix)=factorInfoMatrix[1,]
+factorInfoMatrix=factorInfoMatrix[-1,]
+#use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case
+rownames(factorInfoMatrix)=factorInfoMatrix[,1]
+
+if(length(setdiff(colnames(expDataMatrix),rownames(factorInfoMatrix)))!=0){
+  addComment("[ERROR]Missing samples in factor file",T,opt$log)
+  q( "no", 1, F )
+}
+
+#order sample as in expression matrix and remove spurious sample
+factorInfoMatrix=factorInfoMatrix[colnames(expDataMatrix),]
+
+#test if all values names are present in dico
+if(!all(unlist(factorInfoMatrix) %in% rownames(renamingDico))){
+  addComment("[ERROR]Missing factor names in renaming dictionary",T,opt$log)
+  q( "no", 1, F )
+}
+
+addComment("[INFO]Factors OK",T,opt$log,display=FALSE)
+addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE)
+
+##manage blocking factor
+blockingFactor=NULL
+blockinFactorsList=NULL
+if(!is.null(opt$blockingInfo)){
+
+  #chargement du fichier des blocking factors
+  blockingInfoMatrix=read.csv(file=file.path(getwd(), opt$blockingInfo),header=F,sep="\t",colClasses="character")
+  #remove first row to convert it as colnames
+  colnames(blockingInfoMatrix)=blockingInfoMatrix[1,]
+  blockingInfoMatrix=blockingInfoMatrix[-1,]
+  #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case
+  rownames(blockingInfoMatrix)=blockingInfoMatrix[,1]
+
+
+  if(length(setdiff(colnames(expDataMatrix),rownames(blockingInfoMatrix)))!=0){
+    addComment("[ERROR]Missing samples in blocking factor file",T,opt$log)
+    q( "no", 1, F )
+  }
+
+  #order sample as in expression matrix
+  blockingInfoMatrix=blockingInfoMatrix[colnames(expDataMatrix),]
+
+  #test if all blocking names are present in dico
+  if(!all(unlist(blockingInfoMatrix) %in% rownames(renamingDico))){
+    addComment("[ERROR]Missing blocking names in renaming dictionary",T,opt$log)
+    q( "no", 1, F )
+  }
+
+  #remove blocking factors allready present as real factors
+  blockingNotInMainFactors=setdiff(colnames(blockingInfoMatrix)[-1],colnames(factorInfoMatrix)[-1])
+
+  if(length(blockingNotInMainFactors)<(ncol(blockingInfoMatrix)-1))addComment("[WARNING]Blocking factors cannot be principal factors",T,opt$log,display=FALSE)
+
+  if(length(blockingNotInMainFactors)>0){
+
+    blockingInfoMatrix=blockingInfoMatrix[,c(colnames(blockingInfoMatrix)[1],blockingNotInMainFactors)]
+
+    groupBlocking=rep("c",ncol(expDataMatrix))
+    #for each blocking factor
+    for(blockingFact in blockingNotInMainFactors){
+      if(opt$blockingPolicy=="correlated"){
+        indNewFact=as.numeric(factor(blockingInfoMatrix[,blockingFact]))
+        groupBlocking=paste(groupBlocking,indNewFact,sep="_")
+      }else{
+        if(is.null(blockinFactorsList))blockinFactorsList=list()
+        blockinFactorsList[[blockingFact]]=factor(unlist(lapply(blockingInfoMatrix[,blockingFact],function(x)paste(c(blockingFact,"_",x),collapse=""))))
+      }
+    }
+    if(opt$blockingPolicy=="correlated"){
+      blockingFactor=factor(groupBlocking)
+      if(length(levels(blockingFactor))==1){
+        addComment("[ERROR]Selected blocking factors seems to be constant",T,opt$log)
+        q( "no", 1, F )
+      }
+    }
+    addComment("[INFO]Blocking info OK",T,opt$log,display=FALSE)
+  }else{
+    addComment("[WARNING]No blocking factors will be considered",T,opt$log,display=FALSE)
+  }
+}
+
+
+##rename different input parameters using renamingDictionary
+opt$factorsContrast=renamingDico[unlist(lapply(unlist(strsplit(opt$factorsContrast,",")),function(x)which(renamingDico[,1]==x))),2]
+
+userDefinedContrasts=FALSE
+if(!is.null(opt$firstGroupContrast) && !is.null(opt$secondGroupContrast)){
+  userDefinedContrasts=TRUE
+  for(iContrast in 1:length(opt$firstGroupContrast)){
+    opt$firstGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",")
+    opt$secondGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",")
+  }
+}
+
+if(!is.null(opt$controlGroups)){
+  renamedGroups=c()
+  for(iGroup in unlist(strsplit(opt$controlGroups,","))){
+    renamedControlGroup=paste(renamingDico[unlist(lapply(unlist(strsplit(iGroup,":")),function(x)which(renamingDico[,1]==x))),2],collapse=":")
+    if(length(renamedControlGroup)==0 || any(which(unlist(gregexpr(text = renamedControlGroup,pattern = ":"))==-1))){
+      addComment("[ERROR]Control groups for interaction seem to mismatch, please check them.",T,opt$log)
+      q( "no", 1, F )
+    }
+    renamedGroups=c(renamedGroups,renamedControlGroup)
+  }
+  opt$controlGroups=renamedGroups
+}
+addComment("[INFO]Contrast variables are renamed to avoid confusion",T,opt$log,display=FALSE)
+##renaming done
+
+#to convert factor as numeric value --> useless now ?
+#expDataMatrix=apply(expDataMatrix,c(1,2),function(x)as.numeric(paste(x)))
+
+#get factors info for LIMMA
+factorsList=list()
+for(iFactor in opt$factorsContrast){
+  if(!(iFactor %in% colnames(factorInfoMatrix))){
+    addComment("[ERROR]Required factors are missing in input file",T,opt$log)
+    q( "no", 1, F )
+  }
+  factorsList[[iFactor]]=factor(unlist(lapply(factorInfoMatrix[,iFactor],function(x)paste(c(iFactor,"_",x),collapse=""))))
+  if(length(levels(factorsList[[iFactor]]))==1){
+    addComment("[ERROR]One selected factor seems to be constant",T,opt$log)
+    q( "no", 1, F )
+  }
+}
+
+#check if there is at least 2 factors to allow interaction computation
+if(!is.null(opt$controlGroups) && length(factorsList)<2){
+  addComment("[ERROR]You cannot ask for interaction with less than 2 factors",T,opt$log)
+  q( "no", 1, F )
+}
+
+#merge all factors as a single one
+factorsMerged=as.character(factorsList[[opt$factorsContrast[1]]])
+for(iFactor in opt$factorsContrast[-1]){
+  factorsMerged=paste(factorsMerged,as.character(factorsList[[iFactor]]),sep=".")
+}
+factorsMerged=factor(factorsMerged)
+
+#checked that coefficient number (ie. factorsMerged levels) is strictly smaller than sample size
+if(length(levels(factorsMerged))>=length(factorsMerged)){
+  addComment(c("[ERROR]No enough samples (",length(factorsMerged),") to estimate ",length(levels(factorsMerged))," coefficients"),T,opt$log)
+  q( "no", 1, F )
+}
+
+#get the sample size of each factor values
+sampleSizeFactor=table(factorsMerged)
+
+
+if(!is.null(blockinFactorsList)){
+  factorString=c("blockinFactorsList[['", names(blockinFactorsList)[1],"']]")
+  for(blockingFact in names(blockinFactorsList)[-1]){
+    factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]")
+  }
+  design = model.matrix(as.formula(paste(c("~ factorsMerged +",factorString," + 0"),collapse="")))
+
+  #rename design columns
+  coeffMeaning = levels(factorsMerged)
+  for(blockingFact in blockinFactorsList){
+    coeffMeaning=c(coeffMeaning,levels(blockingFact)[-1])
+  }
+  colnames(design) = coeffMeaning
+}else{
+  design = model.matrix(as.formula( ~ factorsMerged + 0))
+
+  #rename degin columns
+  coeffMeaning = levels(factorsMerged)
+  colnames(design) = coeffMeaning
+}
+
+addComment(c("[INFO]Available coefficients: ",coeffMeaning),T,opt$log,display=F)
+
+estimableCoeff=which(colSums(design)!=0)
+
+addComment("[INFO]Design done",T,opt$log,display=F)
+
+  #use blocking factor if exists
+if(!is.null(blockingFactor)){
+  corfit <- duplicateCorrelation(expDataMatrix, design, block=blockingFactor)
+
+  addComment(c("[INFO]Correlation within groups: ",corfit$consensus.correlation),T,opt$log,display=F)
+
+  #run linear model  fit
+  data.fit = lmFit(expDataMatrix,design,block = blockingFactor, correlation=corfit$consensus.correlation)
+}else{
+  #run linear model  fit
+  data.fit = lmFit(expDataMatrix,design)
+}
+
+estimatedCoeff=which(!is.na(data.fit$coefficients[1,]))
+
+addComment("[INFO]Lmfit done",T,opt$log,display=F)
+
+#catch situation where some coefficients cannot be estimated, probably due to dependances between design columns
+#if(length(setdiff(estimableCoeff,estimatedCoeff))>0){
+#  addComment("[ERROR]Error in design matrix, check your group definitions",T,opt$log)
+#  q( "no", 1, F )
+#}
+#to strong condition, should return ERROR only when coefficients relative to principal factors cannot be estimated, otherwise, return a simple WARNING
+
+#define requested contrasts
+requiredContrasts=c()
+humanReadingContrasts=c()
+persoContrastName=c()
+if(userDefinedContrasts){
+  for(iContrast in 1:length(opt$firstGroupContrast)){
+    posGroup=unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse=".")))
+    negGroup=unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse=".")))
+    #clear posGroup and negGroup from empty groups
+    emptyPosGroups=which(!(posGroup%in%coeffMeaning))
+    if(length(emptyPosGroups)>0){
+      addComment(c("[WARNING]The group(s)",posGroup[emptyPosGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE)
+      posGroup=posGroup[-emptyPosGroups]
+      currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],","))[-emptyPosGroups],collapse="+")
+    }else{
+      currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),collapse="+")
+    }
+    emptyNegGroups=which(!(negGroup%in%coeffMeaning))
+    if(length(emptyNegGroups)>0){
+      addComment(c("[WARNING]The group(s)",negGroup[emptyNegGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE)
+      negGroup=negGroup[-emptyNegGroups]
+      currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))[-emptyNegGroups]),collapse="-")
+    }else{
+      currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))),collapse="-")
+    }
+    if(length(posGroup)==0 || length(negGroup)==0 ){
+      addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to empty group"),T,opt$log,display=FALSE)
+    }else{
+      if(all(posGroup%in%negGroup) && all(negGroup%in%posGroup)){
+        addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to null contrast"),T,opt$log,display=FALSE)
+      }else{
+        #get coefficients required for first group added as positive
+        positiveCoeffWeights=sampleSizeFactor[posGroup]/sum(sampleSizeFactor[posGroup])
+        #positiveCoeffWeights=rep(1,length(posGroup))
+        #names(positiveCoeffWeights)=names(sampleSizeFactor[posGroup])
+        #get coefficients required for second group added as negative
+        negativeCoeffWeights=sampleSizeFactor[negGroup]/sum(sampleSizeFactor[negGroup])
+        #negativeCoeffWeights=rep(1,length(negGroup))
+        #names(negativeCoeffWeights)=names(sampleSizeFactor[negGroup])
+        #build the resulting contrast
+        currentContrast=paste(paste(positiveCoeffWeights[posGroup],posGroup,sep="*"),collapse="+")
+        currentContrast=paste(c(currentContrast,paste(paste(negativeCoeffWeights[negGroup],negGroup,sep="*"),collapse="-")),collapse="-")
+        requiredContrasts=c(requiredContrasts,currentContrast)
+
+        #build the human reading contrast
+        humanReadingContrasts=c(humanReadingContrasts,currentHumanContrast)
+        if(!is.null(opt$contrastNames) && nchar(opt$contrastNames[iContrast])>0){
+          persoContrastName=c(persoContrastName,opt$contrastNames[iContrast])
+        }else{
+          persoContrastName=c(persoContrastName,"")
+        }
+
+        addComment(c("[INFO]Contrast added : ",currentHumanContrast),T,opt$log,display=F)
+        addComment(c("with complete formula ",currentContrast),T,opt$log,display=F)
+      }
+    }
+  }
+}
+
+
+  #define the true formula with interactions to get interaction coefficients
+  factorString=c("factorsList[['", names(factorsList)[1],"']]")
+  for(iFactor in names(factorsList)[-1]){
+    factorString=c(factorString," * factorsList[['",iFactor,"']]")
+  }
+
+  if(!is.null(blockinFactorsList)){
+    for(blockingFact in names(blockinFactorsList)){
+      factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]")
+    }
+  }
+
+  #should not be null at the end
+  allFtestMeanSquare=NULL
+  #to get the F-test values
+  estimatedInteractions=rownames(anova(lm(as.formula(paste(c("expDataMatrix[1,] ~ ",factorString),collapse="")))))
+  estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x){temp=unlist(strsplit(x,"[ \" | : ]"));paste(temp[seq(2,length(temp),3)],collapse=":")})),estimatedInteractions[length(estimatedInteractions)])
+  #rename estimated interaction terms using renamingDico
+  estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x)paste(renamingDico[unlist(strsplit(x,":")),1],collapse=":"))),estimatedInteractions[length(estimatedInteractions)])
+  t <- unlist(apply(expDataMatrix,1,function(x){temp=anova(lm(as.formula(paste(c("x ~ ",factorString),collapse=""))))$`Mean Sq`;temp/temp[length(temp)]}))
+  allFtestMeanSquare <- t(matrix(t,nrow=length(estimatedInteractions)))
+  #remove from allFtest rows containing NA
+  if(length(which(is.na(allFtestMeanSquare[,1])))>0)allFtestMeanSquare=allFtestMeanSquare[-(which(is.na(allFtestMeanSquare[,1]))),]
+  colnames(allFtestMeanSquare)=estimatedInteractions
+
+  #add contrasts corresponding to interaction terms
+  if(!is.null(opt$controlGroups)){
+    #first load user defined control group for each factor
+    controlGroup=rep(NA,length(factorsList))
+    names(controlGroup)=names(factorsList)
+    for(iGroup in opt$controlGroups){
+      splitGroup=unlist(strsplit(iGroup,":"))
+      splitGroup[2]=paste(splitGroup[1],splitGroup[2],sep = "_")
+      #check if defined control group is really a level of the corresponding factor
+      if(!splitGroup[1]%in%names(controlGroup) || !splitGroup[2]%in%factorsList[[splitGroup[1]]]){
+        addComment(c("[ERROR]The factor name",splitGroup[1],"does not exist or group name",splitGroup[2]),T,opt$log)
+        q( "no", 1, F )
+      }
+      if(!is.na(controlGroup[splitGroup[1]])){
+        addComment("[ERROR]Several control groups are defined for the same factor, please select only one control group for each factor if you want to compute interaction contrasts",T,opt$log)
+        q( "no", 1, F )
+      }
+      controlGroup[splitGroup[1]]=splitGroup[2]
+    }
+
+    #check if all factor have a defined control group
+    if(any(is.na(controlGroup))){
+      addComment("[ERROR]Missing control group for some factors, please check them if you want to compute interaction contrasts",T,opt$log)
+      q( "no", 1, F )
+    }
+
+    nbFactors=length(factorsList)
+    interactionContrasts=c()
+    contrastClass=c()
+    #initialize list for the first level
+    newPreviousLoopContrast=list()
+    for(iFactorA in 1:(nbFactors-1)){
+      nameFactorA=names(factorsList)[iFactorA]
+      compA=c()
+      for(levelA in setdiff(levels(factorsList[[iFactorA]]),controlGroup[nameFactorA])){
+        compA=c(compA,paste(levelA,controlGroup[nameFactorA],sep="-"))
+      }
+      newPreviousLoopContrast[[as.character(iFactorA)]]=compA
+    }
+    #make a loop for growing interaction set
+    for(globalIfactor in 1:(nbFactors-1)){
+      previousLoopContrast=newPreviousLoopContrast
+      newPreviousLoopContrast=list()
+      #factor A reuse contrasts made at previsous loop
+      for(iFactorA in names(previousLoopContrast)){
+        compA=previousLoopContrast[[iFactorA]]
+
+        if(max(as.integer(unlist(strsplit(iFactorA,"\\."))))<nbFactors){
+          #factor B is the new factor to include in intreraction set
+          for(iFactorB in (max(as.integer(unlist(strsplit(iFactorA,"\\."))))+1):nbFactors){
+            nameFactorB=names(factorsList)[iFactorB]
+            #keep contrasts identified trough interacting factors set
+            newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c()
+              for(iCompA in compA){
+                for(levelB in setdiff(levels(factorsList[[iFactorB]]),controlGroup[nameFactorB])){
+                  #decompose the contrast compA to apply the new level of factor B on each term
+                  temp=unlist(strsplit(iCompA,"[ + ]"))
+                  splitCompA=temp[1]
+                  for(iTemp in temp[-1])splitCompA=c(splitCompA,"+",iTemp)
+                  splitCompA=unlist(lapply(splitCompA,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB}))
+                  #apply on each contrast term the new level of factor B
+                  firstTerm=paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,levelB,sep=".")}else{x})),collapse="")
+                  secondTerm=negativeExpression(paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,controlGroup[nameFactorB],sep=".")}else{x})),collapse=""))
+                  currentContrast=paste(c(firstTerm,secondTerm),collapse="")
+
+                  newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c(newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]],currentContrast)
+                }
+              }
+            }
+        }
+      }
+      for(iContrast in names(newPreviousLoopContrast)){
+        contrastClass=c(contrastClass,rep(iContrast,length(newPreviousLoopContrast[[iContrast]])))
+      }
+      interactionContrasts=c(interactionContrasts,unlist(newPreviousLoopContrast))
+    }
+    #make human title for interactions
+    names(interactionContrasts)=contrastClass
+    humanReadingInteraction=unlist(lapply(interactionContrasts,function(x)gsub("\\.",":",unlist(strsplit(x,"[+-]"))[1])))
+
+    contrastToIgnore=c()
+
+    #complete with control groups and order to match to coeffs
+    for(iContrast in 1:length(interactionContrasts)){
+      missingFactor=setdiff(1:nbFactors,as.integer(unlist(strsplit(names(interactionContrasts[iContrast]),"\\."))))
+      #decompose the contrast
+      temp=unlist(strsplit(interactionContrasts[iContrast],"[ + ]"))
+      splitContrast=temp[1]
+      for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp)
+      splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB}))
+      for(iFactor in missingFactor){
+        for(iTerm in 1:length(splitContrast)){
+          if(splitContrast[iTerm]!="+" && splitContrast[iTerm]!="-"){
+            splitTerm=unlist(strsplit(splitContrast[iTerm],"\\."))
+            if(iFactor==1)splitContrast[iTerm]=paste(c(controlGroup[names(factorsList)[iFactor]],splitTerm),collapse=".")
+            if(iFactor==nbFactors)splitContrast[iTerm]=paste(c(splitTerm,controlGroup[names(factorsList)[iFactor]]),collapse=".")
+            if(iFactor>1 && iFactor<nbFactors)splitContrast[iTerm]=paste(c(splitTerm[1:(iFactor-1)],controlGroup[names(factorsList)[iFactor]],splitTerm[iFactor:length(splitTerm)]),collapse=".")
+          }
+        }
+      }
+      interactionContrasts[iContrast]=paste(splitContrast,collapse="")
+      if(all(splitContrast[seq(1,length(splitContrast),2)]%in%coeffMeaning)){
+        addComment(c("[INFO]Interaction contrast added : ",humanReadingInteraction[iContrast]),T,opt$log,display=F)
+        addComment(c("with complete formula ",interactionContrasts[iContrast]),T,opt$log,display=F)
+      }else{
+        contrastToIgnore=c(contrastToIgnore,iContrast)
+        addComment(c("[WARNING]Interaction contrast",humanReadingInteraction[iContrast],"is removed due to empty group"),T,opt$log,display=F)
+      }
+    }
+
+    #add interaction contrasts to global contrast list
+    if(length(contrastToIgnore)>0){
+      requiredContrasts=c(requiredContrasts,interactionContrasts[-contrastToIgnore])
+      humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction[-contrastToIgnore])
+      persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction[-contrastToIgnore])))
+    }else{
+      requiredContrasts=c(requiredContrasts,interactionContrasts)
+      humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction)
+      persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction)))
+    }
+  }#end of intreaction contrasts
+
+
+  #remove from requiredContrasts contrasts that cannot be estimated
+  toRemove=unique(unlist(lapply(setdiff(coeffMeaning,names(estimatedCoeff)),function(x)grep(x,requiredContrasts))))
+  if(length(toRemove)>0){
+    addComment(c("[WARNING]",length(toRemove)," contrasts are removed, due to missing coefficients"),T,opt$log,display=FALSE)
+    requiredContrasts=requiredContrasts[-toRemove]
+    humanReadingContrasts=humanReadingContrasts[-toRemove]
+    persoContrastName=persoContrastName[-toRemove]
+  }
+
+  if(length(requiredContrasts)==0){
+    addComment("[ERROR]No contrast to compute, please check your contrast definition.",T,opt$log)
+    q( "no", 1, F )
+  }
+
+  #compute for each contrast mean of coefficients in posGroup and negGroup for FC computation of log(FC) with LSmean as in Partek
+  meanPosGroup=list()
+  meanNegGroup=list()
+  for(iContrast in 1:length(requiredContrasts)){
+    #define posGroup and negGroup
+    #first split contrast
+    temp=unlist(strsplit(requiredContrasts[iContrast],"[ + ]"))
+    splitContrast=temp[1]
+    for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp)
+    splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB}))
+    #and then put each term in good group
+    posGroup=c()
+    negGroup=c()
+    nextIsPos=TRUE
+    for(iSplit in splitContrast){
+      if(iSplit=="+")nextIsPos=TRUE
+      if(iSplit=="-")nextIsPos=FALSE
+      if(iSplit!="-" && iSplit!="+"){
+        #remove weights of contrast terms
+        iSplitBis=unlist(strsplit(iSplit,"[*]"))
+        iSplitBis=iSplitBis[length(iSplitBis)]
+        if(nextIsPos)posGroup=c(posGroup,iSplitBis)
+        else negGroup=c(negGroup,iSplitBis)
+      }
+    }
+    #compute means for each group
+    meanPosGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,posGroup],ncol=length(posGroup)),1,mean)
+    meanNegGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,negGroup],ncol=length(negGroup)),1,mean)
+  }
+
+
+  contrast.matrix = makeContrasts(contrasts=requiredContrasts,levels=design)
+  data.fit.con = contrasts.fit(data.fit,contrast.matrix)
+
+  addComment("[INFO]Contrast definition done",T,opt$log,T,display=FALSE)
+
+  #compute LIMMA statistics
+  data.fit.eb = eBayes(data.fit.con)
+
+  addComment("[INFO]Estimation done",T,opt$log,T,display=FALSE)
+
+  #adjust p.value through FDR
+  data.fit.eb$adj_p.value=data.fit.eb$p.value
+  for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){
+    data.fit.eb$adj_p.value[,iComparison]=p.adjust(data.fit.eb$p.value[,iComparison],"fdr")
+  }
+
+  #add a new field based on LS-means for each contrast instead of global mean like they were calculated in coefficients field
+  data.fit.eb$coefficientsLS=data.fit.eb$coefficients
+  if(ncol(data.fit.eb$coefficients)!=length(meanPosGroup)){
+    addComment("[ERROR]Estimated contrasts number unexpected",T,opt$log)
+    q( "no", 1, F )
+  }
+  for(iContrast in 1:length(meanPosGroup)){
+    data.fit.eb$coefficientsLS[,iContrast]=meanPosGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)]-meanNegGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)]
+  }
+
+  #if requested replace coefficient computed as global mean by LS-means values
+  if(useLSmean)data.fit.eb$coefficients=data.fit.eb$coefficientsLS
+
+addComment("[INFO]Core treatment done",T,opt$log,T,display=FALSE)
+
+
+##convert humanReadingContrasts with namingDictionary to create humanReadingContrastsRenamed and keep original humanReadingContrasts names for file names
+humanReadingContrastsRenamed=rep("",length(humanReadingContrasts))
+for(iContrast in 1:length(humanReadingContrasts)){
+  if(persoContrastName[iContrast]==""){
+    #if(verbose)addComment(humanReadingContrasts[iContrast])
+    specialCharacters=str_extract_all(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]]
+    #if(verbose)addComment(specialCharacters)
+    nameConverted=unlist(lapply(strsplit(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]],function(x)renamingDico[x,1]))
+    #if(verbose)addComment(nameConverted)
+    humanReadingContrastsRenamed[iContrast]=paste(nameConverted,specialCharacters,collapse="",sep="")
+    #if(verbose)addComment(humanReadingContrastsRenamed[iContrast])
+    humanReadingContrastsRenamed[iContrast]=substr(humanReadingContrastsRenamed[iContrast],1,nchar(humanReadingContrastsRenamed[iContrast])-1)
+  }else{
+    humanReadingContrastsRenamed[iContrast]=persoContrastName[iContrast]
+  }
+}
+
+#write correspondances between plot file names (humanReadingContrasts) and displayed names in figure legends (humanReadingContrastsRenamed), usefull to define html items in xml file
+correspondanceTable=matrix("",ncol=2,nrow=ncol(data.fit.eb$p.value))
+correspondanceTable[,1]=unlist(lapply(humanReadingContrasts,function(x)gsub(":","_INT_",gsub("\\+","_PLUS_",gsub("\\*","_AND_",x)))))
+correspondanceTable[,2]=humanReadingContrastsRenamed
+rownames(correspondanceTable)=correspondanceTable[,2]
+write.table(correspondanceTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F)
+
+#plot nominal p-val histograms for selected comparisons
+histogramPerPage=6
+if (!is.null(opt$histo)) {
+  iToPlot=1
+  plotVector=list()
+  nbComparisons=ncol(data.fit.eb$p.value)
+  for (iComparison in 1:nbComparisons){
+    dataToPlot=data.frame(pval=data.fit.eb$p.value[,iComparison],id=rownames(data.fit.eb$p.value))
+    p <- ggplot(data=dataToPlot, aes(x=pval)) + geom_histogram(colour="red", fill="salmon") +
+      theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="Frequencies") + xlab(label="Nominal p-val") +
+      theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5))
+    plotVector[[length(plotVector)+1]]=p
+
+    pp <- ggplotly(p)
+    htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F)
+
+    if(iComparison==nbComparisons || length(plotVector)==histogramPerPage){
+      #plot and close the actual plot
+      if(opt$format=="pdf"){
+        pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{
+          png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse=""))
+        }
+      multiplot(plotlist=plotVector,cols=2)
+      dev.off()
+      if(iComparison<nbComparisons){
+        #prepare for a new plotting file if necessary
+        plotVector=list()
+        iToPlot=iToPlot+1
+      }
+    }
+  }
+  addComment("[INFO]Histograms drawn",T,opt$log,T,display=FALSE)
+
+}
+
+#plot F-test sum square barplot
+if(!is.null(allFtestMeanSquare)){
+  dataToPlot=data.frame(Fratio=apply(allFtestMeanSquare,2,mean),Factors=factor(colnames(allFtestMeanSquare),levels = colnames(allFtestMeanSquare)))
+
+  p <- ggplot(data=dataToPlot, aes(x=Factors, y=Fratio, fill=Factors)) +
+    geom_bar(stat="identity") + scale_fill_manual(values = colorRampPalette(brewer.pal(9,"Set1"))(ncol(allFtestMeanSquare))[sample(ncol(allFtestMeanSquare))]) + ylab(label="mean F-ratio") +
+    theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + ggtitle("Source of variation")
+
+   if(opt$format=="pdf"){
+    pdf(paste(c("./plotDir/",opt$fratioFile,".pdf"),collapse=""))}else{
+      png(paste(c("./plotDir/",opt$fratioFile,".png"),collapse=""))
+    }
+  plot(p)
+  dev.off()
+
+  pp <- ggplotly(p)
+  htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$fratioFile,".html"),collapse=""),selfcontained = F)
+
+  addComment("[INFO]SumSquareTest drawn",T,opt$log,T,display=FALSE)
+}
+
+#plot VOLCANO plot
+#volcanoplot(data.fit.eb,coef=1,highlight=10)
+volcanoPerPage=1
+logFCthreshold=log2(opt$thresholdFC)
+if (!is.null(opt$volcano)) {
+  iToPlot=1
+  plotVector=list()
+  nbComparisons=ncol(data.fit.eb$adj_p.value)
+  for (iComparison in 1:nbComparisons){
+
+    #define the log10(p-val) threshold corresponding to FDR threshold fixed by user
+    probeWithLowFDR=-log10(data.fit.eb$p.value[which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold),iComparison])
+    pvalThresholdFDR=NULL
+    if(length(probeWithLowFDR)>0)pvalThresholdFDR=min(probeWithLowFDR)
+
+    #get significant points over FC and FDR thresholds
+    significativePoints=intersect(which(abs(data.fit.eb$coefficients[,iComparison])>=logFCthreshold),which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold))
+
+    #to reduce size of html plot, we keep 20000 points maximum sampled amongst genes with pval>=33%(pval) and abs(log2(FC))<=66%(abs(log2(FC)))
+    htmlPointsToRemove=intersect(which(abs(data.fit.eb$coefficients[,iComparison])<=quantile(abs(data.fit.eb$coefficients[,iComparison]),c(0.66))),which(data.fit.eb$p.value[,iComparison]>=quantile(abs(data.fit.eb$p.value[,iComparison]),c(0.33))))
+    if(length(htmlPointsToRemove)>20000){
+      htmlPointsToRemove=setdiff(htmlPointsToRemove,sample(htmlPointsToRemove,20000))
+    }else{
+      htmlPointsToRemove=c()
+    }
+
+      xMinLimPlot=min(data.fit.eb$coefficients[,iComparison])-0.2
+      xMaxLimPlot=max(data.fit.eb$coefficients[,iComparison])+0.2
+      yMaxLimPlot= max(-log10(data.fit.eb$p.value[,iComparison]))+0.2
+
+      if(length(significativePoints)>0){
+        dataSignifToPlot=data.frame(pval=-log10(data.fit.eb$p.value[significativePoints,iComparison]),FC=data.fit.eb$coefficients[significativePoints,iComparison],description=paste(names(data.fit.eb$coefficients[significativePoints,iComparison]),"\n","FC: " , round(2^data.fit.eb$coefficients[significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[significativePoints,iComparison],digits=4), sep=""))
+        #to test if remains any normal points to draw
+        if(length(significativePoints)<nrow(data.fit.eb$p.value)){
+          dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-significativePoints,iComparison]),FC=data.fit.eb$coefficients[-significativePoints,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[-significativePoints,iComparison],digits=4), sep=""))
+        }else{
+          dataToPlot=data.frame(pval=0,FC=0,description="null")
+        }
+      }else{
+        dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[,iComparison]),FC=data.fit.eb$coefficients[,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[,iComparison],digits=4), sep=""))
+      }
+
+      ##traditional plot
+      p <- ggplot(data=dataToPlot, aes(x=FC, y=pval)) + geom_point() +
+        theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="-log10(p-val)") + xlab(label="Log2 Fold Change") +
+        theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position="none")
+      if(logFCthreshold!=0) p <- p + geom_vline(xintercept=-logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_vline(xintercept=logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_text(data.frame(text=c(paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),paste(c("log2(FC=",opt$thresholdFC,")"),collapse="")),x=c(-logFCthreshold,logFCthreshold),y=c(0,0)),mapping=aes(x=x, y=y, label=text), size=4, angle=90, vjust=-0.4, hjust=0, color="salmon")
+      if(!is.null(pvalThresholdFDR)) p <- p + geom_hline(yintercept=pvalThresholdFDR, color="skyblue1",linetype="dotted", size=0.5) + geom_text(data.frame(text=c(paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse="")),x=c(xMinLimPlot),y=c(pvalThresholdFDR)),mapping=aes(x=x, y=y, label=text), size=4, vjust=0, hjust=0, color="skyblue3")
+      if(length(significativePoints)>0)p <- p + geom_point(data=dataSignifToPlot,aes(colour=description))
+
+      ##interactive plot
+      if(length(htmlPointsToRemove)>0){
+        pointToRemove=union(htmlPointsToRemove,significativePoints)
+        #to test if remains any normal points to draw
+        if(length(pointToRemove)<nrow(data.fit.eb$p.value)){
+          dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-pointToRemove,iComparison]),FC=data.fit.eb$coefficients[-pointToRemove,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-pointToRemove,iComparison],2) , " | FDR p-val: ", prettyNum(data.fit.eb$adj_p.value[-pointToRemove,iComparison],digits=4), sep=""))
+        }else{
+          dataToPlot=data.frame(pval=0,FC=0,description="null")
+        }
+      }
+
+      if((nrow(dataToPlot)+nrow(dataSignifToPlot))>40000)addComment(c("[WARNING]For",humanReadingContrastsRenamed[iComparison],"volcano, numerous points to plot(",nrow(dataToPlot)+nrow(dataSignifToPlot),"), resulting volcano could be heavy, using more stringent thresholds could be helpful."),T,opt$log)
+
+      phtml <- plot_ly(data=dataToPlot, x=~FC, y=~pval,type="scatter", mode="markers",showlegend = FALSE, marker = list(color="gray",opacity=0.5), text=~description, hoverinfo="text") %>%
+        layout(title = humanReadingContrastsRenamed[iComparison],xaxis=list(title="Log2 Fold Change",showgrid=TRUE, zeroline=FALSE),yaxis=list(title="-log10(p-val)", showgrid=TRUE, zeroline=FALSE))
+      if(length(significativePoints)>0) phtml=add_markers(phtml,data=dataSignifToPlot, x=~FC, y=~pval, mode="markers" , marker=list( color=log10(abs(dataSignifToPlot$FC)*dataSignifToPlot$pval),colorscale='Rainbow'), text=~description, hoverinfo="text", inherit = FALSE) %>% hide_colorbar()
+      if(logFCthreshold!=0){
+       phtml=add_trace(phtml,x=c(-logFCthreshold,-logFCthreshold), y=c(0,yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
+       phtml=add_annotations(phtml,x=-logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral"))
+       phtml=add_trace(phtml,x=c(logFCthreshold,logFCthreshold), y=c(0, yMaxLimPlot), type="scatter",  mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
+       phtml=add_annotations(phtml,x=logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral"))
+      }
+      if(!is.null(pvalThresholdFDR)){
+        phtml=add_trace(phtml,x=c(xMinLimPlot,xMaxLimPlot), y=c(pvalThresholdFDR,pvalThresholdFDR), type="scatter",  mode = "lines", line=list(color="cornflowerblue",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
+        phtml=add_annotations(phtml,x=xMinLimPlot,y=pvalThresholdFDR+0.1,xref = "x",yref = "y",text = paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse=""),xanchor = 'left',showarrow = F,font=list(color="cornflowerblue"))
+      }
+    plotVector[[length(plotVector)+1]]=p
+
+    #save plotly files
+    pp <- ggplotly(phtml)
+    htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F)
+
+
+    if(iComparison==nbComparisons || length(plotVector)==volcanoPerPage){
+      #plot and close the actual plot
+      if(opt$format=="pdf"){
+        pdf(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".pdf"),collapse=""))}else{
+          png(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".png"),collapse=""))
+        }
+      multiplot(plotlist=plotVector,cols=1)
+      dev.off()
+      if(iComparison<nbComparisons){
+        #prepare for a new ploting file if necessary
+        plotVector=list()
+        iToPlot=iToPlot+1
+      }
+    }
+  }
+  remove(dataToPlot,dataSignifToPlot)
+  addComment("[INFO]Volcanos drawn",T,opt$log,T,display=FALSE)
+}
+
+rowItemInfo=NULL
+if(!is.null(opt$rowNameType) && !is.null(opt$organismID)){
+##get gene information from BioMart
+#if(!require("biomaRt")){
+#    source("https://bioconductor.org/biocLite.R")
+#    biocLite("biomaRt")
+#}
+
+ensembl_hs_mart <- useMart(biomart="ensembl", dataset=opt$organismID)
+ensembl_df <- getBM(attributes=c(opt$rowNameType,"description"),mart=ensembl_hs_mart)
+rowItemInfo=ensembl_df[which(ensembl_df[,1]!=""),2]
+rowItemInfo=unlist(lapply(rowItemInfo,function(x)substr(unlist(strsplit(x," \\[Source"))[1],1,30)))
+names(rowItemInfo)=ensembl_df[which(ensembl_df[,1]!=""),1]
+}
+
+#write(unlist(dimnames(data.fit.eb$adj_p.value)),opt$log,append = T)
+
+#prepare additional output containing df informations
+dfMatrix=matrix(0,ncol=3,nrow = nrow(data.fit.eb$coefficients),dimnames = list(rownames(data.fit.eb$coefficients),c("df.residual","df.prior","df.total")))
+dfMatrix[,"df.residual"]=data.fit.eb$df.residual
+dfMatrix[,"df.prior"]=data.fit.eb$df.prior
+dfMatrix[,"df.total"]=data.fit.eb$df.total
+
+#filter out genes with higher p-values for all comparisons
+genesToKeep=names(which(apply(data.fit.eb$adj_p.value,1,function(x)length(which(x<=opt$fdrThreshold))>0)))
+#filter out genes with lower FC for all comparisons
+genesToKeep=intersect(genesToKeep,names(which(apply(data.fit.eb$coefficients,1,function(x)length(which(abs(x)>=logFCthreshold))>0))))
+
+if(length(genesToKeep)>0){
+  data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[genesToKeep,],ncol=ncol(data.fit.eb$adj_p.value))
+  rownames(data.fit.eb$adj_p.value)=genesToKeep
+  colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value)
+
+  data.fit.eb$p.value=matrix(data.fit.eb$p.value[genesToKeep,],ncol=ncol(data.fit.eb$p.value))
+  rownames(data.fit.eb$p.value)=genesToKeep
+  colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value)
+
+  data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[genesToKeep,],ncol=ncol(data.fit.eb$coefficients))
+  rownames(data.fit.eb$coefficients)=genesToKeep
+  colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value)
+
+  data.fit.eb$t=matrix(data.fit.eb$t[genesToKeep,],ncol=ncol(data.fit.eb$t))
+  rownames(data.fit.eb$t)=genesToKeep
+  colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value)
+
+  dfMatrix=dfMatrix[genesToKeep,,drop=FALSE]
+
+}else{
+  addComment(c("[WARNING]No significative genes considering the given FDR threshold : ",opt$fdrThreshold),T,opt$log,display=FALSE)
+}
+
+addComment("[INFO]Significant genes filtering done",T,opt$log,T,display=FALSE)
+
+
+#plot VennDiagramm for genes below threshold between comparisons
+#t=apply(data.fit.eb$adj_p.value[,1:4],2,function(x)names(which(x<=opt$threshold)))
+#get.venn.partitions(t)
+#vennCounts(data.fit.eb$adj_p.value[,1:4]<=opt$threshold)
+
+#make a simple sort genes based only on the first comparison
+#newOrder=order(data.fit.eb$adj_p.value[,1])
+#data.fit.eb$adj_p.value=data.fit.eb$adj_p.value[newOrder,]
+
+#alternative sorting strategy based on the mean gene rank over all comparisons
+if(length(genesToKeep)>1){
+  currentRank=rep(0,nrow(data.fit.eb$adj_p.value))
+  for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){
+    currentRank=currentRank+rank(data.fit.eb$adj_p.value[,iComparison])
+  }
+  currentRank=currentRank/ncol(data.fit.eb$adj_p.value)
+  newOrder=order(currentRank)
+
+  data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[newOrder,],ncol=ncol(data.fit.eb$adj_p.value))
+  rownames(data.fit.eb$adj_p.value)=rownames(data.fit.eb$p.value)[newOrder]
+  colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value)
+
+  data.fit.eb$p.value=matrix(data.fit.eb$p.value[newOrder,],ncol=ncol(data.fit.eb$p.value))
+  rownames(data.fit.eb$p.value)=rownames(data.fit.eb$adj_p.value)
+  colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value)
+
+  data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[newOrder,],ncol=ncol(data.fit.eb$coefficients))
+  rownames(data.fit.eb$coefficients)=rownames(data.fit.eb$adj_p.value)
+  colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value)
+
+  data.fit.eb$t=matrix(data.fit.eb$t[newOrder,],ncol=ncol(data.fit.eb$t))
+  rownames(data.fit.eb$t)=rownames(data.fit.eb$adj_p.value)
+  colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value)
+
+  dfMatrix=dfMatrix[newOrder,,drop=FALSE]
+}
+
+
+#formating output matrices depending on genes to keep
+if(length(genesToKeep)==0){
+  outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)
+  outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))
+  outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))
+  outputData[,1]=c("LIMMA","Gene","noGene")
+  outputData[,2]=c("Comparison","Info","noInfo")
+
+  outputDfData=matrix(0,ncol=3+1,nrow=2)
+  outputDfData[1,]=c("X","df.residual","df.prior","df.total")
+  outputDfData[,1]=c("Statistics","noGene")
+}else{
+  if(length(genesToKeep)==1){
+    outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)
+    outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))
+    outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))
+    outputData[,1]=c("LIMMA","Gene",genesToKeep)
+    outputData[,2]=c("Comparison","Info","na")
+    if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep]
+    outputData[3,seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)
+    outputData[3,seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)
+    outputData[3,seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)
+    outputData[3,seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)
+    outputData[3,seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)
+
+    outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))
+    outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")
+    outputDfData[2,]=c(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual","df.prior","df.total")],digits=4))
+  }else{
+    #format matrix to be correctly read by galaxy (move headers in first column and row)
+    outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=nrow(data.fit.eb$adj_p.value)+2)
+    outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))
+    outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))
+    outputData[,1]=c("LIMMA","Gene",rownames(data.fit.eb$adj_p.value))
+    outputData[,2]=c("Comparison","Info",rep("na",nrow(data.fit.eb$adj_p.value)))
+    if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(data.fit.eb$adj_p.value)]
+    outputData[3:nrow(outputData),seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)
+    outputData[3:nrow(outputData),seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)
+    outputData[3:nrow(outputData),seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)
+    outputData[3:nrow(outputData),seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)
+    outputData[3:nrow(outputData),seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)
+
+    outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))
+    outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")
+    outputDfData[2:(1+nrow(dfMatrix)),]=cbind(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual")],digits=4),prettyNum(dfMatrix[,c("df.prior")],digits=4),prettyNum(dfMatrix[,c("df.total")],digits=4))
+  }
+}
+addComment("[INFO]Formated output",T,opt$log,display=FALSE)
+
+#write output results
+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F)
+
+#write df info file
+write.table(outputDfData,file=opt$outputDfFile,quote=FALSE,sep="\t",col.names = F,row.names = F)
+
+end.time <- Sys.time()
+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)
+
+addComment("[INFO]End of R script",T,opt$log,display=FALSE)
+
+printSessionInfo(opt$log)
+#sessionInfo()
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/VolcanoPlotsScript.R	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,426 @@
+# R script to plot volcanos through Galaxy based GIANT tool
+# written by Jimmy Vandel
+#
+#
+initial.options <- commandArgs(trailingOnly = FALSE)
+file.arg.name <- "--file="
+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
+script.basename <- dirname(script.name)
+source(file.path(script.basename, "utils.R"))
+source(file.path(script.basename, "getopt.R"))
+
+#addComment("Welcome R!")
+
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+loc <- Sys.setlocale("LC_NUMERIC", "C")
+
+#get starting time
+start.time <- Sys.time()
+
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs()
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "statisticsFile", "i", 1, "character",
+  "volcanoName" , "n", 1, "character",
+  "pvalColumnName" , "p", 1, "character",
+  "fdrColumnName" , "m", 1, "character",
+  "fcColumnName" , "c", 1, "character",
+  "fcKind","d", 1, "character",
+  "fdrThreshold","s", 1, "double",
+  "fcThreshold","e", 1, "double",
+  "organismID","x",1,"character",
+  "rowNameType","y",1,"character",
+  "log", "l", 1, "character",
+  "outputFile" , "o", 1, "character",
+  "format", "f", 1, "character",
+  "quiet", "q", 0, "logical"),
+  byrow=TRUE, ncol=4)
+opt <- getopt(spec)
+
+# enforce the following required arguments
+if (is.null(opt$log)) {
+  addComment("[ERROR]'log file' is required\n")
+  q( "no", 1, F )
+}
+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)
+if (is.null(opt$statisticsFile)) {
+  addComment("[ERROR]'statisticsFile' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (length(opt$pvalColumnName)==0 || length(opt$fdrColumnName)==0  || length(opt$fcColumnName)==0) {
+  addComment("[ERROR]no selected columns",T,opt$log)
+  q( "no", 1, F )
+}
+if (length(opt$pvalColumnName)!=length(opt$fcColumnName) || length(opt$pvalColumnName)!=length(opt$fdrColumnName)) {
+  addComment("[ERROR]different number of selected columns between p.val, adj-p.val and FC ",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$fcKind)) {
+  addComment("[ERROR]'fcKind' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$fdrThreshold)) {
+  addComment("[ERROR]'FDR threshold' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$fcThreshold)) {
+  addComment("[ERROR]'FC threshold' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$outputFile)) {
+  addComment("[ERROR]'output file' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$format)) {
+  addComment("[ERROR]'output format' is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+#demande si le script sera bavard
+verbose <- if (is.null(opt$quiet)) {
+  TRUE
+}else{
+  FALSE
+}
+
+#paramètres internes
+addComment("[INFO]Parameters checked test mode !",T,opt$log,display=FALSE)
+
+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)
+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)
+
+#directory for plots
+dir.create(file.path(getwd(), "plotDir"))
+dir.create(file.path(getwd(), "plotLyDir"))
+
+#charge des packages silencieusement
+suppressPackageStartupMessages({
+  library("methods")
+  library("biomaRt")
+  library("ggplot2")
+  library("plotly")
+  library("stringr")
+})
+
+#define some usefull variable
+nbVolcanosToPlot=length(opt$pvalColumnName)
+
+#load input file
+statDataMatrix=read.csv(file=file.path(getwd(), opt$statisticsFile),header=F,sep="\t",colClasses="character")
+#remove first colum to convert it as rownames
+rownames(statDataMatrix)=statDataMatrix[,1]
+statDataMatrix=statDataMatrix[,-1]
+
+#identify lines without adjusted p-value info (should contain the same content as rownames) and replace them with NA values
+FDRinfo=rep(TRUE,nbVolcanosToPlot)
+for(iVolcano in 1:nbVolcanosToPlot){
+  #input parameter should be None when adjusted p-val are not available
+  if(opt$fdrColumnName[iVolcano]=="None"){
+    #content of the corresponding column should also be the same as rownames
+    if(!all(statDataMatrix[,(iVolcano-1)*3+2]==rownames(statDataMatrix))){
+      addComment(c("[ERROR]It seems that input stat matrix contains adjusted p-values for volcano",iVolcano,"whereas input parameter indicates that not."),T,opt$log)
+      q( "no", 1, F )
+    }
+    FDRinfo[iVolcano]=FALSE
+    statDataMatrix[,(iVolcano-1)*3+2]=NA
+  }
+}
+
+if(is.data.frame(statDataMatrix)){
+  statDataMatrix=data.matrix(statDataMatrix)
+}else{
+  statDataMatrix=data.matrix(as.numeric(statDataMatrix))
+}
+
+#check if available column number match with volcano requested number
+if(ncol(statDataMatrix)!=3*nbVolcanosToPlot){
+  addComment("[ERROR]Input file column number is different from requested volcano number",T,opt$log)
+  q( "no", 1, F )
+}
+
+#build global dataFrame with data and fill with p.val and log2(FC) and FDR
+dataFrame=data.frame(row.names = rownames(statDataMatrix))
+#start with p-value
+dataFrame$p.value=statDataMatrix[,seq(1,nbVolcanosToPlot*3,3),drop=FALSE]
+#compute FDR if needed or just get available info
+dataFrame$adj_p.value=dataFrame$p.value
+for(iVolcano in 1:nbVolcanosToPlot){
+  #adjusted p-value are already computed
+  if(FDRinfo[iVolcano]){
+    dataFrame$adj_p.value[,iVolcano]=statDataMatrix[,(iVolcano-1)*3+2,drop=FALSE]
+  }else{
+    #adjusted p-value should be computed based on p-val using FDR
+    dataFrame$adj_p.value[,iVolcano]=p.adjust(dataFrame$p.value[,iVolcano,drop=FALSE],"fdr")
+    addComment(c("[INFO]Adjusted p-values are not available in input for volcano",iVolcano,", FDR approach will be used on available raw p-values"),T,opt$log)
+  }
+}
+if(opt$fcKind=="FC"){
+  #we should transform as Log2FC
+  dataFrame$coefficients=log2(statDataMatrix[,seq(3,nbVolcanosToPlot*3,3),drop=FALSE])
+  addComment(c("[INFO]FC are converted in log2(FC) for plotting"),T,opt$log)
+}else{
+  dataFrame$coefficients=statDataMatrix[,seq(3,nbVolcanosToPlot*3,3),drop=FALSE]
+}
+
+addComment(c("[INFO]Input data available for",nbVolcanosToPlot,"volcano(s) with",nrow(statDataMatrix),"rows"),T,opt$log)
+
+
+#plot VOLCANOs
+volcanoPerPage=1
+logFCthreshold=log2(opt$fcThreshold)
+iToPlot=1
+plotVector=list()
+volcanoNameList=c()
+for (iVolcano in 1:nbVolcanosToPlot){
+
+  if(nchar(opt$volcanoName[iVolcano])>0){
+    curentVolcanoName=opt$volcanoName[iVolcano]
+  }else{
+    curentVolcanoName=paste(iVolcano,opt$pvalColumnName[iVolcano],sep="_")
+  }
+
+  #keep only rows without NA for p-val, adjusted p-val and coeff
+  pValToPlot=dataFrame$p.value[,iVolcano]
+  fdrToPlot=dataFrame$adj_p.value[,iVolcano]
+  coeffToPlot=dataFrame$coefficients[,iVolcano]
+
+  rowToRemove=unique(c(which(is.na(pValToPlot)),which(is.na(fdrToPlot)),which(is.na(coeffToPlot))))
+  if(length(rowToRemove)>0){
+    pValToPlot=pValToPlot[-rowToRemove]
+    fdrToPlot=fdrToPlot[-rowToRemove]
+    coeffToPlot=coeffToPlot[-rowToRemove]
+  }
+  addComment(c("[INFO]For",curentVolcanoName,"volcano,",length(rowToRemove),"rows are discarded due to NA values,",length(pValToPlot),"remaining rows."),T,opt$log)
+
+  #save volcano name
+  volcanoNameList=c(volcanoNameList,curentVolcanoName)
+
+  #remove characters possibly troubling
+  volcanoFileName=iVolcano
+
+  #define the log10(p-val) threshold corresponding to FDR threshold fixed by user
+  probeWithLowFDR=-log10(pValToPlot[which(fdrToPlot<=opt$fdrThreshold)])
+  pvalThresholdFDR=NULL
+  if(length(probeWithLowFDR)>0)pvalThresholdFDR=min(probeWithLowFDR)
+
+  #get significant points over FC and FDR thresholds
+  significativePoints=intersect(which(abs(coeffToPlot)>=logFCthreshold),which(fdrToPlot<=opt$fdrThreshold))
+
+  #to reduce size of html plot, we keep 20000 points maximum sampled amongst genes with pval>=33%(pval) and abs(log2(FC))<=66%(abs(log2(FC)))
+  htmlPointsToRemove=intersect(which(abs(coeffToPlot)<=quantile(abs(coeffToPlot),c(0.66))),which(pValToPlot>=quantile(abs(pValToPlot),c(0.33))))
+  if(length(htmlPointsToRemove)>20000){
+    htmlPointsToRemove=setdiff(htmlPointsToRemove,sample(htmlPointsToRemove,20000))
+  }else{
+    htmlPointsToRemove=c()
+  }
+
+  xMinLimPlot=min(coeffToPlot)-0.2
+  xMaxLimPlot=max(coeffToPlot)+0.2
+  yMaxLimPlot= max(-log10(pValToPlot))+0.2
+
+  if(length(significativePoints)>0){
+    dataSignifToPlot=data.frame(pval=-log10(pValToPlot[significativePoints]),FC=coeffToPlot[significativePoints],description=paste(names(coeffToPlot[significativePoints]),"\n","FC: " , round(2^coeffToPlot[significativePoints],2) , " | Adjusted p-val: ",prettyNum(fdrToPlot[significativePoints],digits=4), sep=""))
+    #to test if remains any normal points to draw
+    if(length(significativePoints)<length(pValToPlot)){
+      dataToPlot=data.frame(pval=-log10(pValToPlot[-significativePoints]),FC=coeffToPlot[-significativePoints],description=paste("FC: " , round(2^coeffToPlot[-significativePoints],2) , " | Adjusted p-val: ",prettyNum(fdrToPlot[-significativePoints],digits=4), sep=""))
+    }else{
+      dataToPlot=data.frame(pval=0,FC=0,description="null")
+    }
+  }else{
+    dataToPlot=data.frame(pval=-log10(pValToPlot),FC=coeffToPlot,description=paste("FC: " , round(2^coeffToPlot,2) , " | Adjusted p-val: ",prettyNum(fdrToPlot,digits=4), sep=""))
+  }
+
+  ##traditional plot
+
+  p <- ggplot(data=dataToPlot, aes(x=FC, y=pval)) + geom_point() +
+    theme_bw() + ggtitle(curentVolcanoName) + ylab(label="-Log10(p-val)") + xlab(label="Log2 Fold Change") +
+    theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position="none")
+  if(logFCthreshold!=0) p <- p + geom_vline(xintercept=-logFCthreshold, color="salmon",linetype="dotted", size=1) +  geom_vline(xintercept=logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_text(data.frame(text=c(paste(c("log2(1/FC=",opt$fcThreshold,")"),collapse=""),paste(c("log2(FC=",opt$fcThreshold,")"),collapse="")),x=c(-logFCthreshold,logFCthreshold),y=c(0,0)),mapping=aes(x=x, y=y, label=text), size=4, angle=90, vjust=-0.4, hjust=0, color="salmon")
+  if(!is.null(pvalThresholdFDR)) p <- p + geom_hline(yintercept=pvalThresholdFDR, color="skyblue1",linetype="dotted", size=0.5) + geom_text(data.frame(text=c(paste(c("Adjusted pval limit(",opt$fdrThreshold,")"),collapse="")),x=c(xMinLimPlot),y=c(pvalThresholdFDR)),mapping=aes(x=x, y=y, label=text), size=4, vjust=0, hjust=0, color="skyblue3")
+  if(length(significativePoints)>0)p <- p + geom_point(data=dataSignifToPlot,aes(colour=description))
+
+  ##interactive plot
+
+  if(length(htmlPointsToRemove)>0){
+    pointToRemove=union(htmlPointsToRemove,significativePoints)
+    #to test if it remains any normal points to draw
+    if(length(pointToRemove)<length(pValToPlot)){
+      dataToPlot=data.frame(pval=-log10(pValToPlot[-pointToRemove]),FC=coeffToPlot[-pointToRemove],description=paste("FC: " , round(2^coeffToPlot[-pointToRemove],2) , " | Adjusted p-val: ", prettyNum(fdrToPlot[-pointToRemove],digits=4), sep=""))
+    }else{
+      dataToPlot=data.frame(pval=0,FC=0,description="null")
+    }
+  }
+
+  if((nrow(dataToPlot)+length(significativePoints))>40000)addComment(c("[WARNING]For",curentVolcanoName,"volcano, numerous points to plot(",nrow(dataToPlot)+nrow(dataSignifToPlot),"), resulting volcano could be heavy, using more stringent thresholds could be helpful."),T,opt$log)
+
+  phtml <- plot_ly(data=dataToPlot, x=~FC, y=~pval,type="scatter", mode="markers",showlegend = FALSE, marker = list(color="gray",opacity=0.5), text=~description, hoverinfo="text") %>%
+    layout(title = curentVolcanoName[iVolcano],xaxis=list(title="Log2 Fold Change",showgrid=TRUE, zeroline=FALSE),yaxis=list(title="-Log10(p-val)", showgrid=TRUE, zeroline=FALSE))
+  if(length(significativePoints)>0) phtml=add_markers(phtml,data=dataSignifToPlot, x=~FC, y=~pval, mode="markers" , marker=list( color=log10(abs(dataSignifToPlot$FC)*dataSignifToPlot$pval),colorscale='Rainbow'), text=~description, hoverinfo="text", inherit = FALSE) %>% hide_colorbar()
+  if(logFCthreshold!=0){
+    phtml=add_trace(phtml,x=c(-logFCthreshold,-logFCthreshold), y=c(0,yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
+    phtml=add_annotations(phtml,x=-logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(1/FC=",opt$fcThreshold,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral"))
+    phtml=add_trace(phtml,x=c(logFCthreshold,logFCthreshold), y=c(0, yMaxLimPlot), type="scatter",  mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
+    phtml=add_annotations(phtml,x=logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(FC=",opt$fcThreshold,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral"))
+  }
+  if(!is.null(pvalThresholdFDR)){
+    phtml=add_trace(phtml,x=c(xMinLimPlot,xMaxLimPlot), y=c(pvalThresholdFDR,pvalThresholdFDR), type="scatter",  mode = "lines", line=list(color="cornflowerblue",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
+    phtml=add_annotations(phtml,x=xMinLimPlot,y=pvalThresholdFDR+0.1,xref = "x",yref = "y",text = paste(c("Adjusted pval limit(",opt$fdrThreshold,")"),collapse=""),xanchor = 'left',showarrow = F,font=list(color="cornflowerblue"))
+  }
+  plotVector[[length(plotVector)+1]]=p
+
+  #save plotly files
+  pp <- ggplotly(phtml)
+  htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Volcanos_",volcanoFileName,".html"),collapse=""),selfcontained = F)
+
+
+  if(iVolcano==nbVolcanosToPlot || length(plotVector)==volcanoPerPage){
+    #plot and close the actual plot
+    if(opt$format=="pdf"){
+      pdf(paste(c("./plotDir/Volcanos_",volcanoFileName,".pdf"),collapse=""))}else{
+        png(paste(c("./plotDir/Volcanos_",volcanoFileName,".png"),collapse=""))
+      }
+    multiplot(plotlist=plotVector,cols=1)
+    dev.off()
+    if(iVolcano<nbVolcanosToPlot){
+      #prepare for a new ploting file if necessary
+      plotVector=list()
+      iToPlot=iToPlot+1
+    }
+  }
+}
+remove(dataToPlot,dataSignifToPlot)
+addComment("[INFO]Volcanos drawn",T,opt$log,T,display=FALSE)
+
+
+#now add anotation infos about genes
+
+rowItemInfo=NULL
+if(!is.null(opt$rowNameType) && !is.null(opt$organismID)){
+  ##get gene information from BioMart
+  #if(!require("biomaRt")){
+  #    source("https://bioconductor.org/biocLite.R")
+  #    biocLite("biomaRt")
+  #}
+
+  ensembl_hs_mart <- useMart(biomart="ensembl", dataset=opt$organismID)
+  ensembl_df <- getBM(attributes=c(opt$rowNameType,"description"),mart=ensembl_hs_mart)
+  rowItemInfo=ensembl_df[which(ensembl_df[,1]!=""),2]
+  rowItemInfo=unlist(lapply(rowItemInfo,function(x)substr(unlist(strsplit(x," \\[Source"))[1],1,30)))
+  names(rowItemInfo)=ensembl_df[which(ensembl_df[,1]!=""),1]
+}
+
+#filter out genes with higher p-values for all comparisons
+genesToKeep=names(which(apply(dataFrame$adj_p.value,1,function(x)length(which(x<=opt$fdrThreshold))>0)))
+#filter out genes with lower FC for all comparisons
+genesToKeep=intersect(genesToKeep,names(which(apply(dataFrame$coefficients,1,function(x)length(which(abs(x)>=logFCthreshold))>0))))
+
+if(length(genesToKeep)>0){
+  dataFrameNew=data.frame(row.names=genesToKeep)
+
+  dataFrameNew$adj_p.value=matrix(dataFrame$adj_p.value[genesToKeep,,drop=FALSE],ncol=ncol(dataFrame$adj_p.value))
+  rownames(dataFrameNew$adj_p.value)=genesToKeep
+  colnames(dataFrameNew$adj_p.value)=colnames(dataFrame$p.value)
+
+  dataFrameNew$p.value=matrix(dataFrame$p.value[genesToKeep,,drop=FALSE],ncol=ncol(dataFrame$p.value))
+  rownames(dataFrameNew$p.value)=genesToKeep
+  colnames(dataFrameNew$p.value)=colnames(dataFrame$adj_p.value)
+
+  dataFrameNew$coefficients=matrix(dataFrame$coefficients[genesToKeep,,drop=FALSE],ncol=ncol(dataFrame$coefficients))
+  rownames(dataFrameNew$coefficients)=genesToKeep
+  colnames(dataFrameNew$coefficients)=colnames(dataFrame$adj_p.value)
+
+  dataFrame=dataFrameNew
+  rm(dataFrameNew)
+}else{
+  addComment("[WARNING]No significative genes",T,opt$log,display=FALSE)
+}
+
+addComment("[INFO]Significant genes filtering done",T,opt$log,T,display=FALSE)
+
+
+#plot VennDiagramm for genes below threshold between comparisons
+#t=apply(dataFrame$adj_p.value[,1:4],2,function(x)names(which(x<=opt$threshold)))
+#get.venn.partitions(t)
+#vennCounts(dataFrame$adj_p.value[,1:4]<=opt$threshold)
+
+#make a simple sort genes based only on the first comparison
+#newOrder=order(dataFrame$adj_p.value[,1])
+#dataFrame$adj_p.value=dataFrame$adj_p.value[newOrder,]
+
+#alternative sorting strategy based on the mean gene rank over all comparisons
+if(length(genesToKeep)>1){
+  currentRank=rep(0,nrow(dataFrame$adj_p.value))
+  for(iVolcano in 1:ncol(dataFrame$adj_p.value)){
+    currentRank=currentRank+rank(dataFrame$adj_p.value[,iVolcano])
+  }
+  currentRank=currentRank/ncol(dataFrame$adj_p.value)
+  newOrder=order(currentRank)
+  rownames(dataFrame)=rownames(dataFrame)[newOrder]
+
+  dataFrame$adj_p.value=matrix(dataFrame$adj_p.value[newOrder,],ncol=ncol(dataFrame$adj_p.value))
+  rownames(dataFrame$adj_p.value)=rownames(dataFrame$p.value)[newOrder]
+  colnames(dataFrame$adj_p.value)=colnames(dataFrame$p.value)
+
+  dataFrame$p.value=matrix(dataFrame$p.value[newOrder,],ncol=ncol(dataFrame$p.value))
+  rownames(dataFrame$p.value)=rownames(dataFrame$adj_p.value)
+  colnames(dataFrame$p.value)=colnames(dataFrame$adj_p.value)
+
+  dataFrame$coefficients=matrix(dataFrame$coefficients[newOrder,],ncol=ncol(dataFrame$coefficients))
+  rownames(dataFrame$coefficients)=rownames(dataFrame$adj_p.value)
+  colnames(dataFrame$coefficients)=colnames(dataFrame$adj_p.value)
+}
+
+#formating output matrix depending on genes to keep
+if(length(genesToKeep)==0){
+  outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3)
+  outputData[1,]=c("X","X",rep(volcanoNameList,each=4))
+  outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))
+  outputData[,1]=c("Volcano","Gene","noGene")
+  outputData[,2]=c("Comparison","Info","noInfo")
+}else{
+  if(length(genesToKeep)==1){
+    outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3)
+    outputData[1,]=c("X","X",rep(volcanoNameList,each=4))
+    outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))
+    outputData[,1]=c("Volcano","Gene",genesToKeep)
+    outputData[,2]=c("Comparison","Info","na")
+    if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep]
+    outputData[3,seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4)
+    outputData[3,seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4)
+    outputData[3,seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4)
+    outputData[3,seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4)
+  }else{
+    #format matrix to be correctly read by galaxy (move headers in first column and row)
+    outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=nrow(dataFrame$adj_p.value)+2)
+    outputData[1,]=c("X","X",rep(volcanoNameList,each=4))
+    outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))
+    outputData[,1]=c("Volcano","Gene",rownames(dataFrame$adj_p.value))
+    outputData[,2]=c("Comparison","Info",rep("na",nrow(dataFrame$adj_p.value)))
+    if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(dataFrame$adj_p.value)]
+    outputData[3:nrow(outputData),seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4)
+    outputData[3:nrow(outputData),seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4)
+    outputData[3:nrow(outputData),seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4)
+    outputData[3:nrow(outputData),seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4)
+  }
+}
+addComment("[INFO]Formated output",T,opt$log,display=FALSE)
+
+#write output results
+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F)
+
+
+end.time <- Sys.time()
+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)
+
+addComment("[INFO]End of R script",T,opt$log,display=FALSE)
+
+printSessionInfo(opt$log)
+
+#sessionInfo()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/getopt.R	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,773 @@
+# Copyright (c) 2008-2010 Allen Day
+# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu>
+#
+# Modified by J.Vandel 2017 to consider situation of multiple identical flag
+# and concatenate as a vector the set of parameter for the same flag instead of
+# keeping only the last value as done by the previous version.
+#
+#  This file is free software: you may copy, redistribute and/or modify it
+#  under the terms of the GNU General Public License as published by the
+#  Free Software Foundation, either version 2 of the License, or (at your
+#  option) any later version.
+#
+#  This file is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#' C-like getopt behavior
+#'
+#' getopt is primarily intended to be used with ``\link{Rscript}''.  It
+#' facilitates writing ``\#!'' shebang scripts that accept short and long
+#' flags/options.  It can also be used from ``R'' directly, but is probably less
+#' useful in this context.
+#'
+#' getopt() returns a \link{list} data structure containing \link{names} of the
+#' flags that were present in the \link{character} \link{vector} passed in under
+#' the \emph{opt} argument.  Each value of the \link{list} is coerced to the
+#' data type specified according to the value of the \emph{spec} argument.  See
+#' below for details.
+#'
+#' Notes on naming convention:
+#'
+#' 1. An \emph{option} is one of the shell-split input strings.
+#'
+#' 2. A \emph{flag} is a type of \emph{option}.  a \emph{flag} can be defined as
+#' having no \emph{argument} (defined below), a required \emph{argument}, or an
+#' optional \emph{argument}.
+#'
+#' 3. An \emph{argument} is a type of \emph{option}, and is the value associated
+#' with a flag.
+#'
+#' 4. A \emph{long flag} is a type of \emph{flag}, and begins with the string
+#' ``--''.  If the \emph{long flag} has an associated \emph{argument}, it may be
+#' delimited from the \emph{long flag} by either a trailing \emph{=}, or may be
+#' the subsequent \emph{option}.
+#'
+#' 5. A \emph{short flag} is a type of \emph{flag}, and begins with the string
+#' ``-''.  If a \emph{short flag} has an associated \emph{argument}, it is the
+#' subsequent \emph{option}.  \emph{short flags} may be bundled together,
+#' sharing a single leading ``-'', but only the final \emph{short flag} is able
+#' to have a corresponding \emph{argument}.
+#'
+#' Many users wonder whether they should use the getopt package, optparse package,
+#' or argparse package.
+#' Here is some of the major differences:
+#'
+#' Features available in \code{getopt} unavailable in \code{optparse}
+#'
+#' 1. As well as allowing one to specify options that take either
+#'      no argument or a required argument like \code{optparse},
+#'    \code{getopt} also allows one to specify option with an optional argument.
+#'
+#' Some features implemented in \code{optparse} package unavailable in \code{getopt}
+#'
+#' 1. Limited support for capturing positional arguments after the optional arguments
+#' when \code{positional_arguments} set to TRUE in \code{parse_args}
+#'
+#' 2. Automatic generation of an help option and printing of help text when encounters an "-h"
+#'
+#' 3. Option to specify default arguments for options as well the
+#'    variable name to store option values
+#'
+#' There is also new package \code{argparse} introduced in 2012 which contains
+#' all the features of both getopt and optparse but which has a dependency on
+#' Python 2.7 or 3.2+ and has not been used in production since 2008 or 2009
+#' like the getopt and optparse packages.
+#'
+#' Some Features unlikely to be implemented in \code{getopt}:
+#'
+#' 1. Support for multiple, identical flags, e.g. for "-m 3 -v 5 -v", the
+#' trailing "-v" overrides the preceding "-v 5", result is v=TRUE (or equivalent
+#' typecast).
+#'
+#' 2. Support for multi-valued flags, e.g. "--libpath=/usr/local/lib
+#' --libpath=/tmp/foo".
+#'
+#' 3. Support for lists, e.g. "--define os=linux --define os=redhat" would
+#' set result$os$linux=TRUE and result$os$redhat=TRUE.
+#'
+#' 4. Support for incremental, argument-less flags, e.g. "/path/to/script
+#' -vvv" should set v=3.
+#'
+#' 5. Support partial-but-unique string match on options, e.g. "--verb" and
+#' "--verbose" both match long flag "--verbose".
+#'
+#' 6. No support for mixing in positional arguments or extra arguments that
+#' don't match any options.  For example, you can't do "my.R --arg1 1 foo bar
+#' baz" and recover "foo", "bar", "baz" as a list.  Likewise for "my.R foo
+#' --arg1 1 bar baz".
+#'
+#' @aliases getopt getopt-package
+#' @param spec The getopt specification, or spec of what options are considered
+#' valid.  The specification must be either a 4-5 column \link{matrix}, or a
+#' \link{character} \link{vector} coercible into a 4 column \link{matrix} using
+#' \link{matrix}(x,ncol=4,byrow=TRUE) command.  The \link{matrix}/\link{vector}
+#' contains:
+#'
+#' Column 1: the \emph{long flag} name.  A multi-\link{character} string.
+#'
+#' Column 2: \emph{short flag} alias of Column 1.  A single-\link{character}
+#' string.
+#'
+#' Column 3: \emph{Argument} mask of the \emph{flag}.  An \link{integer}.
+#' Possible values: 0=no argument, 1=required argument, 2=optional argument.
+#'
+#' Column 4: Data type to which the \emph{flag}'s argument shall be cast using
+#' \link{storage.mode}.  A multi-\link{character} string.  This only considered
+#' for same-row Column 3 values of 1,2.  Possible values: \link{logical},
+#' \link{integer}, \link{double}, \link{complex}, \link{character}.
+#' If \link{numeric} is encountered then it will be converted to double.
+#'
+#' Column 5 (optional): A brief description of the purpose of the option.
+#'
+#' The terms \emph{option}, \emph{flag}, \emph{long flag}, \emph{short flag},
+#' and \emph{argument} have very specific meanings in the context of this
+#' document.  Read the ``Description'' section for definitions.
+#' @param opt This defaults to the return value of \link{commandArgs}(TRUE).
+#'
+#' If R was invoked directly via the ``R'' command, this corresponds to all
+#' arguments passed to R after the ``--args'' flag.
+#'
+#' If R was invoked via the ``\link{Rscript}'' command, this corresponds to all
+#' arguments after the name of the R script file.
+#'
+#' Read about \link{commandArgs} and \link{Rscript} to learn more.
+#' @param command The string to use in the usage message as the name of the
+#' script.  See argument \emph{usage}.
+#' @param usage If TRUE, argument \emph{opt} will be ignored and a usage
+#' statement (character string) will be generated and returned from \emph{spec}.
+#' @param debug This is used internally to debug the getopt() function itself.
+#' @author Allen Day
+#' @seealso \code{\link{getopt}}
+#' @keywords data
+#' @export
+#' @examples
+#'
+#' #!/path/to/Rscript
+#' library('getopt');
+#' #get options, using the spec as defined by the enclosed list.
+#' #we read the options from the default: commandArgs(TRUE).
+#' spec = matrix(c(
+#'   'verbose', 'v', 2, "integer",
+#'   'help'   , 'h', 0, "logical",
+#'   'count'  , 'c', 1, "integer",
+#'   'mean'   , 'm', 1, "double",
+#'   'sd'     , 's', 1, "double"
+#' ), byrow=TRUE, ncol=4);
+#' opt = getopt(spec);
+#'
+#' # if help was asked for print a friendly message
+#' # and exit with a non-zero error code
+#' if ( !is.null(opt$help) ) {
+#'   cat(getopt(spec, usage=TRUE));
+#'   q(status=1);
+#' }
+#'
+#' #set some reasonable defaults for the options that are needed,
+#' #but were not specified.
+#' if ( is.null(opt$mean    ) ) { opt$mean    = 0     }
+#' if ( is.null(opt$sd      ) ) { opt$sd      = 1     }
+#' if ( is.null(opt$count   ) ) { opt$count   = 10    }
+#' if ( is.null(opt$verbose ) ) { opt$verbose = FALSE }
+#'
+#' #print some progress messages to stderr, if requested.
+#' if ( opt$verbose ) { write("writing...",stderr()); }
+#'
+#' #do some operation based on user input.
+#' cat(paste(rnorm(opt$count,mean=opt$mean,sd=opt$sd),collapse="\n"));
+#' cat("\n");
+#'
+#' #signal success and exit.
+#' #q(status=0);
+getopt = function (spec=NULL,opt=commandArgs(TRUE),command=get_Rscript_filename(),usage=FALSE,debug=FALSE) {
+
+  # littler compatibility - map argv vector to opt
+  if (exists("argv", where = .GlobalEnv, inherits = FALSE)) {
+    opt = get("argv", envir = .GlobalEnv);
+  }
+
+  ncol=4;
+  maxcol=6;
+  col.long.name    = 1;
+  col.short.name   = 2;
+  col.has.argument = 3;
+  col.mode         = 4;
+  col.description  = 5;
+
+  flag.no.argument = 0;
+  flag.required.argument = 1;
+  flag.optional.argument = 2;
+
+  result = list();
+  result$ARGS = vector(mode="character");
+
+  #no spec.  fail.
+  if ( is.null(spec) ) {
+    stop('argument "spec" must be non-null.');
+
+  #spec is not a matrix.  attempt to coerce, if possible.  issue a warning.
+  } else if ( !is.matrix(spec) ) {
+    if ( length(spec)/4 == as.integer(length(spec)/4) ) {
+      warning('argument "spec" was coerced to a 4-column (row-major) matrix.  use a matrix to prevent the coercion');
+      spec = matrix( spec, ncol=ncol, byrow=TRUE );
+    } else {
+      stop('argument "spec" must be a matrix, or a character vector with length divisible by 4, rtfm.');
+    }
+
+  #spec is a matrix, but it has too few columns.
+  } else if ( dim(spec)[2] < ncol ) {
+    stop(paste('"spec" should have at least ",ncol," columns.',sep=''));
+
+  #spec is a matrix, but it has too many columns.
+  } else if ( dim(spec)[2] > maxcol ) {
+    stop(paste('"spec" should have no more than ",maxcol," columns.',sep=''));
+
+  #spec is a matrix, and it has some optional columns.
+  } else if ( dim(spec)[2] != ncol ) {
+    ncol = dim(spec)[2];
+  }
+
+  #sanity check.  make sure long names are unique, and short names are unique.
+  if ( length(unique(spec[,col.long.name])) != length(spec[,col.long.name]) ) {
+    stop(paste('redundant long names for flags (column ',col.long.name,').',sep=''));
+  }
+  if ( length(na.omit(unique(spec[,col.short.name]))) != length(na.omit(spec[,col.short.name])) ) {
+    stop(paste('redundant short names for flags (column ',col.short.name,').',sep=''));
+  }
+  # convert numeric type to double type
+  spec[,4] <- gsub("numeric", "double", spec[,4])
+
+  # if usage=TRUE, don't process opt, but generate a usage string from the data in spec
+  if ( usage ) {
+    ret = '';
+    ret = paste(ret,"Usage: ",command,sep='');
+    for ( j in 1:(dim(spec))[1] ) {
+      ret = paste(ret,' [-[-',spec[j,col.long.name],'|',spec[j,col.short.name],']',sep='');
+      if (spec[j,col.has.argument] == flag.no.argument) {
+        ret = paste(ret,']',sep='');
+      } else if (spec[j,col.has.argument] == flag.required.argument) {
+        ret = paste(ret,' <',spec[j,col.mode],'>]',sep='');
+      } else if (spec[j,col.has.argument] == flag.optional.argument) {
+        ret = paste(ret,' [<',spec[j,col.mode],'>]]',sep='');
+      }
+    }
+    # include usage strings
+    if ( ncol >= 5 ) {
+      max.long = max(apply(cbind(spec[,col.long.name]),1,function(x)length(strsplit(x,'')[[1]])));
+      ret = paste(ret,"\n",sep='');
+      for (j in 1:(dim(spec))[1] ) {
+        ret = paste(ret,sprintf(paste("    -%s|--%-",max.long,"s    %s\n",sep=''),
+          spec[j,col.short.name],spec[j,col.long.name],spec[j,col.description]
+        ),sep='');
+      }
+    }
+    else {
+      ret = paste(ret,"\n",sep='');
+    }
+    return(ret);
+  }
+
+  #XXX check spec validity here.  e.g. column three should be convertible to integer
+
+  i = 1;
+
+  while ( i <= length(opt) ) {
+    if ( debug ) print(paste("processing",opt[i]));
+
+    current.flag = 0; #XXX use NA
+    optstring = opt[i];
+
+
+    #long flag
+    if ( substr(optstring, 1, 2) == '--' ) {
+      if ( debug ) print(paste("  long option:",opt[i]));
+
+      optstring = substring(optstring,3);
+
+      this.flag = NA;
+      this.argument = NA;
+      kv = strsplit(optstring, '=')[[1]];
+      if ( !is.na(kv[2]) ) {
+        this.flag = kv[1];
+        this.argument = paste(kv[-1], collapse="=");
+      } else {
+        this.flag = optstring;
+      }
+
+      rowmatch = grep( this.flag, spec[,col.long.name],fixed=TRUE );
+
+      #long flag is invalid, matches no options
+      if ( length(rowmatch) == 0 ) {
+        stop(paste('long flag "', this.flag, '" is invalid', sep=''));
+
+      #long flag is ambiguous, matches too many options
+      } else if ( length(rowmatch) > 1 ) {
+        # check if there is an exact match and use that
+        rowmatch = which(this.flag == spec[,col.long.name])
+        if(length(rowmatch) == 0) {
+          stop(paste('long flag "', this.flag, '" is ambiguous', sep=''));
+        }
+      }
+
+      #if we have an argument
+      if ( !is.na(this.argument) ) {
+        #if we can't accept the argument, bail out
+        if ( spec[rowmatch, col.has.argument] == flag.no.argument ) {
+          stop(paste('long flag "', this.flag, '" accepts no arguments', sep=''));
+
+        #otherwise assign the argument to the flag
+        } else {
+          storage.mode(this.argument) = spec[rowmatch, col.mode];
+          #don't need here to remove the last value of the vector as argument is in the same string as
+          #the flag name "--flag=argument" so no spurious TRUE was added
+          result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],this.argument);
+	  i = i + 1;
+	  next;
+        }
+
+      #otherwise, we don't have an argument
+      } else {
+        #if we require an argument, bail out
+        ###if ( spec[rowmatch, col.has.argument] == flag.required.argument ) {
+        ###  stop(paste('long flag "', this.flag, '" requires an argument', sep=''));
+
+        #long flag has no attached argument. set flag as present.  set current.flag so we can peek ahead later and consume the argument if it's there
+        ###} else {
+          result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE);
+          current.flag = rowmatch;
+        ###}
+      }
+
+    #short flag(s)
+    } else if ( substr(optstring, 1, 1) == '-' ) {
+      if ( debug ) print(paste("  short option:",opt[i]));
+
+      these.flags = strsplit(optstring,'')[[1]];
+
+      done = FALSE;
+      for ( j in 2:length(these.flags) ) {
+        this.flag = these.flags[j];
+        rowmatch = grep( this.flag, spec[,col.short.name],fixed=TRUE );
+
+        #short flag is invalid, matches no options
+        if ( length(rowmatch) == 0 ) {
+          stop(paste('short flag "', this.flag, '" is invalid', sep=''));
+
+        #short flag is ambiguous, matches too many options
+        } else if ( length(rowmatch) > 1 ) {
+          stop(paste('short flag "', this.flag, '" is ambiguous', sep=''));
+
+        #short flag has an argument, but is not the last in a compound flag string
+        } else if ( j < length(these.flags) & spec[rowmatch,col.has.argument] == flag.required.argument ) {
+          stop(paste('short flag "', this.flag, '" requires an argument, but has none', sep=''));
+
+        #short flag has no argument, flag it as present
+        } else if ( spec[rowmatch,col.has.argument] == flag.no.argument ) {
+          result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE);
+	  done = TRUE;
+
+        #can't definitively process this flag yet, need to see if next option is an argument or not
+        } else {
+          result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE);
+          current.flag = rowmatch;
+          done = FALSE;
+        }
+      }
+      if ( done ) {
+        i = i + 1;
+        next;
+      }
+    }
+
+    #invalid opt
+    if ( current.flag == 0 ) {
+      stop(paste('"', optstring, '" is not a valid option, or does not support an argument', sep=''));
+      #TBD support for positional args
+      #if ( debug ) print(paste('"', optstring, '" not a valid option.  It is appended to getopt(...)$ARGS', sep=''));
+      #result$ARGS = append(result$ARGS, optstring);
+
+    # some dangling flag, handle it
+    } else if ( current.flag > 0 ) {
+      if ( debug ) print('    dangling flag');
+      if ( length(opt) > i ) {
+        peek.optstring = opt[i + 1];
+        if ( debug ) print(paste('      peeking ahead at: "',peek.optstring,'"',sep=''));
+
+        #got an argument.  attach it, increment the index, and move on to the next option.  we don't allow arguments beginning with '-' UNLESS
+	#specfile indicates the value is an "integer" or "double", in which case we allow a leading dash (and verify trailing digits/decimals).
+        if ( substr(peek.optstring, 1, 1) != '-' |
+	  #match negative double
+	  ( substr(peek.optstring, 1, 1) == '-'
+	  & regexpr('^-[0123456789]*\\.?[0123456789]+$',peek.optstring) > 0
+	  & spec[current.flag, col.mode]== 'double'
+	  ) |
+	  #match negative integer
+	  ( substr(peek.optstring, 1, 1) == '-'
+	  & regexpr('^-[0123456789]+$',peek.optstring) > 0
+	  & spec[current.flag, col.mode]== 'integer'
+	  )
+	) {
+          if ( debug ) print(paste('        consuming argument *',peek.optstring,'*',sep=''));
+          storage.mode(peek.optstring) = spec[current.flag, col.mode];
+          #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones
+          result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);
+          i = i + 1;
+
+	#a lone dash
+	} else if ( substr(peek.optstring, 1, 1) == '-' & length(strsplit(peek.optstring,'')[[1]]) == 1 ) {
+          if ( debug ) print('        consuming "lone dash" argument');
+          storage.mode(peek.optstring) = spec[current.flag, col.mode];
+          #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones
+          result[[spec[current.flag, col.long.name]]] =c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);
+          i = i + 1;
+
+        #no argument
+        } else {
+          if ( debug ) print('        no argument!');
+
+          #if we require an argument, bail out
+          if ( spec[current.flag, col.has.argument] == flag.required.argument ) {
+            stop(paste('flag "', this.flag, '" requires an argument', sep=''));
+
+          #otherwise set flag as present.
+          } else if (
+	    spec[current.flag, col.has.argument] == flag.optional.argument |
+	    spec[current.flag, col.has.argument] == flag.no.argument
+	  ) {
+  	    x = TRUE;
+  	    storage.mode(x) = spec[current.flag, col.mode];
+            result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);
+          } else {
+            stop(paste("This should never happen.",
+              "Is your spec argument correct?  Maybe you forgot to set",
+              "ncol=4, byrow=TRUE in your matrix call?"));
+	  }
+        }
+      #trailing flag without required argument
+      } else if ( spec[current.flag, col.has.argument] == flag.required.argument ) {
+        stop(paste('flag "', this.flag, '" requires an argument', sep=''));
+
+      #trailing flag without optional argument
+      } else if ( spec[current.flag, col.has.argument] == flag.optional.argument ) {
+        x = TRUE;
+        storage.mode(x) = spec[current.flag, col.mode];
+        result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);
+
+      #trailing flag without argument
+      } else if ( spec[current.flag, col.has.argument] == flag.no.argument ) {
+        x = TRUE;
+        storage.mode(x) = spec[current.flag, col.mode];
+        result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);
+      } else {
+        stop("this should never happen (2).  please inform the author.");
+      }
+    #no dangling flag, nothing to do.
+    } else {
+    }
+
+    i = i+1;
+  }
+  return(result);
+}
+
+
+
+#########################
+#set a modified version using only long named parameters
+
+getoptLong = function (spec=NULL,opt=commandArgs(TRUE),command=get_Rscript_filename(),usage=FALSE,debug=FALSE) {
+
+  # littler compatibility - map argv vector to opt
+  if (exists("argv", where = .GlobalEnv, inherits = FALSE)) {
+    opt = get("argv", envir = .GlobalEnv);
+  }
+
+  ncol=4;
+  maxcol=6;
+  col.long.name    = 1;
+  #col.short.name   = 2;
+  col.has.argument = 3;
+  col.mode         = 4;
+  col.description  = 5;
+
+  flag.no.argument = 0;
+  flag.required.argument = 1;
+  flag.optional.argument = 2;
+
+  result = list();
+  result$ARGS = vector(mode="character");
+
+  #no spec.  fail.
+  if ( is.null(spec) ) {
+    stop('argument "spec" must be non-null.');
+
+    #spec is not a matrix.  attempt to coerce, if possible.  issue a warning.
+  } else if ( !is.matrix(spec) ) {
+    if ( length(spec)/4 == as.integer(length(spec)/4) ) {
+      warning('argument "spec" was coerced to a 4-column (row-major) matrix.  use a matrix to prevent the coercion');
+      spec = matrix( spec, ncol=ncol, byrow=TRUE );
+    } else {
+      stop('argument "spec" must be a matrix, or a character vector with length divisible by 4, rtfm.');
+    }
+
+    #spec is a matrix, but it has too few columns.
+  } else if ( dim(spec)[2] < ncol ) {
+    stop(paste('"spec" should have at least ",ncol," columns.',sep=''));
+
+    #spec is a matrix, but it has too many columns.
+  } else if ( dim(spec)[2] > maxcol ) {
+    stop(paste('"spec" should have no more than ",maxcol," columns.',sep=''));
+
+    #spec is a matrix, and it has some optional columns.
+  } else if ( dim(spec)[2] != ncol ) {
+    ncol = dim(spec)[2];
+  }
+
+  #sanity check.  make sure long names are unique, and short names are unique.
+  if ( length(unique(spec[,col.long.name])) != length(spec[,col.long.name]) ) {
+    stop(paste('redundant long names for flags (column ',col.long.name,').',sep=''));
+  }
+  # if ( length(na.omit(unique(spec[,col.short.name]))) != length(na.omit(spec[,col.short.name])) ) {
+  #   stop(paste('redundant short names for flags (column ',col.short.name,').',sep=''));
+  # }
+  # convert numeric type to double type
+  spec[,4] <- gsub("numeric", "double", spec[,4])
+
+  # if usage=TRUE, don't process opt, but generate a usage string from the data in spec
+  if ( usage ) {
+    ret = '';
+    ret = paste(ret,"Usage: ",command,sep='');
+    for ( j in 1:(dim(spec))[1] ) {
+      ret = paste(ret,' [-[-',spec[j,col.long.name],']',sep='');
+      if (spec[j,col.has.argument] == flag.no.argument) {
+        ret = paste(ret,']',sep='');
+      } else if (spec[j,col.has.argument] == flag.required.argument) {
+        ret = paste(ret,' <',spec[j,col.mode],'>]',sep='');
+      } else if (spec[j,col.has.argument] == flag.optional.argument) {
+        ret = paste(ret,' [<',spec[j,col.mode],'>]]',sep='');
+      }
+    }
+    # include usage strings
+    if ( ncol >= 5 ) {
+      max.long = max(apply(cbind(spec[,col.long.name]),1,function(x)length(strsplit(x,'')[[1]])));
+      ret = paste(ret,"\n",sep='');
+      for (j in 1:(dim(spec))[1] ) {
+        ret = paste(ret,sprintf(paste("--%-",max.long,"s    %s\n",sep='')
+                                ,spec[j,col.long.name],spec[j,col.description]
+        ),sep='');
+      }
+    }
+    else {
+      ret = paste(ret,"\n",sep='');
+    }
+    return(ret);
+  }
+
+  #XXX check spec validity here.  e.g. column three should be convertible to integer
+
+  i = 1;
+
+  while ( i <= length(opt) ) {
+    if ( debug ) print(paste("processing",opt[i]));
+
+    current.flag = 0; #XXX use NA
+    optstring = opt[i];
+
+
+    #long flag
+    if ( substr(optstring, 1, 2) == '--' ) {
+      if ( debug ) print(paste("  long option:",opt[i]));
+
+      optstring = substring(optstring,3);
+
+      this.flag = NA;
+      this.argument = NA;
+      kv = strsplit(optstring, '=')[[1]];
+      if ( !is.na(kv[2]) ) {
+        this.flag = kv[1];
+        this.argument = paste(kv[-1], collapse="=");
+      } else {
+        this.flag = optstring;
+      }
+
+      rowmatch = grep( this.flag, spec[,col.long.name],fixed=TRUE );
+
+      #long flag is invalid, matches no options
+      if ( length(rowmatch) == 0 ) {
+        stop(paste('long flag "', this.flag, '" is invalid', sep=''));
+
+        #long flag is ambiguous, matches too many options
+      } else if ( length(rowmatch) > 1 ) {
+        # check if there is an exact match and use that
+        rowmatch = which(this.flag == spec[,col.long.name])
+        if(length(rowmatch) == 0) {
+          stop(paste('long flag "', this.flag, '" is ambiguous', sep=''));
+        }
+      }
+
+      #if we have an argument
+      if ( !is.na(this.argument) ) {
+        #if we can't accept the argument, bail out
+        if ( spec[rowmatch, col.has.argument] == flag.no.argument ) {
+          stop(paste('long flag "', this.flag, '" accepts no arguments', sep=''));
+
+          #otherwise assign the argument to the flag
+        } else {
+          storage.mode(this.argument) = spec[rowmatch, col.mode];
+          #don't need here to remove the last value of the vector as argument is in the same string as
+          #the flag name "--flag=argument" so no spurious TRUE was added
+          result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],this.argument);
+          i = i + 1;
+          next;
+        }
+
+        #otherwise, we don't have an argument
+      } else {
+        #if we require an argument, bail out
+        ###if ( spec[rowmatch, col.has.argument] == flag.required.argument ) {
+        ###  stop(paste('long flag "', this.flag, '" requires an argument', sep=''));
+
+        #long flag has no attached argument. set flag as present.  set current.flag so we can peek ahead later and consume the argument if it's there
+        ###} else {
+        result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE);
+        current.flag = rowmatch;
+        ###}
+      }
+
+      #short flag(s)
+    }
+    #else if ( substr(optstring, 1, 1) == '-' ) {
+    #   if ( debug ) print(paste("  short option:",opt[i]));
+    #
+    #   these.flags = strsplit(optstring,'')[[1]];
+    #
+    #   done = FALSE;
+    #   for ( j in 2:length(these.flags) ) {
+    #     this.flag = these.flags[j];
+    #     rowmatch = grep( this.flag, spec[,col.short.name],fixed=TRUE );
+    #
+    #     #short flag is invalid, matches no options
+    #     if ( length(rowmatch) == 0 ) {
+    #       stop(paste('short flag "', this.flag, '" is invalid', sep=''));
+    #
+    #       #short flag is ambiguous, matches too many options
+    #     } else if ( length(rowmatch) > 1 ) {
+    #       stop(paste('short flag "', this.flag, '" is ambiguous', sep=''));
+    #
+    #       #short flag has an argument, but is not the last in a compound flag string
+    #     } else if ( j < length(these.flags) & spec[rowmatch,col.has.argument] == flag.required.argument ) {
+    #       stop(paste('short flag "', this.flag, '" requires an argument, but has none', sep=''));
+    #
+    #       #short flag has no argument, flag it as present
+    #     } else if ( spec[rowmatch,col.has.argument] == flag.no.argument ) {
+    #       result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE);
+    #       done = TRUE;
+    #
+    #       #can't definitively process this flag yet, need to see if next option is an argument or not
+    #     } else {
+    #       result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE);
+    #       current.flag = rowmatch;
+    #       done = FALSE;
+    #     }
+    #   }
+    #   if ( done ) {
+    #     i = i + 1;
+    #     next;
+    #   }
+    # }
+
+    #invalid opt
+    if ( current.flag == 0 ) {
+      stop(paste('"', optstring, '" is not a valid option, or does not support an argument', sep=''));
+      #TBD support for positional args
+      #if ( debug ) print(paste('"', optstring, '" not a valid option.  It is appended to getopt(...)$ARGS', sep=''));
+      #result$ARGS = append(result$ARGS, optstring);
+
+      # some dangling flag, handle it
+    } else if ( current.flag > 0 ) {
+      if ( debug ) print('    dangling flag');
+      if ( length(opt) > i ) {
+        peek.optstring = opt[i + 1];
+        if ( debug ) print(paste('      peeking ahead at: "',peek.optstring,'"',sep=''));
+
+        #got an argument.  attach it, increment the index, and move on to the next option.  we don't allow arguments beginning with '-' UNLESS
+        #specfile indicates the value is an "integer" or "double", in which case we allow a leading dash (and verify trailing digits/decimals).
+        if ( substr(peek.optstring, 1, 1) != '-' |
+             #match negative double
+             ( substr(peek.optstring, 1, 1) == '-'
+               & regexpr('^-[0123456789]*\\.?[0123456789]+$',peek.optstring) > 0
+               & spec[current.flag, col.mode]== 'double'
+             ) |
+             #match negative integer
+             ( substr(peek.optstring, 1, 1) == '-'
+               & regexpr('^-[0123456789]+$',peek.optstring) > 0
+               & spec[current.flag, col.mode]== 'integer'
+             )
+        ) {
+          if ( debug ) print(paste('        consuming argument *',peek.optstring,'*',sep=''));
+          storage.mode(peek.optstring) = spec[current.flag, col.mode];
+          #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones
+          result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);
+          i = i + 1;
+
+          #a lone dash
+        } else if ( substr(peek.optstring, 1, 1) == '-' & length(strsplit(peek.optstring,'')[[1]]) == 1 ) {
+          if ( debug ) print('        consuming "lone dash" argument');
+          storage.mode(peek.optstring) = spec[current.flag, col.mode];
+          #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones
+          result[[spec[current.flag, col.long.name]]] =c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);
+          i = i + 1;
+
+          #no argument
+        } else {
+          if ( debug ) print('        no argument!');
+
+          #if we require an argument, bail out
+          if ( spec[current.flag, col.has.argument] == flag.required.argument ) {
+            stop(paste('flag "', this.flag, '" requires an argument', sep=''));
+
+            #otherwise set flag as present.
+          } else if (
+            spec[current.flag, col.has.argument] == flag.optional.argument |
+            spec[current.flag, col.has.argument] == flag.no.argument
+          ) {
+            x = TRUE;
+            storage.mode(x) = spec[current.flag, col.mode];
+            result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);
+          } else {
+            stop(paste("This should never happen.",
+                       "Is your spec argument correct?  Maybe you forgot to set",
+                       "ncol=4, byrow=TRUE in your matrix call?"));
+          }
+        }
+        #trailing flag without required argument
+      } else if ( spec[current.flag, col.has.argument] == flag.required.argument ) {
+        stop(paste('flag "', this.flag, '" requires an argument', sep=''));
+
+        #trailing flag without optional argument
+      } else if ( spec[current.flag, col.has.argument] == flag.optional.argument ) {
+        x = TRUE;
+        storage.mode(x) = spec[current.flag, col.mode];
+        result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);
+
+        #trailing flag without argument
+      } else if ( spec[current.flag, col.has.argument] == flag.no.argument ) {
+        x = TRUE;
+        storage.mode(x) = spec[current.flag, col.mode];
+        result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);
+      } else {
+        stop("this should never happen (2).  please inform the author.");
+      }
+      #no dangling flag, nothing to do.
+    } else {
+    }
+
+    i = i+1;
+  }
+  return(result);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/heatMapClustering.R	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,896 @@
+# A command-line interface to plot heatmap based on expression or diff. exp. analysis
+# written by Jimmy Vandel
+# one of these arguments is required:
+#
+#
+initial.options <- commandArgs(trailingOnly = FALSE)
+file.arg.name <- "--file="
+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
+script.basename <- dirname(script.name)
+source(file.path(script.basename, "utils.R"))
+source(file.path(script.basename, "getopt.R"))
+
+#addComment("Welcome R!")
+
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+loc <- Sys.setlocale("LC_NUMERIC", "C")
+
+#get starting time
+start.time <- Sys.time()
+
+
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE, OutDec=".")
+
+#get options
+args <- commandArgs()
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options   from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "expressionFile", "x", 1, "character",
+  "diffAnalyseFile", "x", 1, "character",
+  "factorInfo","x", 1, "character",
+  "genericData","x", 0, "logical",
+  "comparisonName","x",1,"character",
+  "comparisonNameLow","x",1,"character",
+  "comparisonNameHigh","x",1,"character",
+  "filterInputOutput","x", 1, "character",
+  "FCthreshold","x", 1, "double",
+  "pvalThreshold","x", 1, "double",
+  "geneListFiltering","x",1,"character",
+  "clusterNumber","x",1,"integer",
+  "maxRows","x",1,"integer",
+  "sampleClusterNumber","x",1,"integer",
+  "dataTransformation","x",1,"character",
+  "distanceMeasure","x",1,"character",
+  "aggloMethod","x",1,"character",
+  "personalColors","x",1,"character",
+  "sideBarColorPalette","x",1,"character",
+  "format", "x", 1, "character",
+  "quiet", "x", 0, "logical",
+  "log", "x", 1, "character",
+  "outputFile" , "x", 1, "character"),
+  byrow=TRUE, ncol=4)
+opt <- getoptLong(spec)
+
+# enforce the following required arguments
+if (is.null(opt$log)) {
+  addComment("[ERROR]'log file' is required")
+  q( "no", 1, F )
+}
+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)
+if (is.null(opt$format)) {
+  addComment("[ERROR]'output format' is required",T,opt$log)
+  q( "no", 1, F )
+}
+if (is.null(opt$outputFile)) {
+  addComment("[ERROR]'output file' is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if(is.null(opt$expressionFile) && !is.null(opt$genericData)){
+  addComment("[ERROR]generic data clustering is based on expression clustering",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (is.null(opt$clusterNumber) || opt$clusterNumber<2) {
+  addComment("[ERROR]valid genes clusters number is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (is.null(opt$sampleClusterNumber) || opt$sampleClusterNumber<1) {
+  addComment("[ERROR]valid samples clusters number is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (is.null(opt$dataTransformation)) {
+  addComment("[ERROR]data transformation option is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (is.null(opt$distanceMeasure)) {
+  addComment("[ERROR]distance measure option is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (is.null(opt$aggloMethod)) {
+  addComment("[ERROR]agglomeration method option is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (is.null(opt$maxRows) || opt$maxRows<2) {
+  addComment("[ERROR]valid plotted row number is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (!is.null(opt[["comparisonName"]]) && nchar(opt[["comparisonName"]])==0){
+  addComment("[ERROR]you have to specify comparison",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (!is.null(opt$comparisonNameLow) && nchar(opt$comparisonNameLow)==0){
+  addComment("[ERROR]you have to specify comparisonLow",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (!is.null(opt$comparisonNameHigh) && nchar(opt$comparisonNameHigh)==0){
+  addComment("[ERROR]you have to specify comparisonHigh",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (is.null(opt$genericData) && (!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh))){
+  addComment("[ERROR]comparisonLow and comparisonHigh can be specified only with generic data",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (!is.null(opt$genericData) && !is.null(opt[["comparisonName"]])){
+  addComment("[ERROR]basic comparison cannot be specified for generic data",T,opt$log)
+  q( "no", 1, F )
+}
+
+if ((!is.null(opt[["comparisonName"]]) || !is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)) && is.null(opt$diffAnalyseFile)) {
+  addComment("[ERROR]'diff. exp. analysis file' is required",T,opt$log)
+  q( "no", 1, F )
+}
+
+if (!is.null(opt$genericData) && !is.null(opt$diffAnalyseFile) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh)){
+  addComment("[ERROR]Missing comparison information for filtering",T,opt$log)
+  q( "no", 1, F )
+}
+
+if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && (is.null(opt[["comparisonName"]]) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh))) {
+  addComment("[ERROR]'comparisons' are missing for filtering",T,opt$log)
+  q( "no", 1, F )
+}
+
+if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && !is.null(opt$geneListFiltering)) {
+  addComment("[ERROR]Cannot have two filtering strategies",T,opt$log)
+  q( "no", 1, F )
+}
+
+verbose <- if (is.null(opt$quiet)) {
+  TRUE
+}else{
+  FALSE}
+
+addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)
+
+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)
+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)
+
+#directory for plots and HTML
+dir.create(file.path(getwd(), "plotDir"))
+dir.create(file.path(getwd(), "plotLyDir"))
+
+#silent package loading
+suppressPackageStartupMessages({
+  library("plotly")
+  library("dendextend")
+  #library("ggdendro")
+  #library("plyr")
+  library("ggplot2")
+  library("heatmaply")
+  library("circlize")
+  #library("RColorBrewer")
+  #source("https://bioconductor.org/biocLite.R")
+  #biocLite("ComplexHeatmap")
+  library("ComplexHeatmap")
+  #library("processx")
+})
+
+expressionToCluster=!is.null(opt$expressionFile)
+
+#load input data files
+if(expressionToCluster){
+  #first expression data
+  expressionMatrix=read.csv(file=opt$expressionFile,header=F,sep="\t",colClasses="character")
+  #remove first row to convert it as colnames (to avoid X before colnames with header=T)
+  colNamesData=expressionMatrix[1,-1]
+  expressionMatrix=expressionMatrix[-1,]
+  #remove first colum to convert it as rownames
+  rowNamesData=expressionMatrix[,1]
+  expressionMatrix=expressionMatrix[,-1]
+  if(is.data.frame(expressionMatrix)){
+    expressionMatrix=data.matrix(expressionMatrix)
+  }else{
+    expressionMatrix=data.matrix(as.numeric(expressionMatrix))
+  }
+  dimnames(expressionMatrix)=list(rowNamesData,colNamesData)
+
+  #check input files
+  if (!is.numeric(expressionMatrix)) {
+    addComment("[ERROR]Expression data is not fully numeric!",T,opt$log,display=FALSE)
+    q( "no", 1, F )
+  }
+
+  addComment("[INFO]Expression data loaded and checked")
+  addComment(c("[INFO]Dim of expression matrix:",dim(expressionMatrix)),T,opt$log,display=FALSE)
+}
+
+nbComparisons=0
+nbColPerContrast=5
+comparisonMatrix=NULL
+comparisonMatrixInfoGene=NULL
+#if available comparisons
+if(!is.null(opt[["comparisonName"]])){
+    #load results from differential expression analysis
+    #consider first row contains column names
+    comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t")
+    colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,]))
+    #remove the second line also as it's information line (p-val,FDR.p-val,FC,logFC)
+    comparisonMatrix=comparisonMatrix[-c(1,2),]
+    #remove first and second colums, convert the first one as rownames
+    rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1]))
+    #and save second column content that contain geneInfo
+    comparisonMatrixInfoGene=as.character(unlist(comparisonMatrix[,2]))
+    names(comparisonMatrixInfoGene)=as.character(unlist(comparisonMatrix[,1]))
+    comparisonMatrix=comparisonMatrix[,-c(1,2)]
+
+    comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix))
+
+    if (ncol(comparisonMatrix)%%nbColPerContrast != 0) {
+      addComment("[ERROR]Diff. exp. data does not contain good number of columns per contrast, should contains in this order:p-val,FDR.p-val,FC,log2(FC) and t-stat",T,opt$log,display=FALSE)
+      q( "no", 1, F )
+    }
+
+    if(max(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])>1 || min(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])<0){
+      addComment("[ERROR]Seem that diff. exp. data does not contain correct values for p-val and FDR.p-val columns, should be including in [0,1] interval",T,opt$log,display=FALSE)
+      q( "no", 1, F )
+    }
+
+    if (!is.numeric(comparisonMatrix)) {
+      addComment("[ERROR]Diff. exp. data is not fully numeric!",T,opt$log,display=FALSE)
+      q( "no", 1, F )
+    }
+
+    if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){
+      addComment("[WARNING]All genes from diff. exp. file are not included in expression file",T,opt$log,display=FALSE)
+    }
+
+    if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){
+      addComment("[WARNING]All genes from expression file are not included in diff. exp. file",T,opt$log,display=FALSE)
+    }
+
+    addComment("[INFO]Diff. exp. analysis loaded and checked",T,opt$log,display=FALSE)
+    addComment(c("[INFO]Dim of original comparison matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE)
+
+    #restrict to user specified comparisons
+    restrictedComparisons=unlist(strsplit(opt[["comparisonName"]],","))
+    #should be improved to avoid selection of column names starting too similarly
+    colToKeep=which(unlist(lapply(colnames(comparisonMatrix),function(x)any(startsWith(x,restrictedComparisons)))))
+    comparisonMatrix=matrix(comparisonMatrix[,colToKeep],ncol=length(colToKeep),dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[colToKeep]))
+
+    #get number of required comparisons
+    nbComparisons=ncol(comparisonMatrix)/nbColPerContrast
+
+    addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE)
+}
+
+#should be only the case with generic data
+if(!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)){
+    #load generic data used for filtering
+    nbColPerContrast=1
+    #consider first row contains column names
+    comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t")
+    colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,]))
+    #remove first colum, convert the first one as rownames
+    rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1]))
+    comparisonMatrix=comparisonMatrix[-1,-1]
+
+    comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix))
+
+    if (!is.numeric(comparisonMatrix)) {
+      addComment("[ERROR]Filtering matrix is not fully numeric!",T,opt$log,display=FALSE)
+      q( "no", 1, F )
+    }
+
+    if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){
+      addComment("[WARNING]All genes from filtering file are not included in expression file",T,opt$log,display=FALSE)
+    }
+
+    if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){
+      addComment("[WARNING]All genes from expression file are not included in filtering file",T,opt$log,display=FALSE)
+    }
+
+    addComment("[INFO]Filtering file loaded and checked",T,opt$log,display=FALSE)
+    addComment(c("[INFO]Dim of original filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE)
+
+    #restrict to user specified comparisons
+    restrictedComparisons=c()
+    if(!is.null(opt$comparisonNameLow))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameLow,","))))
+    if(!is.null(opt$comparisonNameHigh))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameHigh,","))))
+
+    if (!all(restrictedComparisons%in%colnames(comparisonMatrix))){
+      addComment("[ERROR]Selected columns in filtering file are not present in filtering matrix!",T,opt$log,display=FALSE)
+      q( "no", 1, F )
+    }
+    comparisonMatrix=matrix(comparisonMatrix[,restrictedComparisons],ncol=length(restrictedComparisons),dimnames = list(rownames(comparisonMatrix),restrictedComparisons))
+
+    #get number of required comparisons
+    nbComparisons=ncol(comparisonMatrix)
+
+    addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE)
+}
+
+
+
+factorInfoMatrix=NULL
+if(!is.null(opt$factorInfo)){
+  #get group information
+  #load factors file
+  factorInfoMatrix=read.csv(file=opt$factorInfo,header=F,sep="\t",colClasses="character")
+  #remove first row to convert it as colnames
+  colnames(factorInfoMatrix)=factorInfoMatrix[1,]
+  factorInfoMatrix=factorInfoMatrix[-1,]
+  #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case
+  rownames(factorInfoMatrix)=factorInfoMatrix[,1]
+
+  factorBarColor=colnames(factorInfoMatrix)[2]
+
+  if(ncol(factorInfoMatrix)>2){
+    addComment("[ERROR]Factors file should not contain more than 2 columns",T,opt$log,display=FALSE)
+    q( "no", 1, F )
+  }
+
+  #factor file is used for color band on heatmap, so all expression matrix column should be in the factor file
+  if(expressionToCluster && length(setdiff(colnames(expressionMatrix),rownames(factorInfoMatrix)))!=0){
+    addComment("[ERROR]Missing samples in factor file",T,opt$log,display=FALSE)
+    q( "no", 1, F )
+  }
+
+  #factor file is used for color band on heatmap, so all comparison matrix column should be in the factor file
+  if(!expressionToCluster && length(setdiff(colnames(comparisonMatrix),rownames(factorInfoMatrix)))!=0){
+    addComment("[ERROR]Missing differential contrasts in factor file",T,opt$log,display=FALSE)
+    q( "no", 1, F )
+  }
+
+  addComment("[INFO]Factors OK",T,opt$log,display=FALSE)
+  addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE)
+}
+
+if(!is.null(opt$personalColors)){
+ ##parse personal colors
+  personalColors=unlist(strsplit(opt$personalColors,","))
+  if(length(personalColors)==2){
+    ##add medium color between two to get three colors
+    personalColors=c(personalColors[1],paste(c("#",as.character(as.hexmode(floor(apply(col2rgb(personalColors),1,mean))))),collapse=""),personalColors[2])
+  }
+  if(length(personalColors)!=3){
+    addComment("[ERROR]Personalized colors doesn't contain enough colors",T,opt$log,display=FALSE)
+    q( "no", 1, F )
+  }
+
+}
+
+
+if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="input"){
+  #filter input data
+
+    if(is.null(opt$geneListFiltering)){
+      #filtering using stat thresholds
+      #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold)))
+      if(is.null(opt$genericData)){
+        #diff. expression matrix
+        rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>log2(opt$FCthreshold))))!=0))))
+      }else{
+        #generic filtering matrix
+        rowToKeep=rownames(comparisonMatrix)
+        if(!is.null(opt$comparisonNameLow)){
+          restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,","))
+          rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0)))))
+        }
+        if(!is.null(opt$comparisonNameHigh)){
+          restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,","))
+          rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0)))))
+        }
+      }
+    }else{
+      #filtering using user gene list
+      geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F)
+      rowToKeep=unlist(c(geneListFiltering))
+    }
+
+    if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){
+      #should arrive only with user gene list filtering with diff.exp. results clustering
+      addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log)
+      rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix))
+    }
+
+    if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){
+      addComment("[WARNING] some genes selected by the input filter are not in the expression file",T,opt$log)
+      rowToKeep=intersect(rowToKeep,rownames(expressionMatrix))
+    }
+
+    if(length(rowToKeep)==0){
+      addComment("[ERROR]No gene survived to the input filtering thresholds, execution will be aborted.
+                 Please consider to change threshold values and re-run the tool.",T,opt$log)
+      q( "no", 1, F )
+    }
+
+    #filter comparison matrix
+    if(!is.null(comparisonMatrix)){
+      comparisonMatrix=matrix(comparisonMatrix[rowToKeep,],ncol=ncol(comparisonMatrix),dimnames = list(rowToKeep,colnames(comparisonMatrix)))
+      if(!is.null(comparisonMatrixInfoGene))comparisonMatrixInfoGene=comparisonMatrixInfoGene[rowToKeep]
+    }
+    #then expression matrix
+    if(expressionToCluster)expressionMatrix=matrix(expressionMatrix[rowToKeep,],ncol=ncol(expressionMatrix),dimnames = list(rowToKeep,colnames(expressionMatrix)))
+
+    if(!is.null(comparisonMatrix) && expressionToCluster && nrow(comparisonMatrix)!=nrow(expressionMatrix)){
+      addComment("[ERROR]Problem during input filtering, please check code",T,opt$log,display=FALSE)
+      q( "no", 1, F )
+    }
+
+    addComment("[INFO]Filtering step done",T,opt$log,display=FALSE)
+    addComment(c("[INFO]Input filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE)
+}
+
+
+addComment("[INFO]Ready to plot",T,opt$log,display=FALSE)
+
+##---------------------
+
+#plot heatmap
+if(expressionToCluster){
+  #will make clustering based on expression value or generic value
+  dataToHeatMap=expressionMatrix
+  valueMeaning="Intensity"
+  if(!is.null(opt$genericData))valueMeaning="Value"
+}else{
+  #will make clustering on log2(FC) values
+  dataToHeatMap=matrix(comparisonMatrix[,seq(4,ncol(comparisonMatrix),nbColPerContrast)],ncol=nbComparisons,dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)]))
+  valueMeaning="Log2(FC)"
+}
+addComment(c("[INFO]Dim of heatmap matrix:",dim(dataToHeatMap)),T,opt$log,display=FALSE)
+
+if(nrow(dataToHeatMap)==1 && ncol(dataToHeatMap)==1){
+  addComment("[ERROR]Cannot make clustering with unique cell tab",T,opt$log,display=FALSE)
+  q( "no", 1, F )
+}
+
+
+#apply data transformation if needed
+if(opt$dataTransformation=="log"){
+  dataToHeatMap=log(dataToHeatMap)
+  valueMeaning=paste(c("log(",valueMeaning,")"),collapse="")
+  addComment("[INFO]Data to cluster and to display in the heatmap are log transformed",T,opt$log,display=FALSE)
+}
+if(opt$dataTransformation=="log2"){
+  dataToHeatMap=log2(dataToHeatMap)
+  valueMeaning=paste(c("log2(",valueMeaning,")"),collapse="")
+  addComment("[INFO]Data to cluster and to display in the heatmap are log2 transformed",T,opt$log,display=FALSE)
+}
+
+maxRowsToDisplay=opt$maxRows
+
+nbClusters=opt$clusterNumber
+if(nbClusters>nrow(dataToHeatMap)){
+  #correct number of clusters if needed
+  nbClusters=nrow(dataToHeatMap)
+  addComment(c("[WARNING]Not enough rows to reach required clusters number, it is reduced to number of rows:",nbClusters),T,opt$log,display=FALSE)
+}
+
+nbSampleClusters=opt$sampleClusterNumber
+if(nbSampleClusters>ncol(dataToHeatMap)){
+  #correct number of clusters if needed
+  nbSampleClusters=ncol(dataToHeatMap)
+  addComment(c("[WARNING]Not enough columns to reach required conditions clusters number, it is reduced to number of columns:",nbSampleClusters),T,opt$log,display=FALSE)
+}
+
+colClust=FALSE
+rowClust=FALSE
+effectiveRowClust=FALSE
+
+#make appropriate clustering if needed
+if(nrow(dataToHeatMap)>1 && nbClusters>1)rowClust=hclust(distExtended(dataToHeatMap,method = opt$distanceMeasure),method = opt$aggloMethod)
+if(ncol(dataToHeatMap)>1 && nbSampleClusters>1)colClust=hclust(distExtended(t(dataToHeatMap),method = opt$distanceMeasure),method = opt$aggloMethod)
+
+if(nrow(dataToHeatMap)>maxRowsToDisplay){
+  #make subsampling based on preliminary global clustering
+  #clusteringResults=cutree(rowClust,nbClusters)
+  #heatMapGenesToKeep=unlist(lapply(seq(1,nbClusters),function(x)sample(which(clusteringResults==x),min(length(which(clusteringResults==x)),round(maxRowsToDisplay/nbClusters)))))
+  ##OR
+  #basic subsampling
+  heatMapGenesToKeep=sample(rownames(dataToHeatMap),maxRowsToDisplay)
+  effectiveDataToHeatMap=matrix(dataToHeatMap[heatMapGenesToKeep,],ncol=ncol(dataToHeatMap),dimnames=list(heatMapGenesToKeep,colnames(dataToHeatMap)))
+  effectiveNbClusters=min(nbClusters,maxRowsToDisplay)
+  if(nrow(effectiveDataToHeatMap)>1 && effectiveNbClusters>1)effectiveRowClust=hclust(distExtended(effectiveDataToHeatMap, method = opt$distanceMeasure),method = opt$aggloMethod)
+  addComment(c("[WARNING]Too many rows for efficient heatmap drawing",maxRowsToDisplay,"subsampling is done for vizualization only"),T,opt$log,display=FALSE)
+  rm(heatMapGenesToKeep)
+}else{
+  effectiveDataToHeatMap=dataToHeatMap
+  effectiveRowClust=rowClust
+  effectiveNbClusters=nbClusters
+}
+
+addComment(c("[INFO]Dim of plotted heatmap matrix:",dim(effectiveDataToHeatMap)),T,opt$log,display=FALSE)
+
+personalized_hoverinfo=matrix("",ncol = ncol(effectiveDataToHeatMap),nrow = nrow(effectiveDataToHeatMap),dimnames = dimnames(effectiveDataToHeatMap))
+if(expressionToCluster){
+  for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\n",valueMeaning,": ",effectiveDataToHeatMap[iRow,iCol]),collapse="")}}
+}else{
+  for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\nFC: ",round(2^effectiveDataToHeatMap[iRow,iCol],2)),collapse="")}}
+}
+
+#trying to overcome limitation of heatmaply package to modify xtick and ytick label, using directly plotly functions, but for now plotly do not permit to have personalized color for each x/y tick separately
+test=FALSE
+if(test==TRUE){
+
+  #define dendogram shapes
+  dd.row <- as.dendrogram(effectiveRowClust)
+  dd.col <- as.dendrogram(colClust)
+
+  #and color them
+  dd.row=color_branches(dd.row, k = effectiveNbClusters, groupLabels = T)
+  dd.col=color_branches(dd.col, k = nbSampleClusters, groupLabels = T)
+
+  #generating function for dendogram from segment list
+  ggdend <- function(df) {
+    ggplot() +
+      geom_segment(data = df, aes(x=x, y=y, xend=xend, yend=yend)) +
+      labs(x = "", y = "") + theme_minimal() +
+      theme(axis.text = element_blank(), axis.ticks = element_blank(),
+            panel.grid = element_blank())
+  }
+
+  # generate x/y dendogram plots
+  px <- ggdend(dendro_data(dd.col)$segments)
+  py <- ggdend(dendro_data(dd.row)$segments) + coord_flip()
+
+  # reshape data matrix
+  col.ord <- order.dendrogram(dd.col)
+  row.ord <- order.dendrogram(dd.row)
+  xx <- effectiveDataToHeatMap[row.ord, col.ord]
+  # and also personalized_hoverinfo
+  personalized_hoverinfo=personalized_hoverinfo[row.ord, col.ord]
+
+  # hide axis ticks and grid lines
+  eaxis <- list(
+    showticklabels = FALSE,
+    showgrid = FALSE,
+    zeroline = FALSE
+  )
+
+  #make the empty plot
+  p_empty <- plot_ly() %>%
+    layout(margin = list(l = 200),
+           xaxis = eaxis,
+           yaxis = eaxis)
+
+  heatmap.plotly <- plot_ly(
+    z = xx, x = 1:ncol(xx), y = 1:nrow(xx), colors = viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno"),
+    type = "heatmap", showlegend = FALSE, text = personalized_hoverinfo, hoverinfo = "text",
+    colorbar = list(
+      # Capitalise first letter
+      title = valueMeaning,
+      tickmode = "array",
+      len = 0.3
+    )
+  ) %>%
+    layout(
+      xaxis = list(
+        tickfont = list(size = 10,color=get_leaves_branches_col(dd.row)),
+        tickangle = 45,
+        tickvals = 1:ncol(xx), ticktext = colnames(xx),
+        linecolor = "#ffffff",
+        range = c(0.5, ncol(xx) + 0.5),
+        showticklabels = TRUE
+      ),
+      yaxis = list(
+        tickfont = list(size = 10, color=get_leaves_branches_col(dd.col)),
+        tickangle = 0,
+        tickvals = 1:nrow(xx), ticktext = rownames(xx),
+        linecolor = "#ffffff",
+        range = c(0.5, nrow(xx) + 0.5),
+        showticklabels = TRUE
+      )
+    )
+
+  #generate plotly
+  pp <- subplot(px, p_empty, heatmap.plotly, py, nrows = 2, margin = 0,widths = c(0.8,0.2),heights = c(0.2,0.8), shareX = TRUE,
+                shareY = TRUE)
+
+  #save image file
+  export(pp, file =  paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse=""))
+  #rise a bug due to token stuf
+  #orca(pp, file =  paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse=""))
+
+
+  #save plotLy file
+  htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F)
+
+  #htmlwidgets::saveWidget(as_widget(pp),"~/Bureau/test.html",selfcontained = F)
+
+}else{ #test
+  label_names=c("Probe","Condition",valueMeaning)
+
+  # #color hclust objects
+  # dd.row=color_branches(effectiveRowClust, k = effectiveNbClusters)
+  # #rowColors=get_leaves_branches_col(dd.row)
+  # #rowColors[order.dendrogram(dd.row)]=rowColors
+  # rowGroup=cutree(effectiveRowClust, k = effectiveNbClusters)
+  #
+  # #get order of class as they will be displayed on the dendogram
+  # rowGroupRenamed=data.frame(cluster=mapvalues(rowGroup, unique(rowGroup[order.dendrogram(dd.row)[nleaves(dd.row):1]]), 1:effectiveNbClusters))
+  #
+  #  dd.col=color_branches(colClust, k = nbSampleClusters)
+  #  #colColors=get_leaves_branches_col(dd.col)
+  #  #colColors[order.dendrogram(dd.col)]=colColors
+  #  colGroup=cutree(colClust, k = nbSampleClusters)
+  #
+  # # #get order of class as they will be displayed on the dendogram
+  #  colGroupRenamed=data.frame(sampleCluster=mapvalues(colGroup, unique(colGroup[order.dendrogram(dd.col)[nleaves(dd.col):1]]), 1:nbSampleClusters))
+
+
+  #while option is not correctly managed by heatmap apply, put personalized_hoverinfo to NULL
+  personalized_hoverinfo=NULL
+
+  if(is.null(opt$personalColors)){
+    heatmapColors=viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno")
+  }else{
+    heatmapColors=personalColors
+  }
+
+  colGroupRenamed=NULL
+  if(!is.null(factorInfoMatrix)){
+    colGroupRenamed=eval(parse(text=(paste("data.frame(",factorBarColor,"=factorInfoMatrix[colnames(effectiveDataToHeatMap),2])",sep=""))))
+    sideBarGroupNb=length(table(factorInfoMatrix[colnames(effectiveDataToHeatMap),2]))
+    sideBarColorPaletteName="Spectral"
+    if(!is.null(opt$sideBarColorPalette) && opt$sideBarColorPalette%in%rownames(RColorBrewer::brewer.pal.info)){
+      sideBarColorPaletteName=opt$sideBarColorPalette
+    }
+    sideBarColorPalette=setNames(colorRampPalette(RColorBrewer::brewer.pal(RColorBrewer::brewer.pal.info[sideBarColorPaletteName,"maxcolors"], sideBarColorPaletteName))(sideBarGroupNb),unique(factorInfoMatrix[colnames(effectiveDataToHeatMap),2]))
+  }
+
+  if(!is.null(colGroupRenamed)){
+    pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,col_side_colors=colGroupRenamed,col_side_palette=sideBarColorPalette,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors)
+  }else{
+    pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors)
+  }
+
+
+  #save image file
+  export(pp, file =  paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse=""))
+  #rise a bug due to token stuf
+  #orca(pp, file =  paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse=""))
+
+
+  #save plotLy file
+  htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F)
+
+}
+addComment("[INFO]Heatmap drawn",T,opt$log,display=FALSE)
+
+
+#plot circular heatmap
+if(!class(effectiveRowClust)=="logical"){
+  dendo=as.dendrogram(effectiveRowClust)
+
+  if(is.null(opt$personalColors)){
+    col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.01)), viridis(101,option = "inferno"))
+  }else{
+    col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.5)), personalColors)
+  }
+
+  if(opt$format=="pdf"){
+    pdf(paste(c("./plotDir/circularPlot.pdf"),collapse=""))}else{
+      png(paste(c("./plotDir/circularPlot.png"),collapse=""))
+    }
+
+  circos.par(cell.padding = c(0, 0, 0, 0), gap.degree = 5)
+  circos.initialize(c(rep("a",nrow(effectiveDataToHeatMap)),"b"),xlim=cbind(c(0,0),c(nrow(effectiveDataToHeatMap),5)))
+  circos.track(ylim = c(0, 1), bg.border = NA, panel.fun = function(x, y) {
+    if(CELL_META$sector.index=="a"){
+      nr = ncol(effectiveDataToHeatMap)
+      nc = nrow(effectiveDataToHeatMap)
+      circos.text(1:nc- 0.5, rep(0,nc), adj = c(0, 0),
+                  rownames(effectiveDataToHeatMap)[order.dendrogram(dendo)], facing = "clockwise", niceFacing = TRUE, cex = 0.3)
+    }
+  })
+
+  circos.track(ylim = c(0, ncol(effectiveDataToHeatMap)), bg.border = NA, panel.fun = function(x, y) {
+
+    m = t(matrix(effectiveDataToHeatMap[order.dendrogram(dendo),],ncol=ncol(effectiveDataToHeatMap)))
+    col_mat = col_fun(m)
+    nr = nrow(m)
+    nc = ncol(m)
+    if(CELL_META$sector.index=="a"){
+      for(i in 1:nr) {
+        circos.rect(1:nc - 1, rep(nr - i, nc),
+                    1:nc, rep(nr - i + 1, nc),
+                    border = col_mat[i, ], col = col_mat[i, ])
+      }
+    }else{
+      circos.text(rep(1,nr), seq(nr,1,-1) , colnames(effectiveDataToHeatMap),cex = 0.3)
+    }
+  })
+
+  #dendo = color_branches(dendo, k = effectiveNbClusters, col = colorRampPalette(brewer.pal(12,"Set3"))(effectiveNbClusters))
+  dendo = color_branches(dendo, k = effectiveNbClusters, col = rev(colorspace::rainbow_hcl(effectiveNbClusters)))
+
+
+  circos.track(ylim = c(0, attributes(dendo)$height), bg.border = NA, track.height = 0.25,
+               panel.fun = function(x, y) {
+                 if(CELL_META$sector.index=="a")circos.dendrogram(dendo)} )
+
+  circos.clear()
+  ##add legend
+  lgd_links = Legend(at = seq(ceiling(min(effectiveDataToHeatMap)),floor(max(effectiveDataToHeatMap)),ceiling((floor(max(effectiveDataToHeatMap))-ceiling(min(effectiveDataToHeatMap)))/4)), col_fun = col_fun,
+                     title_position = "topleft", grid_width = unit(5, "mm") ,title = valueMeaning)
+
+  pushViewport(viewport(x = 0.85, y = 0.80,
+                        width = 0.1,
+                        height = 0.1,
+                        just = c("left", "bottom")))
+  grid.draw(lgd_links)
+  upViewport()
+
+
+  dev.off()
+
+  addComment("[INFO]Circular heatmap drawn",T,opt$log,display=FALSE)
+  loc <- Sys.setlocale("LC_NUMERIC","C")
+}else{
+  addComment(c("[WARNING]Circular plot will not be plotted considering row or cluster number < 2"),T,opt$log,display=FALSE)
+}
+rm(effectiveDataToHeatMap,effectiveRowClust,effectiveNbClusters)
+
+#plot screeplot
+if(class(rowClust)!="logical" && nrow(dataToHeatMap)>2){
+  screePlotData=c()
+  for(iNbClusters in 2:(nbClusters+min(10,max(0,nrow(dataToHeatMap)-nbClusters)))){
+    clusteringResults=cutree(rowClust,iNbClusters)
+    #clusteringResults=kmeans(dataToHeatMap,iNbClusters)$cluster
+
+    #compute variance between each intra-class points amongst themselves (need at least 3 points by cluster)
+    #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>2){var(dist(dataToHeatMap[temp,]))}else{0}}))) )
+    #compute variance between each intra-class points and fictive mean point (need at least 2 points by cluster)
+    #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){   var(dist(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]))[1:length(temp)]) }else{0}}))) )
+    if(ncol(dataToHeatMap)>1)screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){   sum((distExtended(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]),method = opt$distanceMeasure)[1:length(temp)])^2) }else{0}}))) )
+    else screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){   sum((dataToHeatMap[temp,]-mean(dataToHeatMap[temp,]))^2) }else{0}}))) )
+  }
+
+  dataToPlot=data.frame(clusterNb=seq(2,length(screePlotData)+1),wcss=screePlotData)
+  p <- ggplot(data=dataToPlot, aes(clusterNb,wcss)) + geom_point(colour="#EE4444") + geom_line(colour="#DD9999") +
+    ggtitle("Scree plot") + theme_bw() + xlab(label="Cluster number") + ylab(label="Within cluster sum of squares") +
+    theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none") +
+    scale_x_continuous(breaks=seq(min(dataToPlot$clusterNb), max(dataToPlot$clusterNb), 1))
+
+  #save plotly files
+  pp <- ggplotly(p)
+
+  if(opt$format=="pdf"){
+    pdf(paste(c("./plotDir/screePlot.pdf"),collapse=""))}else{
+      png(paste(c("./plotDir/screePlot.png"),collapse=""))
+    }
+  plot(p)
+  dev.off()
+
+  #save plotly files
+  htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/screePlot.html"),collapse=""),selfcontained = F)
+
+  addComment("[INFO]Scree plot drawn",T,opt$log,display=FALSE)
+}else{
+  addComment(c("[WARNING]Scree plot will not be plotted considering row number <= 2"),T,opt$log,display=FALSE)
+}
+
+##----------------------
+
+#filter output based on parameters
+
+rowToKeep=rownames(dataToHeatMap)
+if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="output"){
+  #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold)))
+  if(is.null(opt$geneListFiltering)){
+    if(is.null(opt$genericData)){
+      #diff. expression matrix
+      rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<=opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>=log2(opt$FCthreshold))))!=0))))
+    }else{
+      #generic filtering matrix
+      rowToKeep=rownames(comparisonMatrix)
+      if(!is.null(opt$comparisonNameLow)){
+        restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,","))
+        rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0)))))
+      }
+      if(!is.null(opt$comparisonNameHigh)){
+        restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,","))
+        rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0)))))
+      }
+    }
+  }else{
+    geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F)
+    rowToKeep=unlist(c(geneListFiltering))
+  }
+  if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){
+    #should arrive only with user gene list filtering with diff.exp. results clustering
+    addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log)
+    rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix))
+  }
+
+  if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){
+    addComment("[WARNING] some genes selected by the output filter are not in the expression file",T,opt$log)
+    rowToKeep=intersect(rowToKeep,rownames(expressionMatrix))
+  }
+  addComment(c("[INFO]Output filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE)
+}
+
+#we add differential analysis info in output if it was directly used for clustering or when it was used for filtering with expression
+
+#in case of expression or generic data clustering without filtering based on external stats
+if(expressionToCluster && is.null(comparisonMatrix)){
+  if(length(rowToKeep)==0){
+    addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)
+    outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE)
+  }else{
+      outputData=matrix(0,ncol=2,nrow=length(rowToKeep)+1)
+      outputData[1,]=c("Gene","Cluster")
+      outputData[2:(length(rowToKeep)+1),1]=rowToKeep
+      if(class(rowClust)!="logical" ){
+        outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep]
+      }else{
+        outputData[2:(length(rowToKeep)+1),2]=0
+      }
+  }
+}
+
+#in case of generic data clustering with filtering based on generic external data
+if(!is.null(opt$genericData) && !is.null(comparisonMatrix)){
+  if(length(rowToKeep)==0){
+    addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)
+    outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE)
+  }else{
+    outputData=matrix(0,ncol=2+nbComparisons,nrow=length(rowToKeep)+1)
+    outputData[1,]=c("Gene","Cluster",colnames(comparisonMatrix))
+    outputData[2:(length(rowToKeep)+1),1]=rowToKeep
+    if(class(rowClust)!="logical" ){
+      outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep]
+    }else{
+      outputData[2:(length(rowToKeep)+1),2]=0
+    }
+    outputData[2:(length(rowToKeep)+1),3:(ncol(comparisonMatrix)+2)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4)
+  }
+}
+
+#in case of expression data clustering with filtering based on diff. exp. results or diff. exp. results clustering
+if(is.null(opt$genericData) && !is.null(comparisonMatrix)){
+  if(length(rowToKeep)==0){
+    addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)
+    outputData=matrix(0,ncol=3,nrow=3)
+    outputData[1,]=c("","","Comparison")
+    outputData[2,]=c("Gene","Info","Cluster")
+    outputData[3,]=c("noGene","noInfo","noClustering")
+  }else{
+      outputData=matrix(0,ncol=3+nbComparisons*nbColPerContrast,nrow=length(rowToKeep)+2)
+      outputData[1,]=c("","","Comparison",rep(colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)],each=nbColPerContrast))
+      outputData[2,]=c("Gene","Info","Cluster",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),nbComparisons))
+      outputData[3:(length(rowToKeep)+2),1]=rowToKeep
+      outputData[3:(length(rowToKeep)+2),2]=comparisonMatrixInfoGene[rowToKeep]
+      if(class(rowClust)!="logical" ){
+        outputData[3:(length(rowToKeep)+2),3]=cutree(rowClust,nbClusters)[rowToKeep]
+      }else{
+        outputData[3:(length(rowToKeep)+2),3]=0
+      }
+      outputData[3:(length(rowToKeep)+2),4:(ncol(comparisonMatrix)+3)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4)
+  }
+}
+
+addComment("[INFO]Formated output",T,opt$log,display=FALSE)
+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F)
+
+##----------------------
+
+end.time <- Sys.time()
+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)
+
+
+addComment("[INFO]End of R script",T,opt$log,display=FALSE)
+
+printSessionInfo(opt$log)
+
+#sessionInfo()
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/utils.R	Fri Jun 26 09:51:15 2020 -0400
@@ -0,0 +1,143 @@
+# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu>
+#
+#  This file is free software: you may copy, redistribute and/or modify it
+#  under the terms of the GNU General Public License as published by the
+#  Free Software Foundation, either version 2 of the License, or (at your
+#  option) any later version.
+#
+#  This file is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+#extendedDist function to correlation measure
+distExtended <- function(x,method) {
+  if(method %in% c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"))return(dist(x,method = method))
+  if(method %in% c("pearson", "spearman", "kendall"))return(as.dist(1-cor(t(x),method=method))/2)
+  if(method %in% c("absPearson", "absSpearman", "absKendall"))return(as.dist(1-abs(cor(t(x),method=method))))
+  return(NULL)
+}
+
+##comment function to display message and optionnaly add it to log file
+
+addComment <- function(text,addToFile=FALSE,fileName=NULL,append=TRUE,display=TRUE){
+  if(display)cat(paste(c(text,"\n"),collapse = " "))
+  if(addToFile)write(paste(text,collapse = " "),fileName,append=append)
+}
+
+printSessionInfo <- function(fileName=NULL,append=TRUE){
+  addComment("[INFO]R session info :",T,fileName,display=FALSE)
+  tempInfo=sessionInfo()
+  write(paste(tempInfo$R.version$version.string),fileName,append=append)
+  write(paste("Platform",tempInfo$platform,sep = " : "),fileName,append=append)
+  write(paste("Running under",tempInfo$running,sep = " : "),fileName,append=append)
+  write(paste("Local variables",tempInfo$locale,sep = " : "),fileName,append=append)
+  write(paste("Attached base packages",paste(tempInfo$basePkgs,collapse = "; "),sep = " : "),fileName,append=append)
+  if(length(tempInfo$otherPkgs)>0){
+    lineToPrint=""
+    for(iPack in tempInfo$otherPkgs){
+      lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "")
+    }
+    write(paste("Other attached packages",lineToPrint,sep = " : "),fileName,append=append)
+  }
+  if(length(tempInfo$loadedOnly)>0){
+    lineToPrint=""
+    for(iPack in tempInfo$loadedOnly){
+      lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "")
+    }
+    write(paste("Loaded packages",lineToPrint,sep = " : "),fileName,append=append)
+  }
+}
+
+##negative of a mathematical expression
+negativeExpression <- function(expression){
+  expression=gsub("\\+","_toMinus_",expression)
+  expression=gsub("\\-","+",expression)
+  expression=gsub("_toMinus_","-",expression)
+  if(substr(expression,1,1)!="-" && substr(expression,1,1)!="+"){
+    expression=paste(c("-",expression),collapse="")
+  }
+
+  return(expression)
+}
+
+#' Returns file name of calling Rscript
+#'
+#' \code{get_Rscript_filename} returns the file name of calling Rscript
+#' @return A string with the filename of the calling script.
+#'      If not found (i.e. you are in a interactive session) returns NA.
+#'
+#' @export
+get_Rscript_filename <- function() {
+    prog <- sub("--file=", "", grep("--file=", commandArgs(), value=TRUE)[1])
+    if( .Platform$OS.type == "windows") {
+        prog <- gsub("\\\\", "\\\\\\\\", prog)
+    }
+    prog
+}
+
+#' Recursively sorts a list
+#'
+#' \code{sort_list} returns a sorted list
+#' @param unsorted_list A list.
+#' @return A sorted list.
+#' @export
+sort_list <- function(unsorted_list) {
+    for(ii in seq(along=unsorted_list)) {
+        if(is.list(unsorted_list[[ii]])) {
+            unsorted_list[[ii]] <- sort_list(unsorted_list[[ii]])
+        }
+    }
+    unsorted_list[sort(names(unsorted_list))]
+}
+
+
+# Multiple plot function
+#
+# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
+# - cols:   Number of columns in layout
+# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
+#
+# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
+# then plot 1 will go in the upper left, 2 will go in the upper right, and
+# 3 will go all the way across the bottom.
+#
+multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
+  library(grid)
+
+  # Make a list from the ... arguments and plotlist
+  plots <- c(list(...), plotlist)
+
+  numPlots = length(plots)
+
+  # If layout is NULL, then use 'cols' to determine layout
+  if (is.null(layout)) {
+    # Make the panel
+    # ncol: Number of columns of plots
+    # nrow: Number of rows needed, calculated from # of cols
+    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
+                     ncol = cols, nrow = ceiling(numPlots/cols))
+  }
+
+  if (numPlots==1) {
+    print(plots[[1]])
+
+  } else {
+    # Set up the page
+    grid.newpage()
+    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
+
+    # Make each plot, in the correct location
+    for (i in 1:numPlots) {
+      # Get the i,j matrix positions of the regions that contain this subplot
+      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
+
+      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
+                                      layout.pos.col = matchidx$col))
+    }
+  }
+}