Next changeset 1:0b09345fa632 (2020-09-14) |
Commit message:
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df" |
added:
galaxy/wrappers/ExprHeatmapClustering.xml src/ExprPlotsScript.R src/General_functions.py src/LIMMA_options.py src/LIMMAscriptV4.R src/VolcanoPlotsScript.R src/getopt.R src/heatMapClustering.R src/utils.R |
b |
diff -r 000000000000 -r 14045c80a222 galaxy/wrappers/ExprHeatmapClustering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/wrappers/ExprHeatmapClustering.xml Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,928 @@\n+<tool name="GIANT-Heatmap and Hierarchical clustering" id="giant_hierarchical_clustering" version="0.5.1">\r\n+ <description>Run hierarchical clustering and plot heatmap from expression data and/or differential expression analysis</description>\r\n+ <requirements>\r\n+ <requirement type="package" version="4.8.0">r-plotly</requirement>\r\n+ <requirement type="package" version="1.12.0">r-dendextend</requirement>\r\n+ <requirement type="package" version="0.1_20">r-ggdendro</requirement>\r\n+ <requirement type="package" version="3.2.1">r-ggplot2</requirement>\r\n+ <requirement type="package" version="0.16.0">r-heatmaply</requirement>\r\n+ <requirement type="package" version="0.4.8">r-circlize</requirement>\r\n+ <requirement type="package" version="1.18.1">bioconductor-complexheatmap</requirement>\r\n+ <requirement type="package" version="2.2.2">pandoc</requirement>\r\n+ </requirements>\r\n+ <code file=\'../../src/General_functions.py\'/>\r\n+ <stdio>\r\n+ <regex match="Execution halted"\r\n+ source="both"\r\n+ level="fatal"\r\n+ description="Execution halted, please contact tool developer or administrators." />\r\n+ <regex match="Error in"\r\n+ source="both"\r\n+ level="fatal"\r\n+ description="An error occured during R execution, please contact tool developer." />\r\n+ <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." />\r\n+ <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." />\r\n+ </stdio>\r\n+ <command>\t<![CDATA[\r\n+\r\n+ #if ($dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData") and $dataToCluster.expressionData:\r\n+\r\n+ ##start by selecting specific input data columns depending on user request\r\n+ #if $dataToCluster.dataToCluster_selector=="genericData" and $dataToCluster.columnToKeep:\r\n+ awk -v columns="$dataToCluster.columnToKeep" \'BEGIN{FS="\\t";OFS="";ORS="";split(columns,columnsTab,",")} FNR==1{for(iColumn=1;iColumn<=length(columnsTab);iColumn++)for(iField=2;iField<=NF;iField++){if(\\$iField==columnsTab[iColumn])colsToSelect[iColumn]=iField}} {line=\\$1;for(iColumn=1;iColumn<=length(columnsTab);iColumn++)line=line"\\t"\\$colsToSelect[iColumn];print line"\\n";}\' $dataToCluster.expressionData > ./selectedExpressionData;\r\n+ #else\r\n+ cp $dataToCluster.expressionData ./selectedExpressionData;\r\n+ #end if\r\n+\r\n+ ##reorder columns of input data based on factors file\r\n+ #if $dataToCluster.reorder_sample.reordering_selector=="factorFile" and $dataToCluster.reorder_sample.factorFileData and $dataToCluster.reorder_sample.factorToUse:\r\n+ awk -v factors="$dataToCluster.reorder_sample.factorToUse" \'BEGIN{FS="\\t";OFS="";ORS="";split(factors,factorsTab,",")} FNR==1{for(iFactor=1;iFactor<=length(factorsTab);iFactor++)for(iField=2;iField<=NF;iField++){if(\\$iField==factorsTab[iFactor])colsToSelect[iFactor]=iField}} FNR>1{line=\\$1;for(iFactor=1;iFactor<=length(factorsTab);iFactor++)line=line"\\t"\\$colsToSelect[iFactor];print line"\\n";}\' $dataToCluster.reorder_sample.factorFileData > ./orderingFactor;\r\n+\r\n+ sort -V -k2 ./orderingFactor > ./orderingSample;\r\n+\r\n+ awk \'BEGIN{FS="\\t";OFS="";ORS="";factorNumber=0} ARGIND==1{sampleOrdered[FNR]=\\$1;factorNumber=FNR} ARGIND==2 && FNR==1{for(iElemt=1;iElemt<=factorNumber;iElemt++)for(iPosit=2;iPosit<=NF;iPosit++)if(\\$iPosit==sampleOrdered[iElemt])positOrdered[iElemt]=iPosit} ARGIND==2{line=\\$1;for(iElemt=1;iElemt<=factorNumber;iElemt++)if(iElemt in positOrdered)line=line"\\t"\\$positOrdered[iElemt];print line"\\n"}\' ./orderingSample ./selectedExpressionData > ./orderedExpressionData;\r\n+\r\n+ ##check if some input data columns were lost during the process\r\n+ awk \'ARGIND==1 && FNR==1{colNumbA=NF} ARGIND==2 && FNR==1{colNumbB=NF} END{if(colNumbA!=colNumbB) print "[WA'..b' for readability and running time considerations only, number of displayed rows (genes) in heatmaps/circular plot can be limited. Clustering information in generated tabular file and scree plot are computed from a global clustering considering all genes (excepting those filtered out before clustering). Heatmap and circular plot are displayed for a random gene selection, to avoid such random selection we advise you to use input filtering option before clustering to have a gene number below this limit.\r\n+\r\n+\\- **Personalized heatmap colors** to build color gradient choosing start, middle and end colors.\r\n+\r\n+\\- **Output format** for circular plots only.\r\n+\r\n+\\- **Html snapshot format** for interactive plotly plots.\r\n+\r\n+\\- **Scale html snapshots** to increase resolution of snapshots taken from interactive plotly plots.\r\n+\r\n+-----\r\n+\r\n+**Outputs**\r\n+\r\n+\\- **Tabular clustering file** containing cluster information for each gene satifying filtering steps. If expression or generic data was clustered, a two columns file is generated with gene identifiers and cluster numbers with possibly additional columns containing informations used for filtering. If differential results was clustered, a similar file is returned with an additional column containing cluster numbers and differential statistics coresponding to comparisons used for filtering.\r\n+\r\n+\\- **HTML file** to access interactive version of heatmap and screeplot through PlotLy html pages, circular plot image and tabulated clustering results. As a reminder, when the number of genes to display in heatmap/circular plot exceeds the maximum gene number parameter, a random sampling is performed for plotting efficiency. Thus, clustering displayed on heatmap/circular plot may slighlty differ from clustering information contained in tabular file as heatmap/circular plot clustering is done over a subset of genes whereas tabular file contains clustering results performed on all genes.\r\n+\r\n+\\- **LOG file** containing information about execution. Useful especially if tool execution fails. Please attach this log file in any bug report.\r\n+\r\n+]]> \r\n+ </help>\r\n+ <citations>\r\n+ <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870}\r\n+ }</citation>\r\n+\r\n+ <citation type="bibtex">@article{,\r\n+ author = {Galili, Tal and O\'Callaghan, Alan and\r\n+ Sidi, Jonathan and Sievert, Carson},\r\n+ title = {heatmaply: an R package for creating interactive cluster\r\n+ heatmaps for online publishing},\r\n+ journal = {Bioinformatics},\r\n+ year = {2017},\r\n+ doi = {10.1093/bioinformatics/btx657},\r\n+ url = {http://dx.doi.org/10.1093/bioinformatics/btx657},\r\n+ eprint =\r\n+ {https://academic.oup.com/bioinformatics/article-pdf/doi/10.1093/bioinformatics/btx657/21358327/btx657.pdf}\r\n+ }</citation>\r\n+\r\n+ <citation type="bibtex">@article{doi:10.1093/bioinformatics/btu393,\r\n+ author = {Gu, Zuguang and Gu, Lei and Eils, Roland and Schlesner, Matthias and Brors, Benedikt},\r\n+ title = {circlize implements and enhances circular visualization in R },\r\n+ journal = {Bioinformatics},\r\n+ volume = {30},\r\n+ number = {19},\r\n+ pages = {2811-2812},\r\n+ year = {2014},\r\n+ doi = {10.1093/bioinformatics/btu393},\r\n+ URL = {http://dx.doi.org/10.1093/bioinformatics/btu393},\r\n+ eprint = {/oup/backfile/content_public/journal/bioinformatics/30/19/10.1093_bioinformatics_btu393/2/btu393.pdf}\r\n+ }</citation>\r\n+\r\n+ <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly}\r\n+ }</citation>\r\n+\r\n+\r\n+ </citations>\r\n+\r\n+</tool>\r\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/ExprPlotsScript.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ExprPlotsScript.R Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,465 @@\n+# A command-line interface to basic plots for use with Galaxy\n+# written by Jimmy Vandel\n+# one of these arguments is required:\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+#get options\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)\n+args <- commandArgs()\n+\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "dataFile", "i", 1, "character",\n+ "factorInfo","t", 1, "character",\n+ "dataFileFormat","j",1,"character",\n+ "conditionNames","c",1,"character",\n+ "format", "f", 1, "character",\n+ "quiet", "q", 0, "logical",\n+ "log", "l", 1, "character",\n+ "histo" , "h", 1, "character",\n+ "maPlot" , "a", 1, "character",\n+ "boxplot" , "b", 1, "character",\n+ "microarray" , "m", 1, "character",\n+ "acp" , "p" , 1, "character",\n+ "screePlot" , "s" , 1, "character"),\n+ byrow=TRUE, ncol=4)\n+opt <- getopt(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$dataFile) || is.null(opt$dataFileFormat)) {\n+ addComment("[ERROR]\'dataFile\' and it format are required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$histo) & is.null(opt$maPlot) & is.null(opt$boxplot) & is.null(opt$microarray) & is.null(opt$acp)){\n+ addComment("[ERROR]Select at least one plot to draw",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+verbose <- if (is.null(opt$quiet)) {\n+ TRUE\n+}else{\n+ FALSE}\n+\n+addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)\n+\n+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)\n+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)\n+\n+#directory for plots\n+dir.create(file.path(getwd(), "plotDir"))\n+dir.create(file.path(getwd(), "plotLyDir"))\n+\n+#silent package loading\n+suppressPackageStartupMessages({\n+ library("oligo")\n+ library("ff")\n+ library("ggplot2")\n+ library("plotly")\n+})\n+\n+\n+#chargement des fichiers en entr\xc3\xa9e\n+#fichier de type CEL\n+dataAreFromCel=FALSE\n+if(toupper(opt$dataFileFormat)=="CEL"){\n+ dataAreFromCel=TRUE\n+ celData=read.celfiles(unlist(strsplit(opt$dataFile,",")))\n+ #load all expressions\n+ dataMatrix=exprs(celData)\n+ #select "pm" probes\n+ probeInfo=getProbeInfo(celData,probeType = c("pm"),target="probeset")\n+ #reduce dataMatrix to log expression matrix for a randomly probe selection\n+ dataMatrix=log2(dataMatrix[sample(unique(probeInfo[,1]),min(100000,length(unique(probeInfo[,1])))),])\n+ addComment("[INFO]Raw data are log2 transformed",TRUE,opt$log,display=FALSE)\n+ remove(probeInfo)\n+}else{\n+ #fichier deja tabule\n+ dataMatrix=read.csv(file=opt$dataFile,header=F,sep="\\t",colClasses="character")\n+ #remove first row to convert it as colnames (to avoid X before colnames with header=T)\n+ colNamesData=dataMatrix[1,-1]\n+ dataMatrix=dataMatrix[-1,]\n+ #remove first colum to convert it as rownames\n+ rowNamesData=dataMatrix[,1]\n+ dataMatrix=dataMatrix[,-1]\n+ if(is.data.frame(dataMatrix)){\n+ dataMatrix=data.matrix(dataMatrix)\n+ }else{\n+ dataMatrix=data.matrix(as.numeric(dataMatrix))\n'..b'ot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = \'text\', inherit = F, text=rownames(PACres$x), hoverinfo=\'skip\', showlegend = FALSE, color=I(\'black\'))\n+ #save the plotly plot\n+ htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)\n+ }\n+ }else{\n+ for(iFactorBis in (iFactor+1):length(orderedFactors)){\n+ if(length(table(factorInfoMatrix[,orderedFactors[iFactorBis]]))<=length(symbolset)){\n+ dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], Attribute2=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactorBis]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=","))))\n+ p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = \'scatter3d\', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),symbol=~Attribute2,symbols = symbolset,hoverinfo = \'text\', text = ~paste(Experiments,"\\n",hoverLabel),marker=list(size=5))%>%\n+ layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),\n+ legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))\n+ fileIdent=paste(correspondanceNameTable[orderedFactors[c(iFactor,iFactorBis)],1],collapse="_AND_")\n+ #add text label to plot\n+ ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = \'text\', inherit = F, text=rownames(PACres$x), hoverinfo=\'skip\', showlegend = FALSE, color=I(\'black\'))\n+ #save the plotly plot\n+ htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)\n+ }else{\n+ addComment(c("[WARNING]PCA with",orderedFactors[iFactor],"and",orderedFactors[iFactorBis],"groups cannot be displayed, too many groups (max",length(symbolset),")"),T,opt$log,display=FALSE) \n+ }\n+ }\n+ }\n+ }\n+ }else{\n+ dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x))\n+ p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = \'scatter3d\', mode="markers",marker=list(size=5,color="salmon"),hoverinfo = \'text\',text = ~paste(Experiments))%>%\n+ layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),\n+ legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))\n+ ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = \'text\', inherit = F, text=rownames(PACres$x), hoverinfo=\'skip\', showlegend = FALSE, color=I(\'black\'))\n+ \n+ #save plotly files \n+ htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_plot.html"),collapse=""),selfcontained = F)\n+ }\n+ remove(p,dataToPlot,dataFiltered)\n+ addComment("[INFO]ACP plot drawn",T,opt$log,display=FALSE)\n+}\n+\n+#write correspondances between plot file names and displayed names in figure legends, usefull to define html items in xml file\n+write.table(correspondanceNameTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+#sessionInfo()\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/General_functions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/General_functions.py Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,206 @@\n+import re\n+import numpy as np\n+\n+def get_column_names( file_path, toNotConsider=-1, each=1):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tcpt=0\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif i!=toNotConsider:#to squeeze the first column\n+\t\t\tif cpt==0:\n+\t\t\t\toptions.append( ( field_component, field_component, False ) )\n+\t\t\tcpt+=1\n+\t\t\tif cpt==each:\n+\t\t\t\tcpt=0\n+\tinputfile.close()\n+\treturn options\n+\n+def get_column_names_filteredList( file_path, toNotConsider=[], each=1):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tcpt=0\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif i not in toNotConsider:#to squeeze the first columns\n+\t\t\tif cpt==0:\n+\t\t\t\toptions.append( ( field_component, field_component, False ) )\n+\t\t\tcpt+=1\n+\t\t\tif cpt==each:\n+\t\t\t\tcpt=0\n+\tinputfile.close()\n+\treturn options\n+\n+def get_column_names_mergeNumber(file_path, numberToMerge=1, toNotConsider=[]):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tif int(numberToMerge)>0:\n+\t\tiHeader=0\n+\t\tfor iCurrentLine in inputfile:\n+\t\t\tiHeader=iHeader+1\n+\t\t\tif iHeader>int(numberToMerge):\n+\t\t\t\tbreak\n+\t\t\tcurrentLine=iCurrentLine.strip().split("\\t")\n+\t\t\tiOption=-1\n+\t\t\tfor i, field_component in enumerate( currentLine ):\n+\t\t\t\tif i not in toNotConsider:#to squeeze specified columns\n+\t\t\t\t\tiOption=iOption+1\n+\t\t\t\t\tif iHeader==1:\n+\t\t\t\t\t\toptions.append( ( str(field_component), str(field_component), False ) )\n+\t\t\t\t\telse:\n+\t\t\t\t\t\toptions[iOption]=(options[iOption][0]+"_"+str(field_component),options[iOption][1]+"_"+str(field_component),False)\n+\telse:\n+\t\tcurrentLine = next(inputfile).strip().split("\\t")\n+\t\tfor i, field_component in enumerate( currentLine ):\n+\t\t\tif i not in toNotConsider:#to squeeze specified columns\n+\t\t\t\toptions.append( ( "Column_"+str(i), "Column_"+str(i), False ) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_row_names( file_path, factorName ):\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tiColumn=-1\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif field_component==factorName:#to test\n+\t\t\tiColumn=i\n+\toptions=[]\n+\tif iColumn!=-1:\n+\t\tfor nextLine in inputfile:\n+\t\t\tnextLine=nextLine.strip().split("\\t")\n+\t\t\tif len(nextLine)>1:\n+\t\t\t\tif (nextLine[iColumn], nextLine[iColumn], False) not in options:\n+\t\t\t\t\toptions.append( (nextLine[iColumn], nextLine[iColumn], False) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_condition_file_names( file_list, toNotConsider=-1, each=1):\n+\toptions=[]\n+\tif not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names\n+\t\tinputfile = open(file_list.file_name)\n+\t\tfirstLine = next(inputfile).strip().split("\\t")\n+\t\tcpt=0\n+\t\tfor i, field_component in enumerate( firstLine ):\n+\t\t\tif i!=toNotConsider:#to squeeze the first column\n+\t\t\t\tif cpt==0:\n+\t\t\t\t\toptions.append( ( field_component, field_component, False ) )\n+\t\t\t\tcpt+=1\n+\t\t\t\tif cpt==each:\n+\t\t\t\t\tcpt=0\n+\t\tinputfile.close()\n+\telse:#if input file is a .cel file list or a collection\n+\t\tif not hasattr(file_list[0],\'collection\'):#if it is not a collection, get name easily\n+\t\t\tfor i, field_component in enumerate( file_list ):\n+\t\t\t\toptions.append( ( field_component.name, field_component.name, False ) )\n+\t\telse:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object\n+\t\t\tfor i, field_component in enumerate( file_list[0].collection.elements ):\n+\t\t\t\toptions.append( ( field_component.element_identifier, field_component.element_identifier, False ) )\n+\treturn options\n+\n+def generateFactorFile( file_list, factor_list, outputFileName, logFile):\n+\tforbidenCharacters={"*",":",",","|"}\n+\toutputfile = open(outputFileName, \'w\')\n+\toutputLog = open(logFile, \'w\')\n+\tsampleList=[]\n+\tif not isinstance(file_list,list):\n+\t\tconditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)\n+\telse :\n+\t\tconditionNames=get_condition_file'..b': \'"+currentFactor+"\'\\n")\t\n+\t\t\t\t\treturn 4\n+\t\t\t#check if factor allready named like that\n+\t\t\tif not globalDict.get(currentFactor) is None:\n+\t\t\t\toutputLog.write("[ERROR] \'"+currentFactor+"\' is used several times as factor name\\n")\t\n+\t\t\t\treturn 3\n+\t\t\tglobalDict[currentFactor]=dict()\n+\t\t\tfirstLine=firstLine+"\\t"+currentFactor\n+\t\t\tfactorNameList.append(currentFactor)\n+\t\t\tif len(factor_component[\'valueList\'])<=1:#check if there is at least two value available\n+\t\t\t\toutputLog.write("[ERROR] at least two different values are necessary for \'"+currentFactor+"\' factor\\n")\n+\t\t\t\treturn 1\n+\t\t\telse:\n+\t\t\t\tfor iValue, value_component in enumerate( factor_component[\'valueList\'] ):\n+\t\t\t\t\tcurrentValue=str(value_component[\'valueName\'])\n+\t\t\t\t\t#check if factor name contains forbidden characters\n+\t\t\t\t\tfor specialCharacter in forbidenCharacters:\n+\t\t\t\t\t\tif currentValue.find(specialCharacter)!=-1:\n+\t\t\t\t\t\t\toutputLog.write("[ERROR] \'"+specialCharacter+"\' character is forbidden in value name : \'"+currentValue+"\'\\n")\t\n+\t\t\t\t\t\t\treturn 4\n+\t\t\t\t\tcurrentSample=str(value_component[\'valueConditions\']).split(",")\n+\t\t\t\t\tfor iSample, sample_component in enumerate (currentSample):\n+\t\t\t\t\t\tif not sample_component in currentSampleList:\n+\t\t\t\t\t\t\toutputLog.write("[ERROR] sample "+sample_component+" was assigned several times for factor \'"+currentFactor+"\'\\n")\n+\t\t\t\t\t\t\treturn 2\n+\t\t\t\t\t\tcurrentSampleList.remove(sample_component)\n+\t\t\t\t\t\tglobalDict[currentFactor][sample_component]=currentValue\n+\t\t\tif(len(currentSampleList)>0):\n+\t\t\t\toutputLog.write("[ERROR] for factor \'"+currentFactor+"\'\' sample "+str(currentSampleList)+" are not assigned to any value\\n")\n+\t\t\t\treturn 2\n+\toutputLog.write("[INFO] "+str(len(globalDict))+" factors are detected\\n")\n+\t#start writing the factor file\n+\toutputfile.write(firstLine+"\\n") \n+\tfor iSample, sample_component in enumerate(sampleList):\n+\t\tnewLine=sample_component\n+\t\tfor iFactor, factor_component in enumerate(factorNameList):\n+\t\t\tnewLine=newLine+"\\t"+globalDict[factor_component][sample_component]\n+\t\toutputfile.write(newLine+"\\n") \n+\toutputfile.close()\n+\toutputLog.close()\n+\treturn 0\n+\n+def selectSubSetTable(file_path,headerLine_number,columnsToAdd,columnNamesToKeep,outputFileName,logFile):\n+\toutputLog = open(logFile, \'w\')\n+\toutputLog.write("[INFO] header line number : "+ headerLine_number+" lines\\n")\t\n+\tavailableColumnsTuple=get_column_names_mergeNumber(file_path, headerLine_number)\n+\t#convert tuple list as a simple array\n+\tavailableColumns=[]\n+\tfor iTuple, tuple_content in enumerate (availableColumnsTuple): \n+\t\tavailableColumns.append(str(tuple_content[0]))\n+\tif len(availableColumns)==0:\n+\t\toutputLog.write("[ERROR] No detected columns in input file\\n")\t\n+\t\treturn 1\n+\tselectedColumns=list(columnsToAdd)\n+\tfor iVolcano, volcano_content in enumerate(columnNamesToKeep):\n+\t\tselectedColumns.append(availableColumns.index(volcano_content[\'pvalColumn\']))\n+\t\tif volcano_content[\'fdrColumn\'] in availableColumns:\n+\t\t\tselectedColumns.append(availableColumns.index(volcano_content[\'fdrColumn\']))\n+\t\telse:\n+\t\t\tselectedColumns.append(0)\n+\t\tselectedColumns.append(availableColumns.index(volcano_content[\'fcColumn\']))\n+\tif len(selectedColumns)!=(3*len(columnNamesToKeep)+len(columnsToAdd)):\n+\t\toutputLog.write("[ERROR] matching between input file colnames and requested column names failed\\n")\t\n+\t\treturn 1\n+\toutputLog.write("[INFO] columns kept : "+str(selectedColumns)+"\\n")\t\n+\t#start writting formatted file\n+\tinputfile = open(file_path)\n+\toutputfile = open(outputFileName, \'w\')\n+\tiLineCpt=-1\n+\tfor iCurrentLine in inputfile:\n+\t\tiLineCpt=iLineCpt+1\n+\t\tif iLineCpt>=int(headerLine_number):\n+\t\t\tcurrentLineFields=np.array(iCurrentLine.strip().split("\\t"))\n+\t\t\tnewLine="\\t".join(currentLineFields[selectedColumns])\n+\t\t\toutputfile.write(newLine+"\\n")\n+\tif iLineCpt<int(headerLine_number):\n+\t\toutputLog.write("[ERROR] not enough lines in input files ("+(iLineCpt+1)+" lines)\\n")\t\n+\t\treturn 1\n+\tinputfile.close()\n+\toutputfile.close()\n+\toutputLog.close()\n+\treturn 0\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/LIMMA_options.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/LIMMA_options.py Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,330 @@\n+import re\n+\n+def get_column_names( file_path, toNotConsider=None, toNotConsiderBis=None):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif i!=0 and field_component!=toNotConsider and field_component!=toNotConsiderBis:#to squeeze the first column\n+\t\t\toptions.append( ( field_component, field_component, False ) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_row_names( file_path, factorName ):\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tiColumn=-1\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif field_component==factorName:#to test\n+\t\t\tiColumn=i\n+\toptions=[]\n+\tif iColumn!=-1:\n+\t\tfor nextLine in inputfile:\n+\t\t\tnextLine=nextLine.strip().split("\\t")\n+\t\t\tif len(nextLine)>1:\n+\t\t\t\tif (nextLine[iColumn], nextLine[iColumn], False) not in options:\n+\t\t\t\t\toptions.append( (nextLine[iColumn], nextLine[iColumn], False) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_row_names_interaction( file_path, factorNameA, factorNameB ):\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tiColumnA=-1\n+\tiColumnB=-1\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif field_component==factorNameA:#to test\n+\t\t\tiColumnA=i\n+\t\tif field_component==factorNameB:#to test\n+\t\t\tiColumnB=i\n+\tpossibleValuesA=[]\n+\tpossibleValuesB=[]\n+\tif iColumnA!=-1 and iColumnB!=-1:\n+\t\tfor nextLine in inputfile:\n+\t\t\tnextLine=nextLine.strip().split("\\t")\n+\t\t\tif len(nextLine)>1:\n+\t\t\t\tif nextLine[iColumnA] not in possibleValuesA:\n+\t\t\t\t\tpossibleValuesA.append(nextLine[iColumnA])\n+\t\t\t\tif nextLine[iColumnB] not in possibleValuesB:\n+\t\t\t\t\tpossibleValuesB.append(nextLine[iColumnB])\n+\tinputfile.close()\t\n+\toptions=[]\n+\tif len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None":\n+\t\tfor counterA in range(len(possibleValuesA)):\n+\t\t\tfor counterB in range(len(possibleValuesB)):\n+\t\t\t\toptions.append( (possibleValuesA[counterA]+"*"+possibleValuesB[counterB], possibleValuesA[counterA]+"*"+possibleValuesB[counterB], False) )\t\n+\treturn options\n+\n+def get_comparisonsA( factorA, valuesA ):\n+\toptions=[]\n+\tformatValuesA=re.sub("(^\\[u\')|(\'\\]$)","", str(valuesA))\n+\tpossibleValues=formatValuesA.split("\', u\'")\n+\tif len(possibleValues)>=2:\n+\t\tfor counter in range(len(possibleValues)-1):\n+\t\t\tfor innerCounter in range(counter+1,len(possibleValues)):\n+\t\t\t\toptions.append( (possibleValues[counter]+" - "+possibleValues[innerCounter], possibleValues[counter]+" - "+possibleValues[innerCounter], False) )\n+\t\t\t\toptions.append( (possibleValues[innerCounter]+" - "+possibleValues[counter], possibleValues[innerCounter]+" - "+possibleValues[counter], False) )\n+\treturn options\n+\n+def get_comparisonsAB(factorA, valuesA, factorB, valuesB, interaction):\n+\toptions=[]\n+\tformatValuesA=re.sub("(^\\[u\')|(\'\\]$)","", str(valuesA))\n+\tpossibleValuesA=formatValuesA.split("\', u\'")\n+\tformatValuesB=re.sub("(^\\[u\')|(\'\\]$)","", str(valuesB))\n+\tpossibleValuesB=formatValuesB.split("\', u\'")\n+\tif str(interaction)=="False":\n+\t\tif len(possibleValuesA)>=2:\n+\t\t\tfor counter in range(len(possibleValuesA)-1):\n+\t\t\t\tfor innerCounter in range(counter+1,len(possibleValuesA)):\n+\t\t\t\t\toptions.append( (possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], False) )\n+\t\t\t\t\toptions.append( (possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], False) )\n+\t\tif len(possibleValuesB)>=2:\n+\t\t\tfor counter in range(len(possibleValuesB)-1):\n+\t\t\t\tfor innerCounter in range(counter+1,len(possibleValuesB)):\n+\t\t\t\t\toptions.append( (possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], False) )\n+\t\t\t\t\toptions.append( (possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], possibleValuesB[innerCounter]+" - "+possible'..b'ield_component not in dico):\n+\t\t\t\t\t\tdico[field_component]="Condition"+str(iCondition)\n+\t\t\t\t\t\tnewCurrentLine="Condition"+str(iCondition)\n+\t\t\t\t\t\tiCondition+=1\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tnewCurrentLine=dico[field_component]\n+\t\t\t\telse:\n+\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\tdico[field_component]="Value"+str(iValue)\n+\t\t\t\t\t\tnewCurrentLine+="\\tValue"+str(iValue)\n+\t\t\t\t\t\tiValue+=1\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tnewCurrentLine+="\\t"+dico[field_component]\n+\t\toutputfile.write(newCurrentLine+"\\n")\n+\t\tfirstLine=0\n+\toutputfile.close()\n+\tinputfile.close()\n+\t##check if any entries in dictionnary contains forbiden character\n+\tfor key, value in dico.items():\n+\t\tfor specialCharacter in forbidenCharacters:\n+\t\t\tif value.startswith("Condition")==False and key.find(specialCharacter)!=-1:\n+\t\t\t\treturn 1\n+\t##then write dictionnary in a additional file\n+\toutputfile = open(ouputDictionnary, \'w\')\n+\tfor key, value in dico.items():\n+\t\toutputfile.write(key+"\\t"+value+"\\n")\n+\toutputfile.close()\n+\treturn 0\n+\n+\n+def replaceNamesBlockInFiles(expressionFile_name,conditionFile_name,blockingFile_name,outputExpressionFile,outputConditionFile,outputBlockingFile,ouputDictionnary):\n+\tdico={}\n+\tforbidenCharacters={"*",":",",","|"}\n+\t##start with expression file, read only the first line\n+\tinputfile = open(expressionFile_name)\n+\toutputfile = open(outputExpressionFile, \'w\')\n+\tfirstLine = next(inputfile).rstrip().split("\\t")\n+\tiCondition=1\n+\tnewFirstLine=""\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif (i>0):\n+\t\t\t#conditions names should not be redundant with other conditions\n+\t\t\tif(field_component not in dico):\n+\t\t\t\tdico[field_component]="Condition"+str(iCondition)\n+\t\t\t\tnewFirstLine+="\\t"+"Condition"+str(iCondition)\n+\t\t\t\tiCondition+=1\n+\t\t\telse:\n+\t\t\t\traise NameError(\'condition name allready exists!\')\n+\t\telse:\n+\t\t\tnewFirstLine+=field_component\n+\toutputfile.write(newFirstLine+"\\n")\n+\tfor line in inputfile:\n+\t\toutputfile.write(line)\n+\toutputfile.close()\n+\tinputfile.close()\n+\t#then parse condition file, read all lines in this case\n+\tiFactor=1\n+\tiValue=1\n+\tfor fileNum in range(2):\n+\t\tif fileNum==0:\n+\t\t\tinputfile = open(conditionFile_name)\n+\t\t\toutputfile = open(outputConditionFile, \'w\')\n+\t\telse:\n+\t\t\tinputfile = open(blockingFile_name)\n+\t\t\toutputfile = open(outputBlockingFile, \'w\')\n+\t\tfirstLine=1\n+\t\tfor line in inputfile:\n+\t\t\tcurrentLine = line.rstrip().split("\\t")\n+\t\t\tnewCurrentLine=""\n+\t\t\tfor i, field_component in enumerate( currentLine ):\n+\t\t\t\t#special treatment for the first line\n+\t\t\t\tif (firstLine==1):\n+\t\t\t\t\tif (i==0):\n+\t\t\t\t\t\tnewCurrentLine=field_component\n+\t\t\t\t\telse:\n+\t\t\t\t\t\t#factor names should not be redundant with other factors or conditions\n+\t\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\t\tdico[field_component]="Factor"+str(iFactor)\n+\t\t\t\t\t\t\tnewCurrentLine+="\\t"+"Factor"+str(iFactor)\n+\t\t\t\t\t\t\tiFactor+=1\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\traise NameError(\'factor name allready exists!\')\n+\t\t\t\telse:\t\n+\t\t\t\t\tif (i==0):\n+\t\t\t\t\t\t#check if condition name allready exist and used it if it is, or create a new one if not\n+\t\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\t\tdico[field_component]="Condition"+str(iCondition)\n+\t\t\t\t\t\t\tnewCurrentLine="Condition"+str(iCondition)\n+\t\t\t\t\t\t\tiCondition+=1\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tnewCurrentLine=dico[field_component]\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\t\tdico[field_component]="Value"+str(iValue)\n+\t\t\t\t\t\t\tnewCurrentLine+="\\tValue"+str(iValue)\n+\t\t\t\t\t\t\tiValue+=1\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tnewCurrentLine+="\\t"+dico[field_component]\n+\t\t\toutputfile.write(newCurrentLine+"\\n")\n+\t\t\tfirstLine=0\n+\t\toutputfile.close()\n+\t\tinputfile.close()\n+\t##check if any entries in dictionnary contains forbiden character\n+\tfor key, value in dico.items():\n+\t\tfor specialCharacter in forbidenCharacters:\n+\t\t\tif value.startswith("Condition")==False and key.find(specialCharacter)!=-1:\n+\t\t\t\treturn 1\n+\t##then write dictionnary in a additional file\n+\toutputfile = open(ouputDictionnary, \'w\')\n+\tfor key, value in dico.items():\n+\t\toutputfile.write(key+"\\t"+value+"\\n")\n+\toutputfile.close()\n+\treturn 0\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/LIMMAscriptV4.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/LIMMAscriptV4.R Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,1002 @@\n+# A command-line interface for LIMMA to use with Galaxy\n+# written by Jimmy Vandel\n+# one of these arguments is required:\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)\n+args <- commandArgs()\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "dataFile", "i", 1, "character",\n+ "factorInfo","a", 1, "character",\n+ "blockingInfo","b", 1, "character",\n+ "dicoRenaming","g",1,"character",\n+ "blockingPolicy","u", 1, "character",\n+ "fdrThreshold","t", 1, "double",\n+ "thresholdFC","d", 1, "double",\n+ "format", "f", 1, "character",\n+ "histo","h", 1, "character",\n+ "volcano","v", 1, "character",\n+ "factorsContrast","r", 1, "character",\n+ "contrastNames","p", 1, "character",\n+ "firstGroupContrast","m", 1, "character",\n+ "secondGroupContrast","n", 1, "character",\n+ "controlGroups","c", 1, "character",\n+ "fratioFile","s",1,"character",\n+ "organismID","x",1,"character",\n+ "rowNameType","y",1,"character",\n+ "quiet", "q", 0, "logical",\n+ "log", "l", 1, "character",\n+ "outputFile" , "o", 1, "character",\n+ "outputDfFile" , "z", 1, "character"),\n+ byrow=TRUE, ncol=4)\n+opt <- getopt(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required\\n")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$dataFile)) {\n+ addComment("[ERROR]\'dataFile\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (!is.null(opt$blockingInfo) && is.null(opt$blockingPolicy) ) {\n+ addComment("[ERROR]blocking policy is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$dicoRenaming)) {\n+ addComment("[ERROR]renaming dictionnary is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$factorsContrast)) {\n+ addComment("[ERROR]factor informations are missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (length(opt$firstGroupContrast)!=length(opt$secondGroupContrast)) {\n+ addComment("[ERROR]some contrast groups seems to be empty",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$factorInfo)) {\n+ addComment("[ERROR]factors info is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fdrThreshold)) {\n+ addComment("[ERROR]\'FDR threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$outputFile) || is.null(opt$outputDfFile)){\n+ addComment("[ERROR]\'output files\' are required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$thresholdFC)){\n+ addComment("[ERROR]\'FC threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fratioFile)) {\n+ addComment("[ERROR]F-ratio parameter is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+#demande si le script sera bavard\n+verbose <- if (is.null(opt$quiet)) {\n+ TRUE\n+}else{\n+ FALSE\n+}\n+\n+#param\xc3\xa8tres internes\n+#pour savoir si on remplace les FC calcul\xc3\xa9s par LIMMA par un calcul du LS-MEAN (ie moyenne de moyennes de chaque groupe dans chaque terme du contraste plut\xc3\xb4t qu\'une moyenne globale dans chaque terme)\n+useLSmean=FALSE\n+\n+addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)\n+\n+addComment(c("[INFO]Wor'..b'.eb$t)=rownames(data.fit.eb$adj_p.value)\n+ colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value)\n+ \n+ dfMatrix=dfMatrix[newOrder,,drop=FALSE]\n+}\n+\n+\n+#formating output matrices depending on genes to keep\n+if(length(genesToKeep)==0){\n+ outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))\n+ outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))\n+ outputData[,1]=c("LIMMA","Gene","noGene")\n+ outputData[,2]=c("Comparison","Info","noInfo")\n+ \n+ outputDfData=matrix(0,ncol=3+1,nrow=2)\n+ outputDfData[1,]=c("X","df.residual","df.prior","df.total")\n+ outputDfData[,1]=c("Statistics","noGene")\n+}else{\n+ if(length(genesToKeep)==1){\n+ outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))\n+ outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))\n+ outputData[,1]=c("LIMMA","Gene",genesToKeep)\n+ outputData[,2]=c("Comparison","Info","na")\n+ if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep]\n+ outputData[3,seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)\n+ outputData[3,seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)\n+ outputData[3,seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)\n+ outputData[3,seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)\n+ outputData[3,seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)\n+ \n+ outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))\n+ outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")\n+ outputDfData[2,]=c(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual","df.prior","df.total")],digits=4))\n+ }else{\n+ #format matrix to be correctly read by galaxy (move headers in first column and row)\n+ outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=nrow(data.fit.eb$adj_p.value)+2)\n+ outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))\n+ outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))\n+ outputData[,1]=c("LIMMA","Gene",rownames(data.fit.eb$adj_p.value))\n+ outputData[,2]=c("Comparison","Info",rep("na",nrow(data.fit.eb$adj_p.value)))\n+ if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(data.fit.eb$adj_p.value)]\n+ outputData[3:nrow(outputData),seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)\n+ outputData[3:nrow(outputData),seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)\n+ outputData[3:nrow(outputData),seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)\n+ \n+ outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))\n+ outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")\n+ outputDfData[2:(1+nrow(dfMatrix)),]=cbind(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual")],digits=4),prettyNum(dfMatrix[,c("df.prior")],digits=4),prettyNum(dfMatrix[,c("df.total")],digits=4))\n+ }\n+}\n+addComment("[INFO]Formated output",T,opt$log,display=FALSE) \n+\n+#write output results\n+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+#write df info file\n+write.table(outputDfData,file=opt$outputDfFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+#sessionInfo()\n+\n+\n+\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/VolcanoPlotsScript.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/VolcanoPlotsScript.R Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,426 @@\n+# R script to plot volcanos through Galaxy based GIANT tool \n+# written by Jimmy Vandel\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)\n+args <- commandArgs()\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "statisticsFile", "i", 1, "character",\n+ "volcanoName" , "n", 1, "character",\n+ "pvalColumnName" , "p", 1, "character",\n+ "fdrColumnName" , "m", 1, "character",\n+ "fcColumnName" , "c", 1, "character",\n+ "fcKind","d", 1, "character",\n+ "fdrThreshold","s", 1, "double",\n+ "fcThreshold","e", 1, "double",\n+ "organismID","x",1,"character",\n+ "rowNameType","y",1,"character",\n+ "log", "l", 1, "character",\n+ "outputFile" , "o", 1, "character",\n+ "format", "f", 1, "character",\n+ "quiet", "q", 0, "logical"),\n+ byrow=TRUE, ncol=4)\n+opt <- getopt(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required\\n")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$statisticsFile)) {\n+ addComment("[ERROR]\'statisticsFile\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (length(opt$pvalColumnName)==0 || length(opt$fdrColumnName)==0 || length(opt$fcColumnName)==0) {\n+ addComment("[ERROR]no selected columns",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (length(opt$pvalColumnName)!=length(opt$fcColumnName) || length(opt$pvalColumnName)!=length(opt$fdrColumnName)) {\n+ addComment("[ERROR]different number of selected columns between p.val, adj-p.val and FC ",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fcKind)) {\n+ addComment("[ERROR]\'fcKind\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fdrThreshold)) {\n+ addComment("[ERROR]\'FDR threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fcThreshold)) {\n+ addComment("[ERROR]\'FC threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$outputFile)) {\n+ addComment("[ERROR]\'output file\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+#demande si le script sera bavard\n+verbose <- if (is.null(opt$quiet)) {\n+ TRUE\n+}else{\n+ FALSE\n+}\n+\n+#param\xc3\xa8tres internes\n+addComment("[INFO]Parameters checked test mode !",T,opt$log,display=FALSE)\n+\n+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)\n+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)\n+\n+#directory for plots\n+dir.create(file.path(getwd(), "plotDir"))\n+dir.create(file.path(getwd(), "plotLyDir"))\n+\n+#charge des packages silencieusement\n+suppressPackageStartupMessages({\n+ library("methods")\n+ library("biomaRt")\n+ library("ggplot2")\n+ library("plotly")\n+ library("stringr")\n+})\n+\n+#define some usefull variable\n+nbVolcanosToPlot=length(opt$pvalColumnName)\n+\n+#load input file\n+statDataMatrix=read.csv(file=file.path(getwd(), opt$statisticsFile),header=F,sep="\\t",colClasses="character")\n+#remove first colum to convert it as rownames\n+rownames(statDataMatrix)=statDataMatrix[,1]\n+statDataMatrix=statDataMatrix[,-1]\n+\n+#identify lines without adjusted p-value info (should contain the sa'..b'sed only on the first comparison\n+#newOrder=order(dataFrame$adj_p.value[,1])\n+#dataFrame$adj_p.value=dataFrame$adj_p.value[newOrder,]\n+\n+#alternative sorting strategy based on the mean gene rank over all comparisons\n+if(length(genesToKeep)>1){\n+ currentRank=rep(0,nrow(dataFrame$adj_p.value))\n+ for(iVolcano in 1:ncol(dataFrame$adj_p.value)){\n+ currentRank=currentRank+rank(dataFrame$adj_p.value[,iVolcano])\n+ }\n+ currentRank=currentRank/ncol(dataFrame$adj_p.value)\n+ newOrder=order(currentRank)\n+ rownames(dataFrame)=rownames(dataFrame)[newOrder]\n+ \n+ dataFrame$adj_p.value=matrix(dataFrame$adj_p.value[newOrder,],ncol=ncol(dataFrame$adj_p.value))\n+ rownames(dataFrame$adj_p.value)=rownames(dataFrame$p.value)[newOrder]\n+ colnames(dataFrame$adj_p.value)=colnames(dataFrame$p.value)\n+ \n+ dataFrame$p.value=matrix(dataFrame$p.value[newOrder,],ncol=ncol(dataFrame$p.value))\n+ rownames(dataFrame$p.value)=rownames(dataFrame$adj_p.value)\n+ colnames(dataFrame$p.value)=colnames(dataFrame$adj_p.value)\n+ \n+ dataFrame$coefficients=matrix(dataFrame$coefficients[newOrder,],ncol=ncol(dataFrame$coefficients))\n+ rownames(dataFrame$coefficients)=rownames(dataFrame$adj_p.value)\n+ colnames(dataFrame$coefficients)=colnames(dataFrame$adj_p.value)\n+}\n+\n+#formating output matrix depending on genes to keep\n+if(length(genesToKeep)==0){\n+ outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(volcanoNameList,each=4))\n+ outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))\n+ outputData[,1]=c("Volcano","Gene","noGene")\n+ outputData[,2]=c("Comparison","Info","noInfo")\n+}else{\n+ if(length(genesToKeep)==1){\n+ outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(volcanoNameList,each=4))\n+ outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))\n+ outputData[,1]=c("Volcano","Gene",genesToKeep)\n+ outputData[,2]=c("Comparison","Info","na")\n+ if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep]\n+ outputData[3,seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4)\n+ outputData[3,seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4)\n+ outputData[3,seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4)\n+ outputData[3,seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4)\n+ }else{\n+ #format matrix to be correctly read by galaxy (move headers in first column and row)\n+ outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=nrow(dataFrame$adj_p.value)+2)\n+ outputData[1,]=c("X","X",rep(volcanoNameList,each=4))\n+ outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))\n+ outputData[,1]=c("Volcano","Gene",rownames(dataFrame$adj_p.value))\n+ outputData[,2]=c("Comparison","Info",rep("na",nrow(dataFrame$adj_p.value)))\n+ if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(dataFrame$adj_p.value)]\n+ outputData[3:nrow(outputData),seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4)\n+ outputData[3:nrow(outputData),seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4)\n+ }\n+}\n+addComment("[INFO]Formated output",T,opt$log,display=FALSE) \n+\n+#write output results\n+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+\n+#sessionInfo()\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/getopt.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/getopt.R Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,773 @@\n+# Copyright (c) 2008-2010 Allen Day\n+# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu> \n+# \n+# Modified by J.Vandel 2017 to consider situation of multiple identical flag\n+# and concatenate as a vector the set of parameter for the same flag instead of\n+# keeping only the last value as done by the previous version.\n+#\n+# This file is free software: you may copy, redistribute and/or modify it \n+# under the terms of the GNU General Public License as published by the \n+# Free Software Foundation, either version 2 of the License, or (at your \n+# option) any later version. \n+# \n+# This file is distributed in the hope that it will be useful, but \n+# WITHOUT ANY WARRANTY; without even the implied warranty of \n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU \n+# General Public License for more details. \n+# \n+# You should have received a copy of the GNU General Public License \n+# along with this program. If not, see <http://www.gnu.org/licenses/>. \n+\n+#\' C-like getopt behavior\n+#\' \n+#\' getopt is primarily intended to be used with ``\\link{Rscript}\'\'. It\n+#\' facilitates writing ``\\#!\'\' shebang scripts that accept short and long\n+#\' flags/options. It can also be used from ``R\'\' directly, but is probably less\n+#\' useful in this context.\n+#\' \n+#\' getopt() returns a \\link{list} data structure containing \\link{names} of the\n+#\' flags that were present in the \\link{character} \\link{vector} passed in under\n+#\' the \\emph{opt} argument. Each value of the \\link{list} is coerced to the\n+#\' data type specified according to the value of the \\emph{spec} argument. See\n+#\' below for details.\n+#\' \n+#\' Notes on naming convention:\n+#\' \n+#\' 1. An \\emph{option} is one of the shell-split input strings.\n+#\' \n+#\' 2. A \\emph{flag} is a type of \\emph{option}. a \\emph{flag} can be defined as\n+#\' having no \\emph{argument} (defined below), a required \\emph{argument}, or an\n+#\' optional \\emph{argument}.\n+#\' \n+#\' 3. An \\emph{argument} is a type of \\emph{option}, and is the value associated\n+#\' with a flag.\n+#\' \n+#\' 4. A \\emph{long flag} is a type of \\emph{flag}, and begins with the string\n+#\' ``--\'\'. If the \\emph{long flag} has an associated \\emph{argument}, it may be\n+#\' delimited from the \\emph{long flag} by either a trailing \\emph{=}, or may be\n+#\' the subsequent \\emph{option}.\n+#\' \n+#\' 5. A \\emph{short flag} is a type of \\emph{flag}, and begins with the string\n+#\' ``-\'\'. If a \\emph{short flag} has an associated \\emph{argument}, it is the\n+#\' subsequent \\emph{option}. \\emph{short flags} may be bundled together,\n+#\' sharing a single leading ``-\'\', but only the final \\emph{short flag} is able\n+#\' to have a corresponding \\emph{argument}.\n+#\'\n+#\' Many users wonder whether they should use the getopt package, optparse package, \n+#\' or argparse package.\n+#\' Here is some of the major differences:\n+#\'\n+#\' Features available in \\code{getopt} unavailable in \\code{optparse}\n+#\'\n+#\' 1. As well as allowing one to specify options that take either\n+#\' no argument or a required argument like \\code{optparse},\n+#\' \\code{getopt} also allows one to specify option with an optional argument.\n+#\' \n+#\' Some features implemented in \\code{optparse} package unavailable in \\code{getopt}\n+#\'\n+#\' 1. Limited support for capturing positional arguments after the optional arguments\n+#\' when \\code{positional_arguments} set to TRUE in \\code{parse_args} \n+#\'\n+#\' 2. Automatic generation of an help option and printing of help text when encounters an "-h"\n+#\' \n+#\' 3. Option to specify default arguments for options as well the\n+#\' variable name to store option values\n+#\'\n+#\' There is also new package \\code{argparse} introduced in 2012 which contains\n+#\' all the features of both getopt and optparse but which has a dependency on\n+#\' Python 2.7 or 3.2+ and has not been used in production since 2008 or 2009\n+#\' like the getopt and optparse packages.\n+#\'\n+#\' Some Features unlikely to be implemented in \\code{g'..b'h it, increment the index, and move on to the next option. we don\'t allow arguments beginning with \'-\' UNLESS\n+ #specfile indicates the value is an "integer" or "double", in which case we allow a leading dash (and verify trailing digits/decimals).\n+ if ( substr(peek.optstring, 1, 1) != \'-\' |\n+ #match negative double\n+ ( substr(peek.optstring, 1, 1) == \'-\'\n+ & regexpr(\'^-[0123456789]*\\\\.?[0123456789]+$\',peek.optstring) > 0\n+ & spec[current.flag, col.mode]== \'double\'\n+ ) |\n+ #match negative integer\n+ ( substr(peek.optstring, 1, 1) == \'-\'\n+ & regexpr(\'^-[0123456789]+$\',peek.optstring) > 0\n+ & spec[current.flag, col.mode]== \'integer\'\n+ )\n+ ) {\n+ if ( debug ) print(paste(\' consuming argument *\',peek.optstring,\'*\',sep=\'\'));\n+ storage.mode(peek.optstring) = spec[current.flag, col.mode];\n+ #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);\n+ i = i + 1;\n+ \n+ #a lone dash\n+ } else if ( substr(peek.optstring, 1, 1) == \'-\' & length(strsplit(peek.optstring,\'\')[[1]]) == 1 ) {\n+ if ( debug ) print(\' consuming "lone dash" argument\');\n+ storage.mode(peek.optstring) = spec[current.flag, col.mode];\n+ #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones\n+ result[[spec[current.flag, col.long.name]]] =c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);\n+ i = i + 1;\n+ \n+ #no argument\n+ } else {\n+ if ( debug ) print(\' no argument!\');\n+ \n+ #if we require an argument, bail out\n+ if ( spec[current.flag, col.has.argument] == flag.required.argument ) {\n+ stop(paste(\'flag "\', this.flag, \'" requires an argument\', sep=\'\'));\n+ \n+ #otherwise set flag as present.\n+ } else if (\n+ spec[current.flag, col.has.argument] == flag.optional.argument |\n+ spec[current.flag, col.has.argument] == flag.no.argument \n+ ) {\n+ x = TRUE;\n+ storage.mode(x) = spec[current.flag, col.mode];\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);\n+ } else {\n+ stop(paste("This should never happen.",\n+ "Is your spec argument correct? Maybe you forgot to set",\n+ "ncol=4, byrow=TRUE in your matrix call?"));\n+ }\n+ }\n+ #trailing flag without required argument\n+ } else if ( spec[current.flag, col.has.argument] == flag.required.argument ) {\n+ stop(paste(\'flag "\', this.flag, \'" requires an argument\', sep=\'\'));\n+ \n+ #trailing flag without optional argument\n+ } else if ( spec[current.flag, col.has.argument] == flag.optional.argument ) {\n+ x = TRUE;\n+ storage.mode(x) = spec[current.flag, col.mode];\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);\n+ \n+ #trailing flag without argument\n+ } else if ( spec[current.flag, col.has.argument] == flag.no.argument ) {\n+ x = TRUE;\n+ storage.mode(x) = spec[current.flag, col.mode];\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);\n+ } else {\n+ stop("this should never happen (2). please inform the author.");\n+ }\n+ #no dangling flag, nothing to do.\n+ } else {\n+ }\n+ \n+ i = i+1;\n+ }\n+ return(result);\n+}\n+\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/heatMapClustering.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/heatMapClustering.R Fri Jun 26 09:38:23 2020 -0400 |
[ |
b'@@ -0,0 +1,896 @@\n+# A command-line interface to plot heatmap based on expression or diff. exp. analysis \n+# written by Jimmy Vandel\n+# one of these arguments is required:\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE, OutDec=".")\n+\n+#get options\n+args <- commandArgs()\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "expressionFile", "x", 1, "character",\n+ "diffAnalyseFile", "x", 1, "character",\n+ "factorInfo","x", 1, "character",\n+ "genericData","x", 0, "logical",\n+ "comparisonName","x",1,"character",\n+ "comparisonNameLow","x",1,"character",\n+ "comparisonNameHigh","x",1,"character",\n+ "filterInputOutput","x", 1, "character",\n+ "FCthreshold","x", 1, "double",\n+ "pvalThreshold","x", 1, "double",\n+ "geneListFiltering","x",1,"character",\n+ "clusterNumber","x",1,"integer",\n+ "maxRows","x",1,"integer",\n+ "sampleClusterNumber","x",1,"integer",\n+ "dataTransformation","x",1,"character",\n+ "distanceMeasure","x",1,"character",\n+ "aggloMethod","x",1,"character",\n+ "personalColors","x",1,"character",\n+ "sideBarColorPalette","x",1,"character",\n+ "format", "x", 1, "character",\n+ "quiet", "x", 0, "logical",\n+ "log", "x", 1, "character",\n+ "outputFile" , "x", 1, "character"),\n+ byrow=TRUE, ncol=4)\n+opt <- getoptLong(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$outputFile)) {\n+ addComment("[ERROR]\'output file\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if(is.null(opt$expressionFile) && !is.null(opt$genericData)){\n+ addComment("[ERROR]generic data clustering is based on expression clustering",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$clusterNumber) || opt$clusterNumber<2) {\n+ addComment("[ERROR]valid genes clusters number is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$sampleClusterNumber) || opt$sampleClusterNumber<1) {\n+ addComment("[ERROR]valid samples clusters number is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$dataTransformation)) {\n+ addComment("[ERROR]data transformation option is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$distanceMeasure)) {\n+ addComment("[ERROR]distance measure option is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$aggloMethod)) {\n+ addComment("[ERROR]agglomeration method option is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$maxRows) || opt$maxRows<2) {\n+ addComment("[ERROR]valid plotted row number is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (!is.null(opt[["comparisonName"]]) && nchar(opt[["comparisonName"]])==0){\n+ addComment("[ERROR]you have to specify comparison",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (!is.null(opt$comparisonNameLow) && nchar(opt$comparisonNameLow)==0){\n+ addComment("[ERROR]you have to specify comparisonLow",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (!is.null(opt$comparisonNameHigh) && nchar(opt$comparisonNameHigh)==0){\n+ addComment("[ERROR]y'..b' \n+ if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){\n+ addComment("[WARNING] some genes selected by the output filter are not in the expression file",T,opt$log)\n+ rowToKeep=intersect(rowToKeep,rownames(expressionMatrix))\n+ }\n+ addComment(c("[INFO]Output filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) \n+}\n+\n+#we add differential analysis info in output if it was directly used for clustering or when it was used for filtering with expression\n+\n+#in case of expression or generic data clustering without filtering based on external stats\n+if(expressionToCluster && is.null(comparisonMatrix)){\n+ if(length(rowToKeep)==0){\n+ addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)\n+ outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE)\n+ }else{\n+ outputData=matrix(0,ncol=2,nrow=length(rowToKeep)+1)\n+ outputData[1,]=c("Gene","Cluster")\n+ outputData[2:(length(rowToKeep)+1),1]=rowToKeep\n+ if(class(rowClust)!="logical" ){\n+ outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep]\n+ }else{\n+ outputData[2:(length(rowToKeep)+1),2]=0\n+ }\n+ }\n+}\n+\n+#in case of generic data clustering with filtering based on generic external data\n+if(!is.null(opt$genericData) && !is.null(comparisonMatrix)){\n+ if(length(rowToKeep)==0){\n+ addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)\n+ outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE)\n+ }else{\n+ outputData=matrix(0,ncol=2+nbComparisons,nrow=length(rowToKeep)+1)\n+ outputData[1,]=c("Gene","Cluster",colnames(comparisonMatrix))\n+ outputData[2:(length(rowToKeep)+1),1]=rowToKeep\n+ if(class(rowClust)!="logical" ){\n+ outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep]\n+ }else{\n+ outputData[2:(length(rowToKeep)+1),2]=0\n+ }\n+ outputData[2:(length(rowToKeep)+1),3:(ncol(comparisonMatrix)+2)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4)\n+ }\n+}\n+\n+#in case of expression data clustering with filtering based on diff. exp. results or diff. exp. results clustering\n+if(is.null(opt$genericData) && !is.null(comparisonMatrix)){\n+ if(length(rowToKeep)==0){\n+ addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)\n+ outputData=matrix(0,ncol=3,nrow=3)\n+ outputData[1,]=c("","","Comparison")\n+ outputData[2,]=c("Gene","Info","Cluster")\n+ outputData[3,]=c("noGene","noInfo","noClustering")\n+ }else{\n+ outputData=matrix(0,ncol=3+nbComparisons*nbColPerContrast,nrow=length(rowToKeep)+2)\n+ outputData[1,]=c("","","Comparison",rep(colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)],each=nbColPerContrast))\n+ outputData[2,]=c("Gene","Info","Cluster",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),nbComparisons))\n+ outputData[3:(length(rowToKeep)+2),1]=rowToKeep\n+ outputData[3:(length(rowToKeep)+2),2]=comparisonMatrixInfoGene[rowToKeep]\n+ if(class(rowClust)!="logical" ){\n+ outputData[3:(length(rowToKeep)+2),3]=cutree(rowClust,nbClusters)[rowToKeep]\n+ }else{\n+ outputData[3:(length(rowToKeep)+2),3]=0\n+ }\n+ outputData[3:(length(rowToKeep)+2),4:(ncol(comparisonMatrix)+3)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4)\n+ }\n+}\n+\n+addComment("[INFO]Formated output",T,opt$log,display=FALSE) \n+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+ \n+##----------------------\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+\n+#sessionInfo()\n+\n+\n+\n' |
b |
diff -r 000000000000 -r 14045c80a222 src/utils.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/utils.R Fri Jun 26 09:38:23 2020 -0400 |
[ |
@@ -0,0 +1,143 @@ +# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu> +# +# This file is free software: you may copy, redistribute and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 2 of the License, or (at your +# option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +#extendedDist function to correlation measure +distExtended <- function(x,method) { + if(method %in% c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"))return(dist(x,method = method)) + if(method %in% c("pearson", "spearman", "kendall"))return(as.dist(1-cor(t(x),method=method))/2) + if(method %in% c("absPearson", "absSpearman", "absKendall"))return(as.dist(1-abs(cor(t(x),method=method)))) + return(NULL) +} + +##comment function to display message and optionnaly add it to log file + +addComment <- function(text,addToFile=FALSE,fileName=NULL,append=TRUE,display=TRUE){ + if(display)cat(paste(c(text,"\n"),collapse = " ")) + if(addToFile)write(paste(text,collapse = " "),fileName,append=append) +} + +printSessionInfo <- function(fileName=NULL,append=TRUE){ + addComment("[INFO]R session info :",T,fileName,display=FALSE) + tempInfo=sessionInfo() + write(paste(tempInfo$R.version$version.string),fileName,append=append) + write(paste("Platform",tempInfo$platform,sep = " : "),fileName,append=append) + write(paste("Running under",tempInfo$running,sep = " : "),fileName,append=append) + write(paste("Local variables",tempInfo$locale,sep = " : "),fileName,append=append) + write(paste("Attached base packages",paste(tempInfo$basePkgs,collapse = "; "),sep = " : "),fileName,append=append) + if(length(tempInfo$otherPkgs)>0){ + lineToPrint="" + for(iPack in tempInfo$otherPkgs){ + lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "") + } + write(paste("Other attached packages",lineToPrint,sep = " : "),fileName,append=append) + } + if(length(tempInfo$loadedOnly)>0){ + lineToPrint="" + for(iPack in tempInfo$loadedOnly){ + lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "") + } + write(paste("Loaded packages",lineToPrint,sep = " : "),fileName,append=append) + } +} + +##negative of a mathematical expression +negativeExpression <- function(expression){ + expression=gsub("\\+","_toMinus_",expression) + expression=gsub("\\-","+",expression) + expression=gsub("_toMinus_","-",expression) + if(substr(expression,1,1)!="-" && substr(expression,1,1)!="+"){ + expression=paste(c("-",expression),collapse="") + } + + return(expression) +} + +#' Returns file name of calling Rscript +#' +#' \code{get_Rscript_filename} returns the file name of calling Rscript +#' @return A string with the filename of the calling script. +#' If not found (i.e. you are in a interactive session) returns NA. +#' +#' @export +get_Rscript_filename <- function() { + prog <- sub("--file=", "", grep("--file=", commandArgs(), value=TRUE)[1]) + if( .Platform$OS.type == "windows") { + prog <- gsub("\\\\", "\\\\\\\\", prog) + } + prog +} + +#' Recursively sorts a list +#' +#' \code{sort_list} returns a sorted list +#' @param unsorted_list A list. +#' @return A sorted list. +#' @export +sort_list <- function(unsorted_list) { + for(ii in seq(along=unsorted_list)) { + if(is.list(unsorted_list[[ii]])) { + unsorted_list[[ii]] <- sort_list(unsorted_list[[ii]]) + } + } + unsorted_list[sort(names(unsorted_list))] +} + + +# Multiple plot function +# +# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) +# - cols: Number of columns in layout +# - layout: A matrix specifying the layout. If present, 'cols' is ignored. +# +# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), +# then plot 1 will go in the upper left, 2 will go in the upper right, and +# 3 will go all the way across the bottom. +# +multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { + library(grid) + + # Make a list from the ... arguments and plotlist + plots <- c(list(...), plotlist) + + numPlots = length(plots) + + # If layout is NULL, then use 'cols' to determine layout + if (is.null(layout)) { + # Make the panel + # ncol: Number of columns of plots + # nrow: Number of rows needed, calculated from # of cols + layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), + ncol = cols, nrow = ceiling(numPlots/cols)) + } + + if (numPlots==1) { + print(plots[[1]]) + + } else { + # Set up the page + grid.newpage() + pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) + + # Make each plot, in the correct location + for (i in 1:numPlots) { + # Get the i,j matrix positions of the regions that contain this subplot + matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) + + print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, + layout.pos.col = matchidx$col)) + } + } +} |