Next changeset 1:2762a2622e0d (2020-09-09) |
Commit message:
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df" |
added:
galaxy/wrappers/ArrayNormalization.xml galaxy/wrappers/tool-data/aptTool.loc.sample galaxy/wrappers/tool_data_table_conf.xml.sample src/ExprPlotsScript.R src/General_functions.py src/LIMMA_options.py src/LIMMAscriptV4.R src/VolcanoPlotsScript.R src/getopt.R src/heatMapClustering.R src/utils.R |
b |
diff -r 000000000000 -r 708f43bda2b6 galaxy/wrappers/ArrayNormalization.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/wrappers/ArrayNormalization.xml Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,361 @@\n+<tool name="GIANT-Normalization with APT Summarize" id="giant_aptsummarize" version="0.1.1">\r\n+ <description>Apply Affymetrix Power Tool summarize function to .CEL collection</description>\r\n+ <requirements>\r\n+ <requirement type="package" version="2.10.1">apt-probeset-summarize</requirement>\r\n+ </requirements>\r\n+ <stdio>\r\n+ <regex match="Execution halted"\r\n+ source="both"\r\n+ level="fatal"\r\n+ description="Execution halted, please contact tool developer or administrators." />\r\n+ <exit_code range="10" level="fatal" description="Error in post-treatments, see log file for more information." />\r\n+ <exit_code range="1:9" level="fatal" description="Error during apt command execution, see log file for more information." />\r\n+ </stdio>\r\n+\r\n+\r\n+\r\n+ <command>\t<![CDATA[\r\n+ ##ONLY FOR LOCAL TEST PURPOSE, COMMENT OTHERWISE\r\n+ ##set $pathToApt=\'/mnt/galaxy/home/galaxy/Software/ThermoFischer/apt-1.20.6-x86_64-intel-linux/bin\'\r\n+\r\n+ mkdir ./apt_output/;\r\n+\r\n+ #if $mainCondition.arraySelection=="other":\r\n+ #if $CDFfile:\r\n+ ##.cdf file should has original name\r\n+ cp $CDFfile ./${CDFfile.name};\r\n+ #end if\r\n+ #end if\r\n+\r\n+\r\n+##run APT command\r\n+\r\n+##ONLY FOR LOCAL TEST PURPOSE COMMENT IF NOT NECESSARY\r\n+##${pathToApt}/apt-probeset-summarize -a $commandLine \r\n+##AND UNCOMMENT LINE BELOW\r\n+apt-probeset-summarize -a $commandLine \r\n+\r\n+#if $mainCondition.arraySelection=="other":\r\n+ #if $PGFfile and $CLFfile:\r\n+ --use-pgf-names \r\n+ -p $PGFfile\r\n+ -c $CLFfile\r\n+ #end if \r\n+ #if $CDFfile:\r\n+ -d ./${CDFfile.name}\r\n+ #end if\r\n+ #if $MPSfile and $normLevel=="core":\r\n+ -m $MPSfile\r\n+ #end if\r\n+ #if $BGPfile:\r\n+ -b $BGPfile \r\n+ #end if\r\n+\r\n+#else:\r\n+ \r\n+ #if $mainCondition.arrayID.fields.pathPGF!="null" and $mainCondition.arrayID.fields.pathCLF!="null":\r\n+ --use-pgf-names \r\n+ -p $mainCondition.arrayID.fields.pathPGF\r\n+ -c $mainCondition.arrayID.fields.pathCLF\r\n+ #else:\r\n+ #if $mainCondition.arrayID.fields.pathCDF!="null":\r\n+ -d $mainCondition.arrayID.fields.pathCDF\r\n+ #end if\r\n+ #end if\r\n+ #if $normLevel=="core" and $mainCondition.arrayID.fields.pathMPS!="null":\r\n+ -m $mainCondition.arrayID.fields.pathMPS\r\n+ #end if\r\n+ #if $mainCondition.arrayID.fields.pathBGP!="null":\r\n+ -b $mainCondition.arrayID.fields.pathBGP\r\n+ #end if\r\n+#end if\r\n+\r\n+-o ./apt_output/\r\n+\r\n+\\$(echo \'$inputData\' | tr "," " ") >> $log 2>&1;\r\n+\r\n+ret_code=\\$?;\r\n+\r\n+printf "[INFO]Normalized files: "$inputData >> $log; \r\n+\r\n+if [ \\$ret_code != 0 ]; then\r\n+ exit \\$ret_code;\r\n+else\r\n+\r\n+ ##launch post-treatment scripts\r\n+\r\n+ bash $scriptFormat;\r\n+ #if $annotationCondition.addAnnotation=="true":\r\n+ echo "[INFO]Annotation file used:" >> $log;\r\n+ #if $mainCondition.arraySelection=="other":\r\n+ bash $scriptAnnotation $mainCondition.annotationFile;\r\n+ echo $mainCondition.annotationFile.name >> $log;\r\n+ #else:\r\n+ #if $normLevel=="core":\r\n+ bash $scriptAnnotation $mainCondition.arrayID.fields.pathAnnotTrans;\r\n+ #else:\r\n+ bash $scriptAnnotation $mainCondition.arrayID.fields.pathAnnotProbe;\r\n+ #end if\r\n+ cat $mainCondition.arrayID.fields.versionInfo >> $log;\r\n+ #end if\r\n+ #end if\r\n+ fi;\r\n+ printf "[INFO]End of tool script" >> $log; \r\n+\t]]>\r\n+ </command>\r\n+\r\n+\r\n+\r\n+ <configfiles>\r\n+ <configfile name="scriptFormat">\r\n+<![CDATA[\r\n+#for $inputDataset in $inputData\r\n+ echo \'${inputDataset.name}\' >> ./tempColumnName.txt\r\n+#end for\r\n+awk \' BEGIN{firstLine=1;OFS="";ORS=""} ARGIND==1{tab[FNR]=$1;nbCol=FNR} ARGIND==2 && !($1 ~ /\\\\#/){if(firstLine==0){print "\\n"\\$0}else{print "Conditions";for(i=1;i<=nbCol;i++){print "\\t"tab[i]};firstLine=0}}\' ./tempColumnName.txt ./apt_output/*summary.txt > $outputData\r\n+\r\n+if [ ! -e $outputData ]; then\r\n+ printf "[ERROR]Formating results failed" >> $log; \r\n+ exit 10\r\n+fi\r\n+]]>\r\n+ </configfile> \r\n+\r\n+ <configfile name="scriptAnnotation">\r\n+<![CDATA[\r\n+#if $annotationCondition.a'..b' select the array kind according to .CEL files, thus pre-loaded configuration files will be used automatically for normalization. If configuration files were not pre-loaded, you have to choose "array files selection" and then select manually configuration files for the corresponding array. Configuration files (.pgf, .clf, .cdf, .mps, .bgp) are available in library archive files from the Affymetrix webpage of the array (as an example, configuration files for HTA2.0 array are found in the \'Main Analysis Files\' in the \'Support file\' section of `this page`__). All these files are not mandatory for all normalizations. For basic normalization, both .pgf and.clf files or only .cdf file are mandatory. .mps file is requested for gene level normalization if it is available (see Normalization level parameter). .bgp file is requested for analysis pathway with specific background computation. Annotation file is a simple two column tabular file, with first column containing probe set names and second corresponding annotations, it should not contains any column header. This file can be generated from Affymetrix annotation file available in the same section as array configuration files in Affymetrix webpages, please keep a trace of used annotation version to insure reproducibility.\r\n+\r\n+Extract of annotation file::\r\n+\r\n+ TC01000020.hg.1 SAMD11 \r\n+ TC01000021.hg.1 KLHL17 \r\n+ TC01000022.hg.1 PLEKHN1 \r\n+ TC01000023.hg.1 ISG15 \r\n+ TC01000024.hg.1 AGRN \r\n+\r\n+\\- **Gene annotation** : add gene annotation when available (for pre-loaded arrays, annotation file path should be filled in the aptTool.loc file, for other arrays, annotation file should be selected manually). You can choose to keep or discard in normalized file (meta) probe sets without match in annotation file and select merging policy for probe sets sharing same gene annotation.\r\n+\r\n+-----\r\n+\r\n+**Advanced parameters**\r\n+\r\n+\\- **Additional parameter** to give to \'apt-probeset-summarize\' command line except -a, -o, -d, -p, -c, -m, -b parameters already filled by previous fields.\r\n+\r\n+-----\r\n+\r\n+**Outputs**\r\n+\r\n+\\- **tabular file** containing log2 transformed normalized data as tab delimited matrix. First row contains .CEL names and first column contains (meta) probe set names and/or annotated gene names::\r\n+\r\n+ Conditions 157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL 155_(HuGene-2_0-st).CEL 154_(HuGene-2_0-st).CEL \r\n+ DDX11L2 4.500872 4.429759 4.780281 4.996189 \r\n+ MIR1302-2 3.415065 3.520472 3.471503 3.567988 \r\n+ OR4F5 3.737956 3.011586 3.424494 3.497545\r\n+ VWA1 5.189621 5.129595 4.806793 5.227014\r\n+\r\n+\\- **LOG file** containing information about execution. Useful especially if tool execution fais. Please attach this log file in any bug report.\r\n+\r\n+ .. __: http://media.affymetrix.com/support/developer/powertools/changelog/apt-probeset-summarize.html#intro\r\n+\r\n+ .. __: https://www.thermofisher.com/order/catalog/product/902233#/902233\r\n+\r\n+]]> \r\n+ </help>\r\n+\r\n+\r\n+ <citations>\r\n+ <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870}\r\n+ }</citation>\r\n+\r\n+ <citation type="bibtex">@online{apt, author = {Thermo Fisher Scientific}, title = {Affymetrix Power Tools}, publisher = {Life Technologies Corp.}, address = {USA, MA}, version = {2.10.2}, year = {2018}, url = {https://www.affymetrix.com/support/developer/powertools/changelog/index.html}\r\n+ }</citation>\r\n+ </citations>\r\n+</tool>\r\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 galaxy/wrappers/tool-data/aptTool.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/wrappers/tool-data/aptTool.loc.sample Fri Jun 26 09:35:11 2020 -0400 |
b |
@@ -0,0 +1,17 @@ +#This file lists the locations of all files required by apt tool +#under the "apt" directory (a directory that contains a directory +#for each chip). This file has the TAB delimited format : +# +#<shortName> <fullName> <pathPGF> <pathCLF> <pathMPS> <pathBGP> <pathCDF> <pathAnnotTrans> <pathAnnotProbe> <versionInfo> +# +# +HTA2.0 HTA 2.0 arrays /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/HTA2.0.pgf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/HTA2.0.clf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/HTA2.0.mps /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/HTA2.0.bgp /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/HTA2.0.cdf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/formatedAnnotation36.transcript.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/formatedAnnotation36.probeset.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HTA2.0/versionInfo.txt +Mouse430A1.0 Mouse Genome 430A 1.0 arrays null null null null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430A_1.0/Mouse430A_1.0.cdf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430A_1.0/formatedAnnotation.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430A_1.0/formatedAnnotation.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430A_1.0/versionInfo.txt +Mouse430B1.0 Mouse Genome 430B 1.0 arrays null null null null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430B_1.0/Mouse430B_1.0.cdf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430B_1.0/formatedAnnotation.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430B_1.0/formatedAnnotation.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430B_1.0/versionInfo.txt +Mouse4302.0 Mouse Genome 430 2.0 arrays null null null null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430_2.0/Mouse430_2.0.cdf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430_2.0/formatedAnnotation.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430_2.0/formatedAnnotation.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MouseGenome430_2.0/versionInfo.txt +MoGene1.0ST Mouse Gene 1.0 ST arrays /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.0ST/MoGene1.0ST.pgf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.0ST/MoGene1.0ST.clf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.0ST/MoGene1.0ST.mps /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.0ST/MoGene1.0ST.bgp null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.0ST/formatedAnnotation.transcript.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.0ST/formatedAnnotation.probeset.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.0ST/versionInfo.txt +MoGene1.1ST Mouse Gene 1.1 ST arrays /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.1ST/MoGene1.1ST.pgf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.1ST/MoGene1.1ST.clf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.1ST/MoGene1.1ST.mps /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.1ST/MoGene1.1ST.bgp null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.1ST/formatedAnnotation.transcript.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.1ST/formatedAnnotation.probeset.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene1.1ST/versionInfo.txt +MoGene2.0ST Mouse Gene 2.0 ST arrays /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene2.0ST/MoGene2.0ST.pgf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene2.0ST/MoGene2.0ST.clf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene2.0ST/MoGene2.0ST.mps /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene2.0ST/MoGene2.0ST.bgp null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene2.0ST/formatedAnnotation.transcript.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene2.0ST/formatedAnnotation.probeset.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MoGene2.0ST/versionInfo.txt +MTA1.0 Mouse Transcriptome 1.0 arrays /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MTA1.0/MTA1.0.pgf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MTA1.0/MTA1.0.clf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MTA1.0/MTA1.0.mps /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MTA1.0/MTA1.0.bgp null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MTA1.0/formatedAnnotation.transcript.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MTA1.0/formatedAnnotation.probeset.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/MTA1.0/versionInfo.txt +HuGene1.0ST Human Gene 1.0 ST arrays /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene1.0ST/HuGene1.0ST.pgf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene1.0ST/HuGene1.0ST.clf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene1.0ST/HuGene1.0ST.mps /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene1.0ST/HuGene1.0ST.bgp null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene1.0ST/formatedAnnotation.transcript.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene1.0ST/formatedAnnotation.probeset.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene1.0ST/versionInfo.txt +HuGene2.0ST Human Gene 2.0 ST arrays /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene2.0ST/HuGene2.0ST.pgf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene2.0ST/HuGene2.0ST.clf /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene2.0ST/HuGene2.0ST.mps /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene2.0ST/HuGene2.0ST.bgp null /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene2.0ST/formatedAnnotation.transcript.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene2.0ST/formatedAnnotation.probeset.csv /home/galaxy/Data_Storage/Galaxy_tool-data/affyChipConfig/HuGene2.0ST/versionInfo.txt |
b |
diff -r 000000000000 -r 708f43bda2b6 galaxy/wrappers/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/wrappers/tool_data_table_conf.xml.sample Fri Jun 26 09:35:11 2020 -0400 |
b |
@@ -0,0 +1,13 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of files required for apt tool --> + <table name="aptTool" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, pathPGF, pathCLF, pathMPS, pathBGP, pathCDF, pathAnnotTrans, pathAnnotProbe, versionInfo</columns> + <file path="${__HERE__}/tool-data/aptTool.loc" /> + </table> + <!-- Locations of files required for LIMMA tool --> + <table name="LimmaTool" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, rowTypeID, rowTypeName</columns> + <file path="${__HERE__}/tool-data/LimmaTool.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r 708f43bda2b6 src/ExprPlotsScript.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ExprPlotsScript.R Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,465 @@\n+# A command-line interface to basic plots for use with Galaxy\n+# written by Jimmy Vandel\n+# one of these arguments is required:\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+#get options\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)\n+args <- commandArgs()\n+\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "dataFile", "i", 1, "character",\n+ "factorInfo","t", 1, "character",\n+ "dataFileFormat","j",1,"character",\n+ "conditionNames","c",1,"character",\n+ "format", "f", 1, "character",\n+ "quiet", "q", 0, "logical",\n+ "log", "l", 1, "character",\n+ "histo" , "h", 1, "character",\n+ "maPlot" , "a", 1, "character",\n+ "boxplot" , "b", 1, "character",\n+ "microarray" , "m", 1, "character",\n+ "acp" , "p" , 1, "character",\n+ "screePlot" , "s" , 1, "character"),\n+ byrow=TRUE, ncol=4)\n+opt <- getopt(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$dataFile) || is.null(opt$dataFileFormat)) {\n+ addComment("[ERROR]\'dataFile\' and it format are required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$histo) & is.null(opt$maPlot) & is.null(opt$boxplot) & is.null(opt$microarray) & is.null(opt$acp)){\n+ addComment("[ERROR]Select at least one plot to draw",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+verbose <- if (is.null(opt$quiet)) {\n+ TRUE\n+}else{\n+ FALSE}\n+\n+addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)\n+\n+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)\n+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)\n+\n+#directory for plots\n+dir.create(file.path(getwd(), "plotDir"))\n+dir.create(file.path(getwd(), "plotLyDir"))\n+\n+#silent package loading\n+suppressPackageStartupMessages({\n+ library("oligo")\n+ library("ff")\n+ library("ggplot2")\n+ library("plotly")\n+})\n+\n+\n+#chargement des fichiers en entr\xc3\xa9e\n+#fichier de type CEL\n+dataAreFromCel=FALSE\n+if(toupper(opt$dataFileFormat)=="CEL"){\n+ dataAreFromCel=TRUE\n+ celData=read.celfiles(unlist(strsplit(opt$dataFile,",")))\n+ #load all expressions\n+ dataMatrix=exprs(celData)\n+ #select "pm" probes\n+ probeInfo=getProbeInfo(celData,probeType = c("pm"),target="probeset")\n+ #reduce dataMatrix to log expression matrix for a randomly probe selection\n+ dataMatrix=log2(dataMatrix[sample(unique(probeInfo[,1]),min(100000,length(unique(probeInfo[,1])))),])\n+ addComment("[INFO]Raw data are log2 transformed",TRUE,opt$log,display=FALSE)\n+ remove(probeInfo)\n+}else{\n+ #fichier deja tabule\n+ dataMatrix=read.csv(file=opt$dataFile,header=F,sep="\\t",colClasses="character")\n+ #remove first row to convert it as colnames (to avoid X before colnames with header=T)\n+ colNamesData=dataMatrix[1,-1]\n+ dataMatrix=dataMatrix[-1,]\n+ #remove first colum to convert it as rownames\n+ rowNamesData=dataMatrix[,1]\n+ dataMatrix=dataMatrix[,-1]\n+ if(is.data.frame(dataMatrix)){\n+ dataMatrix=data.matrix(dataMatrix)\n+ }else{\n+ dataMatrix=data.matrix(as.numeric(dataMatrix))\n'..b'ot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = \'text\', inherit = F, text=rownames(PACres$x), hoverinfo=\'skip\', showlegend = FALSE, color=I(\'black\'))\n+ #save the plotly plot\n+ htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)\n+ }\n+ }else{\n+ for(iFactorBis in (iFactor+1):length(orderedFactors)){\n+ if(length(table(factorInfoMatrix[,orderedFactors[iFactorBis]]))<=length(symbolset)){\n+ dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], Attribute2=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactorBis]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=","))))\n+ p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = \'scatter3d\', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),symbol=~Attribute2,symbols = symbolset,hoverinfo = \'text\', text = ~paste(Experiments,"\\n",hoverLabel),marker=list(size=5))%>%\n+ layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),\n+ legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))\n+ fileIdent=paste(correspondanceNameTable[orderedFactors[c(iFactor,iFactorBis)],1],collapse="_AND_")\n+ #add text label to plot\n+ ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = \'text\', inherit = F, text=rownames(PACres$x), hoverinfo=\'skip\', showlegend = FALSE, color=I(\'black\'))\n+ #save the plotly plot\n+ htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)\n+ }else{\n+ addComment(c("[WARNING]PCA with",orderedFactors[iFactor],"and",orderedFactors[iFactorBis],"groups cannot be displayed, too many groups (max",length(symbolset),")"),T,opt$log,display=FALSE) \n+ }\n+ }\n+ }\n+ }\n+ }else{\n+ dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x))\n+ p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = \'scatter3d\', mode="markers",marker=list(size=5,color="salmon"),hoverinfo = \'text\',text = ~paste(Experiments))%>%\n+ layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),\n+ legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))\n+ ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = \'text\', inherit = F, text=rownames(PACres$x), hoverinfo=\'skip\', showlegend = FALSE, color=I(\'black\'))\n+ \n+ #save plotly files \n+ htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_plot.html"),collapse=""),selfcontained = F)\n+ }\n+ remove(p,dataToPlot,dataFiltered)\n+ addComment("[INFO]ACP plot drawn",T,opt$log,display=FALSE)\n+}\n+\n+#write correspondances between plot file names and displayed names in figure legends, usefull to define html items in xml file\n+write.table(correspondanceNameTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+#sessionInfo()\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 src/General_functions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/General_functions.py Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,206 @@\n+import re\n+import numpy as np\n+\n+def get_column_names( file_path, toNotConsider=-1, each=1):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tcpt=0\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif i!=toNotConsider:#to squeeze the first column\n+\t\t\tif cpt==0:\n+\t\t\t\toptions.append( ( field_component, field_component, False ) )\n+\t\t\tcpt+=1\n+\t\t\tif cpt==each:\n+\t\t\t\tcpt=0\n+\tinputfile.close()\n+\treturn options\n+\n+def get_column_names_filteredList( file_path, toNotConsider=[], each=1):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tcpt=0\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif i not in toNotConsider:#to squeeze the first columns\n+\t\t\tif cpt==0:\n+\t\t\t\toptions.append( ( field_component, field_component, False ) )\n+\t\t\tcpt+=1\n+\t\t\tif cpt==each:\n+\t\t\t\tcpt=0\n+\tinputfile.close()\n+\treturn options\n+\n+def get_column_names_mergeNumber(file_path, numberToMerge=1, toNotConsider=[]):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tif int(numberToMerge)>0:\n+\t\tiHeader=0\n+\t\tfor iCurrentLine in inputfile:\n+\t\t\tiHeader=iHeader+1\n+\t\t\tif iHeader>int(numberToMerge):\n+\t\t\t\tbreak\n+\t\t\tcurrentLine=iCurrentLine.strip().split("\\t")\n+\t\t\tiOption=-1\n+\t\t\tfor i, field_component in enumerate( currentLine ):\n+\t\t\t\tif i not in toNotConsider:#to squeeze specified columns\n+\t\t\t\t\tiOption=iOption+1\n+\t\t\t\t\tif iHeader==1:\n+\t\t\t\t\t\toptions.append( ( str(field_component), str(field_component), False ) )\n+\t\t\t\t\telse:\n+\t\t\t\t\t\toptions[iOption]=(options[iOption][0]+"_"+str(field_component),options[iOption][1]+"_"+str(field_component),False)\n+\telse:\n+\t\tcurrentLine = next(inputfile).strip().split("\\t")\n+\t\tfor i, field_component in enumerate( currentLine ):\n+\t\t\tif i not in toNotConsider:#to squeeze specified columns\n+\t\t\t\toptions.append( ( "Column_"+str(i), "Column_"+str(i), False ) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_row_names( file_path, factorName ):\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tiColumn=-1\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif field_component==factorName:#to test\n+\t\t\tiColumn=i\n+\toptions=[]\n+\tif iColumn!=-1:\n+\t\tfor nextLine in inputfile:\n+\t\t\tnextLine=nextLine.strip().split("\\t")\n+\t\t\tif len(nextLine)>1:\n+\t\t\t\tif (nextLine[iColumn], nextLine[iColumn], False) not in options:\n+\t\t\t\t\toptions.append( (nextLine[iColumn], nextLine[iColumn], False) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_condition_file_names( file_list, toNotConsider=-1, each=1):\n+\toptions=[]\n+\tif not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names\n+\t\tinputfile = open(file_list.file_name)\n+\t\tfirstLine = next(inputfile).strip().split("\\t")\n+\t\tcpt=0\n+\t\tfor i, field_component in enumerate( firstLine ):\n+\t\t\tif i!=toNotConsider:#to squeeze the first column\n+\t\t\t\tif cpt==0:\n+\t\t\t\t\toptions.append( ( field_component, field_component, False ) )\n+\t\t\t\tcpt+=1\n+\t\t\t\tif cpt==each:\n+\t\t\t\t\tcpt=0\n+\t\tinputfile.close()\n+\telse:#if input file is a .cel file list or a collection\n+\t\tif not hasattr(file_list[0],\'collection\'):#if it is not a collection, get name easily\n+\t\t\tfor i, field_component in enumerate( file_list ):\n+\t\t\t\toptions.append( ( field_component.name, field_component.name, False ) )\n+\t\telse:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object\n+\t\t\tfor i, field_component in enumerate( file_list[0].collection.elements ):\n+\t\t\t\toptions.append( ( field_component.element_identifier, field_component.element_identifier, False ) )\n+\treturn options\n+\n+def generateFactorFile( file_list, factor_list, outputFileName, logFile):\n+\tforbidenCharacters={"*",":",",","|"}\n+\toutputfile = open(outputFileName, \'w\')\n+\toutputLog = open(logFile, \'w\')\n+\tsampleList=[]\n+\tif not isinstance(file_list,list):\n+\t\tconditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)\n+\telse :\n+\t\tconditionNames=get_condition_file'..b': \'"+currentFactor+"\'\\n")\t\n+\t\t\t\t\treturn 4\n+\t\t\t#check if factor allready named like that\n+\t\t\tif not globalDict.get(currentFactor) is None:\n+\t\t\t\toutputLog.write("[ERROR] \'"+currentFactor+"\' is used several times as factor name\\n")\t\n+\t\t\t\treturn 3\n+\t\t\tglobalDict[currentFactor]=dict()\n+\t\t\tfirstLine=firstLine+"\\t"+currentFactor\n+\t\t\tfactorNameList.append(currentFactor)\n+\t\t\tif len(factor_component[\'valueList\'])<=1:#check if there is at least two value available\n+\t\t\t\toutputLog.write("[ERROR] at least two different values are necessary for \'"+currentFactor+"\' factor\\n")\n+\t\t\t\treturn 1\n+\t\t\telse:\n+\t\t\t\tfor iValue, value_component in enumerate( factor_component[\'valueList\'] ):\n+\t\t\t\t\tcurrentValue=str(value_component[\'valueName\'])\n+\t\t\t\t\t#check if factor name contains forbidden characters\n+\t\t\t\t\tfor specialCharacter in forbidenCharacters:\n+\t\t\t\t\t\tif currentValue.find(specialCharacter)!=-1:\n+\t\t\t\t\t\t\toutputLog.write("[ERROR] \'"+specialCharacter+"\' character is forbidden in value name : \'"+currentValue+"\'\\n")\t\n+\t\t\t\t\t\t\treturn 4\n+\t\t\t\t\tcurrentSample=str(value_component[\'valueConditions\']).split(",")\n+\t\t\t\t\tfor iSample, sample_component in enumerate (currentSample):\n+\t\t\t\t\t\tif not sample_component in currentSampleList:\n+\t\t\t\t\t\t\toutputLog.write("[ERROR] sample "+sample_component+" was assigned several times for factor \'"+currentFactor+"\'\\n")\n+\t\t\t\t\t\t\treturn 2\n+\t\t\t\t\t\tcurrentSampleList.remove(sample_component)\n+\t\t\t\t\t\tglobalDict[currentFactor][sample_component]=currentValue\n+\t\t\tif(len(currentSampleList)>0):\n+\t\t\t\toutputLog.write("[ERROR] for factor \'"+currentFactor+"\'\' sample "+str(currentSampleList)+" are not assigned to any value\\n")\n+\t\t\t\treturn 2\n+\toutputLog.write("[INFO] "+str(len(globalDict))+" factors are detected\\n")\n+\t#start writing the factor file\n+\toutputfile.write(firstLine+"\\n") \n+\tfor iSample, sample_component in enumerate(sampleList):\n+\t\tnewLine=sample_component\n+\t\tfor iFactor, factor_component in enumerate(factorNameList):\n+\t\t\tnewLine=newLine+"\\t"+globalDict[factor_component][sample_component]\n+\t\toutputfile.write(newLine+"\\n") \n+\toutputfile.close()\n+\toutputLog.close()\n+\treturn 0\n+\n+def selectSubSetTable(file_path,headerLine_number,columnsToAdd,columnNamesToKeep,outputFileName,logFile):\n+\toutputLog = open(logFile, \'w\')\n+\toutputLog.write("[INFO] header line number : "+ headerLine_number+" lines\\n")\t\n+\tavailableColumnsTuple=get_column_names_mergeNumber(file_path, headerLine_number)\n+\t#convert tuple list as a simple array\n+\tavailableColumns=[]\n+\tfor iTuple, tuple_content in enumerate (availableColumnsTuple): \n+\t\tavailableColumns.append(str(tuple_content[0]))\n+\tif len(availableColumns)==0:\n+\t\toutputLog.write("[ERROR] No detected columns in input file\\n")\t\n+\t\treturn 1\n+\tselectedColumns=list(columnsToAdd)\n+\tfor iVolcano, volcano_content in enumerate(columnNamesToKeep):\n+\t\tselectedColumns.append(availableColumns.index(volcano_content[\'pvalColumn\']))\n+\t\tif volcano_content[\'fdrColumn\'] in availableColumns:\n+\t\t\tselectedColumns.append(availableColumns.index(volcano_content[\'fdrColumn\']))\n+\t\telse:\n+\t\t\tselectedColumns.append(0)\n+\t\tselectedColumns.append(availableColumns.index(volcano_content[\'fcColumn\']))\n+\tif len(selectedColumns)!=(3*len(columnNamesToKeep)+len(columnsToAdd)):\n+\t\toutputLog.write("[ERROR] matching between input file colnames and requested column names failed\\n")\t\n+\t\treturn 1\n+\toutputLog.write("[INFO] columns kept : "+str(selectedColumns)+"\\n")\t\n+\t#start writting formatted file\n+\tinputfile = open(file_path)\n+\toutputfile = open(outputFileName, \'w\')\n+\tiLineCpt=-1\n+\tfor iCurrentLine in inputfile:\n+\t\tiLineCpt=iLineCpt+1\n+\t\tif iLineCpt>=int(headerLine_number):\n+\t\t\tcurrentLineFields=np.array(iCurrentLine.strip().split("\\t"))\n+\t\t\tnewLine="\\t".join(currentLineFields[selectedColumns])\n+\t\t\toutputfile.write(newLine+"\\n")\n+\tif iLineCpt<int(headerLine_number):\n+\t\toutputLog.write("[ERROR] not enough lines in input files ("+(iLineCpt+1)+" lines)\\n")\t\n+\t\treturn 1\n+\tinputfile.close()\n+\toutputfile.close()\n+\toutputLog.close()\n+\treturn 0\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 src/LIMMA_options.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/LIMMA_options.py Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,330 @@\n+import re\n+\n+def get_column_names( file_path, toNotConsider=None, toNotConsiderBis=None):\n+\toptions=[]\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif i!=0 and field_component!=toNotConsider and field_component!=toNotConsiderBis:#to squeeze the first column\n+\t\t\toptions.append( ( field_component, field_component, False ) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_row_names( file_path, factorName ):\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tiColumn=-1\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif field_component==factorName:#to test\n+\t\t\tiColumn=i\n+\toptions=[]\n+\tif iColumn!=-1:\n+\t\tfor nextLine in inputfile:\n+\t\t\tnextLine=nextLine.strip().split("\\t")\n+\t\t\tif len(nextLine)>1:\n+\t\t\t\tif (nextLine[iColumn], nextLine[iColumn], False) not in options:\n+\t\t\t\t\toptions.append( (nextLine[iColumn], nextLine[iColumn], False) )\n+\tinputfile.close()\n+\treturn options\n+\n+def get_row_names_interaction( file_path, factorNameA, factorNameB ):\n+\tinputfile = open(file_path)\n+\tfirstLine = next(inputfile).strip().split("\\t")\n+\tiColumnA=-1\n+\tiColumnB=-1\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif field_component==factorNameA:#to test\n+\t\t\tiColumnA=i\n+\t\tif field_component==factorNameB:#to test\n+\t\t\tiColumnB=i\n+\tpossibleValuesA=[]\n+\tpossibleValuesB=[]\n+\tif iColumnA!=-1 and iColumnB!=-1:\n+\t\tfor nextLine in inputfile:\n+\t\t\tnextLine=nextLine.strip().split("\\t")\n+\t\t\tif len(nextLine)>1:\n+\t\t\t\tif nextLine[iColumnA] not in possibleValuesA:\n+\t\t\t\t\tpossibleValuesA.append(nextLine[iColumnA])\n+\t\t\t\tif nextLine[iColumnB] not in possibleValuesB:\n+\t\t\t\t\tpossibleValuesB.append(nextLine[iColumnB])\n+\tinputfile.close()\t\n+\toptions=[]\n+\tif len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None":\n+\t\tfor counterA in range(len(possibleValuesA)):\n+\t\t\tfor counterB in range(len(possibleValuesB)):\n+\t\t\t\toptions.append( (possibleValuesA[counterA]+"*"+possibleValuesB[counterB], possibleValuesA[counterA]+"*"+possibleValuesB[counterB], False) )\t\n+\treturn options\n+\n+def get_comparisonsA( factorA, valuesA ):\n+\toptions=[]\n+\tformatValuesA=re.sub("(^\\[u\')|(\'\\]$)","", str(valuesA))\n+\tpossibleValues=formatValuesA.split("\', u\'")\n+\tif len(possibleValues)>=2:\n+\t\tfor counter in range(len(possibleValues)-1):\n+\t\t\tfor innerCounter in range(counter+1,len(possibleValues)):\n+\t\t\t\toptions.append( (possibleValues[counter]+" - "+possibleValues[innerCounter], possibleValues[counter]+" - "+possibleValues[innerCounter], False) )\n+\t\t\t\toptions.append( (possibleValues[innerCounter]+" - "+possibleValues[counter], possibleValues[innerCounter]+" - "+possibleValues[counter], False) )\n+\treturn options\n+\n+def get_comparisonsAB(factorA, valuesA, factorB, valuesB, interaction):\n+\toptions=[]\n+\tformatValuesA=re.sub("(^\\[u\')|(\'\\]$)","", str(valuesA))\n+\tpossibleValuesA=formatValuesA.split("\', u\'")\n+\tformatValuesB=re.sub("(^\\[u\')|(\'\\]$)","", str(valuesB))\n+\tpossibleValuesB=formatValuesB.split("\', u\'")\n+\tif str(interaction)=="False":\n+\t\tif len(possibleValuesA)>=2:\n+\t\t\tfor counter in range(len(possibleValuesA)-1):\n+\t\t\t\tfor innerCounter in range(counter+1,len(possibleValuesA)):\n+\t\t\t\t\toptions.append( (possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], False) )\n+\t\t\t\t\toptions.append( (possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], False) )\n+\t\tif len(possibleValuesB)>=2:\n+\t\t\tfor counter in range(len(possibleValuesB)-1):\n+\t\t\t\tfor innerCounter in range(counter+1,len(possibleValuesB)):\n+\t\t\t\t\toptions.append( (possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], False) )\n+\t\t\t\t\toptions.append( (possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], possibleValuesB[innerCounter]+" - "+possible'..b'ield_component not in dico):\n+\t\t\t\t\t\tdico[field_component]="Condition"+str(iCondition)\n+\t\t\t\t\t\tnewCurrentLine="Condition"+str(iCondition)\n+\t\t\t\t\t\tiCondition+=1\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tnewCurrentLine=dico[field_component]\n+\t\t\t\telse:\n+\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\tdico[field_component]="Value"+str(iValue)\n+\t\t\t\t\t\tnewCurrentLine+="\\tValue"+str(iValue)\n+\t\t\t\t\t\tiValue+=1\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tnewCurrentLine+="\\t"+dico[field_component]\n+\t\toutputfile.write(newCurrentLine+"\\n")\n+\t\tfirstLine=0\n+\toutputfile.close()\n+\tinputfile.close()\n+\t##check if any entries in dictionnary contains forbiden character\n+\tfor key, value in dico.items():\n+\t\tfor specialCharacter in forbidenCharacters:\n+\t\t\tif value.startswith("Condition")==False and key.find(specialCharacter)!=-1:\n+\t\t\t\treturn 1\n+\t##then write dictionnary in a additional file\n+\toutputfile = open(ouputDictionnary, \'w\')\n+\tfor key, value in dico.items():\n+\t\toutputfile.write(key+"\\t"+value+"\\n")\n+\toutputfile.close()\n+\treturn 0\n+\n+\n+def replaceNamesBlockInFiles(expressionFile_name,conditionFile_name,blockingFile_name,outputExpressionFile,outputConditionFile,outputBlockingFile,ouputDictionnary):\n+\tdico={}\n+\tforbidenCharacters={"*",":",",","|"}\n+\t##start with expression file, read only the first line\n+\tinputfile = open(expressionFile_name)\n+\toutputfile = open(outputExpressionFile, \'w\')\n+\tfirstLine = next(inputfile).rstrip().split("\\t")\n+\tiCondition=1\n+\tnewFirstLine=""\n+\tfor i, field_component in enumerate( firstLine ):\n+\t\tif (i>0):\n+\t\t\t#conditions names should not be redundant with other conditions\n+\t\t\tif(field_component not in dico):\n+\t\t\t\tdico[field_component]="Condition"+str(iCondition)\n+\t\t\t\tnewFirstLine+="\\t"+"Condition"+str(iCondition)\n+\t\t\t\tiCondition+=1\n+\t\t\telse:\n+\t\t\t\traise NameError(\'condition name allready exists!\')\n+\t\telse:\n+\t\t\tnewFirstLine+=field_component\n+\toutputfile.write(newFirstLine+"\\n")\n+\tfor line in inputfile:\n+\t\toutputfile.write(line)\n+\toutputfile.close()\n+\tinputfile.close()\n+\t#then parse condition file, read all lines in this case\n+\tiFactor=1\n+\tiValue=1\n+\tfor fileNum in range(2):\n+\t\tif fileNum==0:\n+\t\t\tinputfile = open(conditionFile_name)\n+\t\t\toutputfile = open(outputConditionFile, \'w\')\n+\t\telse:\n+\t\t\tinputfile = open(blockingFile_name)\n+\t\t\toutputfile = open(outputBlockingFile, \'w\')\n+\t\tfirstLine=1\n+\t\tfor line in inputfile:\n+\t\t\tcurrentLine = line.rstrip().split("\\t")\n+\t\t\tnewCurrentLine=""\n+\t\t\tfor i, field_component in enumerate( currentLine ):\n+\t\t\t\t#special treatment for the first line\n+\t\t\t\tif (firstLine==1):\n+\t\t\t\t\tif (i==0):\n+\t\t\t\t\t\tnewCurrentLine=field_component\n+\t\t\t\t\telse:\n+\t\t\t\t\t\t#factor names should not be redundant with other factors or conditions\n+\t\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\t\tdico[field_component]="Factor"+str(iFactor)\n+\t\t\t\t\t\t\tnewCurrentLine+="\\t"+"Factor"+str(iFactor)\n+\t\t\t\t\t\t\tiFactor+=1\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\traise NameError(\'factor name allready exists!\')\n+\t\t\t\telse:\t\n+\t\t\t\t\tif (i==0):\n+\t\t\t\t\t\t#check if condition name allready exist and used it if it is, or create a new one if not\n+\t\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\t\tdico[field_component]="Condition"+str(iCondition)\n+\t\t\t\t\t\t\tnewCurrentLine="Condition"+str(iCondition)\n+\t\t\t\t\t\t\tiCondition+=1\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tnewCurrentLine=dico[field_component]\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tif(field_component not in dico):\n+\t\t\t\t\t\t\tdico[field_component]="Value"+str(iValue)\n+\t\t\t\t\t\t\tnewCurrentLine+="\\tValue"+str(iValue)\n+\t\t\t\t\t\t\tiValue+=1\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tnewCurrentLine+="\\t"+dico[field_component]\n+\t\t\toutputfile.write(newCurrentLine+"\\n")\n+\t\t\tfirstLine=0\n+\t\toutputfile.close()\n+\t\tinputfile.close()\n+\t##check if any entries in dictionnary contains forbiden character\n+\tfor key, value in dico.items():\n+\t\tfor specialCharacter in forbidenCharacters:\n+\t\t\tif value.startswith("Condition")==False and key.find(specialCharacter)!=-1:\n+\t\t\t\treturn 1\n+\t##then write dictionnary in a additional file\n+\toutputfile = open(ouputDictionnary, \'w\')\n+\tfor key, value in dico.items():\n+\t\toutputfile.write(key+"\\t"+value+"\\n")\n+\toutputfile.close()\n+\treturn 0\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 src/LIMMAscriptV4.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/LIMMAscriptV4.R Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,1002 @@\n+# A command-line interface for LIMMA to use with Galaxy\n+# written by Jimmy Vandel\n+# one of these arguments is required:\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)\n+args <- commandArgs()\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "dataFile", "i", 1, "character",\n+ "factorInfo","a", 1, "character",\n+ "blockingInfo","b", 1, "character",\n+ "dicoRenaming","g",1,"character",\n+ "blockingPolicy","u", 1, "character",\n+ "fdrThreshold","t", 1, "double",\n+ "thresholdFC","d", 1, "double",\n+ "format", "f", 1, "character",\n+ "histo","h", 1, "character",\n+ "volcano","v", 1, "character",\n+ "factorsContrast","r", 1, "character",\n+ "contrastNames","p", 1, "character",\n+ "firstGroupContrast","m", 1, "character",\n+ "secondGroupContrast","n", 1, "character",\n+ "controlGroups","c", 1, "character",\n+ "fratioFile","s",1,"character",\n+ "organismID","x",1,"character",\n+ "rowNameType","y",1,"character",\n+ "quiet", "q", 0, "logical",\n+ "log", "l", 1, "character",\n+ "outputFile" , "o", 1, "character",\n+ "outputDfFile" , "z", 1, "character"),\n+ byrow=TRUE, ncol=4)\n+opt <- getopt(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required\\n")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$dataFile)) {\n+ addComment("[ERROR]\'dataFile\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (!is.null(opt$blockingInfo) && is.null(opt$blockingPolicy) ) {\n+ addComment("[ERROR]blocking policy is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$dicoRenaming)) {\n+ addComment("[ERROR]renaming dictionnary is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$factorsContrast)) {\n+ addComment("[ERROR]factor informations are missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (length(opt$firstGroupContrast)!=length(opt$secondGroupContrast)) {\n+ addComment("[ERROR]some contrast groups seems to be empty",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$factorInfo)) {\n+ addComment("[ERROR]factors info is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fdrThreshold)) {\n+ addComment("[ERROR]\'FDR threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$outputFile) || is.null(opt$outputDfFile)){\n+ addComment("[ERROR]\'output files\' are required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$thresholdFC)){\n+ addComment("[ERROR]\'FC threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fratioFile)) {\n+ addComment("[ERROR]F-ratio parameter is missing",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+#demande si le script sera bavard\n+verbose <- if (is.null(opt$quiet)) {\n+ TRUE\n+}else{\n+ FALSE\n+}\n+\n+#param\xc3\xa8tres internes\n+#pour savoir si on remplace les FC calcul\xc3\xa9s par LIMMA par un calcul du LS-MEAN (ie moyenne de moyennes de chaque groupe dans chaque terme du contraste plut\xc3\xb4t qu\'une moyenne globale dans chaque terme)\n+useLSmean=FALSE\n+\n+addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)\n+\n+addComment(c("[INFO]Wor'..b'.eb$t)=rownames(data.fit.eb$adj_p.value)\n+ colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value)\n+ \n+ dfMatrix=dfMatrix[newOrder,,drop=FALSE]\n+}\n+\n+\n+#formating output matrices depending on genes to keep\n+if(length(genesToKeep)==0){\n+ outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))\n+ outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))\n+ outputData[,1]=c("LIMMA","Gene","noGene")\n+ outputData[,2]=c("Comparison","Info","noInfo")\n+ \n+ outputDfData=matrix(0,ncol=3+1,nrow=2)\n+ outputDfData[1,]=c("X","df.residual","df.prior","df.total")\n+ outputDfData[,1]=c("Statistics","noGene")\n+}else{\n+ if(length(genesToKeep)==1){\n+ outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))\n+ outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))\n+ outputData[,1]=c("LIMMA","Gene",genesToKeep)\n+ outputData[,2]=c("Comparison","Info","na")\n+ if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep]\n+ outputData[3,seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)\n+ outputData[3,seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)\n+ outputData[3,seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)\n+ outputData[3,seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)\n+ outputData[3,seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)\n+ \n+ outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))\n+ outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")\n+ outputDfData[2,]=c(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual","df.prior","df.total")],digits=4))\n+ }else{\n+ #format matrix to be correctly read by galaxy (move headers in first column and row)\n+ outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=nrow(data.fit.eb$adj_p.value)+2)\n+ outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))\n+ outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))\n+ outputData[,1]=c("LIMMA","Gene",rownames(data.fit.eb$adj_p.value))\n+ outputData[,2]=c("Comparison","Info",rep("na",nrow(data.fit.eb$adj_p.value)))\n+ if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(data.fit.eb$adj_p.value)]\n+ outputData[3:nrow(outputData),seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)\n+ outputData[3:nrow(outputData),seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)\n+ outputData[3:nrow(outputData),seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)\n+ \n+ outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))\n+ outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")\n+ outputDfData[2:(1+nrow(dfMatrix)),]=cbind(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual")],digits=4),prettyNum(dfMatrix[,c("df.prior")],digits=4),prettyNum(dfMatrix[,c("df.total")],digits=4))\n+ }\n+}\n+addComment("[INFO]Formated output",T,opt$log,display=FALSE) \n+\n+#write output results\n+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+#write df info file\n+write.table(outputDfData,file=opt$outputDfFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+#sessionInfo()\n+\n+\n+\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 src/VolcanoPlotsScript.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/VolcanoPlotsScript.R Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,426 @@\n+# R script to plot volcanos through Galaxy based GIANT tool \n+# written by Jimmy Vandel\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)\n+args <- commandArgs()\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "statisticsFile", "i", 1, "character",\n+ "volcanoName" , "n", 1, "character",\n+ "pvalColumnName" , "p", 1, "character",\n+ "fdrColumnName" , "m", 1, "character",\n+ "fcColumnName" , "c", 1, "character",\n+ "fcKind","d", 1, "character",\n+ "fdrThreshold","s", 1, "double",\n+ "fcThreshold","e", 1, "double",\n+ "organismID","x",1,"character",\n+ "rowNameType","y",1,"character",\n+ "log", "l", 1, "character",\n+ "outputFile" , "o", 1, "character",\n+ "format", "f", 1, "character",\n+ "quiet", "q", 0, "logical"),\n+ byrow=TRUE, ncol=4)\n+opt <- getopt(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required\\n")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$statisticsFile)) {\n+ addComment("[ERROR]\'statisticsFile\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (length(opt$pvalColumnName)==0 || length(opt$fdrColumnName)==0 || length(opt$fcColumnName)==0) {\n+ addComment("[ERROR]no selected columns",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (length(opt$pvalColumnName)!=length(opt$fcColumnName) || length(opt$pvalColumnName)!=length(opt$fdrColumnName)) {\n+ addComment("[ERROR]different number of selected columns between p.val, adj-p.val and FC ",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fcKind)) {\n+ addComment("[ERROR]\'fcKind\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fdrThreshold)) {\n+ addComment("[ERROR]\'FDR threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$fcThreshold)) {\n+ addComment("[ERROR]\'FC threshold\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$outputFile)) {\n+ addComment("[ERROR]\'output file\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+#demande si le script sera bavard\n+verbose <- if (is.null(opt$quiet)) {\n+ TRUE\n+}else{\n+ FALSE\n+}\n+\n+#param\xc3\xa8tres internes\n+addComment("[INFO]Parameters checked test mode !",T,opt$log,display=FALSE)\n+\n+addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)\n+addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)\n+\n+#directory for plots\n+dir.create(file.path(getwd(), "plotDir"))\n+dir.create(file.path(getwd(), "plotLyDir"))\n+\n+#charge des packages silencieusement\n+suppressPackageStartupMessages({\n+ library("methods")\n+ library("biomaRt")\n+ library("ggplot2")\n+ library("plotly")\n+ library("stringr")\n+})\n+\n+#define some usefull variable\n+nbVolcanosToPlot=length(opt$pvalColumnName)\n+\n+#load input file\n+statDataMatrix=read.csv(file=file.path(getwd(), opt$statisticsFile),header=F,sep="\\t",colClasses="character")\n+#remove first colum to convert it as rownames\n+rownames(statDataMatrix)=statDataMatrix[,1]\n+statDataMatrix=statDataMatrix[,-1]\n+\n+#identify lines without adjusted p-value info (should contain the sa'..b'sed only on the first comparison\n+#newOrder=order(dataFrame$adj_p.value[,1])\n+#dataFrame$adj_p.value=dataFrame$adj_p.value[newOrder,]\n+\n+#alternative sorting strategy based on the mean gene rank over all comparisons\n+if(length(genesToKeep)>1){\n+ currentRank=rep(0,nrow(dataFrame$adj_p.value))\n+ for(iVolcano in 1:ncol(dataFrame$adj_p.value)){\n+ currentRank=currentRank+rank(dataFrame$adj_p.value[,iVolcano])\n+ }\n+ currentRank=currentRank/ncol(dataFrame$adj_p.value)\n+ newOrder=order(currentRank)\n+ rownames(dataFrame)=rownames(dataFrame)[newOrder]\n+ \n+ dataFrame$adj_p.value=matrix(dataFrame$adj_p.value[newOrder,],ncol=ncol(dataFrame$adj_p.value))\n+ rownames(dataFrame$adj_p.value)=rownames(dataFrame$p.value)[newOrder]\n+ colnames(dataFrame$adj_p.value)=colnames(dataFrame$p.value)\n+ \n+ dataFrame$p.value=matrix(dataFrame$p.value[newOrder,],ncol=ncol(dataFrame$p.value))\n+ rownames(dataFrame$p.value)=rownames(dataFrame$adj_p.value)\n+ colnames(dataFrame$p.value)=colnames(dataFrame$adj_p.value)\n+ \n+ dataFrame$coefficients=matrix(dataFrame$coefficients[newOrder,],ncol=ncol(dataFrame$coefficients))\n+ rownames(dataFrame$coefficients)=rownames(dataFrame$adj_p.value)\n+ colnames(dataFrame$coefficients)=colnames(dataFrame$adj_p.value)\n+}\n+\n+#formating output matrix depending on genes to keep\n+if(length(genesToKeep)==0){\n+ outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(volcanoNameList,each=4))\n+ outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))\n+ outputData[,1]=c("Volcano","Gene","noGene")\n+ outputData[,2]=c("Comparison","Info","noInfo")\n+}else{\n+ if(length(genesToKeep)==1){\n+ outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3)\n+ outputData[1,]=c("X","X",rep(volcanoNameList,each=4))\n+ outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))\n+ outputData[,1]=c("Volcano","Gene",genesToKeep)\n+ outputData[,2]=c("Comparison","Info","na")\n+ if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep]\n+ outputData[3,seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4)\n+ outputData[3,seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4)\n+ outputData[3,seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4)\n+ outputData[3,seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4)\n+ }else{\n+ #format matrix to be correctly read by galaxy (move headers in first column and row)\n+ outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=nrow(dataFrame$adj_p.value)+2)\n+ outputData[1,]=c("X","X",rep(volcanoNameList,each=4))\n+ outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value)))\n+ outputData[,1]=c("Volcano","Gene",rownames(dataFrame$adj_p.value))\n+ outputData[,2]=c("Comparison","Info",rep("na",nrow(dataFrame$adj_p.value)))\n+ if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(dataFrame$adj_p.value)]\n+ outputData[3:nrow(outputData),seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4)\n+ outputData[3:nrow(outputData),seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4)\n+ outputData[3:nrow(outputData),seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4)\n+ }\n+}\n+addComment("[INFO]Formated output",T,opt$log,display=FALSE) \n+\n+#write output results\n+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+\n+#sessionInfo()\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 src/getopt.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/getopt.R Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,773 @@\n+# Copyright (c) 2008-2010 Allen Day\n+# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu> \n+# \n+# Modified by J.Vandel 2017 to consider situation of multiple identical flag\n+# and concatenate as a vector the set of parameter for the same flag instead of\n+# keeping only the last value as done by the previous version.\n+#\n+# This file is free software: you may copy, redistribute and/or modify it \n+# under the terms of the GNU General Public License as published by the \n+# Free Software Foundation, either version 2 of the License, or (at your \n+# option) any later version. \n+# \n+# This file is distributed in the hope that it will be useful, but \n+# WITHOUT ANY WARRANTY; without even the implied warranty of \n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU \n+# General Public License for more details. \n+# \n+# You should have received a copy of the GNU General Public License \n+# along with this program. If not, see <http://www.gnu.org/licenses/>. \n+\n+#\' C-like getopt behavior\n+#\' \n+#\' getopt is primarily intended to be used with ``\\link{Rscript}\'\'. It\n+#\' facilitates writing ``\\#!\'\' shebang scripts that accept short and long\n+#\' flags/options. It can also be used from ``R\'\' directly, but is probably less\n+#\' useful in this context.\n+#\' \n+#\' getopt() returns a \\link{list} data structure containing \\link{names} of the\n+#\' flags that were present in the \\link{character} \\link{vector} passed in under\n+#\' the \\emph{opt} argument. Each value of the \\link{list} is coerced to the\n+#\' data type specified according to the value of the \\emph{spec} argument. See\n+#\' below for details.\n+#\' \n+#\' Notes on naming convention:\n+#\' \n+#\' 1. An \\emph{option} is one of the shell-split input strings.\n+#\' \n+#\' 2. A \\emph{flag} is a type of \\emph{option}. a \\emph{flag} can be defined as\n+#\' having no \\emph{argument} (defined below), a required \\emph{argument}, or an\n+#\' optional \\emph{argument}.\n+#\' \n+#\' 3. An \\emph{argument} is a type of \\emph{option}, and is the value associated\n+#\' with a flag.\n+#\' \n+#\' 4. A \\emph{long flag} is a type of \\emph{flag}, and begins with the string\n+#\' ``--\'\'. If the \\emph{long flag} has an associated \\emph{argument}, it may be\n+#\' delimited from the \\emph{long flag} by either a trailing \\emph{=}, or may be\n+#\' the subsequent \\emph{option}.\n+#\' \n+#\' 5. A \\emph{short flag} is a type of \\emph{flag}, and begins with the string\n+#\' ``-\'\'. If a \\emph{short flag} has an associated \\emph{argument}, it is the\n+#\' subsequent \\emph{option}. \\emph{short flags} may be bundled together,\n+#\' sharing a single leading ``-\'\', but only the final \\emph{short flag} is able\n+#\' to have a corresponding \\emph{argument}.\n+#\'\n+#\' Many users wonder whether they should use the getopt package, optparse package, \n+#\' or argparse package.\n+#\' Here is some of the major differences:\n+#\'\n+#\' Features available in \\code{getopt} unavailable in \\code{optparse}\n+#\'\n+#\' 1. As well as allowing one to specify options that take either\n+#\' no argument or a required argument like \\code{optparse},\n+#\' \\code{getopt} also allows one to specify option with an optional argument.\n+#\' \n+#\' Some features implemented in \\code{optparse} package unavailable in \\code{getopt}\n+#\'\n+#\' 1. Limited support for capturing positional arguments after the optional arguments\n+#\' when \\code{positional_arguments} set to TRUE in \\code{parse_args} \n+#\'\n+#\' 2. Automatic generation of an help option and printing of help text when encounters an "-h"\n+#\' \n+#\' 3. Option to specify default arguments for options as well the\n+#\' variable name to store option values\n+#\'\n+#\' There is also new package \\code{argparse} introduced in 2012 which contains\n+#\' all the features of both getopt and optparse but which has a dependency on\n+#\' Python 2.7 or 3.2+ and has not been used in production since 2008 or 2009\n+#\' like the getopt and optparse packages.\n+#\'\n+#\' Some Features unlikely to be implemented in \\code{g'..b'h it, increment the index, and move on to the next option. we don\'t allow arguments beginning with \'-\' UNLESS\n+ #specfile indicates the value is an "integer" or "double", in which case we allow a leading dash (and verify trailing digits/decimals).\n+ if ( substr(peek.optstring, 1, 1) != \'-\' |\n+ #match negative double\n+ ( substr(peek.optstring, 1, 1) == \'-\'\n+ & regexpr(\'^-[0123456789]*\\\\.?[0123456789]+$\',peek.optstring) > 0\n+ & spec[current.flag, col.mode]== \'double\'\n+ ) |\n+ #match negative integer\n+ ( substr(peek.optstring, 1, 1) == \'-\'\n+ & regexpr(\'^-[0123456789]+$\',peek.optstring) > 0\n+ & spec[current.flag, col.mode]== \'integer\'\n+ )\n+ ) {\n+ if ( debug ) print(paste(\' consuming argument *\',peek.optstring,\'*\',sep=\'\'));\n+ storage.mode(peek.optstring) = spec[current.flag, col.mode];\n+ #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);\n+ i = i + 1;\n+ \n+ #a lone dash\n+ } else if ( substr(peek.optstring, 1, 1) == \'-\' & length(strsplit(peek.optstring,\'\')[[1]]) == 1 ) {\n+ if ( debug ) print(\' consuming "lone dash" argument\');\n+ storage.mode(peek.optstring) = spec[current.flag, col.mode];\n+ #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones\n+ result[[spec[current.flag, col.long.name]]] =c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring);\n+ i = i + 1;\n+ \n+ #no argument\n+ } else {\n+ if ( debug ) print(\' no argument!\');\n+ \n+ #if we require an argument, bail out\n+ if ( spec[current.flag, col.has.argument] == flag.required.argument ) {\n+ stop(paste(\'flag "\', this.flag, \'" requires an argument\', sep=\'\'));\n+ \n+ #otherwise set flag as present.\n+ } else if (\n+ spec[current.flag, col.has.argument] == flag.optional.argument |\n+ spec[current.flag, col.has.argument] == flag.no.argument \n+ ) {\n+ x = TRUE;\n+ storage.mode(x) = spec[current.flag, col.mode];\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);\n+ } else {\n+ stop(paste("This should never happen.",\n+ "Is your spec argument correct? Maybe you forgot to set",\n+ "ncol=4, byrow=TRUE in your matrix call?"));\n+ }\n+ }\n+ #trailing flag without required argument\n+ } else if ( spec[current.flag, col.has.argument] == flag.required.argument ) {\n+ stop(paste(\'flag "\', this.flag, \'" requires an argument\', sep=\'\'));\n+ \n+ #trailing flag without optional argument\n+ } else if ( spec[current.flag, col.has.argument] == flag.optional.argument ) {\n+ x = TRUE;\n+ storage.mode(x) = spec[current.flag, col.mode];\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);\n+ \n+ #trailing flag without argument\n+ } else if ( spec[current.flag, col.has.argument] == flag.no.argument ) {\n+ x = TRUE;\n+ storage.mode(x) = spec[current.flag, col.mode];\n+ result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x);\n+ } else {\n+ stop("this should never happen (2). please inform the author.");\n+ }\n+ #no dangling flag, nothing to do.\n+ } else {\n+ }\n+ \n+ i = i+1;\n+ }\n+ return(result);\n+}\n+\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 src/heatMapClustering.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/heatMapClustering.R Fri Jun 26 09:35:11 2020 -0400 |
[ |
b'@@ -0,0 +1,896 @@\n+# A command-line interface to plot heatmap based on expression or diff. exp. analysis \n+# written by Jimmy Vandel\n+# one of these arguments is required:\n+#\n+#\n+initial.options <- commandArgs(trailingOnly = FALSE)\n+file.arg.name <- "--file="\n+script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])\n+script.basename <- dirname(script.name)\n+source(file.path(script.basename, "utils.R"))\n+source(file.path(script.basename, "getopt.R"))\n+\n+#addComment("Welcome R!")\n+\n+# setup R error handling to go to stderr\n+options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+\n+# we need that to not crash galaxy with an UTF8 error on German LC settings.\n+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n+loc <- Sys.setlocale("LC_NUMERIC", "C")\n+\n+#get starting time\n+start.time <- Sys.time()\n+\n+\n+options(stringAsfactors = FALSE, useFancyQuotes = FALSE, OutDec=".")\n+\n+#get options\n+args <- commandArgs()\n+\n+# get options, using the spec as defined by the enclosed list.\n+# we read the options from the default: commandArgs(TRUE).\n+spec <- matrix(c(\n+ "expressionFile", "x", 1, "character",\n+ "diffAnalyseFile", "x", 1, "character",\n+ "factorInfo","x", 1, "character",\n+ "genericData","x", 0, "logical",\n+ "comparisonName","x",1,"character",\n+ "comparisonNameLow","x",1,"character",\n+ "comparisonNameHigh","x",1,"character",\n+ "filterInputOutput","x", 1, "character",\n+ "FCthreshold","x", 1, "double",\n+ "pvalThreshold","x", 1, "double",\n+ "geneListFiltering","x",1,"character",\n+ "clusterNumber","x",1,"integer",\n+ "maxRows","x",1,"integer",\n+ "sampleClusterNumber","x",1,"integer",\n+ "dataTransformation","x",1,"character",\n+ "distanceMeasure","x",1,"character",\n+ "aggloMethod","x",1,"character",\n+ "personalColors","x",1,"character",\n+ "sideBarColorPalette","x",1,"character",\n+ "format", "x", 1, "character",\n+ "quiet", "x", 0, "logical",\n+ "log", "x", 1, "character",\n+ "outputFile" , "x", 1, "character"),\n+ byrow=TRUE, ncol=4)\n+opt <- getoptLong(spec)\n+\n+# enforce the following required arguments\n+if (is.null(opt$log)) {\n+ addComment("[ERROR]\'log file\' is required")\n+ q( "no", 1, F )\n+}\n+addComment("[INFO]Start of R script",T,opt$log,display=FALSE)\n+if (is.null(opt$format)) {\n+ addComment("[ERROR]\'output format\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+if (is.null(opt$outputFile)) {\n+ addComment("[ERROR]\'output file\' is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if(is.null(opt$expressionFile) && !is.null(opt$genericData)){\n+ addComment("[ERROR]generic data clustering is based on expression clustering",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$clusterNumber) || opt$clusterNumber<2) {\n+ addComment("[ERROR]valid genes clusters number is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$sampleClusterNumber) || opt$sampleClusterNumber<1) {\n+ addComment("[ERROR]valid samples clusters number is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$dataTransformation)) {\n+ addComment("[ERROR]data transformation option is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$distanceMeasure)) {\n+ addComment("[ERROR]distance measure option is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$aggloMethod)) {\n+ addComment("[ERROR]agglomeration method option is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (is.null(opt$maxRows) || opt$maxRows<2) {\n+ addComment("[ERROR]valid plotted row number is required",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (!is.null(opt[["comparisonName"]]) && nchar(opt[["comparisonName"]])==0){\n+ addComment("[ERROR]you have to specify comparison",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (!is.null(opt$comparisonNameLow) && nchar(opt$comparisonNameLow)==0){\n+ addComment("[ERROR]you have to specify comparisonLow",T,opt$log)\n+ q( "no", 1, F )\n+}\n+\n+if (!is.null(opt$comparisonNameHigh) && nchar(opt$comparisonNameHigh)==0){\n+ addComment("[ERROR]y'..b' \n+ if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){\n+ addComment("[WARNING] some genes selected by the output filter are not in the expression file",T,opt$log)\n+ rowToKeep=intersect(rowToKeep,rownames(expressionMatrix))\n+ }\n+ addComment(c("[INFO]Output filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) \n+}\n+\n+#we add differential analysis info in output if it was directly used for clustering or when it was used for filtering with expression\n+\n+#in case of expression or generic data clustering without filtering based on external stats\n+if(expressionToCluster && is.null(comparisonMatrix)){\n+ if(length(rowToKeep)==0){\n+ addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)\n+ outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE)\n+ }else{\n+ outputData=matrix(0,ncol=2,nrow=length(rowToKeep)+1)\n+ outputData[1,]=c("Gene","Cluster")\n+ outputData[2:(length(rowToKeep)+1),1]=rowToKeep\n+ if(class(rowClust)!="logical" ){\n+ outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep]\n+ }else{\n+ outputData[2:(length(rowToKeep)+1),2]=0\n+ }\n+ }\n+}\n+\n+#in case of generic data clustering with filtering based on generic external data\n+if(!is.null(opt$genericData) && !is.null(comparisonMatrix)){\n+ if(length(rowToKeep)==0){\n+ addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)\n+ outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE)\n+ }else{\n+ outputData=matrix(0,ncol=2+nbComparisons,nrow=length(rowToKeep)+1)\n+ outputData[1,]=c("Gene","Cluster",colnames(comparisonMatrix))\n+ outputData[2:(length(rowToKeep)+1),1]=rowToKeep\n+ if(class(rowClust)!="logical" ){\n+ outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep]\n+ }else{\n+ outputData[2:(length(rowToKeep)+1),2]=0\n+ }\n+ outputData[2:(length(rowToKeep)+1),3:(ncol(comparisonMatrix)+2)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4)\n+ }\n+}\n+\n+#in case of expression data clustering with filtering based on diff. exp. results or diff. exp. results clustering\n+if(is.null(opt$genericData) && !is.null(comparisonMatrix)){\n+ if(length(rowToKeep)==0){\n+ addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE)\n+ outputData=matrix(0,ncol=3,nrow=3)\n+ outputData[1,]=c("","","Comparison")\n+ outputData[2,]=c("Gene","Info","Cluster")\n+ outputData[3,]=c("noGene","noInfo","noClustering")\n+ }else{\n+ outputData=matrix(0,ncol=3+nbComparisons*nbColPerContrast,nrow=length(rowToKeep)+2)\n+ outputData[1,]=c("","","Comparison",rep(colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)],each=nbColPerContrast))\n+ outputData[2,]=c("Gene","Info","Cluster",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),nbComparisons))\n+ outputData[3:(length(rowToKeep)+2),1]=rowToKeep\n+ outputData[3:(length(rowToKeep)+2),2]=comparisonMatrixInfoGene[rowToKeep]\n+ if(class(rowClust)!="logical" ){\n+ outputData[3:(length(rowToKeep)+2),3]=cutree(rowClust,nbClusters)[rowToKeep]\n+ }else{\n+ outputData[3:(length(rowToKeep)+2),3]=0\n+ }\n+ outputData[3:(length(rowToKeep)+2),4:(ncol(comparisonMatrix)+3)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4)\n+ }\n+}\n+\n+addComment("[INFO]Formated output",T,opt$log,display=FALSE) \n+write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\\t",col.names = F,row.names = F)\n+ \n+##----------------------\n+\n+end.time <- Sys.time()\n+addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)\n+\n+\n+addComment("[INFO]End of R script",T,opt$log,display=FALSE)\n+\n+printSessionInfo(opt$log)\n+\n+#sessionInfo()\n+\n+\n+\n' |
b |
diff -r 000000000000 -r 708f43bda2b6 src/utils.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/utils.R Fri Jun 26 09:35:11 2020 -0400 |
[ |
@@ -0,0 +1,143 @@ +# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu> +# +# This file is free software: you may copy, redistribute and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 2 of the License, or (at your +# option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +#extendedDist function to correlation measure +distExtended <- function(x,method) { + if(method %in% c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"))return(dist(x,method = method)) + if(method %in% c("pearson", "spearman", "kendall"))return(as.dist(1-cor(t(x),method=method))/2) + if(method %in% c("absPearson", "absSpearman", "absKendall"))return(as.dist(1-abs(cor(t(x),method=method)))) + return(NULL) +} + +##comment function to display message and optionnaly add it to log file + +addComment <- function(text,addToFile=FALSE,fileName=NULL,append=TRUE,display=TRUE){ + if(display)cat(paste(c(text,"\n"),collapse = " ")) + if(addToFile)write(paste(text,collapse = " "),fileName,append=append) +} + +printSessionInfo <- function(fileName=NULL,append=TRUE){ + addComment("[INFO]R session info :",T,fileName,display=FALSE) + tempInfo=sessionInfo() + write(paste(tempInfo$R.version$version.string),fileName,append=append) + write(paste("Platform",tempInfo$platform,sep = " : "),fileName,append=append) + write(paste("Running under",tempInfo$running,sep = " : "),fileName,append=append) + write(paste("Local variables",tempInfo$locale,sep = " : "),fileName,append=append) + write(paste("Attached base packages",paste(tempInfo$basePkgs,collapse = "; "),sep = " : "),fileName,append=append) + if(length(tempInfo$otherPkgs)>0){ + lineToPrint="" + for(iPack in tempInfo$otherPkgs){ + lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "") + } + write(paste("Other attached packages",lineToPrint,sep = " : "),fileName,append=append) + } + if(length(tempInfo$loadedOnly)>0){ + lineToPrint="" + for(iPack in tempInfo$loadedOnly){ + lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "") + } + write(paste("Loaded packages",lineToPrint,sep = " : "),fileName,append=append) + } +} + +##negative of a mathematical expression +negativeExpression <- function(expression){ + expression=gsub("\\+","_toMinus_",expression) + expression=gsub("\\-","+",expression) + expression=gsub("_toMinus_","-",expression) + if(substr(expression,1,1)!="-" && substr(expression,1,1)!="+"){ + expression=paste(c("-",expression),collapse="") + } + + return(expression) +} + +#' Returns file name of calling Rscript +#' +#' \code{get_Rscript_filename} returns the file name of calling Rscript +#' @return A string with the filename of the calling script. +#' If not found (i.e. you are in a interactive session) returns NA. +#' +#' @export +get_Rscript_filename <- function() { + prog <- sub("--file=", "", grep("--file=", commandArgs(), value=TRUE)[1]) + if( .Platform$OS.type == "windows") { + prog <- gsub("\\\\", "\\\\\\\\", prog) + } + prog +} + +#' Recursively sorts a list +#' +#' \code{sort_list} returns a sorted list +#' @param unsorted_list A list. +#' @return A sorted list. +#' @export +sort_list <- function(unsorted_list) { + for(ii in seq(along=unsorted_list)) { + if(is.list(unsorted_list[[ii]])) { + unsorted_list[[ii]] <- sort_list(unsorted_list[[ii]]) + } + } + unsorted_list[sort(names(unsorted_list))] +} + + +# Multiple plot function +# +# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) +# - cols: Number of columns in layout +# - layout: A matrix specifying the layout. If present, 'cols' is ignored. +# +# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), +# then plot 1 will go in the upper left, 2 will go in the upper right, and +# 3 will go all the way across the bottom. +# +multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { + library(grid) + + # Make a list from the ... arguments and plotlist + plots <- c(list(...), plotlist) + + numPlots = length(plots) + + # If layout is NULL, then use 'cols' to determine layout + if (is.null(layout)) { + # Make the panel + # ncol: Number of columns of plots + # nrow: Number of rows needed, calculated from # of cols + layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), + ncol = cols, nrow = ceiling(numPlots/cols)) + } + + if (numPlots==1) { + print(plots[[1]]) + + } else { + # Set up the page + grid.newpage() + pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) + + # Make each plot, in the correct location + for (i in 1:numPlots) { + # Get the i,j matrix positions of the regions that contain this subplot + matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) + + print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, + layout.pos.col = matchidx$col)) + } + } +} |