Repository 'maaslin'
hg clone https://toolshed.g2.bx.psu.edu/repos/george-weingart/maaslin

Changeset 6:ca61989bc3b4 (2015-02-08)
Previous changeset 5:232e262654eb (2015-02-08) Next changeset 7:c72e14eabb08 (2015-02-09)
Commit message:
Uploaded
added:
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/Figure1-Overview.png
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/MaAsLin_galaxy_ReadMe.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/Maaslin_Output.png
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/None
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/README.md
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/SConscript
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/SConscript_maaslin.py
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/datatypes_conf.xml
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/doc/MaAsLin_User_Guide_v3.docx
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/doc/Merge_Metadata_Read_Me.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/for_merge_metadata/maaslin_demo_measurements.pcl
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/for_merge_metadata/maaslin_demo_metadata.metadata
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.args
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.pcl
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin.xml
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin_format_input_selector.py
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin_wrapper.py
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/CreateReadConfigFile.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/Graphlan_settings.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/Maaslin.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/MaaslinToGraphlanAnnotation.py
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/PCLToGraphlanCoreGene.py
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/AnalysisModules.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/BoostGLM.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Constants.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/IO.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/MaaslinPlots.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Misc.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/SummarizeMaaslin.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Utility.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/ValidateData.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/scriptBiplotTSV.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/merge_metadata.py
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-AnalysisModules/test-AnalysisModules.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-BoostGLM/test-BoostGLM.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-IO/test-IO.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-Maaslin/test-Maaslin.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-SummarizeMaaslin/test-SummarizeMaaslin.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-Utility/test-Utility.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-ValidateData/test-ValidateData.R
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileAltKeyResult.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileAltSigResult.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileResult.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileAltKeyResult.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileAltSigResult.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileResult.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAltKeyAnswer.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAltSigAnswer.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAnswer.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices1.tsv
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices2.tsv
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_AllAnswer.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_AppendAnswer.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_SimpleAnswer.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTableTempDF1.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTableTempDF2.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTemp1.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTemp2.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTempDF1.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTempDF2.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/1Matrix.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/2Matrix.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/3Matrix.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMaaslin.read.config
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMaaslin.tsv
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMatrix.tsv
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/1/FuncSummarizeDirectory-1.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-1.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-2.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-3.txt
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/tmp/.keep
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/transpose.py
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/test-data/maaslin_input
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/test-data/maaslin_output
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/tool_dependencies.xml
maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/transpose.py
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/Figure1-Overview.png
b
Binary file maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/Figure1-Overview.png has changed
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/MaAsLin_galaxy_ReadMe.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/MaAsLin_galaxy_ReadMe.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,16 @@
+Installation instructions for maaslin in a galaxy environment.
+These instructions require the Mercurial versioning system, galaxy, and an internet connection.
+
+1. In the  "galaxy-dist/tools" directory install maaslin by typing in a terminal:
+hg clone https://bitbucket.org/biobakery/maaslin
+
+2. Update member tool_conf.xml  in the galaxy directory adding the following: 
+  <section name="maaslin" id="maaslin">
+    <tool file="maaslin/galaxy/maaslin.xml"/>
+  </section>
+
+3. Update member datatypes_conf.xml  in the galaxy directory adding the following:
+ <datatype extension="maaslin" type="galaxy.datatypes.data:Text" subclass="true" display_in_upload="true"/>
+
+4. Recycle galaxy
+
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/Maaslin_Output.png
b
Binary file maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/Maaslin_Output.png has changed
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/None
b
Binary file maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/None has changed
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/README.md Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,413 @@\n+MaAsLin User Guide v3.1\n+=======================\n+\n+September 2013 - Updated  April 2014 for Galaxy\n+\n+Timothy Tickle and Curtis Huttenhower\n+\n+Table of Contents\n+-----------------\n+\n+A. Introduction to MaAsLin  \n+B. Related Projects and Scripts  \n+C. Installing MaAsLin  \n+D. MaAsLin Inputs  \n+E. Process Flow Overview  \n+D. Process Flow Detail  \n+G. Expected Output Files  \n+H. Troubleshooting  \n+I. Installation as an Automated Pipeline  \n+J. Commandline Options (Modifying Process and Figures)\n+\n+# A. Introduction to MaAsLin\n+\n+MaAsLin is a multivariate statistical framework that finds\n+associations between clinical metadata and potentially\n+high-dimensional experimental data. MaAsLin performs boosted additive\n+general linear models between one group of data (metadata/the\n+predictors) and another group (in our case relative taxonomic\n+abundances/the response).  In our context we use it to discover\n+associations between clinical metadata and microbial community\n+relative abundance or function; however, it is applicable to other\n+data types.\n+\n+Metagenomic data are sparse, and boosting is used to select metadata\n+that show some potential to be useful in a linear model between the\n+metadata and abundances. In the context of metadata and community\n+abundance, a sample\'s metadata is boosted for one Operational\n+Taxonomic Unit (OTU) (Yi). The metadata that are selected by boosting\n+are then used in a general linear model, with each combination of\n+metadata (as predictors) and OTU abundance (as response\n+variables). This occurs for every OTU and metadata combination. Given\n+we work with proportional data, the Yi (abundances) are\n+`arcsin(sqrt(Yi))` transformed. A final formula is as follows:\n+\n+![](https://bitbucket.org/biobakery/maaslin/downloads/maaslinformula2.png)\n+\n+For more information about maaslin please visit\n+[http://huttenhower.sph.harvard.edu/maaslin](http://huttenhower.sph.harvard.edu/maaslin).\n+\n+\n+# B. Related Projects and Scripts\n+\n+Other projects exist at www.bitbucket.com that may help in your\n+analysis:\n+\n+* **QiimeToMaAsLin** is a project that reformats abundance files from\n+    Qiime for MaAsLin. Several formats of Qiime consensus lineages are\n+    supported for this project. To download please visit\n+    [https://bitbucket.org/timothyltickle/qiimetomaaslin](https://bitbucket.org/timothyltickle/qiimetomaaslin).\n+\n+* **merge_metadata.py** is a script included in the MaAsLin project to\n+    generically merge a metadata file with a table of microbial (or\n+    other) measurements. This script is located in `maaslin/src` and\n+    is documented in `maaslin/doc/ Merge_Metadata_Read_Me.txt`.\n+\n+\n+# C. Installing MaAsLin\n+\n+R Libraries: Several libraries need to be installed in R these are\n+  the following:\n+\n+  * agricolae, gam, gamlss, gbm, glmnet, inlinedocs, logging, MASS, nlme, optparse, outliers, penalized, pscl, robustbase, testhat, vegan\n+\n+You can install them by typing R in a terminal and using the\n+  install.packages command:\n+\n+      install.packages(c(\'agricolae\', \'gam\', \'gamlss\', \'gbm\', \'glmnet\', \'inlinedocs\', \'logging\', \'MASS\', \'nlme\', \'optparse\', \'outliers\', \'penalized\', \'pscl\', \'robustbase\', \'testthat\'))\n+\n+# D. MaAsLin Inputs\n+\n+There are 3 input files for each project, the "\\*.read.config" file, the "\\*.pcl" file, and the "\\*.R" script. (If using the sfle automated pipeline, the "\\*" in the file names can be anything but need to be identical for all three files. All three files need to be in the `../sfle/input/maasalin/input` folder only if using sfle). Details of each file follow:\n+\n+### 1\\. "\\*.pcl"\n+\n+Required input file. A PCL file is the file that contains all the data\n+and metadata. This file is formatted so that metadata/data (otus or\n+bugs) are rows and samples are columns. All metadata rows should come\n+first before any abundance data. The file should be a tab delimited\n+text file with the extension ".pcl".\n+\n+### 2\\. "\\*.read.config"\n+\n+Required input file. A read config fil'..b' group that are added to this one at a time. To give a more concrete example: You may have metadata cage, diet, and treatment. You may always want to have the association of abundance evaluated controlling for cage but otherwise looking at the metadata one at a time. In this way the cage metadata is the \\D2forced\\D3 part of the evaluation while the others are not forced and evaluated in serial. The appropriate commandline to indicate this follows (placed in your args file if using sfle, otherwise added in the commandline call):\n+\n+> -a -F cage\n+\n+-a indicates all verses all is being used, -F indicates which metadata are forced (multiple metadata can be given comma delimited as shown here -F metadata1,metadata2,metadata3). This does not bypass the feature selection method so the metadata that are not forced are subject to feature selection and may be removed before coming to the evaluation. If you want all the metadata that are not forced to be evaluated in serial you will need to turn off feature selection and will have a final combined commandline as seen here:\n+\n+> -a -F cage -s none\n+\n+#I. Troubleshooting\n+\n+###1\\. (Only valid if using Sfle) ImportError: No module named sfle\n+\n+When using the command "scons output/maaslin/..." to run my projects I\n+get the message:\n+\n+    ImportError: No module named sfle:\n+      File "/home/user/sfle/SConstruct", line 2:\n+        import sfle\n+\n+**Solution:** You need to update your path. On a linux or MacOS terminal\n+in the sfle directory type the following.\n+\n+    export PATH=/usr/local/bin:`pwd`/src:$PATH\n+    export PYTHONPATH=$PATH\n+\n+\n+###2\\. When trying to run a script I am told I do not have permission\n+even though file permissions have been set for myself.\n+\n+**Solution:** Most likely, you need to set the main MaAsLin script\n+(Maaslin.R) to executable.\n+\n+#J. Installation as an Automated Pipeline\n+\n+SflE (pronounced souffle), is a framework for automation and\n+parallelization on a multiprocessor machine. MaAsLin has been\n+developed to be compatible with this framework. More information can\n+be found at\n+[http://huttenhower.sph.harvard.edu/sfle](http://huttenhower.sph.harvard.edu/sfle). If\n+interested in installing MaAsLin in a SflE environment. After\n+installing SflE, download or move the complete maaslin directory into\n+`sfle/input`. After setting up, one places all maaslin input files in\n+`sfle/input/maaslin/input`. To run the automated pipeline and analyze\n+all files in the `sfle/input/maaslin/input` directory, type: `scons output/maaslin`\n+in a terminal in the sfle directory. This will produce\n+output in the `sfle/output/maaslin` directory.\n+\n+#K. Commandline Options (Modifying Process and Figures)\n+\n+Although we recommend the use of default options, commandline\n+arguments exist to modify both MaAsLin methodology and figures. To see\n+an up-to-date listing of argument usage, in a terminal in the\n+`maaslin/src` directory type `./Maaslin.R -h`.\n+\n+An additional input file (the args file) can be used to apply\n+commandline arguments to a MaAsLin run. This is useful when using\n+MaAsLin as an automated pipeline (using SflE) and is a way to document\n+what commandline are used for different projects. The args file should\n+be named the same as the *.pcl file except using the extension .args\n+. This file should be placed in the `maaslin/input` directory with the\n+other matching project input files. In this file please have one line\n+of arguments and values (if needed; some arguments are logical flags\n+and do not require a value), each separated by a space. The contents\n+of this file will be directly added to the commandline call for\n+Maaslin.R. An example of the contents of an args file is given here.\n+\n+**Example.args:**\n+\n+    -v DEBUG -d 0.1 -b 5\n+\n+In this example MaAsLin is modified to produce verbose output for\n+debugging (-v DEBUG), to change the threshold for making pdfs to a\n+q-value equal to or less than 0.1 (-d 0.1), and to plot \n+5 data (bug) features in the biplot (-b 5).\n+\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/SConscript
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/SConscript Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,97 @@
+import sfle
+import csv
+
+Import( "*" )
+pE = DefaultEnvironment( )
+
+# Extensions
+sGraphlanAnnotationFileExtension = "-ann.txt"
+sGraphlanCoreAnnotFileExtension = "-ann-core.txt"
+sGraphlanCoreGenesFileExtension = "-core.txt"
+sGraphlanFigureExtension = "-graphlan.pdf"
+sMaaslinDataFileExtension = ".txt"
+sMaaslinReadConfigFileExtension = ".read.config"
+sMaaslinSummaryFileExtension = ".txt"
+
+sCustomRScriptExtension = ".R"
+sPCLExtension = ".pcl"
+sTransposeExtension = ".tsv"
+
+# Files
+strMaaslinGraphlanSettings = "Graphlan_settings.txt"
+
+# Script
+sScriptGraphlan = File(os.path.join("..","graphlan","graphlan.py"))
+sScriptGraphlanAnnotate = File(os.path.join("..","graphlan","graphlan_annotate.py"))
+sScriptMaaslinSummaryToGraphlanAnnotation = File(sfle.d(fileDirSrc,"MaaslinToGraphlanAnnotation.py"))
+sScriptPCLToCoreGene = File(sfle.d(fileDirSrc,"PCLToGraphlanCoreGene.py"))
+
+sProgMaaslin = sfle.d(fileDirSrc,"Maaslin.R")
+
+# Settings
+iGraphlanDPI = 150
+iGraphlanFigureSize = 4
+iGraphlanPad = 0.2
+strGraphlanDirectory = "graphlan"
+
+c_fileDirLib = sfle.d( fileDirSrc, "lib" )
+c_fileInputMaaslinR = sfle.d( pE, fileDirSrc, "Maaslin.R" )
+c_afileTestsR = [sfle.d( pE, c_fileDirLib, s ) for s in
+ ("IO.R", "SummarizeMaaslin.R", "Utility.R", "ValidateData.R")]
+
+c_afileDocsR = c_afileTestsR + [sfle.d( pE, c_fileDirLib, s ) for s in
+ ( "AnalysisModules.R", "scriptBiplotTSV.R", "BoostGLM.R", "Constants.R", "MaaslinPlots.R")]
+
+##Test scripts
+for fileInputR in c_afileTestsR:
+  strBase = sfle.rebase( fileInputR, True )
+  #Testing summary file
+  fileTestingSummary = sfle.d( pE, fileDirOutput, strBase +"-TestReport.txt" )
+  dirTestingR = Dir( sfle.d( fileDirSrc, "test-" + strBase ) )
+  Default( sfle.testthat( pE, fileInputR, dirTestingR, fileTestingSummary ) )
+
+##Inline doc
+for fileProg in c_afileDocsR:
+  filePDF = sfle.d( pE, fileDirOutput, sfle.rebase( fileProg, sfle.c_strSufR, sfle.c_strSufPDF ) )
+  Default( sfle.inlinedocs( pE, fileProg, filePDF, fileDirTmp ) )
+
+##Start regression suite
+execfile( "SConscript_maaslin.py" )
+
+##Input pcl files
+lsMaaslinInputFiles = Glob( sfle.d( fileDirInput, "*" + sfle.c_strSufPCL ) )
+
+## Run MaAsLin and generate output
+for strPCLFile in lsMaaslinInputFiles:
+  Default( MaAsLin( strPCLFile ))
+
+#  #Graphlan figure
+#  #TODO Fix path dependent, better way to know it is installed?
+#  if(os.path.exists(sScriptGraphlan.get_abspath())):
+
+#    ## Run Graphlan on all output projects
+#    strProjectName = os.path.splitext(os.path.split(strPCLFile.get_abspath())[1])[0]
+#    strMaaslinOutputDir = sfle.d(fileDirOutput,strProjectName)
+
+#    ##Get maaslin data files
+#    strMaaslinSummaryFile = sfle.d(os.path.join(strMaaslinOutputDir, strProjectName + sMaaslinSummaryFileExtension))
+
+#    # Make core gene file
+#    sCoreGeneFile = File(sfle.d(strMaaslinOutputDir,  os.path.join(strGraphlanDirectory,sfle.rebase(strMaaslinSummaryFile, sMaaslinSummaryFileExtension,sGraphlanCoreGenesFileExtension))))
+#    sReadConfigFile = File(sfle.d(fileDirInput,sfle.rebase(strMaaslinSummaryFile, sMaaslinSummaryFileExtension,sMaaslinReadConfigFileExtension)))
+#    sfle.op(pE, sScriptPCLToCoreGene, [[False, strPCLFile],[False, sReadConfigFile],[True, sCoreGeneFile]])
+
+#    # Make annotation file
+#    sAnnotationFile = File(sfle.d(strMaaslinOutputDir, os.path.join(strGraphlanDirectory,sfle.rebase(strMaaslinSummaryFile, sMaaslinSummaryFileExtension,sGraphlanAnnotationFileExtension))))
+#    sfle.op(pE, sScriptMaaslinSummaryToGraphlanAnnotation, [[False, strMaaslinSummaryFile],[False,sCoreGeneFile],[False,File(sfle.d(fileDirSrc,strMaaslinGraphlanSettings))],[True,sAnnotationFile]])
+
+#    # Generate core gene annotation file names
+#    sCoreGeneAnnotationFile = File(sfle.d(strMaaslinOutputDir,  os.path.join(strGraphlanDirectory,sfle.rebase(strMaaslinSummaryFile, sMaaslinSummaryFileExtension,sGraphlanCoreAnnotFileExtension))))
+#    sfle.op(pE, sScriptGraphlanAnnotate, ["--annot",[sAnnotationFile],[False, sCoreGeneFile],[True, sCoreGeneAnnotationFile]])
+
+#    # Call graphlan
+#    # graphlan.py --dpi 150 --size 4 --pad 0.2 core_genes.annot.xml core_genes.png
+#    sGraphlanFigure = File(sfle.d(strMaaslinOutputDir,  os.path.join(strGraphlanDirectory, sfle.rebase(strMaaslinSummaryFile, sMaaslinSummaryFileExtension,sGraphlanFigureExtension))))
+#    sfle.op(pE, sScriptGraphlan, [[False, sCoreGeneAnnotationFile],[True, sGraphlanFigure],"--dpi",iGraphlanDPI,"--size",iGraphlanFigureSize,"--pad",iGraphlanPad])
+
+#    Default(sCoreGeneFile,sAnnotationFile,sCoreGeneAnnotationFile,sGraphlanFigure)
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/SConscript_maaslin.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/SConscript_maaslin.py Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+"""
+Authors: Timothy Tickle and Curtis Huttenhower
+Description: Find associations in two matrices of data.
+"""
+
+__author__ = "Timothy Tickle and Curtis Huttenhower"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle","Curtis Huttenhower"]
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@hsph.harvard.edu"
+
+import argparse
+import os
+import sfle
+import sys
+
+c_strSufRC = ".read.config"
+
+c_fileDirSrc = Dir( sfle.d( os.path.dirname( sfle.current_file( ) ), sfle.c_strDirSrc ) )
+c_fileProgMaaslin = File( sfle.d( c_fileDirSrc, "Maaslin.R" ) )
+sArgsExt = ".args"
+#Commandline to ignore
+lsIgnore = ["-i","-I","--input_config","--input_process"]
+
+def MaAsLin( filePCL ):
+ #Build input file name if they exist or give ""
+ strBase = filePCL.get_abspath().replace( sfle.c_strSufPCL, "" )
+ strR, strRC, strArgs = (( strBase + s ) for s in (sfle.c_strSufR, c_strSufRC, sArgsExt))
+ fileR, fileRC, fileArgs = (( File( s ) if os.path.exists( s ) else "" ) for s in (strR, strRC, strArgs))
+
+ ## Read in an args file if it exists
+ lsArgs = []
+ if fileArgs:
+ fReader = csv.reader(open(fileArgs.get_abspath(),'r'), delimiter = " ")
+ lsArgsTmp = []
+ [lsArgsTmp.extend(lsLine) for lsLine in fReader]
+ fSkip = False
+ for s in lsArgsTmp:
+ if s in lsIgnore:
+ fSkip=True
+ continue
+ if fSkip:
+ fSkip = not fSkip
+ continue
+ lsArgs.append(s)
+
+ lsInputArgs = ["-I",[fileR]] if fileR else []
+ lsInputArgs.extend(["-i",[fileRC]] if fileRC else [])
+ lsArgs.extend(lsInputArgs)
+
+ strBase = os.path.basename( strBase )
+ fileTSVFile = File(sfle.d(fileDirTmp,sfle.rebase(filePCL,sfle.c_strSufPCL,sfle.c_strSufTSV)))
+ strT = File( sfle.d( os.path.join(fileDirOutput.get_abspath(), strBase, strBase + sfle.c_strSufTXT) ) )
+
+ #Transpose PCL
+ sfle.spipe(pE, filePCL, c_fileProgTranspose, fileTSVFile)
+ #Run MaAsLin
+ sfle.op(pE, c_fileProgMaaslin, lsArgs+[[True,strT],[False, fileTSVFile]])
+ if fileArgs: Depends(c_fileProgMaaslin, fileArgs)
+ Default(strT)
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/datatypes_conf.xml Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<datatypes>
+    <registration>
+  <datatype extension="maaslin" type="galaxy.datatypes.data:Text" subclass="true" display_in_upload="true"/>
+    </registration>
+</datatypes>
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/doc/MaAsLin_User_Guide_v3.docx
b
Binary file maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/doc/MaAsLin_User_Guide_v3.docx has changed
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/doc/Merge_Metadata_Read_Me.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/doc/Merge_Metadata_Read_Me.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,33 @@
+I. Quick start.
+
+The merge_metadata.py script has been included in the MaAsLin package to help add metadata to otu tables (or any tab delimited file where columns are the samples). This script was used to make the maaslin_demo.pcl file found in this project.
+
+The generic command to run the merge_metadata.py is:
+python merge_metadata.py input_metadata_file < input_measurements_file > output_pcl_file
+
+An example of the expected files are found in this project in the directory maaslin/input/for_merge_metadata
+An example of how to run the command on the example files is as follows (when in the maaslin folder in a terminal):
+python src/merge_metadata.py input/for_merge_metadata/maaslin_demo_metadata.metadata < input/for_merge_metadata/maaslin_demo_measurements.pcl > input/maaslin_demo.pcl
+
+II. Script overview
+merge_metadata.py takes a tab delimited metadata file and adds it to a otu table. Both files have expected formats given below. Additionally, if a pipe-delimited consensus lineage is given in the IDs of the OTUs (for instance for the genus Bifidobacterium, Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium), the higher level clades in the consensus lineage are added to other otu in the same clade level generating all higher level clade information captured in the otu data*. This heirarchy is then normalized using the same heirarchical structure. This means, after using the script, a sample will sum to more than 1, typically somewhere around 6 but will depend on if your data is originally at genus, species, or another level of resolution. All terminal otus (or the original otus) in a sample should sum to 1. 
+
+*To help combat multiple comparisons, additional clades are only added if they add information to the data set. This means if you have an otu Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium and no other related otus until Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales, Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae will not be added to the data set because it will be no different than the already existing and more specific Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium otu. Clades at and above Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales will be included depending on if there are other otus to add them to at those clade levels.
+
+
+III. Description of input files
+
+Metadata file:
+Please make the file as follows:
+1. Tab delimited
+2. Rows are samples, columns are metadata
+3. Sample Ids in the metadata file should match the sample ids in the otu table.
+4. Use NA for values which are not recorded.
+5. An example file is found at input/for_merge_metadata/maaslin_demo_metadata.metadata
+
+OTU table:
+Please make the file as follows:
+1. Tab delimited.
+2. Rows are otus, columns are samples (note this is transposed in comparison to the metadata file).
+3. If a consensus lineage is included in the otu name, use pipes as the delimiter.
+4. An example file is found at input/for_merge_metadata/maaslin_demo_measurements.pcl
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/for_merge_metadata/maaslin_demo_measurements.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/for_merge_metadata/maaslin_demo_measurements.pcl Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,14 @@
+ID Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Sample7 Sample8
+Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium|1 0.0507585 0.0861117 0.00168464 0.0011966 0.0164305 0.00592628 0.0367439 0.0663809
+Bacteria|Actinobacteria|Actinobacteria|Coriobacteriales|Coriobacteriaceae|1008 0 0.166041 0.16004 0.0984803 0.127644 0 0.00320332 0
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides|101 0.0110852 0.0229631 0.019991 0.0329065 0.044465 0.020979 0 0.0450837
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|1010 0.1993 0 0.134883 0.179251 0 0.065189 0.349727 0.254737
+Bacteria|Firmicutes|Bacilli|Lactobacillales|Enterococcaceae|1023 0.290198 0.0119232 0 0.00538471 0.351818 0.0321204 0.0192199 0
+Bacteria|Firmicutes|Bacilli|Lactobacillales|Unclassified|1013 0.0869312 0 0.0982704 0.0971641 0.101253 0.10691 0 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Anaerostipes|1026 0 0 0.143194 0 0.131957 0.142349 0.228754 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia|1032 0.233372 0.41157 0.280773 0.329065 0.010269 0 0.0380629 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|1156 0.0641774 0.151248 0.0791779 0.0595908 0.00133498 0 0.00471076 0
+Bacteria|Firmicutes|Erysipelotrichi|Erysipelotrichales|Erysipelotrichaceae|Coprobacillus|1179 0 0.00971517 0.0049416 0.123489 0 0.380586 0 0.380998
+Bacteria|Firmicutes|Unclassified|1232 0.0641774 0.13535 0.0701932 0.0538471 0.0667488 0.0681522 0.127191 0.0622321
+Bacteria|Proteobacteria|Betaproteobacteria|Burkholderiales|Alcaligenaceae|Parasutterella|1344 0 0.00507838 0.0012354 0.00167524 0.0351201 0 0.00395704 0
+Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia/Shigella|1532 0 0 0.00561545 0.017949 0.11296 0.177788 0.18843 0.190568
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/for_merge_metadata/maaslin_demo_metadata.metadata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/for_merge_metadata/maaslin_demo_metadata.metadata Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,9 @@
+ID Cohort Age Height Weight Sex Smoking Star_Trek_Fan Favorite_color
+Sample1 Healthy 87 60 151 0 0 1 Yellow
+Sample2 Healthy 78 72 258 1 0 1 Blue
+Sample3 Healthy 3 63 195 0 1 0 Green
+Sample4 Healthy 2 67 172 1 0 0 Yellow
+Sample5 IBD 32 71 202 1 1 1 Green
+Sample6 IBD 10 65 210 0 1 0 Blue
+Sample7 IBD 39 61 139 1 1 0 Green
+Sample8 IBD 96 64 140 0 0 1 Blue
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.R Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,4 @@
+processFunction = function( frmeData, aiMetadata, aiGenetics, aiData )
+{
+  return( list(frmeData = frmeData, aiMetadata = aiMetadata, aiData = aiData) )
+}
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.args
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.args Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,1 @@
+-v DEBUG
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.pcl Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,30 @@
+sample Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Sample7 Sample8
+Age 87 78 3 2 32 10 39 96
+Cohort Healthy Healthy Healthy Healthy IBD IBD IBD IBD
+Favorite_color Yellow Blue Green Yellow Green Blue Green Blue
+Height 60 72 63 67 71 65 61 64
+Sex 0 1 0 1 1 0 1 0
+Smoking 0 0 1 0 1 1 1 0
+Star_Trek_Fan 1 1 0 0 1 0 0 1
+Weight 151 258 195 172 202 210 139 140
+Bacteria 1 1 1 1 1 1 1 1
+Bacteria|Actinobacteria|Actinobacteria 0.0507585 0.252153 0.161725 0.0996769 0.144075 0.00592628 0.0399472 0.0663809
+Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium|1 0.0507585 0.0861117 0.00168464 0.0011966 0.0164305 0.00592628 0.0367439 0.0663809
+Bacteria|Actinobacteria|Actinobacteria|Coriobacteriales|Coriobacteriaceae|1008 0 0.166041 0.16004 0.0984803 0.127644 0 0.00320332 0
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales 0.210385 0.0229631 0.154874 0.212157 0.044465 0.0861681 0.349727 0.29982
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides|101 0.0110852 0.0229631 0.019991 0.0329065 0.044465 0.020979 0 0.0450837
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|1010 0.1993 0 0.134883 0.179251 0 0.065189 0.349727 0.254737
+Bacteria|Firmicutes 0.738856 0.719806 0.67655 0.668541 0.663381 0.730117 0.417939 0.443231
+Bacteria|Firmicutes|Bacilli|Lactobacillales 0.37713 0.0119232 0.0982704 0.102549 0.45307 0.13903 0.0192199 0
+Bacteria|Firmicutes|Bacilli|Lactobacillales|Enterococcaceae|1023 0.290198 0.0119232 0 0.00538471 0.351818 0.0321204 0.0192199 0
+Bacteria|Firmicutes|Bacilli|Lactobacillales|Unclassified|1013 0.0869312 0 0.0982704 0.0971641 0.101253 0.10691 0 0
+Bacteria|Firmicutes|Clostridia|Clostridiales 0.29755 0.562817 0.503145 0.388656 0.143561 0.142349 0.271528 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae 0.233372 0.41157 0.423967 0.329065 0.142226 0.142349 0.266817 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Anaerostipes|1026 0 0 0.143194 0 0.131957 0.142349 0.228754 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia|1032 0.233372 0.41157 0.280773 0.329065 0.010269 0 0.0380629 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|1156 0.0641774 0.151248 0.0791779 0.0595908 0.00133498 0 0.00471076 0
+Bacteria|Firmicutes|Erysipelotrichi|Erysipelotrichales|Erysipelotrichaceae|Coprobacillus|1179 0 0.00971517 0.0049416 0.123489 0 0.380586 0 0.380998
+Bacteria|Firmicutes|Unclassified|1232 0.0641774 0.13535 0.0701932 0.0538471 0.0667488 0.0681522 0.127191 0.0622321
+Bacteria|Proteobacteria 0 0.00507838 0.00685085 0.0196243 0.14808 0.177788 0.192387 0.190568
+Bacteria|Proteobacteria|Betaproteobacteria|Burkholderiales|Alcaligenaceae|Parasutterella|1344 0 0.00507838 0.0012354 0.00167524 0.0351201 0 0.00395704 0
+Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia/Shigella|1532 0 0 0.00561545 0.017949 0.11296 0.177788 0.18843 0.190568
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/input/maaslin_demo2.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,5 @@
+Matrix: Metadata
+Read_PCL_Rows: -Weight
+
+Matrix: Abundance
+Read_PCL_Rows: Bacteria-
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin.xml Sun Feb 08 23:39:43 2015 -0500
b
b'@@ -0,0 +1,170 @@\n+<tool id="maaslin_run" name="MaAsLin" version="1.0.1">\r\n+<code file="maaslin_format_input_selector.py"/> \r\n+<description></description>\r\n+<command interpreter="python">maaslin_wrapper.py \r\n+--lastmeta $cls_x \r\n+--input $inp_data\r\n+--output $out_file1\r\n+--alpha $alpha\r\n+--min_abd $min_abd\r\n+--min_samp $min_samp\r\n+--zip_file $zip_file\r\n+--tool_option1 $tool_option1\r\n+</command>\r\n+\r\n+  <inputs>\r\n+\t<param format="maaslin" name="inp_data" type="data" label="pcl file of metadata and microbial community measurements: Upload using Get Data-Upload file - Use File-Format = maaslin - Sample file below"/>\r\n+\t<param name="cls_x" type="select" label="Last metadata row (Select \'Weight\' for demo data set)"  multiple="False" size ="70"  dynamic_options="get_cols(inp_data,\'0\')"/>\r\n+\t<param name="alpha" type="float" size="8" value="0.05" label="Maximum false discovery rate (significance threshold)"/>\r\n+\t<param name="min_abd" type="float" size="8" value="0.0001" label="Minimum for feature relative abundance filtering"/>\r\n+\t<param name="min_samp" type="float" size="8" value="0.01" label="Minimum for feature prevalence filtering"/>\r\n+\r\n+\t<param name="tool_option1" type="select" label="Type of output">\r\n+           <option value="1">Single File: Summary</option>\r\n+           <option value="2">Two Files: Complete zipped results + Summary</option>\r\n+\t</param>\r\n+        </inputs>\r\n+    <outputs>\r\n+        <data format="tabular" name="out_file1"  />\r\n+        <data  name="zip_file"  format="zip">\r\n+            <filter>tool_option1 == "2"</filter>\r\n+        </data>\r\n+    </outputs>\r\n+ <requirements>\r\n+    <requirement type="set_environment">maaslin_SCRIPT_PATH</requirement>\r\n+  </requirements>\r\n+   <tests>\r\n+       <test>\r\n+             <param name="inp_data" value="maaslin_input"  ftype="maaslin"  />\r\n+             <param name="cls_x" value="9" />\r\n+             <param name="alpha" value="0.05"  />\r\n+             <param name="min_abd" value="0.0001"  />\r\n+             <param name="min_samp" value="0.01"  />\r\n+             <param name="tool_option1" value="1"  />\r\n+             <output name="out_file1" file="maaslin_output"  />\r\n+                <assert_contents>\r\n+                   <has_text text="Variable     Feature Value   Coefficient     N       N.not.0 P.value Q.value" />\r\n+                </assert_contents>\r\n+       </test>\r\n+   </tests>\r\n+  <help>\r\n+\r\n+Feedback?  Not working?  Please contact us at Maaslin_google_group_ .\r\n+\r\n+\r\n+MaAsLin: Multivariate Analysis by Linear Models\r\n+-----------------------------------------------\r\n+\r\n+MaAsLin is a multivariate statistical framework that finds associations between clinical metadata and microbial community abundance or function. The clinical metadata can be of any type continuous (for example age and weight), boolean (sex, stool/biopsy), or discrete/factor (cohort groupings and phenotypes). MaAsLin is best used in the case when you are associating many metadata with microbial measurements. When this is the case each metadatum can be a diffrent type. For example, you could include age, weight, sex, cohort and phenotype in the same input file to be analyzed in the same MaAsLin run. The microbial measurements are expected to be normalized before using MaAsLin and so are proportional data ranging from 0 to 1.0.\r\n+\r\n+The results of a MaAsLin run are the association of a specific microbial community member with metadata. These associations are without the influence of the other metadata in the study. There are certain factors known that can influence the microbiome (for example diet, age, geography, fecal or biopsy sample origin). MaAsLin allows one to detect the effect of a metadata, possibly a phenotype, deconfounding the effects of diet, age, sample origin or any other metadata captured in the study!\r\n+\r\n+.. image:: https://bytebucket.org/biobakery/galaxy_maaslin/wiki/Figure1-Overview.png         \r\n+    :height: 500   \r\n+    :width: 600 \r\n+\r\n+\r\n+*Maaslin Analysis Overview* MaAsLin perf'..b'etadata row which is only followed by rows which are microbial measurements.\r\n+\r\n+**Maximum false discovery rate (Significance threshold)** Associations are found significant if thier q-value is equal to or less than this threshold.\r\n+\r\n+**Minimum for feature relative abundance filtering** The minimum relative abundance allowed in the data. Values below this are removed and imputed as the median of the sample data.\r\n+\r\n+**Minimum for feature prevalence filtering** The minimum percentage of samples a feature can have abudance in before being removed.\r\n+\r\n+**Type of Output** Select one of the two options for output (summary or detailed results).\r\n+\r\n+Outputs\r\n+-------\r\n+\r\n+The Run MaAsLin module will create either A) a summary text file of plotted significant associations or B) a compressed directory of associations (significant and not significant).\r\n+\r\n+A. Any association that had a q-value less than or equal to the significance threshold will be included in a tab-delimited file.\r\n+\r\n+B. The following files will be generated per MaAsLin run. In the following listing the term projectname refers to what you named your pcl file without the extension.\r\n+\r\n+**Analysis** (These files are useful for analysis):\r\n+\r\n+**projectname-metadata.txt** Each metadata will have a file of associations. Any associations indicated to be performed after initial boosting is recorded here. Included are the information from the final general linear model (performed after the boosting) and the FDR corrected p-value (q-value). Can be opened as a text file or spreadsheet.\r\n+\r\n+**projectname-metadata.pdf** Any association that had a q-value less than or equal to the significance threshold will be plotted here. If this file does not exist, the projectname-metadata.txt should not have an entry that is less than or equal to the threshold. Factor and boolean data is plotted as knotched box plots; continuous data is plotted as a scatter plot with a line of best fit.\r\n+\r\n+.. image:: https://bytebucket.org/biobakery/galaxy_maaslin/wiki/Maaslin_Output.png      \r\n+    :height: 500        \r\n+    :width: 600   \r\n+\r\n+\r\n+\r\n+*Example of the projectname-metadata.pdf file* Significant associations are combined in files of associations per metadata. Factor and boolean data is plotted as knotched box plots; continuous data is plotted as a scatter plot with a line of best fit. Plots show raw data, header data show information from the reduced \r\n+\r\n+**projectname_Summary.txt** Any entry in the projectname-metadata.pdf are collected together here. Can be opened as a text file or spreadsheet.\r\n+\r\n+**Troubleshooting** (These files are typically not used for analysis but are there for documenting the process and troubleshooting):\r\n+\r\n+**projectname.txt** Contains the detail for the statistical engine. Is useful for detailed troubleshooting.\r\n+\r\n+**data.tsv** The data matrix that was read in (transposed). Useful for making sure the correct data was read in.\r\n+\r\n+**data.read.config** Can be used to read in the data.tsv .\r\n+\r\n+**metadata.tsv** The metadata that was read in (transposed). Useful for making sure the correct metadata was read in.\r\n+\r\n+**metadata.read.config** Can be used to read in the data.tsv .\r\n+\r\n+**read_merged.tsv** The data and metadata merged (transposed). Useful for making sure the merging occurred correctly.\r\n+\r\n+**read_merged.read.config** Can be used to read in the read_merged.tsv .\r\n+\r\n+**read_cleaned.tsv** The data read in, merged, and then cleaned. After this process the data is written to this file for reference if needed.\r\n+\r\n+**read_cleaned.read.config** Can be used to read in read_cleaned.tsv .\r\n+\r\n+**ProcessQC.txt** Contains quality control for the MaAsLin analysis. This includes information on the magnitude of outlier removal.\r\n+\r\n+Contacts\r\n+--------\r\n+\r\n+Please feel free to contact us at ttickle@hsph.harvard.edu  for any questions or comments!\r\n+\r\n+.. _Maaslin_google_group: https://groups.google.com/d/forum/maaslin-users\r\n+\r\n+ </help>\r\n+</tool>\r\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin_format_input_selector.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin_format_input_selector.py Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+
+"""
+Author: George Weingart
+Description: Dynamically read columns from input file for UI
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "George Weingart"
+__copyright__ = "Copyright 2012"
+__credits__ = ["George Weingart"]
+__license__ = "MIT"
+__maintainer__ = "George Weingart"
+__email__ = "george.weingart@gmail.com"
+__status__ = "Development"
+
+import sys,string,time
+from pprint import pprint
+
+def red(st,l):
+ if len(st) <= l: return st 
+ l1,l2 = l/2,l/2
+ return st[:l1]+".."+st[len(st)-l2:]
+
+def get_cols(data,full_names):
+ if data == "": return []
+ max_len =32 
+        fname = data.dataset.file_name
+ input_file = open(fname)
+ input_lines = input_file.readlines()
+ input_file.close()
+ table_lines = []
+ for x in input_lines:
+ first_column = x.split('\t')[0]
+ table_lines.append(first_column)
+
+ opt = []
+ rc = ''
+ lines = []
+        try:
+ lines = [(red((rc+v.split()[0]),max_len),'%d' % (i+1),False) for i,v in enumerate(table_lines) if v]
+
+ except:
+ l1 = '*ALL*'
+ l2 = 1
+ l3 = False
+ MyList = [l1,l2,l3]
+ lines.append(MyList)
+ return opt+lines
+
+def get_cols_add_line(data,full_names,lastmeta):
+ if data == "": return []
+ display_to = 1
+ try:
+ display_to = int(lastmeta)
+ except:
+ pass
+
+ max_len = 32 
+        fname = data.dataset.file_name
+ input_file = open(fname)
+ input_lines = input_file.readlines()
+ input_file.close()
+ table_lines = []
+ for x in input_lines:
+ first_column = x.split('\t')[0]
+ table_lines.append(first_column)
+ table_lines.insert(0,'-')
+ if  not display_to == 1:
+ del  table_lines[display_to + 1:]
+
+
+ opt = []
+ rc = ''
+ lines = []
+        try:
+ lines = [(red((rc+v.split()[0]),max_len),'%d' % (i+1),False) for i,v in enumerate(table_lines) if v]
+
+ except:
+ l1 = '*ALL*'
+ l2 = 1
+ l3 = False
+ MyList = [l1,l2,l3]
+ lines.append(MyList)
+ return opt+lines
+
+def get_cols_features(data,full_names,lastmeta):
+ if data == "": return []
+ display_from = 1
+ try:
+ display_from = int(lastmeta)
+ except:
+ pass
+ max_len = 32 
+        fname = data.dataset.file_name
+ input_file = open(fname)
+ input_lines = input_file.readlines()
+ input_file.close()
+ table_lines = []
+ for x in input_lines:
+ first_column = x.split('\t')[0]
+ table_lines.append(first_column)
+
+ opt = []
+ rc = ''
+ del table_lines[:display_from]
+ lines = []
+        try:
+ lines = [(red((rc+v.split()[0]),max_len),'%d' % (i+1),False) for i,v in enumerate(table_lines) if v]
+
+ except:
+ l1 = '*ALL*'
+ l2 = 1
+ l3 = False
+ MyList = [l1,l2,l3]
+ lines.append(MyList)
+ return opt+lines
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/maaslin_wrapper.py Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+
+"""
+Author: George Weingart
+Description: Wrapper program for maaslin
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "George Weingart"
+__copyright__ = "Copyright 2012"
+__credits__ = ["George Weingart"]
+__license__ = "MIT"
+__maintainer__ = "George Weingart"
+__email__ = "george.weingart@gmail.com"
+__status__ = "Development"
+
+from cStringIO import StringIO
+import sys,string
+import os
+import tempfile 
+from pprint import pprint
+import argparse
+
+######################################################################################
+#  Parse input parms                                                                 #
+######################################################################################
+def read_params(x):
+ parser = argparse.ArgumentParser(description='MaAsLin Argparser')
+ parser.add_argument('--lastmeta', action="store", dest='lastmeta',nargs='?')
+ parser.add_argument('--input', action="store", dest='input',nargs='?')
+ parser.add_argument('--output', action="store", dest='output',nargs='?')
+ parser.add_argument('--zip_file', action="store", dest='zip_file',nargs='?')
+ parser.add_argument('--alpha', action="store", type=float,default=0.05,dest='alpha',nargs='?')
+ parser.add_argument('--min_abd', action="store", type=float,default=0.0001,dest='min_abd',nargs='?')
+ parser.add_argument('--min_samp', action="store", type=float,default=0.01,dest='min_samp',nargs='?')
+ parser.add_argument('--tool_option1', action="store", dest='tool_option1',nargs='?')
+ return  parser
+
+
+
+######################################################################################
+#  Build read config file                                                            #
+######################################################################################
+def build_read_config_file(strTempDir,results, DSrc, DMaaslin, root_dir):
+ fname = results.input
+ input_file = open(fname)
+ input_lines = input_file.readlines()
+ LenInput = len(input_lines)
+ input_file.close()
+ TopLimit = int(results.lastmeta)
+ ReadConfigFileName = os.path.join(strTempDir,"Test.read.config")
+ Q = "'"
+
+ #WorkingDir = os.getcwd()
+ WorkingDir = root_dir
+ os.chdir(DMaaslin)
+
+ Limit1 = Q + "2-" + str(TopLimit )  + Q 
+ ReadConfigTb1 = [
+  os.path.join(DSrc,"CreateReadConfigFile.R"),
+ "-c",
+ Limit1,
+        ReadConfigFileName,
+ "Metadata"
+ ">/dev/null",\
+ "2>&1"
+ ]
+
+ cmd_config1 = " ".join(ReadConfigTb1)
+
+ os.system(cmd_config1)
+
+ Limit2 = Q + str(TopLimit +1 )  + '-' + Q 
+ ReadConfigTb2 = [
+  os.path.join(DSrc,"CreateReadConfigFile.R"),
+ "-a",
+ "-c",
+ Limit2,
+        ReadConfigFileName,
+ "Abundance"
+ ">/dev/null",\
+ "2>&1"
+ ]
+
+ cmd_config2 = " ".join(ReadConfigTb2)
+ os.system(cmd_config2)
+ os.chdir(WorkingDir)
+ return  ReadConfigFileName
+
+
+######################################################################################
+#   Main  Program                                                                    #
+######################################################################################
+
+# Parse commandline in
+parser = read_params( sys.argv )
+results = parser.parse_args()
+root_dir = os.environ.get('maaslin_SCRIPT_PATH')
+
+
+
+
+
+
+### If option 2 is selected inform user on 2 outputs
+if results.tool_option1 == "2":
+ print "***Please note: 2 output files are  generated: Complete zipped results + Summary  ***"
+
+### Project name
+strProjectName = os.path.splitext(os.path.basename(results.input))[0]
+
+### Define directory locations
+D = os.path.join(root_dir)
+DSrc = os.path.join(root_dir,"src")
+DInput = os.path.join(root_dir,"maaslin","input")
+DMaaslin = os.path.join(root_dir)
+
+DMaaslinGalaxy = os.path.join(root_dir)
+
+
+
+### Make temporary folder to work in
+### Change permissions to make useable 
+strTempDir = tempfile.mkdtemp()
+cmd_chmod = "chmod 755 /" + strTempDir
+os.system(cmd_chmod)
+cmd_mkdir1 = "mkdir -m 755 " +  os.path.join(strTempDir,strProjectName)
+os.system(cmd_mkdir1)
+
+### Transpose the pcl file to a tsv file
+TbCmdTranspose = [\
+ "python",
+ DMaaslinGalaxy  + "/transpose.py<" + str(results.input) +  ">" +  os.path.join(strTempDir,"output.tsv")\
+ ]
+cmd_transpose = " ".join(TbCmdTranspose)
+os.system(cmd_transpose)
+
+### Make path for target output file
+OutputFile = os.path.join(strTempDir,strProjectName,strProjectName+".txt")
+
+### Make read config file
+ReadConfigFileName = build_read_config_file(strTempDir,results, DSrc, DMaaslin, root_dir)
+
+### Build MaAsLin comamnd
+CmdsArray = [\
+os.path.join(DSrc,"Maaslin.R"),  \
+"-d", str(results.alpha),\
+"-r", str(results.min_abd),\
+"-p", str(results.min_samp), \
+"-i", \
+ReadConfigFileName, \
+OutputFile, \
+os.path.join(strTempDir,"output.tsv"), \
+"-v",\
+"ERROR",\
+">/dev/null",\
+"2>&1"
+]
+
+invoke_maaslin_cmd = " ".join(CmdsArray)
+
+
+
+
+
+### Write to directory cmd line used for troubleshooting
+#CmdFileName = os.path.join(strTempDir,"cmdfile.txt")
+#OutFile = open(CmdFileName,"w")
+#OutputString = invoke_maaslin_cmd + "\n"
+#OutFile.write(OutputString)
+#OutFile.close()
+
+### Call MaAsLin
+os.system(invoke_maaslin_cmd)
+
+
+### Copy output file to make available to galaxy
+cmd_copy = "cp " + os.path.join(strTempDir,strProjectName+"/output.txt") + " " + results.output
+MsgFileName = os.path.join(strTempDir,strProjectName+"/output.txt") 
+
+if  not os.path.isfile(MsgFileName):
+ cmd_copy = "cp " + os.path.join(strTempDir,strProjectName+"/output.txt") + " " + results.output
+ OutFile = open(MsgFileName,"w")
+ OutputString = "A MaAsLin error has occurred\n"
+ OutputString = OutputString + "It typically happens when incorrect 'Last metadata row' was selected\n"
+ OutputString = OutputString + "For demo data please choose 'Weight'\n"
+ OutFile.write(OutputString)
+ OutFile.close()
+
+os.system(cmd_copy)
+
+### Zip up output folder
+cmd_zip = "zip -jr " + os.path.join(strTempDir,strProjectName+".zip") + " " + os.path.join(strTempDir,strProjectName) + ">/dev/null 2>&1"
+
+os.system(cmd_zip)
+
+### Copy output folder to make available to galaxy
+cmd_copy_zip = "cp " + os.path.join(strTempDir,strProjectName+".zip") + " " + results.zip_file
+os.system(cmd_copy_zip)
+
+### Delete temp directory
+cmd_del_tempdir = "rm -r " + strTempDir
+######os.system(cmd_del_tempdir)
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/CreateReadConfigFile.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/CreateReadConfigFile.R Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,63 @@
+#!/usr/bin/env Rscript
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), 
+# authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).
+#####################################################################################
+
+inlinedocs <- function(
+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
+##description<< Allows read config files to be created.
+) { return( pArgs ) }
+
+### Logging class
+suppressMessages(library( logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
+### Class for commandline argument processing
+suppressMessages(library( optparse, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
+
+### Source the IO.R for the script
+source(file.path("src","lib","IO.R"))
+source(file.path("src","lib","Constants.R"))
+
+### Create command line argument parser
+### The TSV (tab seperated value (column major, samples are rows) file that will be read in
+### The column that is the last metadata name
+### The read.config file that will be used to read in the TSV file
+pArgs <- OptionParser( usage = "%prog [optional] <strOutputRC> <strMatrixName>" )
+# Settings for Read config
+## row indices
+pArgs <- add_option( pArgs, c("-r", "--rows"), type="character", action="store", dest="strRows", default=NA, metavar="row_indices", help="Rows to read by index starting with 1.")
+## column indices
+pArgs <- add_option( pArgs, c("-c", "--columns"), type="character", action="store", dest="strColumns", default=NA, metavar="column_indices", help="Columns to read in by index starting with 1.")
+## delimiter
+pArgs <- add_option( pArgs, c("-d", "--delimiter"), type="character", action="store", dest="charDelimiter", default="\t", metavar="delimiter", help="Delimiter to read the matrix.")
+## append to current file
+pArgs <- add_option( pArgs, c("-a", "--append"), type="logical", action="store_true", dest="fAppend", default=FALSE, metavar="append", help="Append to existing data. Default no append.")
+### Parse arguments
+lsArgs <- parse_args( pArgs, positional_arguments = TRUE )
+
+#Get positional arguments
+if( !(length( lsArgs$args ) == 2) ) { stop( print_help( pArgs ) ) }
+
+### Write to file the read config script
+funcWriteMatrixToReadConfigFile(strConfigureFileName=lsArgs$args[1], strMatrixName=lsArgs$args[2], strRowIndices=lsArgs$options$strRows,
+  strColIndices=lsArgs$options$strColumns,acharDelimiter=lsArgs$options$charDelimiter,fAppend=lsArgs$options$fAppend)
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/Graphlan_settings.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/Graphlan_settings.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,19 @@
+title Metadata Associations
+title_font_size 13
+total_plotted_degrees 280
+start_rotation 270
+internal_labels_rotation 270
+annotation_background_alpha 0.15
+clade_separation 0.35
+class_legend_font_size 12
+annotation_legend_font_size 11
+annotation_font_size 5
+annotation_font_stretch 0
+clade_marker_size 5
+branch_bracket_depth 0.5
+branch_thickness 1.2
+internal_label 1 Ph.
+internal_label 2 Classes
+internal_label 3 Orders
+internal_label 4 Families
+internal_label 5 Genera
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/Maaslin.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/Maaslin.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,607 @@\n+#!/usr/bin/env Rscript\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+inlinedocs <- function(\n+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>\n+##description<< Main driver script. Should be called to perform MaAsLin Analysis.\n+) { return( pArgs ) }\n+\n+\n+### Install packages if not already installed\n+vDepLibrary = c("agricolae", "gam", "gamlss", "gbm", "glmnet", "inlinedocs", "logging", "MASS", "nlme", "optparse", "outliers", "penalized", "pscl", "robustbase", "testthat")\n+for(sDepLibrary in vDepLibrary)\n+{\n+  if(! require(sDepLibrary, character.only=TRUE) )\n+  {\n+    install.packages(pkgs=sDepLibrary, repos="http://cran.us.r-project.org")\n+  }\n+}\n+\n+### Logging class\n+suppressMessages(library( logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+### Class for commandline argument processing\n+suppressMessages(library( optparse, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+\n+\n+### Create command line argument parser\n+pArgs <- OptionParser( usage = "%prog [options] <output.txt> <data.tsv>" )\n+\n+# Input files for MaAsLin\n+## Data configuration file\n+pArgs <- add_option( pArgs, c("-i", "--input_config"), type="character", action="store", dest="strInputConfig", metavar="data.read.config", help="Optional configuration file describing data input format.")\n+## Data manipulation/normalization file\n+pArgs <- add_option( pArgs, c("-I", "--input_process"), type="character", action="store", dest="strInputR", metavar="data.R", help="Optional configuration script normalizing or processing data.")\n+\n+# Settings for MaAsLin\n+## Maximum false discovery rate\n+pArgs <- add_option( pArgs, c("-d", "--fdr"), type="double", action="store", dest="dSignificanceLevel", default=0.25, metavar="significance", help="The threshold to use for significance for the generated q-values (BH FDR). Anything equal to or lower than this is significant.  [Default %default]")\n+## Minimum feature relative abundance filtering\n+pArgs <- add_option( pArgs, c("-r", "--minRelativeAbundance"), type="double", action="store", dest="dMinAbd", default=0.0001, metavar="minRelativeAbundance", help="The minimum relative abundance allowed in the data. Values below this are removed and imputed as the median of the sample data.  [Default %default]")\n+## Minimum feature prevalence filtering\n+pArgs <- add_option( pArgs, c("-p", "--minPrevalence"), type="double", action="store", dest="dMinSamp", default=0.1, metavar="minPrevalence", help="The minimum percentage of samples a feature can have '..b'r color) and factor (for shape) data with the most significant association\n+    if(is.null(lsArgs$options$strBiplotColor)||is.null(lsArgs$options$strBiplotShapeBy))\n+    {\n+      for(sMetadata in lsSigMetadata)\n+      {\n+        if(is.factor(lsRet$frmeRaw[[sMetadata]]))\n+        {\n+          if(is.null(lsArgs$options$strBiplotShapeBy))\n+          {\n+            lsArgs$options$strBiplotShapeBy = sMetadata\n+            if(!is.null(lsArgs$options$strBiplotColor))\n+            {\n+              break\n+            }\n+          }\n+        }\n+        if(is.numeric(lsRet$frmeRaw[[sMetadata]]))\n+        {\n+          if(is.null(lsArgs$options$strBiplotColor))\n+          {\n+            lsArgs$options$strBiplotColor = sMetadata\n+            if(!is.null(lsArgs$options$strBiplotShapeBy))\n+            {\n+              break\n+            }\n+          }\n+        }\n+      }\n+    }\n+\n+    #If a user defines a feature, make sure it is in the bugs/data indices\n+    if(!is.null(lsFeaturesToPlot) || !is.null(lsArgs$options$strBiplotColor) || !is.null(lsArgs$options$strBiplotShapeBy))\n+    {\n+      lsCombinedFeaturesToPlot = unique(c(lsFeaturesToPlot,lsArgs$options$strBiplotColor,lsArgs$options$strBiplotShapeBy))\n+      lsCombinedFeaturesToPlot = lsCombinedFeaturesToPlot[!is.null(lsCombinedFeaturesToPlot)]\n+\n+      # If bugs to plot were given then do not use the significant bugs from the MaAsLin output which is default\n+      if(!is.null(lsFeaturesToPlot))\n+      {\n+        lsSigBugs = c()\n+        liSigMetadata = c()\n+      }\n+      liSigMetadata = unique(c(liSigMetadata,which(colnames(lsRet$frmeData) %in% setdiff(lsCombinedFeaturesToPlot, lsOriginalFeatureNames))))\n+      lsSigBugs = unique(c(lsSigBugs, intersect(lsCombinedFeaturesToPlot, lsOriginalFeatureNames)))\n+    }\n+\n+    # Convert bug names and metadata names to comma delimited strings\n+    vsBugs = paste(lsSigBugs,sep=",",collapse=",")\n+    vsMetadata = paste(colnames(lsRet$frmeData)[liSigMetadata],sep=",",collapse=",")\n+    vsMetadataByLevel = c()\n+\n+    # Possibly remove the NA levels depending on the preferences\n+    vsRemoveNA = c(NA, "NA", "na", "Na", "nA")\n+    if(!lsArgs$options$fPlotNA){ vsRemoveNA = c() }\n+    for(aiMetadataIndex in liSigMetadata)\n+    {\n+      lxCurMetadata = lsRet$frmeData[[aiMetadataIndex]]\n+      sCurName = names(lsRet$frmeData[aiMetadataIndex])\n+      if(is.factor(lxCurMetadata))\n+      {\n+        vsMetadataByLevel = c(vsMetadataByLevel,paste(sCurName, setdiff( levels(lxCurMetadata), vsRemoveNA),sep="_"))\n+      } else {\n+        vsMetadataByLevel = c(vsMetadataByLevel,sCurName)\n+      }\n+    }\n+\n+    # If NAs should not be plotted, make them the background color\n+    # Unless explicitly asked to be plotted\n+    sPlotNAColor = "white"\n+    if(lsArgs$options$fInvert){sPlotNAColor = "black"}\n+    if(lsArgs$options$fPlotNA){sPlotNAColor = "grey"}\n+    sLastMetadata = lsOriginalMetadataNames[max(which(lsOriginalMetadataNames %in% names(lsRet$frmeData)))]\n+\n+    # Plot biplot\n+    logdebug("PlotBiplot:Started")\n+    funcDoBiplot(\n+      sBugs = vsBugs,\n+      sMetadata = vsMetadataByLevel,\n+      sColorBy = lsArgs$options$strBiplotColor,\n+      sPlotNAColor = sPlotNAColor,\n+      sShapeBy = lsArgs$options$strBiplotShapeBy,\n+      sShapes = lsArgs$options$sShapes,\n+      sDefaultMarker = "16",\n+      sRotateByMetadata = lsArgs$options$sRotateByMetadata,\n+      dResizeArrow = lsArgs$options$dBiplotMetadataScale,\n+      sInputFileName = lsRet$frmeRaw,\n+      sLastMetadata = sLastMetadata,\n+      sOutputFileName = file.path(outputDirectory,paste(strBase,"-biplot.pdf",sep="")))\n+    logdebug("PlotBiplot:Stopped")\n+  }\n+}\n+\n+# This is the equivalent of __name__ == "__main__" in Python.\n+# That is, if it\'s true we\'re being called as a command line script;\n+# if it\'s false, we\'re being sourced or otherwise included, such as for\n+# library or inlinedocs.\n+if( identical( environment( ), globalenv( ) ) &&\n+\t!length( grep( "^source\\\\(", sys.calls( ) ) ) ) {\n+\tmain( pArgs ) }\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/MaaslinToGraphlanAnnotation.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/MaaslinToGraphlanAnnotation.py Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,213 @@\n+#!/usr/bin/env python\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = ""\n+__version__ = ""\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+import argparse\n+import csv\n+import math\n+from operator import itemgetter\n+import re\n+import string\n+import sys\n+\n+#def funcGetColor(fNumeric,fMax):\n+#  if fNumeric>0:\n+#    return("#"+str(int(99*fNumeric/fMax)).zfill(2)+"0000")\n+#  if fNumeric<0:\n+#    return("#00"+str(int(99*abs(fNumeric/fMax))).zfill(2)+"00")\n+#  return("#000000")\n+\n+def funcGetColor(fNumeric):\n+  if fNumeric>0:\n+    return sRingPositiveColor\n+  else:\n+    return sRingNegativeColor\n+\n+def funcGetAlpha(fNumeric,fMax):\n+  return max(abs(fNumeric/fMax),dMinAlpha)\n+\n+#Constants\n+sAnnotation = "annotation"\n+sAnnotationColor = "annotation_background_color"\n+sClass = "class"\n+sRingAlpha = "ring_alpha"\n+dMinAlpha = .075\n+sRingColor = "ring_color"\n+sRingHeight = "ring_height"\n+#sRingHeightMin = 0.5\n+sStandardizedRingHeight = "1.01"\n+sRingLabel = "ring_label"\n+sRingLabelSizeWord = "ring_label_font_size"\n+sRingLabelSize = 10\n+sRingLineColor = "#999999"\n+sRingPositiveWord = "Positive"\n+sRingPositiveColor = "#990000"\n+sRingNegativeWord = "Negative"\n+sRingNegativeColor = "#009900"\n+sRingLineColorWord = "ring_separator_color"\n+sRingLineThickness = "0.5"\n+sRingLineThicknessWord = "ring_internal_separator_thickness"\n+sCladeMarkerColor = "clade_marker_color"\n+sCladeMarkerSize = "clade_marker_size"\n+sHighlightedMarkerSize = "10"\n+c_dMinDoubleValue = 0.00000000001\n+\n+#Set up arguments reader\n+argp = argparse.ArgumentParser( prog = "MaaslinToGraphlanAnnotation.py",\n+    description = """Converts summary files to graphlan annotation files.""" )\n+\n+#### Read in information\n+#Arguments\n+argp.add_argument("strInputSummary", metavar = "SummaryFile", type = argparse.FileType("r"), help ="Input summary file produced by maaslin")\n+argp.add_argument("strInputCore", metavar = "CoreFile", type = argparse.FileType("r"), help ="Core file produced by Graphlan from the maaslin pcl")\n+argp.add_argument("strInputHeader", metavar = "HeaderFile", type = argparse.FileType("r"), help ="Input header file to append to the generated annotation file.")\n+argp.add_argument("strOutputAnnotation", metavar = "AnnotationFile", type = argparse.FileType("w"), help ="Output annotation file for graphlan")\n+\n+args = argp.parse_args( )\n+\n+#Read in the summary file and transform to cla'..b's,sLine[1]] for sLine in fSum]\n+\n+  ### Make rings\n+  #Setup rings\n+  dictRings = dict([[enumData[1],enumData[0]] for enumData in enumerate(set([lsData[0] for lsData in lsAssociationsModForOTU]))])\n+\n+  #Ring graphlan setting: rings represent a metadata that associates with a feature\n+  #Rings have a line to help differetiate them\n+  lsRingSettings = [[sRingLabel,lsPair[1],lsPair[0]] for lsPair in dictRings.items()]\n+  lsRingLineColors = [[sRingLineColorWord,lsPair[1],sRingLineColor] for lsPair in dictRings.items()]\n+  lsRingLineThick = [[sRingLineThicknessWord,lsPair[1],sRingLineThickness] for lsPair in dictRings.items()]\n+  lsRingLineLabelSize = [[sRingLabelSizeWord,lsPair[1], sRingLabelSize] for lsPair in dictRings.items()]\n+\n+  #Create coloring for rings color represents the directionality of the relationship\n+  dMaxCoef = max([abs(float(sAssociation[2])) for sAssociation in lsAssociationsModForOTU])\n+  lsRingColors = [[lsAssociation[1], sRingColor, dictRings[lsAssociation[0]], funcGetColor(float(lsAssociation[2]))] for lsAssociation in lsAssociationsModForOTU]\n+  lsRingAlpha = [[lsAssociation[1], sRingAlpha, dictRings[lsAssociation[0]], funcGetAlpha(float(lsAssociation[2]), dMaxCoef)] for lsAssociation in lsAssociationsModForOTU]\n+\n+  #Create height for rings representing the log tranformed q-value?\n+  dMaxQValue = max([-1*math.log(max(float(sAssociation[3]), c_dMinDoubleValue)) for sAssociation in lsAssociationsModForOTU])\n+  #lsRingHeights = [[lsAssociation[1], sRingHeight, dictRings[lsAssociation[0]], ((-1*math.log(max(float(lsAssociation[3]), c_dMinDoubleValue)))/dMaxQValue)+sRingHeightMin] for lsAssociation in lsAssociationsModForOTU]\n+  lsRingHeights = [[lsAssociation[1], sRingHeight, dictRings[lsAssociation[0]], sStandardizedRingHeight] for lsAssociation in lsAssociationsModForOTU]\n+\n+  #### Marker\n+  # Marker colors (mainly to make legend\n+  lsMarkerColors = [[lsAssociation[1], sCladeMarkerColor, funcGetColor(float(lsAssociation[2]))] for lsAssociation in lsAssociationsModForOTU]\n+  lsMarkerSizes = [[lsAssociation[1], sCladeMarkerSize, sHighlightedMarkerSize] for lsAssociation in lsAssociationsModForOTU]\n+\n+  #### Make internal highlights\n+  #Highlight the associated clades\n+  lsUniqueAssociatedTaxa = sorted(list(set([lsAssociation[1] for lsAssociation in lsAssociationsModForOTU])))\n+\n+  lsHighlights = []\n+  sABCPrefix = ""\n+  sListABC = string.ascii_lowercase\n+  iListABCIndex = 0\n+  for lsHighlight in lsUniqueAssociatedTaxa:\n+    lsTaxa = lsHighlight.split(".")\n+    sLabel = sABCPrefix+sListABC[iListABCIndex]+":"+lsTaxa[-1] if len(lsTaxa) > 2 else lsTaxa[-1]\n+    lsHighlights.append([lsHighlight, sAnnotation, sLabel])\n+    iListABCIndex = iListABCIndex + 1\n+    if iListABCIndex > 25:\n+      iListABCIndex = 0\n+      sABCPrefix = sABCPrefix + sListABC[len(sABCPrefix)]\n+\n+  #Read in the core file\n+  csvCore = open(args.strInputCore,\'r\') if isinstance(args.strInputCore, str) else args.strInputCore\n+  fSum = csv.reader(csvCore, delimiter="\\t")\n+\n+  #Add in all phylum just incase they were not already included here\n+  lsAddSecondLevel = list(set([sUnique[0].split(".")[1] for sUnique in fSum if len(sUnique[0].split(".")) > 1]))\n+  lsHighlights.extend([[sSecondLevel, sAnnotation, sSecondLevel] for sSecondLevel in lsAddSecondLevel])\n+  lsHighlightColor = [[lsHighlight[0], sAnnotationColor,"b"] for lsHighlight in lsHighlights]\n+\n+  #### Write the remaining output annotation file\n+  fAnn.writerows(lsRingSettings)\n+  fAnn.writerows(lsRingLineColors)\n+  fAnn.writerows(lsRingColors)\n+  fAnn.writerows(lsRingAlpha)\n+  fAnn.writerows(lsRingLineThick)\n+  fAnn.writerows(lsRingLineLabelSize)\n+  fAnn.writerows(lsRingHeights)\n+  fAnn.writerows(lsMarkerColors)\n+  fAnn.writerows(lsMarkerSizes)\n+  fAnn.writerows([[sRingPositiveWord, sCladeMarkerColor, sRingPositiveColor]])\n+  fAnn.writerows([[sRingNegativeWord, sCladeMarkerColor, sRingNegativeColor]])\n+  fAnn.writerows(lsHighlights)\n+  fAnn.writerows(lsHighlightColor)\n+  csvAnn.close()\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/PCLToGraphlanCoreGene.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/PCLToGraphlanCoreGene.py Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), 
+# authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = ""
+__version__ = ""
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+import argparse
+import csv
+from operator import itemgetter
+import re
+import sys
+
+#Helper function which returns a boolean indicator of an input string being parsable as an int
+def funcIsInt(strInt):
+  try:
+    int(strInt)
+    return True
+  except:
+    return False
+
+#Helper function that gets the index of the name and gives the last value of the list for - or the first value depending on the position
+# This supports the ranging in the read.config files
+#If no range is given then the result is just one index of the given name
+def funcGetIndices(lsFeature, lsFunctionNames):
+  if(len(lsFeature)) == 1:
+      if(funcIsInt(lsFeature[0])):
+        return int(lsFeature[0])-1
+      return [lsFeatureNames.index(lsFeature[0])]
+  if(len(lsFeature)) == 2:
+    iIndices = []
+    iPosition = 1
+    for sFeature in lsFeature:
+      if(sFeature==""):
+        if(iPosition==1):
+          iIndices.append(2)
+        elif(iPosition==2):
+          iIndices.append(len(lsFunctionNames)-1)
+      elif(funcIsInt(sFeature)):
+        iIndices.append(int(sFeature)-1)
+      else:
+        iIndices.append(lsFeatureNames.index(sFeature))
+      iPosition = iPosition + 1
+    return iIndices
+
+#Constants
+#The line indicating the rows to read
+c_MatrixName = "Matrix:"
+c_DataMatrix = "Abundance"
+c_strRows = "Read_PCL_Rows:"
+
+#Set up arguments reader
+argp = argparse.ArgumentParser( prog = "PCLToGraphlanCoreGene.py",
+    description = """Converts PCL files to Graphlan core gene files.""" )
+
+#Arguments
+argp.add_argument("strInputPCL", metavar = "PCLFile", type = argparse.FileType("r"), help ="Input PCl file used in maaslin")
+argp.add_argument("strInputRC", metavar = "RCFile", type = argparse.FileType("r"), help ="Input read config file used in maaslin")
+argp.add_argument("strOutputCoreGene", metavar = "CoreGeneFile", type = argparse.FileType("w"), help ="Output core gene file for graphlan")
+
+args = argp.parse_args( )
+
+#Read in read config table and get the rows/columns to use
+#Indicates if we are reading a data matrix
+fIsData = False
+#Holds the indices ranges
+#List of lists,each internal list hold 1 or 2 indices, if two it indicates a range from the first to the second
+llsIndices = []
+csvRC = open(args.strInputRC,'r') if isinstance(args.strInputRC, str) else args.strInputRC
+fRC = csv.reader(csvRC, delimiter=" ")
+for sLine in fRC:
+  #Get the row indices or names
+  if len(sLine):
+    if sLine[0] == c_MatrixName:
+      fIsData = sLine[1] == c_DataMatrix
+    if sLine[0] == c_strRows:
+      if fIsData:
+        llsIndices = [sIndexRange.split("-") for sIndexRange in sLine[1].split(",")]
+        break
+csvRC.close()
+
+# Check to make sure RC file is read
+if len(llsIndices)==0:
+  print("PCLToGraphlanCoreGene:: Could Not find indices in RC file "+args.strInputRC+".")
+
+#Read in the PCL file and parse the file names to core genes format
+csvPCL = open(args.strInputPCL,'r') if isinstance(args.strInputPCL, str) else args.strInputPCL
+fPCL = csv.reader(csvPCL,delimiter="\t")
+#The first column of the csv file
+lsFeatureNames = [sLine[0] for sLine in fPCL]
+csvPCL.close()
+
+# Check to make sure PCL file is read
+if len(lsFeatureNames)==0:
+  print("PCLToGraphlanCoreGene:: Could Not find features in PCL file "+args.strInputPCL+".")
+
+#If the indices are names switch with numbers otherwise subtract 1 because they are ment for R
+liConvertedRangedIndices = [funcGetIndices(sIndex,lsFeatureNames) for sIndex in llsIndices] if len(llsIndices)>0 else []
+llsIndices = None
+
+#If there are any ranges, reduce to lists of indices
+liConvertedIndices = []
+for lsIndices in liConvertedRangedIndices:
+  lsIndices.sort()
+  iLenIndices = len(lsIndices)
+  if iLenIndices > 2:
+    print "Error, received more than 2 indices in a range. Stopped."
+    exit()
+  liConvertedIndices.extend(lsIndices if iLenIndices == 1 else range(lsIndices[0],lsIndices[1]+1))
+liConvertedRangedIndices = None
+
+#Collapse all indices to a set which is then sorted
+liConvertedIndices = sorted(list(set(liConvertedIndices)))
+
+#Reduce name of features to just bugs indicated by indices
+lsFeatureNames = itemgetter(*liConvertedIndices)(lsFeatureNames)
+liConvertedIndices = None
+
+#Change the bug names to the correct formatting (clades seperated by .)
+lsFeatureNames = sorted(lsFeatureNames)
+lsFeatureNames = [re.sub("^[A-Za-z]__","",sBug) for sBug in lsFeatureNames]
+lsFeatureNames = [[re.sub("\|*[A-Za-z]__|\|",".",sBug)] for sBug in lsFeatureNames]
+
+#If this is an OTU, append the number and the genus level together for a more descriptive termal name
+lsFeatureNamesModForOTU = []
+for sBug in lsFeatureNames:
+  lsBug = sBug[0].split(".")
+  if(len(lsBug))> 1:
+    if(lsBug[-1].isdigit()):
+      lsBug[-2]=lsBug[-2]+"_"+lsBug[-1]
+      lsBug = lsBug[0:-1]
+    lsFeatureNamesModForOTU.append([".".join(lsBug)])
+  else:
+    lsFeatureNamesModForOTU.append([lsBug[0]])
+
+#Output core gene file
+csvCG = open(args.strOutputCoreGene,'w') if isinstance(args.strOutputCoreGene, str) else args.strOutputCoreGene
+fCG = csv.writer(csvCG)
+fCG.writerows(lsFeatureNamesModForOTU)
+csvCG.close()
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/AnalysisModules.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/AnalysisModules.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,1237 @@\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+inlinedocs <- function(\n+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>\n+##description<< Allows one to plug in new modules to perform analysis (univariate or multivariate), regularization, and data (response) transformation.\n+) { return( pArgs ) }\n+\n+# Libraries\n+suppressMessages(library( agricolae, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+# Needed for the pot-hoc Kruskal wallis comparisons\n+suppressMessages(library( penalized, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+# Needed for stepAIC\n+suppressMessages(library( MASS, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+# Needed for na action behavior\n+suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+# Needed for boosting\n+suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+# Needed for LASSO\n+suppressMessages(library( glmnet, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+# Needed for mixed models\n+#suppressMessages(library( lme4, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( nlme, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+\n+# Needed for zero inflated models\n+#suppressMessages(library( MCMCglmm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( gamlss, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+# Do not use #suppressMessages(library( glmmADMB, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+\n+fAddBack = TRUE\n+dUnevenMax = .9\n+\n+\n+### Helper functions\n+# OK\n+funcMakeContrasts <- function\n+### Makes univariate contrasts of all predictors in the model formula with the response.\n+(strFormula, \n+### lm style string defining reponse and predictors\n+strRandomFormula,\n+### mixed model string defining the fixed covariates\n+frmeTmp,\n+### The data frame to find predictor data in\n+iTaxon,\n+### Taxon\n+functionContrast,\n+### functionContrast The univariate test to perform\n+lsQCCounts\n+### QC info\n+){\n+  #TODO are we updating the QCCounts?\n+  lsSig = list()\n+  ### Holds all the significance results from the tests\n+  adP = c()\n+  ### Holds the p-values\n+  sCurDataName = names(frmeTmp)[iTaxon]\n+  ### The name of the taxon (data row) that is being associated (always assumed to be numeric)\n+  #Get test comparisons (predictor names from formula string)\n+  asComparisons  = unique(c(funcFormulaStrToL'..b'as.formula(strFormula), data=frmeTmp, dist="poisson"))) # pscl\n+  } else {\n+    #Check to see if | is in the model, if so use a lmm otherwise the standard glm is ok\n+    if(!is.null(strRandomFormula))\n+    {\n+      return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family= quasipoisson, data=frmeTmp)))\n+      #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)\n+      #return(try ( glmer(as.formula(strFormula), data=frmeTmp, family=quasipoisson, na.action=c_strNA_Action) ))\n+    } else {\n+      return(try( glm(as.formula(strFormula), family=quasipoisson, data=frmeTmp, na.action=c_strNA_Action) ))\n+    }\n+  }\n+  ### lmod result object from lm\n+}\n+\n+### Transformations\n+# Tested\n+funcArcsinSqrt <- function(\n+# Transform data with arcsin sqrt transformation\n+aData\n+### The data on which to perform the transformation\n+){\n+  return(asin(sqrt(aData)))\n+  ### Transformed data\n+}\n+\n+funcSquareSin <- function(\n+# Transform data with square sin transformation\n+# Opposite of the funcArcsinSqrt\n+aData\n+### The data on which to perform the transformation\n+){\n+  return(sin(aData)^2)\n+  ### Transformed data\n+}\n+\n+# Tested\n+funcNoTransform <-function(\n+### Pass data without transform\n+aData\n+### The data on which to perform the transformation\n+### Only given here to preserve the pattern, not used.\n+){\n+  return(aData)\n+  ### Transformed data\n+}\n+\n+funcGetAnalysisMethods <- function(\n+### Returns the appropriate functions for regularization, analysis, data transformation, and analysis object inspection.\n+### This allows modular customization per analysis step.\n+### To add a new method insert an entry in the switch for either the selection, transform, or method\n+### Insert them by using the pattern optparse_keyword_without_quotes = function_in_AnalysisModules\n+### Order in the return listy is currently set and expected to be selection, transforms/links, analysis method\n+### none returns null\n+sModelSelectionKey,\n+### Keyword defining the method of model selection\n+sTransformKey,\n+### Keyword defining the method of data transformation\n+sMethodKey,\n+### Keyword defining the method of analysis\n+fZeroInflated = FALSE\n+### Indicates if using zero inflated models\n+){\n+  lRetMethods = list()\n+  #Insert selection methods here\n+  lRetMethods[[c_iSelection]] = switch(sModelSelectionKey,\n+    boost = funcBoostModel,\n+    penalized = funcPenalizedModel,\n+    forward = funcForwardModel,\n+    backward = funcBackwardsModel,\n+    none = NA)\n+\n+  #Insert transforms\n+  lRetMethods[[c_iTransform]] = switch(sTransformKey,\n+    asinsqrt = funcArcsinSqrt,\n+    none = funcNoTransform)\n+\n+  #Insert untransform\n+  lRetMethods[[c_iUnTransform]] = switch(sTransformKey,\n+    asinsqrt = funcNoTransform,\n+    none = funcNoTransform)\n+\n+  #Insert analysis\n+  lRetMethods[[c_iAnalysis]] = switch(sMethodKey,\n+    neg_binomial = funcBinomialMult,\n+    quasi = funcQuasiMult,\n+    univariate = funcDoUnivariate,\n+    lm = funcLM,\n+    none = NA)\n+\n+  # If a univariate method is used it is required to set this to true\n+  # For correct handling.\n+  lRetMethods[[c_iIsUnivariate]]=sMethodKey=="univariate"\n+\n+  #Insert method to get results\n+  if(fZeroInflated)\n+  {\n+    lRetMethods[[c_iResults]] = switch(sMethodKey,\n+      neg_binomial = funcGetZeroInflatedResults,\n+      quasi = funcGetZeroInflatedResults,\n+      univariate = funcGetUnivariateResults,\n+      lm = funcGetZeroInflatedResults,\n+      none = NA)\n+  } else {\n+    lRetMethods[[c_iResults]] = switch(sMethodKey,\n+      neg_binomial = funcGetLMResults,\n+      quasi = funcGetLMResults,\n+      univariate = funcGetUnivariateResults,\n+      lm = funcGetLMResults,\n+      none = NA)\n+  }\n+\n+  return(lRetMethods)\n+  ### Returns a list of functions to be passed for regularization, data transformation, analysis,\n+  ### and custom analysis results introspection functions to pull from return objects data of interest\n+}\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/BoostGLM.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/BoostGLM.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,887 @@\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+inlinedocs <- function(\n+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>\n+##description<< Manages the quality control of data and the performance of analysis (univariate or multivariate), regularization, and data (response) transformation.\n+) { return( pArgs ) }\n+\n+### Load libraries quietly\n+suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( outliers, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( robustbase, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))\n+\n+### Get constants\n+#source(file.path("input","maaslin","src","Constants.R"))\n+#source("Constants.R")\n+\n+## Get logger\n+c_logrMaaslin <- getLogger( "maaslin" )\n+\n+funcDoGrubbs <- function(\n+### Use the Grubbs Test to identify outliers\n+iData,\n+### Column index in the data frame to test\n+frmeData,\n+### The data frame holding the data\n+dPOutlier,\n+### P-value threshold to indicate an outlier is significant\n+lsQC\n+### List holding the QC info of the cleaning step. Which indices are outliers is added.\n+){\n+  adData <- frmeData[,iData]\n+\n+  # Original number of NA\n+  viNAOrig = which(is.na(adData))\n+\n+  while( TRUE )\n+  {\n+    lsTest <- try( grubbs.test( adData ), silent = TRUE )\n+    if( ( class( lsTest ) == "try-error" ) || is.na( lsTest$p.value ) || ( lsTest$p.value > dPOutlier ) )\n+    {break}\n+    viOutliers = outlier( adData, logical = TRUE )\n+    adData[viOutliers] <- NA\n+  }\n+\n+  # Record removed data\n+  viNAAfter = which(is.na(adData))\n+\n+  # If all were set to NA then ignore the filtering\n+  if(length(adData)==length(viNAAfter))\n+  {\n+    viNAAfter = viNAOrig\n+    adData = frmeData[,iData]\n+    c_logrMaaslin$info( paste("Grubbs Test:: Identifed all data as outliers so was inactived for index=",iData," data=",paste(as.vector(frmeData[,iData]),collapse=","), "number zeros=", length(which(frmeData[,iData]==0)), sep = " " ))\n+  } else if(mean(adData, na.rm=TRUE) == 0) {\n+    viNAAfter = viNAOrig\n+    adData = frmeData[,iData]\n+    c_logrMaaslin$info( paste("Grubbs Test::Removed all values but 0, ignored. Index=",iData,".",sep=" " ) )\n+  } else {\n+    # Document removal\n+    if( sum( is.na( adData )'..b'ntransform any of the metadata that failed\n+  # Failed means true for uneven occurences of zeros\n+#  if( fIsTransformed )\n+#  {\n+#    vdUnevenZeroCheck = funcUnTransform( frmeData[[ iTaxon ]] )\n+#    if( funcZerosAreUneven( vdRawData=vdUnevenZeroCheck, funcTransform=funcTransform, vsStratificationFeatures=astrTerms, dfData=frmeData ) )\n+#    {\n+#      frmeData[[ iTaxon ]] = vdUnevenZeroCheck\n+#      c_logrMaaslin$debug( paste( "Taxon transformation reversed due to unevenness of zero distribution.", iTaxon ) )\n+#    }\n+#  }\n+\n+  # Run association analysis if predictors exist and an analysis function is specified\n+  # Run analysis\n+  if(!is.na(funcAnalysis) )\n+  {\n+    #If there are selected and forced fixed covariates\n+    if( length( astrTerms ) )\n+    {\n+      #Count the association attempt\n+      lsData$lsQCCounts$iLms = lsData$lsQCCounts$iLms + 1\n+\n+      #Make the lm formula\n+      #Build formula for simple mixed effects models using random covariates\n+      strRandomCovariatesFormula = NULL\n+      #Random covariates are forced\n+      if(length(lsRandomCovariates)>0)\n+      {\n+        #Format for lme\n+        #Needed for changes to not allowing random covariates through the boosting process\n+        strRandomCovariatesFormula <- paste( "adCur ~ ", paste( sprintf( "1|`%s`", lsRandomCovariates), collapse = " + " ))\n+      }\n+\n+      #Set up a list of formula containing selected fixed variables changing and the forced fixed covariates constant\n+      vstrFormula = c()\n+      #Set up suppressing forced covariates in a all v all scenario only\n+      asSuppress = c()\n+      #Enable all against all comparisons\n+      if(fAllvAll && !fIsUnivariate)\n+      {\n+        lsVaryingCovariates = setdiff(astrTerms,lsNonPenalizedPredictors)\n+        lsConstantCovariates = setdiff(lsNonPenalizedPredictors,lsRandomCovariates)\n+        strConstantFormula = paste( sprintf( "`%s`", lsConstantCovariates ), collapse = " + " )\n+        asSuppress = lsConstantCovariates\n+\n+        if(length(lsVaryingCovariates)==0L)\n+        {\n+          vstrFormula <- c( paste( "adCur ~ ", paste( sprintf( "`%s`", lsConstantCovariates ), collapse = " + " )) )\n+        } else {\n+          for( sVarCov in lsVaryingCovariates )\n+          {\n+            strTempFormula = paste( "adCur ~ `", sVarCov,"`",sep="")\n+            if(length(lsConstantCovariates)>0){ strTempFormula = paste(strTempFormula,strConstantFormula,sep=" + ") }\n+            vstrFormula <- c( vstrFormula, strTempFormula )\n+          }\n+        }\n+      } else {\n+        #This is either the multivariate case formula for all covariates in an lm or fixed covariates in the lmm\n+        vstrFormula <- c( paste( "adCur ~ ", paste( sprintf( "`%s`", astrTerms ), collapse = " + " )) )\n+      }\n+\n+      #Run the association\n+      for( strAnalysisFormula in vstrFormula )\n+      {\n+        i = length(llmod)+1\n+        llmod[[i]] = funcAnalysis(strFormula=strAnalysisFormula, frmeTmp=frmeTmp, iTaxon=iTaxon, lsHistory=list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts), strRandomFormula=strRandomCovariatesFormula, fZeroInflated=fZeroInflated)\n+\n+        liTaxon[[i]] = iTaxon\n+        lastrTerms[[i]] = funcFormulaStrToList(strAnalysisFormula)\n+      }\n+    } else {\n+      #If there are no selected or forced fixed covariates\n+      lsData$lsQCCounts$iNoTerms = lsData$lsQCCounts$iNoTerms + 1\n+      return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts))\n+    }\n+  }\n+\n+  #Call funcBugResults and return it\'s return\n+  if(!is.na(funcGetResult))\n+  {\n+    #Format the results to a consistent expected result.\n+    return( funcGetResult( llmod=llmod, frmeData=frmeData, liTaxon=liTaxon, dSig=dSig, adP=adP, lsSig=lsSig, strLog=strLog, lsQCCounts=lsData$lsQCCounts, lastrCols=lastrTerms, asSuppressCovariates=asSuppress ) )\n+  } else {\n+    return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts))\n+  }\n+  ### List containing a list of pvalues, a list of significant data per association, and a list of QC data\n+}\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Constants.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Constants.R Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,119 @@
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), 
+# authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).
+#####################################################################################
+
+inlinedocs <- function(
+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
+##description<< Global project constants.
+) { return( pArgs ) }
+
+#General
+c_COMMA = ","
+c_DASH = "-"
+
+#For reading IO
+c_MATRIX_NAME = "Matrix:"
+c_FILE_NAME = "File:"
+c_DELIMITER = "Delimiter:"
+c_ID_ROW = "Name_Row_Number:"
+c_ID_COLUMN = "Name_Column_Number:"
+c_ROWS = "Read_Rows:"
+c_PCLROWS = "Read_PCL_Rows:"
+c_TSVROWS = "Read_TSV_Rows:"
+c_COLUMNS = "Read_Columns:"
+c_PCLCOLUMNS = "Read_PCL_Columns:"
+c_TSVCOLUMNS = "Read_TSV_Columns:"
+c_CHARACTER_DATA_TYPE = "DT_Character:"
+c_FACTOR_DATA_TYPE = "DT_Factor:"
+c_INTEGER_DATA_TYPE = "DT_Integer:"
+c_LOGICAL_DATA_TYPE = "DT_Logical:"
+c_NUMERIC_DATA_TYPE = "DT_Numeric:"
+c_ORDEREDFACTOR_DATA_TYPE = "DT_Ordered_Factor:"
+
+### The name of the data matrix read in using a read.config file
+c_strMatrixData  <- "Abundance"
+### The name of the metadata matrix read in using a read.config file
+c_strMatrixMetadata <- "Metadata"
+# Settings for MFA visualization/ordination
+c_iMFA <- 30
+c_dHeight <- 9
+c_dDefaultScale = 0.5
+# The column that is used to determine if information meets a certain significance threshold (dSignificanceLevel) to include in the Summary text file)
+c_strKeywordEvaluatedForInclusion <- "Q.value"
+#The name of the custom process function
+c_strCustomProcessFunction = "processFunction"
+
+#Delimiters
+#Feature name delimiter
+c_cFeatureDelim = "|"
+c_cFeatureDelimRex = "\\|"
+
+#The word used for unclassified
+c_strUnclassified = "unclassified"
+
+#Maaslincore settings
+#If a metadata does not have more than count of unique values, it is changed to factor data mode.
+c_iNonFactorLevelThreshold = 3
+
+#Extensions
+c_sDetailFileSuffix = ".txt"
+c_sSummaryFileSuffix = ".txt"
+c_sLogFileSuffix = "_log"
+
+#Delimiter for output tables
+c_cTableDelimiter="\t"
+
+#Testing Related
+c_strTestingDirectory = "testing"
+c_strCorrectAnswers = "answers"
+c_strTemporaryFiles = "tmp"
+c_strTestingInput = "input"
+
+#Reading matrix defaults
+c_strDefaultMatrixDelimiter = "\t"
+c_strDefaultMatrixRowID = "1"
+c_strDefaultMatrixColID = "1"
+c_strDefaultReadRows = "-"
+c_strDefaultReadCols = "-"
+
+#Separator used when collapsing factor names
+c_sFactorNameSep = ""
+
+#Separator used by the mfa
+c_sMFANameSep1 = "_"
+c_sMFANameSep2 = "."
+
+#Analysis Module list positioning
+c_iSelection = 1
+c_iTransform = 2
+c_iAnalysis = 3
+c_iResults = 4
+c_iUnTransform = 5
+c_iIsUnivariate = 6
+
+#Count based models
+c_vCountBasedModels = c("neg_binomial","quasi")
+
+# Na action in anaylsis, placed here to standardize
+c_strNA_Action = "na.omit"
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/IO.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/IO.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,403 @@\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+inlinedocs <- function(\n+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>\n+##description<< Collection of functions centered on custom reading of data and some IO services.\n+) { return( pArgs ) }\n+\n+#Project Constants\n+\n+c_astrNA <- c(""," ","  ","NA","na")\n+\n+#Do not report warnings\n+options(warn=-1)\n+\n+funcWriteMatrixToReadConfigFile = function(\n+### Writes a read config file. Will write over a file by default\n+strConfigureFileName,\n+### Matrix that will be read\n+strMatrixName,\n+### Name of matrix that will be read\n+strRowIndices=NA,\n+### Rows which will be Read (TSV) by default all will be read\n+strColIndices=NA,\n+### Cols which will be Read (TSV) by default all will be read\n+acharDelimiter=c_strDefaultMatrixDelimiter,\n+### Delimiter for the matrix that will be read in\\\n+fAppend=FALSE\n+### Append to a current read config file\n+){\n+  #If no append delete previous file\n+  if(!fAppend){unlink(strConfigureFileName)}\n+\n+  #Make delimiter readable\n+  switch(acharDelimiter,\n+    "\\t" = {acharDelimiter = "TAB"},\n+    " " = {acharDelimiter = "SPACE"},\n+    "\\r" = {acharDelimiter = "RETURN"},\n+    "\\n" = {acharDelimiter = "ENDLINE"})\n+    \n+  #Manage NAs\n+  if(is.na(strRowIndices)){strRowIndices="-"}\n+  if(is.na(strColIndices)){strColIndices="-"}\n+\n+  #Required output\n+  lsDataLines = c(paste(c_MATRIX_NAME,strMatrixName,sep=" "),\n+    paste(c_DELIMITER,acharDelimiter,sep=" "),\n+    paste(c_ID_ROW,"1",sep=" "),\n+    paste(c_ID_COLUMN,"1",sep=" "),\n+    paste(c_TSVROWS,strRowIndices,sep=" "),\n+    paste(c_TSVCOLUMNS,strColIndices,sep=" "))\n+\n+  lsDataLines = c(lsDataLines,"\\n")\n+\n+  #Output to file\n+  lapply(lsDataLines, cat, file=strConfigureFileName, sep="\\n", append=TRUE)\n+}\n+\n+funcWriteMatrices = function(\n+### Write data frame data files with config files\n+dataFrameList,\n+### A named list of data frames (what you get directly from the read function)\n+saveFileList,\n+### File names to save the data matrices in (one name per data frame)\n+configureFileName,\n+### Name of the configure file to be written which will direct the reading of these data\n+acharDelimiter=c_strDefaultMatrixDelimiter,\n+### Matrix delimiter\n+log = FALSE\n+### Indicates if logging should occur\n+){\n+  #Get names\n+  dataFrameNames = names(dataFrameList)\n+\n+  #Get length of dataFrameList\n+  dataFrameListLength = length(dataFrameList)\n+\n+  #Get length of save file list\n+  saveFileListLength = length(saveFileList)\n+\n+  #If the save file list len'..b'newList)\n+\n+  matrixName <- NA\n+  fileName <- defaultFile\n+\n+  #Hold information on matrices to be read\n+  matrixInformationList = list()\n+  matrixInformationListCount = 1\n+\n+  for(textIndex in c(1:length(fileDataList)))\n+  {\n+    if(textIndex > length(fileDataList)) {break}\n+    #Start at the Matrix name\n+    #Keep this if statement first so that you scan through until you find a matrix block\n+    if(fileDataList[textIndex] == c_MATRIX_NAME)\n+    {\n+      #If the file name is not NA then that is sufficient for a matrix, store\n+      #Either way reset\n+      if(funcIsValid(fileName)&&funcIsValid(matrixName))\n+      {\n+        matrixInformationList[[matrixInformationListCount]] = c(matrixName,fileName,delimiter,rows,columns)\n+        matrixInformationListCount = matrixInformationListCount + 1\n+      }\n+\n+      #Get the matrix name and store\n+      matrixName = fileDataList[textIndex + 1]\n+\n+      fileName = defaultFile\n+      delimiter = "\\t"\n+      rows = NA\n+      columns = NA\n+      #If is not matrix name and no matrix name is known skip until you find the matrix name\n+      #If matrix name is known, continue to collect information about that matrix\n+    } else if(is.na(matrixName)){next}\n+\n+    #Parse different keywords\n+    strParseKey = fileDataList[textIndex]\n+    if(strParseKey == c_FILE_NAME){fileName=fileDataList[textIndex+1]}\n+    else if(strParseKey==c_FILE_NAME){fileName=fileDataList[textIndex+1]}\n+    else if(strParseKey %in% c(c_TSVROWS,c_PCLCOLUMNS,c_ROWS)){rows=fileDataList[textIndex+1]}\n+    else if(strParseKey %in% c(c_TSVCOLUMNS,c_PCLROWS,c_COLUMNS)){columns=fileDataList[textIndex+1]}\n+    else if(strParseKey==c_DELIMITER)\n+    {\n+        switch(fileDataList[textIndex + 1],\n+        "TAB" = {delimiter = "\\t"},\n+        "SPACE" = {delimiter = " "},\n+        "RETURN" = {delimiter = "\\r"},\n+        "ENDLINE" = {delimiter = "\\n"})\n+    }\n+  }\n+  #If there is matrix information left\n+  if((!is.na(matrixName)) && (!is.na(fileName)))\n+  {\n+    matrixInformationList[[matrixInformationListCount]] = c(matrixName,fileName,delimiter,rows,columns)\n+    matrixInformationListCount = matrixInformationListCount + 1\n+  }\n+  return(matrixInformationList)\n+}\n+\n+funcParseIndexSlices = function(\n+### Take a string of comma or dash seperated integer strings and convert into a vector\n+### of integers to use in index slicing\n+strIndexString,\n+### String to be parsed into indicies vector\n+cstrNames\n+### Column names of the data so names can be resolved to indicies\n+){\n+  #If the slices are NA then return\n+  if(is.na(strIndexString)){return(strIndexString)}\n+\n+  #List of indices to return\n+  viRetIndicies = c()\n+\n+  #Split on commas\n+  lIndexString = sapply(strsplit(strIndexString, c_COMMA),function(x) return(x))\n+  for(strIndexItem in lIndexString)\n+  {\n+    #Handle the - case\n+    if(strIndexItem=="-"){strIndexItem = paste("2-",length(cstrNames),sep="")}\n+\n+    #Split on dash and make sure it makes sense\n+    lItemElement = strsplit(strIndexItem, c_DASH)[[1]]\n+    if(length(lItemElement)>2){stop("Error in index, too many dashes, only one is allowed. Index = ",strIndexItem,sep="")}\n+\n+    #Switch names to numbers\n+    aiIndices = which(is.na(as.numeric(lItemElement)))\n+    for( iIndex in aiIndices )\n+    {\n+      lItemElement[iIndex] = which(cstrNames==lItemElement[iIndex])[1]\n+    }\n+\n+    #Make numeric\n+    liItemElement = unlist(lapply(lItemElement, as.numeric))\n+\n+    #If dash is at the end or the beginning add on the correct number\n+    if(substr(strIndexItem,1,1)==c_DASH){liItemElement[1]=2}\n+    if(substr(strIndexItem,nchar(strIndexItem),nchar(strIndexItem))==c_DASH){liItemElement[2]=length(cstrNames)}\n+\n+    #If multiple numbers turn to a slice\n+    if(length(liItemElement)==2){liItemElement = c(liItemElement[1]:liItemElement[2])}\n+\n+    #Update indices\n+    viRetIndicies = c(viRetIndicies, liItemElement)\n+  }\n+  if(length(viRetIndicies)==0){return(NA)}\n+  return(sort(unique(viRetIndicies)))\n+  ### Sorted indicies vector\n+}\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/MaaslinPlots.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/MaaslinPlots.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,428 @@\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+inlinedocs <- function(\n+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>\n+##description<< Holds MaAsLin related plotting\n+) { return( pArgs ) }\n+\n+funcPDF <- function(\n+### Function to plot raw data with linear model information.\n+### Continuous and integer variables are plotted with a line of best fit.\n+### Other data is plotted as boxplots.\n+frmeTmp,\n+lsCur,\n+### Linear model information\n+curPValue,\n+### Pvalue to display\n+curQValue,\n+### Qvalue to display\n+strFilePDF,\n+### PDF file to create or to which to append\n+strBaseOut,\n+### Project directory to place pdf in\n+strName,\n+### Name of taxon\n+funcUnTransform=NULL,\n+### If a transform is used the appropriate of that transfor must be used on the residuals in the partial residual plots\n+fDoResidualPlot = TRUE,\n+### Plot the residual plots\n+fInvert = FALSE,\n+### Invert the figure so the background is black\n+liNaIndices = c()\n+### Indices of NA data that was imputed\n+){\n+  if( is.na( strFilePDF ) )\n+  {\n+    strFilePDF <- sprintf( "%s-%s.pdf", strBaseOut, strName )\n+    pdf( strFilePDF, width = 11, useDingbats=FALSE )\n+  }\n+  \n+  #Invert plots\n+  adColorMin <- c(1, 0, 0)\n+  adColorMax <- c(0, 1, 0)\n+  adColorMed <- c(0, 0, 0)\n+  if( fInvert )\n+  {\n+    par( bg = "black", fg = "white", col.axis = "white", col.lab = "white", col.main = "white", col.sub = "white" )\n+    adColorMin <- c(1, 1, 0)\n+    adColorMax <- c(0, 1, 1)\n+    adColorMed <- c(1, 1, 1)\n+  }\n+\n+  #Create linear model title data string\n+  strTitle <- sprintf( "%s (%.3g sd %.3g, p=%.3g, q=%.3g)", lsCur$orig, lsCur$value, lsCur$std, curPValue, curQValue )\n+  adMar <- c(5, 4, 4, 2) + 0.1\n+  dLine <- NA\n+  strTaxon <- lsCur$taxon\n+  if( nchar( strTaxon ) > 80 )\n+  {\n+    dCEX <- 0.75\n+    iLen <- nchar( strTaxon )\n+    if( iLen > 120 )\n+    {\n+      dLine <- 2.5\n+      i <- round( iLen / 2 )\n+      strTaxon <- paste( substring( strTaxon, 0, i ), substring( strTaxon, i + 1 ), sep = "\\n" )\n+      adMar[2] <- adMar[2] + 1\n+    }\n+  } else { dCEX = 1 }\n+\n+  #Plot 1x2 graphs per page\n+  if(fDoResidualPlot){par(mfrow=c(1,2))}\n+\n+  # Plot factor data as boxplot if is descrete data\n+  # Otherwise plot as a line\n+  adCur <- lsCur$metadata\n+  adY <- lsCur$data\n+\n+  # Remove NAs from data visualization if set to do so (if liNaIndices is not empty)\n+  if(lsCur$name %in% names(liNaIndices)&&(length(liNaIndices[[lsCur$name]])>0))\n+  {\n+    adY <- adY[-1*liNaIndices[[lsCur$name]]]\n+    adCur = adCur[-1*liN'..b'CurXValues, levels=c(vsLevels,"NA"))\n+    }\n+  }\n+\n+  # Scale to the original range\n+  if(!(class( adCurXValues ) == "factor" ))\n+  {\n+    vY = vY + mean(adCurXValues,rm.na=TRUE)\n+  }\n+\n+  # Plot Partial Residual Plot\n+  # If we are printing discontinuous data\n+  # Get the color of the box plots\n+  # Plot box plots\n+  # Plot data as strip charts\n+  if(is.factor(adCurXValues))\n+  {\n+#    adCurXValues = factor(adCurXValues)\n+    astrColors = funcGetFactorBoxColors(adCurXValues,vY,adColorMin,adColorMax,adColorMed)\n+    asNames = c()\n+    for(sLevel in levels(adCurXValues))\n+    {\n+      asNames =  c(asNames,sprintf( "%s (%d)", sLevel, sum( adCurXValues == sLevel, na.rm = TRUE ) ))\n+    }\n+\n+    plot(adCurXValues, vY, xlab=sCovariateOfInterest, ylab=sYLabel, names=asNames, notch = TRUE,mar = adMar,col = astrColors, main=sTitle, outpch = 4, outcex = 0.5 )\n+    stripchart( vY ~ adCurXValues, add = TRUE, col = astrColors, method = "jitter", vertical = TRUE, pch = 20 )\n+\n+  } else {\n+    plot( adCurXValues, vY, mar = adMar, main = sTitle, xlab=sCovariateOfInterest, col = sprintf( "%s99", funcGetColor( ) ), pch = 20,ylab = sYLabel, xaxt = "s" )\n+\n+    lmodLine = lm(vY~adCurXValues)\n+\n+    dColor <- lmodLine$coefficients[2] * mean( adCurXValues, na.rm = TRUE ) / mean( vY, na.rm = TRUE )\n+    strColor <- sprintf( "%sDD", funcColor( dColor, adMax = adColorMin, adMin = adColorMax, adMed = adColorMed ) )\n+    abline( reg =lmodLine, col = strColor, lwd = 3 )\n+  }\n+}\n+\n+funcBoostInfluencePlot <- function(\n+# Plot to show the rel.inf from boosting, what to know if the rank order is correct, better ranks for spiked data.\n+# Show the cut off and features identified as uneven.\n+vdRelInf, \n+sFeature,\n+vsPredictorNames,\n+vstrKeepMetadata,\n+vstrUneven = c()\n+){\n+  vsCol = rep("black",length(vdRelInf))\n+  vsCol[which(vsPredictorNames %in% vstrKeepMetadata)]="green"\n+  vsCol[which(vsPredictorNames %in% vstrUneven)] = "orange"\n+  plot(vdRelInf, col=vsCol, main=sFeature, xlab="Index", ylab="Relative Influence")\n+  legend("topright", pch = paste(1:length(vsPredictorNames)), legend= vsPredictorNames, text.col=vsCol, col=vsCol)\n+}\n+\n+funcResidualPlot <- function(\n+### Plot to data after confounding.\n+### That is, in a linear model with significant coefficient b1 for variable x1,\n+### that\'s been sparsified to some subset of terms: y = b0 + b1*x1 + sum(bi*xi)\n+### Plot x1 on the X axis, and instead of y on the Y axis, instead plot:\n+### y\' = b0 + sum(bi*xi)\n+lsCur,\n+### Assocation to plot\n+frmeTmp,\n+### Data frame of orginal data\n+adColorMin,\n+### Min color in color range for markers\n+adColorMax,\n+### Max color in color range for markers\n+adColorMed,\n+### Medium color in color range for markers\n+adMar,\n+### Standardized margins\n+funcUnTransform,\n+### If a transform is used the opporite of that transfor must be used on the residuals in the partial residual plots\n+liNaIndices = c()\n+### Indices of NA data that was imputed\n+){\n+  #Now plot residual hat plot\n+  #Get coefficient names\n+  asAllCoefs = setdiff(names(lsCur$allCoefs),c("(Intercept)"))\n+  asAllColNames = c()\n+  for(sCoef in asAllCoefs)\n+  {\n+    asAllColNames = c(asAllColNames,funcCoef2Col(sCoef,frmeData=frmeTmp))\n+  }\n+  asAllColNames = unique(asAllColNames)\n+\n+  # All coefficients except for the one of interest\n+  lsOtherCoefs = setdiff(asAllColNames, c(lsCur$name))\n+\n+  lsCovariatesToPlot = NULL\n+  if(is.factor(lsCur$metadata))\n+  {\n+    lsCovariatesToPlot = paste(lsCur$name,levels(lsCur$metadata),sep="")\n+  }else{lsCovariatesToPlot=c(lsCur$orig)}\n+\n+  # If there are no other coefficients then skip plot\n+#  if(!length(lsOtherCoefs)){return()}\n+\n+  # Plot residuals\n+  funcResidualPlotHelper(frmTmp=frmeTmp, sResponseFeature=lsCur$taxon, lsFullModelCovariateNames=asAllColNames, lsCovariateToControlForNames=lsCovariatesToPlot, sCovariateOfInterest=lsCur$name, adColorMin=adColorMin, adColorMax=adColorMax, adColorMed=adColorMed, adMar=adMar, funcUnTransform=funcUnTransform, liNaIndices=liNaIndices)\n+}\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Misc.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Misc.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,208 @@\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+### Modified Code\n+### This code is from the package agricolae by Felipe de Mendiburu\n+### Modifications here are minimal and allow one to use the p.values from the post hoc comparisons\n+### Authors do not claim credit for this solution only needed to modify code to use the output.\n+kruskal <- function (y, trt, alpha = 0.05, p.adj = c("none", "holm", "hochberg", \n+    "bonferroni", "BH", "BY", "fdr"), group = TRUE, main = NULL) \n+{\n+    dfComparisons=NULL\n+    dfMeans=NULL\n+    dntStudent=NULL\n+    dLSD=NULL\n+    dHMean=NULL\n+    name.y <- paste(deparse(substitute(y)))\n+    name.t <- paste(deparse(substitute(trt)))\n+    p.adj <- match.arg(p.adj)\n+    junto <- subset(data.frame(y, trt), is.na(y) == FALSE)\n+    N <- nrow(junto)\n+    junto[, 1] <- rank(junto[, 1])\n+    means <- tapply.stat(junto[, 1], junto[, 2], stat = "sum")\n+    sds <- tapply.stat(junto[, 1], junto[, 2], stat = "sd")\n+    nn <- tapply.stat(junto[, 1], junto[, 2], stat = "length")\n+    means <- data.frame(means, replication = nn[, 2])\n+    names(means)[1:2] <- c(name.t, name.y)\n+    ntr <- nrow(means)\n+    nk <- choose(ntr, 2)\n+    DFerror <- N - ntr\n+    rs <- 0\n+    U <- 0\n+    for (i in 1:ntr) {\n+        rs <- rs + means[i, 2]^2/means[i, 3]\n+        U <- U + 1/means[i, 3]\n+    }\n+    S <- (sum(junto[, 1]^2) - (N * (N + 1)^2)/4)/(N - 1)\n+    H <- (rs - (N * (N + 1)^2)/4)/S\n+#    cat("\\nStudy:", main)\n+#    cat("\\nKruskal-Wallis test\'s\\nTies or no Ties\\n")\n+#    cat("\\nValue:", H)\n+#    cat("\\ndegrees of freedom:", ntr - 1)\n+    p.chisq <- 1 - pchisq(H, ntr - 1)\n+#    cat("\\nPvalue chisq  :", p.chisq, "\\n\\n")\n+    DFerror <- N - ntr\n+    Tprob <- qt(1 - alpha/2, DFerror)\n+    MSerror <- S * ((N - 1 - H)/(N - ntr))\n+    means[, 2] <- means[, 2]/means[, 3]\n+#    cat(paste(name.t, ",", sep = ""), " means of the ranks\\n\\n")\n+    dfMeans=data.frame(row.names = means[, 1], means[, -1])\n+    if (p.adj != "none") {\n+#        cat("\\nP value adjustment method:", p.adj)\n+        a <- 1e-06\n+        b <- 1\n+        for (i in 1:100) {\n+            x <- (b + a)/2\n+            xr <- rep(x, nk)\n+            d <- p.adjust(xr, p.adj)[1] - alpha\n+            ar <- rep(a, nk)\n+            fa <- p.adjust(ar, p.adj)[1] - alpha\n+            if (d * fa < 0) \n+                b <- x\n+            if (d * fa > 0) \n+                a <- x\n+        }\n+        Tprob <- qt(1 - x/2, DFerror)\n+    }\n+    nr <- unique(means[, 3])\n+    if (group) {\n+        Tprob <- qt(1 - alpha/2, DFerror)\n+#        cat("\\nt-Student:", T'..b'ror/means[, \n+            3]))\n+    }\n+    if (!group) {\n+        comb <- combn(ntr, 2)\n+        nn <- ncol(comb)\n+        dif <- rep(0, nn)\n+        LCL <- dif\n+        UCL <- dif\n+        pvalue <- dif\n+        sdtdif <- dif\n+        for (k in 1:nn) {\n+            i <- comb[1, k]\n+            j <- comb[2, k]\n+            if (means[i, 2] < means[j, 2]) {\n+                comb[1, k] <- j\n+                comb[2, k] <- i\n+            }\n+            dif[k] <- abs(means[i, 2] - means[j, 2])\n+            sdtdif[k] <- sqrt(S * ((N - 1 - H)/(N - ntr)) * (1/means[i, \n+                3] + 1/means[j, 3]))\n+            pvalue[k] <- 2 * round(1 - pt(dif[k]/sdtdif[k], DFerror), \n+                6)\n+        }\n+        if (p.adj != "none") \n+            pvalue <- round(p.adjust(pvalue, p.adj), 6)\n+        LCL <- dif - Tprob * sdtdif\n+        UCL <- dif + Tprob * sdtdif\n+        sig <- rep(" ", nn)\n+        for (k in 1:nn) {\n+            if (pvalue[k] <= 0.001) \n+                sig[k] <- "***"\n+            else if (pvalue[k] <= 0.01) \n+                sig[k] <- "**"\n+            else if (pvalue[k] <= 0.05) \n+                sig[k] <- "*"\n+            else if (pvalue[k] <= 0.1) \n+                sig[k] <- "."\n+        }\n+        tr.i <- means[comb[1, ], 1]\n+        tr.j <- means[comb[2, ], 1]\n+        dfComparisons <- data.frame(Difference = dif, p.value = pvalue, \n+            sig, LCL, UCL)\n+        rownames(dfComparisons) <- paste(tr.i, tr.j, sep = " - ")\n+#        cat("\\nComparison between treatments mean of the ranks\\n\\n")\n+#        print(output)\n+        dfMeans <- data.frame(trt = means[, 1], means = means[, \n+            2], M = "", N = means[, 3])\n+    }\n+#    invisible(output)\n+     invisible(list(study=main,test="Kruskal-Wallis test",value=H,df=(ntr - 1),chisq.p.value=p.chisq,p.adj.method=p.adj,ntStudent=dntStudent,alpha=alpha,LSD=dLSD,Harmonic.mean=dHMean,comparisons=dfComparisons,means=dfMeans))\n+}\n+\n+### This function is NOT original code but is from the gamlss package.\n+### It is written here in an effort to over write the gamlss object summary method\n+### so that I can return information of interest.\n+estimatesgamlss<-function (object, Qr, p1, coef.p, \n+                           est.disp , df.r, \n+                           digits = max(3, getOption("digits") - 3),\r\n+                           covmat.unscaled , ...)\r\n+{\r\n+  #covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE])\r\n+  dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))\r\n+  covmat <- covmat.unscaled #in glm is=dispersion * covmat.unscaled, but here is already multiplied by the dispersion\r\n+  var.cf <- diag(covmat)\r\n+  s.err <- sqrt(var.cf)\r\n+  tvalue <- coef.p/s.err\r\n+  dn <- c("Estimate", "Std. Error")\r\n+  if (!est.disp) \r\n+  {\r\n+    pvalue <- 2 * pnorm(-abs(tvalue))\r\n+    coef.table <- cbind(coef.p, s.err, tvalue, pvalue)\r\n+    dimnames(coef.table) <- list(names(coef.p), c(dn, "z value","Pr(>|z|)"))\r\n+  } else if (df.r > 0) {\r\n+    pvalue <- 2 * pt(-abs(tvalue), df.r)\r\n+    coef.table <- cbind(coef.p, s.err, tvalue, pvalue)\r\n+    dimnames(coef.table) <- list(names(coef.p), c(dn, "t value","Pr(>|t|)"))\r\n+  } else {\r\n+    coef.table <- cbind(coef.p, Inf)\r\n+    dimnames(coef.table) <- list(names(coef.p), dn)\r\n+  }\r\n+  return(coef.table)\r\n+}\n+\n+### This function is NOT original code but is from the gamlss package.\n+### It is written here in an effort to over write the gamlss object summary method\n+### so that I can return information of interest.\n+summary.gamlss<- function (object, type = c("vcov", "qr"), save = FALSE, ...) \r\n+{\n+  return(as.data.frame(estimatesgamlss(object=object,Qr=object$mu.qr, p1=1:(object$mu.df-object$mu.nl.df), \r\n+    coef.p=object$mu.coefficients[object$mu.qr$pivot[1:(object$mu.df-object$mu.nl.df)]], \r\n+    est.disp =TRUE, df.r=(object$noObs - object$mu.df),\r\n+    covmat.unscaled=chol2inv(object$mu.qr$qr[1:(object$mu.df-object$mu.nl.df), 1:(object$mu.df-object$mu.nl.df), drop = FALSE]) )) )\n+}\n\\ No newline at end of file\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/SummarizeMaaslin.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/SummarizeMaaslin.R Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,86 @@
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), 
+# authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).
+#####################################################################################
+
+inlinedocs <- function(
+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
+##description<< Creates a summary of association detail files.
+) { return( pArgs ) }
+
+#Logging class
+suppressMessages(library(logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
+
+# Get logger
+c_logrMaaslin <- getLogger( "maaslin" )
+
+funcSummarizeDirectory = function(
+### Summarizes the massline detail files into one file based on significance.
+astrOutputDirectory,
+### The output directory to find the MaAsLin results.
+strBaseName,
+### The prefix string used in maaslin to start the detail files.
+astrSummaryFileName,
+### The summary file's name, should be a path not a file name
+astrKeyword,
+### The column name of the data to check significance before adding a detail to the summary
+afSignificanceLevel
+### The value of significance the data must be at or below to be included in the summary (0.0 is most significant; like p-values)
+){
+  #Store significant data elements
+  dfSignificantData = NULL
+
+  #Get detail files in output directory
+  astrlsDetailFiles = list.files(astrOutputDirectory, pattern=paste(strBaseName,"-","[[:print:]]*",c_sDetailFileSuffix,sep=""), full.names=TRUE)
+  logdebug(format(astrlsDetailFiles),c_logrMaaslin)
+
+  #For each file after the first file
+  for(astrFile in astrlsDetailFiles)
+  {
+    #Read in data and reduce to significance
+    dfDetails = read.table(astrFile, header=TRUE, sep=c_cTableDelimiter)
+    dfDetails = dfDetails[which(dfDetails[astrKeyword] <= afSignificanceLevel),]
+
+    #Combine with other data if it exists
+    if(is.null(dfSignificantData))
+    {
+      dfSignificantData = dfDetails
+    } else {
+      dfSignificantData = rbind(dfSignificantData,dfDetails)
+    }
+  }
+  
+  #Write data to file
+  unlink(astrSummaryFileName)
+  if(is.null(dfSignificantData))
+  {
+    funcWrite("No significant data found.",astrSummaryFileName)
+    return( NULL )
+  } else {
+    #Sort by metadata and then significance
+    dfSignificantData = dfSignificantData[order(dfSignificantData$Value, dfSignificantData$P.value, decreasing = FALSE),]
+    funcWriteTable( dfSignificantData, astrSummaryFileName, fAppend = FALSE )
+    # Sort by q.value and return
+    return( dfSignificantData[ order( dfSignificantData$P.value, decreasing = FALSE ), ] )
+  }
+} 
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Utility.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/Utility.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,503 @@\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+\n+inlinedocs <- function(\n+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>\n+##description<< Collection of minor utility scripts\n+) { return( pArgs ) }\n+\n+#source("Constants.R")\n+\n+funcRename <- function(\n+### Modifies labels for plotting\n+### If the name is not an otu collapse to the last two clades\n+### Otherwise use the most terminal clade\n+astrNames\n+### Names to modify for plotting\n+){\n+  astrRet <- c()\n+  for( strName in astrNames )\n+  {\n+    astrName <- strsplit( strName, c_cFeatureDelimRex )[[1]]\n+    i <- length( astrName )\n+    if( ( astrName[i] == c_strUnclassified ) || !is.na( as.numeric( astrName[i] ) ) )\n+    {\n+      strRet <- paste( astrName[( i - 1 ):i], collapse = c_cFeatureDelim )\n+    } else {\n+    strRet <- astrName[i]\n+    }\n+    astrRet <- c(astrRet, strRet)\n+  }\n+  return( astrRet )\n+  ### List of modified names\n+}\n+\n+funcBonferonniCorrectFactorData <- function\n+### Bonferroni correct for factor data\n+(dPvalue,\n+### P-value to correct\n+vsFactors,\n+### Factors of the data to correct\n+fIgnoreNAs = TRUE\n+){\n+  vsUniqueFactors = unique( vsFactors )\n+  if( fIgnoreNAs ){ vsUniqueFactors = setdiff( vsUniqueFactors, c("NA","na","Na","nA") ) }\n+  return( dPvalue * max( 1, ( length( vsUniqueFactors ) - 1 ) ) )\n+  ### Numeric p-value that is correct for levels (excluding NA levels)\n+}\n+\n+funcCalculateTestCounts <- function(\n+### Calculates the number of tests used in inference\n+iDataCount,\n+asMetadata,\n+asForced,\n+asRandom,\n+fAllvAll\n+){\n+  iMetadata = length(asMetadata)\n+  iForced = length(setdiff(intersect( asForced, asMetadata ), asRandom))\n+  iRandom = length(intersect( asRandom, asMetadata ))\n+  if(fAllvAll)\n+  {\n+    #AllvAll flow formula\n+    return((iMetadata-iForced-iRandom) * iDataCount)\n+  }\n+\n+  #Normal flow formula\n+  return((iMetadata-iRandom) * iDataCount)\n+}\n+\n+funcGetRandomColors=function(\n+#Generates a given number of random colors\n+tempNumberColors = 1\n+### Number of colors to generate\n+){\n+  adRet = c()\n+  return(sapply(1:tempNumberColors, function(x){\n+    adRGB <- ( runif( 3 ) * 0.66 ) + 0.33\n+    adRet <- c(adRet, rgb( adRGB[1], adRGB[2], adRGB[3] ))\n+  }))\n+}\n+\n+funcCoef2Col <- function(\n+### Searches through a dataframe and looks for a column that would match the coefficient\n+### by the name of the column or the column name and level appended together.\n+strCoef,\n+### String coefficient name\n+frmeData,\n+### Data frame of data\n+astrCols = c()\n+### Column names of int'..b'funcWrite("\\nInitial Metadata Count: ", strProcessFileName )\n+  funcWrite(lsQCData$aiMetadataInitial, strProcessFileName )\n+  funcWrite("\\nData Count after preprocess: ", strProcessFileName )\n+  funcWrite(lsQCData$aiAfterPreprocess, strProcessFileName )\n+  funcWrite("\\nRemoved for missing metadata: ", strProcessFileName )\n+  funcWrite(lsQCData$iMissingMetadata, strProcessFileName )\n+  funcWrite("\\nRemoved for missing data: ", strProcessFileName )\n+  funcWrite(lsQCData$iMissingData, strProcessFileName )\n+  funcWrite("\\nDetailed outlier indices: ", strProcessFileName )\n+  for(sFeature in names(lsQCData$liOutliers))\n+  {\n+    funcWrite(paste("Feature",sFeature,"Outlier indice(s):", paste(lsQCData$liOutliers[[sFeature]],collapse=",")), strProcessFileName )\n+  }\n+  funcWrite("\\nMetadata which survived clean: ", strProcessFileName )\n+  funcWrite(lsQCData$aiMetadataCleaned, strProcessFileName )\n+  funcWrite("\\nData which survived clean: ", strProcessFileName )\n+  funcWrite(lsQCData$aiDataCleaned, strProcessFileName )\n+}\n+\n+funcLMToNoNAFormula <-function(\n+lMod,\n+frmeTmp,\n+adCur\n+){\n+  dfCoef = coef(lMod)\n+  astrCoefNames = setdiff(names(dfCoef[as.vector(!is.na(dfCoef))==TRUE]),"(Intercept)")\n+  astrPredictors = unique(as.vector(sapply(astrCoefNames,funcCoef2Col, frmeData=frmeTmp)))\n+  strFormula = paste( "adCur ~", paste( sprintf( "`%s`", astrPredictors ), collapse = " + " ), sep = " " )\n+  return(try( lm(as.formula( strFormula ), data=frmeTmp )))\n+}\n+\n+funcFormulaStrToList <- function(\n+#Takes a lm or mixed model formula and returns a list of covariate names in the formula\n+strFormula\n+#Formula to extract covariates from\n+){\n+  #Return list\n+  lsRetComparisons = c()\n+\n+  #If you get a null or na just return\n+  if(is.null(strFormula)||is.na(strFormula)){return(lsRetComparisons)}\n+\n+  #Get test comparisons (predictor names from formula string)\n+  asComparisons = gsub("`","",setdiff(unlist(strsplit(unlist(strsplit(strFormula,"~"))[2]," ")),c("","+")))\n+\n+  #Change metadata in formula to univariate comparisons\n+  for(sComparison in asComparisons)\n+  {\n+    #Removed random covariate formating\n+    lsParse = unlist(strsplit(sComparison, "[\\\\(\\\\|\\\\)]", perl=FALSE))\n+    lsRetComparisons = c(lsRetComparisons,lsParse[length(lsParse)])\n+  }\n+  return(lsRetComparisons)\n+}\n+\n+funcFormulaListToString <- function(\n+# Using covariate and random covariate names, creates a lm or mixed model formula\n+# returns a vector of c(strLM, strMixedModel), one will be NA given the existance of random covariates.\n+# On error c(NA,NA) is given\n+astrTerms,\n+#Fixed covariates or all covariates if using an lm\n+astrRandomCovariates = NULL\n+#Random covariates for a mixed model\n+){\n+  strRetLMFormula = NA\n+  strRetMMFormula = NA\n+\n+  #If no covariates return NA\n+  if(is.null(astrTerms)){return(c(strRetLMFormula, strRetMMFormula))}\n+\n+  #Get fixed covariates\n+  astrFixedCovariates = setdiff(astrTerms,astrRandomCovariates)\n+\n+  #If no fixed coavariates return NA\n+  # Can not run a model with no fixed covariate, restriction of lmm\n+  if(length(astrFixedCovariates)==0){return(c(strRetLMFormula, strRetMMFormula))}\n+\n+  # Fixed Covariates\n+  strFixedCovariates = paste( sprintf( "`%s`", astrFixedCovariates ), collapse = " + " )\n+\n+  #If random covariates, set up a formula for mixed models\n+  if(length(astrRandomCovariates)>0)\n+  {\n+    #Format for lmer\n+    #strRetFormula <- paste( "adCur ~ ", paste( sprintf( "(1|`%s`))", intersect(astrRandomCovariates, astrTerms)), collapse = " + " ))\n+    #Format for glmmpql\n+    strRandomCovariates = paste( sprintf( "1|`%s`", setdiff(astrRandomCovariates, astrTerms)), collapse = " + " )\n+    strRetMMFormula <- paste( "adCur ~ ", strFixedCovariates, " + ", strRandomCovariates, sep="")\n+  } else {\n+    #This is either the formula for all covariates in an lm or fixed covariates in the lmm\n+    strRetLMFormula <- paste( "adCur ~ ", strFixedCovariates, sep="")\n+  }\n+  return(c(strRetLMFormula, strRetMMFormula))\n+}\n\\ No newline at end of file\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/ValidateData.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/ValidateData.R Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,93 @@
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), 
+# authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).
+#####################################################################################
+
+inlinedocs <- function(
+##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
+##description<< Minor validation files to check data typing when needed.
+) { return( pArgs ) }
+
+funcIsValid <- function(
+### Requires a data to not be NA, not be NULL
+### Returns True on meeting these requirements, returns false otherwise
+### Return boolean Indicator of not being empty (TRUE = not empty)
+tempData = NA
+### Parameter tempData Is evaluated as not empty
+){
+  #If the data is not na or null return true
+  if(!is.null(tempData))
+  {
+    if(length(tempData)==1){ return(!is.na(tempData)) }
+    return(TRUE)
+  }
+  return(FALSE)
+  ### True (Valid) false (invalid)
+}
+
+funcIsValidString <- function(
+### Requires a data to not be NA, not be NULL, and to be of type Character
+### Returns True on meeting these requirements, returns false otherwise
+### Return boolean Indicator of identity as a string
+tempData = NA
+### Parameter tempData Is evaluated as a string
+){
+  #If is not a valid data return false
+  if(!funcIsValid(tempData))
+  {
+    return(FALSE)
+  }
+  #If is a string return true
+  if((class(tempData)=="character")&&(length(tempData)==1))
+  {
+    return(TRUE)
+  }
+  return(FALSE)
+  ### True (Valid) false (invalid)
+}
+
+funcIsValidFileName <- function(
+### Requires a data to not be NA, not be NULL, and to be a valid string
+### which points to an existing file
+### Returns True on meeting these requirements, returns false otherwise
+### Return boolean Indicator of identity as a file name
+tempData = NA,
+### Parameter tempData Is evaluated as a file name
+fVerbose=FALSE
+### Verbose will print the file path when not valid.
+){
+  #If is not valid string return false
+  if(!(funcIsValidString(tempData)))
+  {
+    if(fVerbose){print(paste("FunctIsValidFileName: InvalidString. Value=",tempData,sep=""))}
+    return(FALSE)
+  }
+  #If is a valid string and points to a file
+  if(file.exists(tempData))
+  {
+    return(TRUE)
+  }
+  if(fVerbose){print(paste("FunctIsValidFileName: Path does not exist. Value=",tempData,sep=""))}
+  return(FALSE)
+  ### True (Valid) false (invalid)
+}
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/scriptBiplotTSV.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/lib/scriptBiplotTSV.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,515 @@\n+#!/usr/bin/env Rscript\n+\n+library(vegan)\n+library(optparse)\n+\n+funcGetCentroidForMetadatum <- function(\n+### Given a binary metadatum, calculate the centroid of the samples associated with the metadata value of 1\n+# 1. Get all samples that have the metadata value of 1\n+# 2. Get the x and y coordinates of the selected samples\n+# 3. Get the median value for the x and ys\n+# 4. Return those coordinates as the centroid\'s X and Y value\n+vfMetadata,\n+### Logical or integer (0,1) vector, TRUE or 1 values indicate correspoinding samples in the\n+### mSamplePoints which will be used to define the centroid\n+mSamplePoints\n+### Coordinates (columns;n=2) of samples (rows) corresponding to the vfMetadata\n+){\n+  # Check the lengths which should be equal\n+  if(length(vfMetadata)!=nrow(mSamplePoints))\n+  {\n+    print(paste("funcGetCentroidForMetadata::Error: Should have received metadata and samples of the same length, received metadata length ",length(vfMetadata)," and sample ",nrow(mSamplePoints)," length.",sep=""))\n+    return( FALSE )\n+  }\n+\n+  # Get all the samples that have the metadata value of 1\n+  viMetadataSamples = which(as.integer(vfMetadata)==1)\n+\n+  # Get the x and y coordinates for the selected samples\n+  mSelectedPoints = mSamplePoints[viMetadataSamples,]\n+\n+  # Get the median value for the x and the ys\n+  if(!is.null(nrow(mSelectedPoints)))\n+  {\n+    return( list(x=median(mSelectedPoints[,1],na.rm = TRUE),y=median(mSelectedPoints[,2],na.rm = TRUE)) )\n+  } else {\n+    return( list(x=mSelectedPoints[1],y=mSelectedPoints[2]) )\n+  }\n+}\n+\n+funcGetMaximumForMetadatum <- function(\n+### Given a continuous metadata\n+### 1. Use the x and ys from mSamplePoints for coordinates and the metadata value as a height (z)\n+### 2. Use lowess to smooth the landscape\n+### 3. Take the maximum of the landscape\n+### 4. Return the coordiantes for the maximum as the centroid\n+vdMetadata,\n+### Continuous (numeric or integer) metadata\n+mSamplePoints\n+### Coordinates (columns;n=2) of samples (rows) corresponding to the vfMetadata\n+){\n+  # Work with data frame\n+  if(class(mSamplePoints)=="matrix")\n+  {\n+    mSamplePoints = data.frame(mSamplePoints)\n+  }\n+  # Check the lengths of the dataframes and the metadata\n+  if(length(vdMetadata)!=nrow(mSamplePoints))\n+  {\n+    print(paste("funcGetMaximumForMetadatum::Error: Should have received metadata and samples of the same length, received metadata length ",length(vdMetadata)," and sample ",nrow(mSamplePoints)," length.",sep=""))\n+    return( FALSE )\n+  }\n+\n+  # Add the metadata value to the points\n+  mSamplePoints[3] = vdMetadata\n+  names(mSamplePoints) = c("x","y","z") \n+\n+  # Create lowess to smooth the surface\n+  # And calculate the fitted heights\n+  # x = sample coordinate 1\n+  # y = sample coordinate 2\n+  # z = metadata value\n+  loessSamples = loess(z~x*y, data=mSamplePoints, degree = 1, normalize = FALSE, na.action=na.omit)\n+\n+  # Naively get the max\n+  vdCoordinates = loessSamples$x[which(loessSamples$y==max(loessSamples$y)),]\n+  return(list(lsmod = loessSamples, x=vdCoordinates[1],y=vdCoordinates[2]))\n+}\n+\n+funcMakeShapes <- function(\n+### Takes care of defining shapes for the plot\n+dfInput,\n+### Data frame of metadata measurements\n+sShapeBy,\n+### The metadata to shape by\n+sShapes,\n+### List of custom metadata (per level if factor).\n+### Should correspond to the number of levels in shapeBy; the format is level:shape,level:shape for example HighLuminosity:14,LowLuminosity:2,HighPH:10,LowPH:18 \n+cDefaultShape\n+### Shape to default to if custom shapes are not used\n+){\n+  lShapes = list()\n+  vsShapeValues = c()\n+  vsShapeShapes = c()\n+  vsShapes = c()\n+  sMetadataId = sShapeBy\n+\n+  # Set default shape, color, and color ranges \n+  if(!is.null(cDefaultShape))\n+  {\n+    # Default shape should be an int for the int pch options\n+    if(!is.na(as.integer(cDefaultShape)))\n+    {\n+      cDefaultShape = as.integer(cDefaultShape)\n+    }\n+  } else {\n+    cDefaultShape = 16\n+  }\n+\n+  # Make shapes\n+  vs'..b'  # Plot text\n+    if(length(viMetadataDummy)==1)\n+    {\n+      text(x=mMetadataCoordinates[viMetadataDummy,][1]*dResizeArrow*0.8, y=mMetadataCoordinates[viMetadataDummy,][2]*dResizeArrow*0.8, labels=row.names(mMetadataCoordinates)[viMetadataDummy],col=sArrowTextColor)\n+    } else {\n+      text(x=mMetadataCoordinates[viMetadataDummy,1]*dResizeArrow*0.8, y=mMetadataCoordinates[viMetadataDummy,2]*dResizeArrow*0.8, labels=row.names(mMetadataCoordinates)[viMetadataDummy],col=sArrowTextColor)\n+    }\n+  }\n+\n+  # Create Legend\n+  # The text default is the colorMetadata_level (one per level) plus the ShapeMetadata_level (one per level)\n+  # The color default is already determined colors plus grey for shapes.\n+  sLegendText = c(paste(vsColorValues,sColorBy,sep="_"),paste(sMetadataShape,vsShapeValues,sep="_"))\n+  sLegendColors = c(vsColorRBG,rep(cDefaultColor,length(vsShapeValues)))\n+\n+  # If the color values are numeric\n+  # Too many values may be given in the legend (given they may be a continuous range of values)\n+  # To reduce this they are summarized instead, given the colors and values for the extreme ends.\n+  if( !sum( is.na( as.numeric( vsColorValues[ which( !is.na( vsColorValues ) ) ] ) ) ) )\n+  {\n+    vdNumericColors = as.numeric( vsColorValues )\n+    vdNumericColors = vdNumericColors[ which( !is.na( vdNumericColors ) ) ]\n+    vdSortedNumericColors = sort( vdNumericColors )\n+    sLegendText = c( paste( sColorBy, vdSortedNumericColors[ 1 ], sep="_" ), \n+                     paste( sColorBy, vdSortedNumericColors[ length(vdSortedNumericColors) ], sep="_" ),\n+                     paste( sMetadataShape, vsShapeValues, sep="_" ) )\n+    sLegendColors = c(vsColorRBG[ which( vdNumericColors == vdSortedNumericColors[ 1 ] )[ 1 ] ],\n+                      vsColorRBG[ which( vdNumericColors == vdSortedNumericColors[ length( vdSortedNumericColors ) ] )[ 1 ] ],\n+                      rep(cDefaultColor,length(vsShapeValues)))\n+  }\n+  sLegendShapes = c( rep( cDefaultShape, length( sLegendText ) - length( vsShapeShapes ) ), vsShapeShapes )\n+\n+  # If any legend text was constructed then make the legend.\n+  if( length( sLegendText ) >0 )\n+  {\n+    legend( "topright", legend = sLegendText, pch = sLegendShapes, col = sLegendColors )\n+  }\n+\n+  # Original biplot call if you want to check the custom plotting of the script\n+  # There will be one difference where the biplot call scales an axis, this one does not. In relation to the axes, the points, text and arrows should still match.\n+  # Axes to the top and right are for the arrow, others are for markers and bug names.\n+  #biplot(mNMDSData$points,mMetadataCoordinates[viMetadataDummy,],xlabs=vsShapes,xlab=paste("MDS1","Stress=",mNMDSData$stress),main="Biplot function Bugs and Sampes - Metadata Plotted with Centroids")\n+  dev.off()\n+}\n+\n+# This is the equivalent of __name__ == "__main__" in Python.\n+# That is, if it\'s true we\'re being called as a command line script;\n+# if it\'s false, we\'re being sourced or otherwise included, such as for\n+# library or inlinedocs.\n+if( identical( environment( ), globalenv( ) ) &&\n+\t!length( grep( "^source\\\\(", sys.calls( ) ) ) )\n+{\n+  lsArgs <- parse_args( pArgs, positional_arguments=TRUE )\n+\n+  funcDoBiplot(\n+    sBugs = lsArgs$options$sBugs,\n+    sMetadata = lsArgs$options$sMetadata,\n+    sColorBy = lsArgs$options$sColorBy,\n+    sColorRange = lsArgs$options$sColorRange,\n+    sTextColor = lsArgs$options$sTextColor,\n+    sArrowColor = lsArgs$options$sArrowColor,\n+    sArrowTextColor = lsArgs$options$sArrowTextColor,\n+    sPlotNAColor = lsArgs$options$sPlotNAColor,\n+    sShapeBy = lsArgs$options$sShapeBy,\n+    sShapes = lsArgs$options$sShapes,\n+    sDefaultMarker = lsArgs$options$sDefaultMarker,\n+    sRotateByMetadata = lsArgs$options$sRotateByMetadata,\n+    dResizeArrow = lsArgs$options$dResizeArrow,\n+    sTitle = lsArgs$options$sTitle,\n+    sInputFileName = lsArgs$args[2],\n+    sLastMetadata = lsArgs$args[1],\n+    sOutputFileName = lsArgs$options$sOutputFileName)\n+}\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/merge_metadata.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/merge_metadata.py Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,454 @@\n+#!/usr/bin/env python\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#\n+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), \n+# authored by the Huttenhower lab at the Harvard School of Public Health\n+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).\n+#####################################################################################\n+"""\n+Examples\n+~~~~~~~~\n+\n+``metadata.txt``::\n+\n+\t-\tY\tZ\n+\ta\t1\tx\n+\tb\t0\ty\n+\tc\t\tz\n+\n+``data.pcl``::\n+\n+\t-\ta\tb\tc\n+\tA|B\t1\t2\t3\n+\tA|C\t4\t5\t6\n+\tD|E\t7\t8\t9\n+\n+``Examples``::\n+\n+\t$ merge_metadata.py metadata.txt < data.pcl\n+\tsample\ta\tb\tc\n+\tY\t1\t0\n+\tZ\tx\ty\tz\n+\tA\t0.416667\t0.466667\t0.5\n+\tA|B\t0.0833333\t0.133333\t0.166667\n+\tA|C\t0.333333\t0.333333\t0.333333\n+\tD|E\t0.583333\t0.533333\t0.5\n+\n+\t$ merge_metadata.py metadata.txt -t 0 < data.pcl\n+\tsample\ta\tb\tc\n+\tY\t1\t0\n+\tZ\tx\ty\tz\n+\tA|B\t0.0833333\t0.133333\t0.166667\n+\tA|C\t0.333333\t0.333333\t0.333333\n+\tD|E\t0.583333\t0.533333\t0.5\n+\n+\t$ merge_metadata.py metadata.txt -t 1 < data.pcl\n+\tsample\ta\tb\tc\n+\tY\t1\t0\n+\tZ\tx\ty\tz\n+\tA\t0.416667\t0.466667\t0.5\n+\tD\t0.583333\t0.533333\t0.5\n+\n+\t$ merge_metadata.py metadata.txt -t 0 -n < data.pcl\n+\tsample\ta\tb\tc\n+\tY\t1\t0\n+\tZ\tx\ty\tz\n+\tA|B\t1\t2\t3\n+\tA|C\t4\t5\t6\n+\tD|E\t7\t8\t9\n+\n+\t$ merge_metadata.py metadata.txt -t 0 -m 0.8 -s "-" < data.pcl\n+\tsample\tb\tc\n+\tY\t0\t-\n+\tZ\ty\tz\n+\tA|B\t0.133333\t0.166667\n+\tA|C\t0.333333\t0.333333\n+\tD|E\t0.533333\t0.5\n+\n+\t$ merge_metadata.py -t 0 < data.pcl\n+\tsample\ta\tb\tc\n+\tA|B\t1\t2\t3\n+\tA|C\t4\t5\t6\n+\tD|E\t7\t8\t9\n+\n+.. testsetup::\n+\n+\tfrom merge_metadata import *\n+"""\n+\n+import argparse\n+import blist\n+import csv\n+import re\n+import sys\n+\n+c_dTarget\t= 1.0\n+c_fRound\t= False\n+\n+class CClade:\n+\t\n+\tdef __init__( self ):\n+\t\t\n+\t\tself.m_hashChildren = {}\n+\t\tself.m_adValues = None\n+\t\n+\tdef get( self, astrClade ):\n+\t\t\n+\t\treturn self.m_hashChildren.setdefault(\n+\t\t\tastrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self\n+\t\t\t\n+\tdef set( self, adValues ):\n+\t\t\n+\t\tself.m_adValues = blist.blist( [0] ) * len( adValues )\n+\t\tfor i, d in enumerate( adValues ):\n+\t\t\tif d:\n+\t\t\t\tself.m_adValues[i] = d\n+\t\t\n+\tdef impute( self ):\n+\t\t\n+\t\tif not self.m_adValues:\n+\t\t\tfor pChild in self.m_hashChildren.values( ):\n+\t\t\t\tadChild = pChild.impute( )\n+\t\t\t\tif self.m_adValues:\n+\t\t\t\t\tfor i in range( len( adChild or [] ) ):\n+\t\t\t\t\t\tif adChild[i]:\n+\t\t\t\t\t\t\tself.m_adValues[i] += adChild[i]\n+\t\t\t\telif adChild:\n+\t\t\t\t\tself.m_adValues = adChild[:] \n+\t\t\t\t\t\n+\t\treturn self.m_adValues\n+\t\n+\tdef _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ):\n+\t\t\n+\t\tfHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) )\n+\t\tiDepth += 1\n+\t\tsetiRet = set()\n+\t\tif self.m_hashChildren:\n+\t\t\tfor strChild, pChild in self.m_hashChildren.items( ):\n+\t\t\t\tsetiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves )\n+\t\t\tsetiRet = set( ( i + 1 ) for i in setiRet )\n+\t\tel'..b'ax][j] for j in ( range( i ) + range( i + 1, len( astrHeaders ) ) ) )\n+\t\t\tif dMaxUs < ( dMin * dMaxThem ):\n+\t\t\t\tsys.stderr.write( "Omitting: %s\\n" % astrHeaders[i] )\n+\t\t\t\tafOmit[i] = True\n+\t\n+\tif astrExclude:\n+\t\tsetstrExclude = set(s.strip( ) for s in astrExclude)\n+\t\tfor i in range( len( astrHeaders ) ):\n+\t\t\tif ( not afOmit[i] ) and ( astrHeaders[i] in setstrExclude ):\n+\t\t\t\tafOmit[i] = True\n+\t\n+\tadMult = [( ( c_dTarget / d ) if ( fNormalize and ( d > 0 ) ) else 1 ) for d in adSeqs]\n+\tfor strFeature, adCounts in hashFeatures.items( ):\n+\t\tfor i in range( len( adCounts ) ):\n+\t\t\tif adCounts[i]:\n+\t\t\t\tadCounts[i] *= adMult[i]\n+\t\t\t\tif c_fRound:\n+\t\t\t\t\tadCounts[i] = round( adCounts[i] )\n+\tfor strFeature, adCounts in hashFeatures.items( ):\n+\t\tastrFeature = strFeature.strip( ).split( "|" )\n+\t\twhile len( astrFeature ) > 1:\n+\t\t\tastrFeature = astrFeature[:-1]\n+\t\t\tstrParent = "|".join( astrFeature )\n+\t\t\tadParent = hashFeatures.get( strParent )\n+\t\t\tif adParent == adCounts:\n+\t\t\t\tdel hashFeatures[strParent]\n+\t\t\t\tsetstrFeatures.remove( strParent )\n+\t\n+\tif astrMetadata:\n+\t\tfor i in range( len( astrMetadata ) ):\n+\t\t\thashFeatures[astrMetadata[i]] = astrCur = []\n+\t\t\tfor strSubject in astrHeaders:\n+\t\t\t\tastrSubject = hashMetadata.get( strSubject )\n+\t\t\t\tif not astrSubject:\n+\t\t\t\t\tstrSubject = re.sub( \'_.*$\', "", strSubject )\n+\t\t\t\t\tastrSubject = hashMetadata.get( strSubject, [] )\n+\t\t\t\tastrCur.append( astrSubject[i] if ( i < len( astrSubject ) ) else "" )\n+\t\n+\tastrFeatures = sorted( astrMetadata or [] ) + sorted( setstrFeatures )\n+\taiHeaders = filter( lambda i: not afOmit[i], range( len( astrHeaders ) ) )\n+\tcsvw = csv.writer( sys.stdout, csv.excel_tab )\n+\tcsvw.writerow( ["sample"] + [astrHeaders[i] for i in aiHeaders] )\n+\tfor iFeature in range( len( astrFeatures ) ):\n+\t\tstrFeature = astrFeatures[iFeature]\n+\t\tadFeature = hashFeatures[strFeature]\n+\t\tastrValues = [adFeature[i] for i in aiHeaders]\n+\t\tfor i in range( len( astrValues ) ):\n+\t\t\tstrValue = astrValues[i]\n+\t\t\tif type( strValue ) in (int, float):\n+\t\t\t\tastrValues[i] = "%g" % astrValues[i]\n+\t\t\telif ( not strValue ) or ( ( type( strValue ) == str ) and\n+\t\t\t\t( len( strValue ) == 0 ) ):\n+\t\t\t\tastrValues[i] = strMissing\n+\t\tcsvw.writerow( [strFeature] + astrValues )\n+\n+\tfor astrRaw in aastrRaw:\n+\t\tcsvw.writerow( [astrRaw[i] for i in aiHeaders] )\n+\n+argp = argparse.ArgumentParser( prog = "merge_metadata.py",\n+\tdescription = "Join a data matrix with a metadata matrix, optionally normalizing and filtering it.\\n\\n" +\n+\t"A pipe-delimited taxonomy hierarchy can also be dynamically added or removed." )\n+argp.add_argument( "-n",\t\tdest = "fNormalize",\taction = "store_false",\n+\thelp = "Don\'t normalize data values by column sums" )\n+argp.add_argument( "-s",\t\tdest = "strMissing",\tmetavar = "missing",\n+\ttype = str,\t\tdefault = " ",\n+\thelp = "String representing missing metadata values" )\n+argp.add_argument( "-m",\t\tdest = "dMin",\t\t\tmetavar = "min",\n+\ttype = float,\tdefault = 0.01,\n+\thelp = "Per-column quality control, minimum fraction of maximum value" )\n+argp.add_argument( "-t",\t\tdest = "iTaxa",\t\t\tmetavar = "taxa",\n+\ttype = int,\t\tdefault = -1,\n+\thelp = "Depth of taxonomy to be computed, negative = from right, 0 = no change" )\n+argp.add_argument( "-l",\t\tdest = "fLeaves",\t\taction = "store_true",\n+\thelp = "Output only leaves, not complete taxonomy" )\n+argp.add_argument( "-x",\t\tdest = "istmExclude",\tmetavar = "exclude.txt",\n+\ttype = file,\n+\thelp = "File from which sample IDs to exclude are read" )\n+argp.add_argument( "istmMetadata",\tmetavar = "metadata.txt",\n+\ttype = file,\tnargs = "?",\n+\thelp = "File from which metadata is read" )\n+__doc__ = "::\\n\\n\\t" + argp.format_help( ).replace( "\\n", "\\n\\t" ) + __doc__\n+\n+def _main( ):\n+\targs = argp.parse_args( )\n+\tmerge_metadata( args.istmMetadata and csv.reader( args.istmMetadata, csv.excel_tab ),\n+\t\tcsv.reader( sys.stdin, csv.excel_tab ), sys.stdout, args.fNormalize, args.strMissing,\n+\t\t\targs.istmExclude, args.dMin, args.iTaxa, args.fLeaves )\n+\t\n+if __name__ == "__main__":\n+\t_main( )\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-AnalysisModules/test-AnalysisModules.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-AnalysisModules/test-AnalysisModules.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,788 @@\n+c_strDir <- file.path(getwd( ),"..")\n+\n+source(file.path(c_strDir,"lib","Constants.R"))\n+source(file.path(c_strDir,"lib","Utility.R"))\n+\n+#Test Utilities\n+context("Test funcGetLMResults")\n+context("Test funcGetStepPredictors")\n+\n+context("Test funcMakeContrasts")\n+covX1 = c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)\n+covX2 = c(144.4, 245.9, 141.9, 253.3, 144.7, 244.1, 150.7, 245.2, 160.1)\n+covX3 = as.factor(c(1,2,3,1,2,3,1,2,3))\n+covX4 = as.factor(c(1,1,1,1,2,2,2,2,2))\n+covX5 = as.factor(c(1,2,1,2,1,2,1,2,1))\n+covY = c(.26,  .31,  .25,  .50,  .36,  .40,  .52,  .28,  .38)\n+frmeTmp = data.frame(Covariate1=covX1, Covariate2=covX2, Covariate3=covX3, Covariate4=covX4, Covariate5=covX5, adCur= covY)\n+iTaxon = 6\n+#Add in updating QC errors\n+#Add in random covariates\n+strFormula = "adCur ~ Covariate1"\n+strRandomFormula = NULL\n+lsSig = list()\n+lsSig[[1]] = list()\n+lsSig[[1]]$name = "Covariate1"\n+lsSig[[1]]$orig = "Covariate1"\n+lsSig[[1]]$taxon = "adCur"\n+lsSig[[1]]$data = covY\n+lsSig[[1]]$factors = "Covariate1"\n+lsSig[[1]]$metadata = covX1\n+vdCoef = c(Covariate1=0.6)\n+lsSig[[1]]$value = vdCoef\n+lsSig[[1]]$std = sd(covX1)\n+lsSig[[1]]$allCoefs = vdCoef\n+ret1 = funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,\n+    functionContrast=function(x,adCur,dfData)\n+    {\n+      retList = list()\n+      ret = cor.test(as.formula(paste("~",x,"+ adCur")), data=dfData, method="spearman", na.action=c_strNA_Action)\n+      #Returning rho for the coef in a named vector\n+      vdCoef = c()\n+      vdCoef[[x]]=ret$estimate\n+      retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)\n+      return(retList)\n+    }, lsQCCounts=list())\n+ret1$adP = round(ret1$adP,5)\n+test_that("1. Test that the funcMakeContrasts works on a continuous variable.",{\n+  expect_equal(ret1,list(adP=round(c(0.09679784),5),lsSig=lsSig,lsQCCounts=list()))})\n+\n+strFormula = "adCur ~ Covariate1 + Covariate2"\n+strRandomFormula = NULL\n+lsSig = list()\n+lsSig[[1]] = list()\n+lsSig[[1]]$name = "Covariate1"\n+lsSig[[1]]$orig = "Covariate1"\n+lsSig[[1]]$taxon = "adCur"\n+lsSig[[1]]$data = covY\n+lsSig[[1]]$factors = "Covariate1"\n+lsSig[[1]]$metadata = covX1\n+vdCoef = c(Covariate1=0.6)\n+lsSig[[1]]$value = vdCoef\n+lsSig[[1]]$std = sd(covX1)\n+lsSig[[1]]$allCoefs = vdCoef\n+lsSig[[2]] = list()\n+lsSig[[2]]$name = "Covariate2"\n+lsSig[[2]]$orig = "Covariate2"\n+lsSig[[2]]$taxon = "adCur"\n+lsSig[[2]]$data = covY\n+lsSig[[2]]$factors = "Covariate2"\n+lsSig[[2]]$metadata = covX2\n+vdCoef = c(Covariate2=0.46666667)\n+lsSig[[2]]$value = vdCoef\n+lsSig[[2]]$std = sd(covX2)\n+lsSig[[2]]$allCoefs = vdCoef\n+ret1 = funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,\n+    functionContrast=function(x,adCur,dfData)\n+    {\n+      retList = list()\n+      ret = cor.test(as.formula(paste("~",x,"+ adCur")), data=dfData, method="spearman", na.action=c_strNA_Action)\n+      #Returning rho for the coef in a named vector\n+      vdCoef = c()\n+      vdCoef[[x]]=ret$estimate\n+      retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)\n+      return(retList)\n+    }, lsQCCounts=list())\n+ret1$adP = round(ret1$adP,5)\n+test_that("Test that the funcMakeContrasts works on 2 continuous variables.",{\n+  expect_equal(ret1,list(adP=round(c(0.09679784,0.21252205),5),lsSig=lsSig,lsQCCounts=list()))})\n+\n+strFormula = "adCur ~ Covariate4"\n+strRandomFormula = NULL\n+lsSig = list()\n+lsSig[[1]] = list()\n+lsSig[[1]]$name = "Covariate4"\n+lsSig[[1]]$orig = "Covariate42"\n+lsSig[[1]]$taxon = "adCur"\n+lsSig[[1]]$data = covY\n+lsSig[[1]]$factors = "Covariate4"\n+lsSig[[1]]$metadata = covX4 #update\n+vdCoef = c(Covariate42=NA)\n+lsSig[[1]]$value = vdCoef\n+lsSig[[1]]$std = sd(covX4) #update\n+lsSig[[1]]$allCoefs = vdCoef\n+# Get return\n+rets = funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,\n+    functionContrast=function(x,adCur,'..b'om=as.formula(strRandomFormula), family=binomial(link=logit), data=frmeTmp)\n+#test_that("Test that the lm has the correct results for 2 random and 1 fixed covariates.",{\n+#  expect_equal(funcBinomialMult(strFormula=strFormula,frmeTmp=frmeTmp,iTaxon=iTaxon,lsHistory=lsHistory,strRandomFormula=strRandomFormula),lmRet)\n+#})\n+\n+\n+context("Test funcQuasiMult")\n+strFormula = "adCur ~ Covariate1"\n+strRandomFormula = NULL\n+x = c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1,44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)\n+x2 = c(34.2, 32.5, 22.4, 43, 3.25, 6.4, 7, 87, 9,34.2, 32.5, 22.4, 43, 3.25, 6.4, 7, 87, 9)\n+xf1 = c(1,1,2,2,1,1,2,2,2,1,1,2,2,1,1,2,2,2)\n+xf2 = c(1,1,1,1,2,2,2,2,2,1,1,1,1,2,2,2,2,2)\n+frmeTmp = data.frame(Covariate1=x, Covariate2=x2, FCovariate3=xf1, FCovariate4=xf2, adCur=adCur)\n+iTaxon = 5\n+lmRet = glm(as.formula(strFormula), family=quasipoisson, data=frmeTmp, na.action=c_strNA_Action)\n+test_that("Test that the quasi poisson has the correct results for 1 covariate.",{\n+  expect_equal(funcQuasiMult(strFormula=strFormula,frmeTmp=frmeTmp,iTaxon=iTaxon,lsHistory=lsHistory,strRandomFormula=strRandomFormula),lmRet)\n+})\n+#Test for correct call for 2 covariates\n+strFormula = "adCur ~ Covariate1 + Covariate2"\n+iTaxon = 5\n+lmRet = glm(as.formula(strFormula), family=quasipoisson, data=frmeTmp, na.action=c_strNA_Action)\n+test_that("Test that the quasi poisson has the correct results for 2 covariates.",{\n+  expect_equal(funcQuasiMult(strFormula=strFormula,frmeTmp=frmeTmp,iTaxon=iTaxon,lsHistory=lsHistory,strRandomFormula=strRandomFormula),lmRet)\n+})\n+##Test for correct call with 1 random and one fixed covariate\n+#strFormula = "adCur ~ Covariate1"\n+#strRandomFormula = "~1|FCovariate3"\n+#lmRet = glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=quasipoisson, data=frmeTmp)\n+#test_that("Test that the lm has the correct results for 1 random and one fixed covariate.",{\n+#  expect_equal(funcQuasiMult(strFormula=strFormula,frmeTmp=frmeTmp,iTaxon=iTaxon,lsHistory=lsHistory,strRandomFormula=strRandomFormula),lmRet)\n+#})\n+##Test for correct call with 1 random and 2 fixed covariates\n+#strFormula = "adCur ~ Covariate1 + Covariate2"\n+#strRandomFormula = "~1|FCovariate3"\n+#lmRet = glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=quasipoisson, data=frmeTmp)\n+#test_that("Test that the lm has the correct results for 1 random and 2 fixed covariates.",{\n+#  expect_equal(funcQuasiMult(strFormula=strFormula,frmeTmp=frmeTmp,iTaxon=iTaxon,lsHistory=lsHistory,strRandomFormula=strRandomFormula),lmRet)\n+#})\n+##Test for correct call with 2 random and 1 fixed covariates\n+#strFormula = "adCur ~ Covariate1"\n+#strRandomFormula = "~1|FCovariate4+1|FCovariate3"\n+#lmRet = glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=quasipoisson, data=frmeTmp)\n+#test_that("Test that the lm has the correct results for 2 random and 1 fixed covariates.",{\n+#  expect_equal(funcQuasiMult(strFormula=strFormula,frmeTmp=frmeTmp,iTaxon=iTaxon,lsHistory=lsHistory,strRandomFormula=strRandomFormula),lmRet)\n+#})\n+\n+\n+#Test transforms\n+context("Test funcNoTransform")\n+aTest1 = c(NA)\n+aTest2 = c(NULL)\n+aTest3 = c(0.5,1.4,2.4,3332.4,0.0,0.0000003)\n+aTest4 = c(0.1)\n+test_that("Test that no transform does not change the data.",{\n+  expect_equal(funcNoTransform(aTest1), aTest1)\n+  expect_equal(funcNoTransform(aTest2), aTest2)\n+  expect_equal(funcNoTransform(aTest3), aTest3)\n+  expect_equal(funcNoTransform(aTest4), aTest4)\n+})\n+\n+\n+context("Test funcArcsinSqrt")\n+aTest1 = c(NA)\n+aTest2 = c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0)\n+aTest3 = c(0.000001)\n+test_that("Test that funcArcsinSqrt performs the transform correctly.",{\n+  expect_equal(funcArcsinSqrt(NA), as.numeric(NA))\n+  expect_equal(funcArcsinSqrt(aTest1), asin(sqrt(aTest1)))\n+  expect_equal(funcArcsinSqrt(aTest2), asin(sqrt(aTest2)))\n+  expect_equal(funcArcsinSqrt(aTest3), asin(sqrt(aTest3)))\n+})\n\\ No newline at end of file\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-BoostGLM/test-BoostGLM.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-BoostGLM/test-BoostGLM.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,307 @@\n+c_strDir <- file.path(getwd( ),"..")\n+\n+source(file.path(c_strDir,"lib","Constants.R"))\n+source(file.path(c_strDir,"lib","Utility.R"))\n+source(file.path(c_strDir,"lib","AnalysisModules.R"))\n+\n+# General setup\n+covX1 = c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)\n+covX2 = c(144.4, 245.9, 141.9, 253.3, 144.7, 244.1, 150.7, 245.2, 160.1)\n+covX3 = as.factor(c(1,2,3,1,2,3,1,2,3))\n+covX4 = as.factor(c(1,1,1,1,2,2,2,2,2))\n+covX5 = as.factor(c(1,2,1,2,1,2,1,2,1))\n+covY = c(.26,  .31,  .25,  .50,  .36,  .40,  .52,  .28,  .38)\n+frmeTmp = data.frame(Covariate1=covX1, Covariate2=covX2, Covariate3=covX3, Covariate4=covX4, Covariate5=covX5, adCur= covY)\n+iTaxon = 6\n+\n+lsCov1 = list()\n+lsCov1$name = "Covariate1"\n+lsCov1$orig = "Covariate1"\n+lsCov1$taxon = "adCur"\n+lsCov1$data = covY\n+lsCov1$factors = "Covariate1"\n+lsCov1$metadata = covX1\n+vdCoef = c()\n+vdCoef["(Intercept)"]=round(0.0345077486,5)\n+vdCoef["Covariate1"]= round(0.0052097355,5)\n+vdCoef["Covariate2"]= round(0.0005806568,5)\n+vdCoef["Covariate32"]=round(-0.1333421874,5)\n+vdCoef["Covariate33"]=round(-0.1072006419,5)\n+vdCoef["Covariate42"]=round(0.0849198280,5)\n+lsCov1$value = c(Covariate1=round(0.005209736,5))\n+lsCov1$std = round(0.0063781728,5)\n+lsCov1$allCoefs = vdCoef\n+lsCov2 = list()\n+lsCov2$name = "Covariate2"\n+lsCov2$orig = "Covariate2"\n+lsCov2$taxon = "adCur"\n+lsCov2$data = covY\n+lsCov2$factors = "Covariate2"\n+lsCov2$metadata = covX2\n+lsCov2$value = c(Covariate2=round(0.0005806568,5))\n+lsCov2$std = round(0.0006598436,5)\n+lsCov2$allCoefs = vdCoef\n+lsCov3 = list()\n+lsCov3$name = "Covariate3"\n+lsCov3$orig = "Covariate32"\n+lsCov3$taxon = "adCur"\n+lsCov3$data = covY\n+lsCov3$factors = "Covariate3"\n+lsCov3$metadata = covX3\n+lsCov3$value = c(Covariate32=round(-0.1333422,5))\n+lsCov3$std = round(0.0895657826,5)\n+lsCov3$allCoefs = vdCoef\n+lsCov4 = list()\n+lsCov4$name = "Covariate3"\n+lsCov4$orig = "Covariate33"\n+lsCov4$taxon = "adCur"\n+lsCov4$data = covY\n+lsCov4$factors = "Covariate3"\n+lsCov4$metadata = covX3\n+lsCov4$value = c(Covariate33=round(-0.1072006,5))\n+lsCov4$std = round(0.0792209541,5)\n+lsCov4$allCoefs = vdCoef\n+lsCov5 = list()\n+lsCov5$name = "Covariate4"\n+lsCov5$orig = "Covariate42"\n+lsCov5$taxon = "adCur"\n+lsCov5$data = covY\n+lsCov5$factors = "Covariate4"\n+lsCov5$metadata = covX4\n+lsCov5$value = c(Covariate42=round(0.08491983,5))\n+lsCov5$std = round(0.0701018621,5)\n+lsCov5$allCoefs = vdCoef\n+\n+context("Test funcClean")\n+\n+context("Test funcBugHybrid")\n+# multiple covariates, one call lm\n+aiMetadata = c(1:5)\n+aiData = c(iTaxon)\n+dFreq = 0.5 / length( aiMetadata )\n+dSig = 0.25\n+dMinSamp = 0.1\n+adP = c()\n+lsSig = list()\n+funcReg = NA\n+funcAnalysis = funcLM\n+funcGetResult = funcGetLMResults\n+lsData = list(frmeData=frmeTmp, aiMetadata=aiMetadata, aiData=aiData, lsQCCounts=list())\n+lsData$astrMetadata = names(frmeTmp)[aiMetadata]\n+\n+adPExpected = round(c(0.4738687,0.4436566,0.4665972,0.5378693,0.3124672),5)\n+QCExpected = list(iLms=numeric(0))\n+lsSigExpected = list()\n+lsSigExpected[[1]] = lsCov1\n+lsSigExpected[[2]] = lsCov2 \n+lsSigExpected[[3]] = lsCov3\n+lsSigExpected[[4]] = lsCov4\n+lsSigExpected[[5]] = lsCov5\n+expectedReturn = list(adP=adPExpected,lsSig=lsSigExpected,lsQCCounts=QCExpected)\n+receivedReturn = funcBugHybrid(iTaxon=iTaxon,frmeData=frmeTmp,lsData=lsData,aiMetadata=aiMetadata,dFreq=dFreq,dSig=dSig,dMinSamp=dMinSamp,adP=adP,lsSig=lsSig, strLog=NA,funcReg=funcReg,lsNonPenalizedPredictors=NULL,funcAnalysis=funcAnalysis,lsRandomCovariates=NULL,funcGetResult=funcGetResult)\n+receivedReturn$adP = round(receivedReturn$adP,5)\n+\n+vCoefs=receivedReturn$lsSig[[1]]$allCoefs\n+vCoefs[1]=round(vCoefs[1],5)\n+vCoefs[2]=round(vCoefs[2],5)\n+vCoefs[3]=round(vCoefs[3],5)\n+vCoefs[4]=round(vCoefs[4],5)\n+vCoefs[5]=round(vCoefs[5],5)\n+vCoefs[6]=round(vCoefs[6],5)\n+receivedReturn$lsSig[[1]]$allCoefs=vCoefs\n+receivedReturn$lsSig[[2]]$allCoefs=vCoefs\n+receivedReturn$lsSig[[3]]$allCoefs=vCoefs\n+receivedReturn$lsSig[[4]]$allCoefs=vCoefs\n+receivedReturn$lsSig[[5]]$allCoefs='..b'Y\n+lsCov4$factors = "Covariate2"\n+lsCov4$metadata = frmeTmp[["Covariate2"]]\n+vdCoef = c(Covariate2=0.46666667)\n+lsCov4$value = vdCoef\n+lsCov4$std = sd(frmeTmp[["Covariate2"]])\n+lsCov4$allCoefs = vdCoef\n+\n+lsSigExpected = list()\n+lsSigExpected[[1]] = lsCov1\n+lsSigExpected[[2]] = lsCov2\n+lsSigExpected[[3]] = lsCov3\n+lsSigExpected[[4]] = lsCov4\n+\n+expectedReturn = list(adP=adPExpected,lsSig=lsSigExpected,lsQCCounts=QCExpected)\n+receivedReturn = funcBugHybrid(iTaxon=iTaxon,frmeData=frmeTmp,lsData=lsData,aiMetadata=aiMetadata,dFreq=dFreq,dSig=dSig,dMinSamp=dMinSamp,adP=adP,lsSig=lsSig, strLog=NA,funcReg=funcReg,lsNonPenalizedPredictors=NULL,funcAnalysis=funcAnalysis,lsRandomCovariates=NULL,funcGetResult=funcGetResult)\n+receivedReturn$adP = round(receivedReturn$adP,5)\n+test_that("funcBugHybrid works with the univariate option with 3 covariates.",{expect_equal(receivedReturn,expectedReturn)})\n+\n+\n+# single covariate, single call univariate\n+funcReg = NA\n+funcAnalysis = funcDoUnivariate\n+funcGetResult = NA\n+aiMetadata = c(1)\n+dFreq = 0.5 / length( aiMetadata )\n+lsData$astrMetadata = names(frmeTmp)[aiMetadata]\n+adPExpected = round(c(0.09679784),5)\n+QCExpected = list(iLms=numeric(0))\n+lsSigExpected = list()\n+lsSigExpected[[1]] = lsCov3\n+\n+expectedReturn = list(adP=adPExpected,lsSig=lsSigExpected,lsQCCounts=QCExpected)\n+receivedReturn = funcBugHybrid(iTaxon=iTaxon,frmeData=frmeTmp,lsData=lsData,aiMetadata=aiMetadata,dFreq=dFreq,dSig=dSig,dMinSamp=dMinSamp,adP=adP,lsSig=lsSig, strLog=NA,funcReg=funcReg,lsNonPenalizedPredictors=NULL,funcAnalysis=funcAnalysis,lsRandomCovariates=NULL,funcGetResult=funcGetResult)\n+receivedReturn$adP = round(receivedReturn$adP,5)\n+test_that("funcBugHybrid works with the univariate option with 1 covariates.",{expect_equal(receivedReturn,expectedReturn)})\n+\n+\n+context("Test funcBugs")\n+#One LM run\n+frmeData=frmeTmp\n+aiMetadata=c(1)\n+aiData=c(iTaxon)\n+strData=NA\n+dFreq= 0.5 / length( aiMetadata )\n+dSig=0.25\n+dMinSamp=0.1\n+strDirOut=NA\n+funcReg=NA\n+lsNonPenalizedPredictors=NULL\n+lsRandomCovariates=NULL\n+funcAnalysis=funcLM\n+funcGetResults=funcGetLMResults\n+fDoRPlot=FALSE\n+lsData = list(frmeData=frmeData, aiMetadata=aiMetadata, aiData=aiData, lsQCCounts=list())\n+lsData$astrMetadata = names(frmeTmp)[aiMetadata]\n+QCExpected = list(iLms=numeric(0))\n+\n+expectedReturn = list(aiReturnBugs=aiData,lsQCCounts=QCExpected)\n+receivedReturn = funcBugs(frmeData=frmeData, lsData=lsData, aiMetadata=aiMetadata, aiData=aiData, strData=strData, dFreq=dFreq, dSig=dSig, dMinSamp=dMinSamp,strDirOut=strDirOut, funcReg=funcReg,lsNonPenalizedPredictors=lsNonPenalizedPredictors,funcAnalysis=funcAnalysis,lsRandomCovariates=lsRandomCovariates,funcGetResults=funcGetResults,fDoRPlot=fDoRPlot)\n+\n+test_that("funcBugs works with the lm option with 1 covariate.",{expect_equal(receivedReturn,expectedReturn)})\n+\n+#multiple LM run\n+frmeData=frmeTmp\n+aiMetadata=c(1:5)\n+aiData=c(iTaxon)\n+strData=NA\n+dFreq= 0.5 / length( aiMetadata )\n+dSig=0.25\n+dMinSamp=0.1\n+strDirOut=NA\n+funcReg=NA\n+lsNonPenalizedPredictors=NULL\n+lsRandomCovariates=NULL\n+funcAnalysis=funcLM\n+funcGetResults=funcGetLMResults\n+fDoRPlot=FALSE\n+lsData = list(frmeData=frmeData, aiMetadata=aiMetadata, aiData=aiData, lsQCCounts=list())\n+lsData$astrMetadata = names(frmeTmp)[aiMetadata]\n+QCExpected = list(iLms=numeric(0))\n+\n+expectedReturn = list(aiReturnBugs=aiData,lsQCCounts=QCExpected)\n+receivedReturn = funcBugs(frmeData=frmeData, lsData=lsData, aiMetadata=aiMetadata, aiData=aiData, strData=strData, dFreq=dFreq, dSig=dSig, dMinSamp=dMinSamp,strDirOut=strDirOut, funcReg=funcReg,lsNonPenalizedPredictors=lsNonPenalizedPredictors,funcAnalysis=funcAnalysis,lsRandomCovariates=lsRandomCovariates,funcGetResults=funcGetResults,fDoRPlot=fDoRPlot)\n+\n+print("START START")\n+print(expectedReturn)\n+print("RECEIVED")\n+print(receivedReturn)\n+print("STOP STOP")\n+\n+test_that("funcBugs works with the lm option with multiple covariates.",{expect_equal(receivedReturn,expectedReturn)})\n\\ No newline at end of file\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-IO/test-IO.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-IO/test-IO.R Sun Feb 08 23:39:43 2015 -0500
[
b'@@ -0,0 +1,162 @@\n+c_strDir <- file.path(getwd( ),"..")\n+\n+source(file.path(c_strDir,"lib","Constants.R"))\n+source(file.path(c_strDir,"lib","ValidateData.R"))\n+strTestingDirectory = file.path(c_strDir,c_strTestingDirectory)\n+\n+expect_equal(funcParseIndexSlices("1",cNames),c(1))\n+\n+cNames = c("One","Two","Three","Four","Five","Six","Seven","Eight","Nine","Ten","Eleven",\n+  "Twelve","Thirteen","Fourteen","Fifteen")\n+\n+test_that("Just Numerics are parsed",{\n+  expect_equal(funcParseIndexSlices("1",cNames),c(1))\n+  expect_equal(funcParseIndexSlices("8,10",cNames),c(8,10))\n+  expect_equal(funcParseIndexSlices("2-6",cNames), c(2,3,4,5,6))\n+  expect_equal(funcParseIndexSlices("3,7,10-12",cNames), c(3,7,10,11,12))\n+})\n+\n+test_that("Missing numbers are parsed",{\n+  expect_equal(funcParseIndexSlices("-",cNames), c(2:15))\n+  expect_equal(funcParseIndexSlices("-4",cNames), c(2,3,4))\n+  expect_equal(funcParseIndexSlices("3-",cNames), c(3:15))\n+})\n+\n+test_that("Words are parsed correctly",{\n+  expect_equal(funcParseIndexSlices("One",cNames), c(1))\n+  expect_equal(funcParseIndexSlices("Eight,Ten",cNames), c(8,10))\n+  expect_equal(funcParseIndexSlices("Two-Six",cNames), c(2,3,4,5,6))\n+  expect_equal(funcParseIndexSlices("Three,Seven,Ten-Twelve",cNames), c(3,7,10,11,12)) \n+})\n+\n+test_that("Missing words are parsed",{\n+  expect_equal(funcParseIndexSlices("-Four",cNames), c(2:4))\n+  expect_equal(funcParseIndexSlices("Three-",cNames), c(3:15))\n+})\n+\n+test_that("Words and numbers are parsed correctly",{\n+  expect_equal(funcParseIndexSlices("Eight,10",cNames), c(8,10))\n+  expect_equal(funcParseIndexSlices("2-Six",cNames), c(2,3,4,5,6))\n+  expect_equal(funcParseIndexSlices("Three,7,10-Twelve",cNames), c(3,7,10,11,12))\n+})\n+\n+\n+context("Test funcWriteMatrixToReadConfigFile")\n+# File to temporarily write to\n+strWriteMatrixRCTestFile = file.path(strTestingDirectory,c_strTemporaryFiles,"FuncWriteMatrixToReadConfigFileTemp.read.config")\n+# Files that hold answers\n+strFileSimpleRCFileAnswer = file.path(strTestingDirectory,c_strCorrectAnswers,"FuncWriteMatrixToReadConfigFile_SimpleAnswer.read.config")\n+strFileUseAllRCFileAnswer = file.path(strTestingDirectory,c_strCorrectAnswers,"FuncWriteMatrixToReadConfigFile_AllAnswer.read.config")\n+strFileAppendTrueRCFileAnswer = file.path(strTestingDirectory,c_strCorrectAnswers,"FuncWriteMatrixToReadConfigFile_AppendAnswer.read.config")\n+#Input matrix file\n+strFileMatrix = file.path(strTestingDirectory,c_strTestingInput,"TestMatrix.tsv")\n+\n+#Get read config files in different scenarios\n+funcWriteMatrixToReadConfigFile(strWriteMatrixRCTestFile,"SimpleMatrix")\n+strSimpleInterface = readLines(strWriteMatrixRCTestFile)\n+funcWriteMatrixToReadConfigFile(strWriteMatrixRCTestFile,"AllMatrix",strRowIndices="1,2,3,4,5", strColIndices="10,11,12",acharDelimiter=" ")\n+strUseAllParametersInterface = readLines(strWriteMatrixRCTestFile)\n+funcWriteMatrixToReadConfigFile(strWriteMatrixRCTestFile,"SimpleMatrix")\n+funcWriteMatrixToReadConfigFile(strWriteMatrixRCTestFile,"SimpleMatrix")\n+strAppendFalseInterface = readLines(strWriteMatrixRCTestFile)\n+funcWriteMatrixToReadConfigFile(strWriteMatrixRCTestFile,"SimpleMatrix")\n+funcWriteMatrixToReadConfigFile(strWriteMatrixRCTestFile,"SimpleMatrix",fAppend=TRUE)\n+strAppendTrueInterface = readLines(strWriteMatrixRCTestFile)\n+\n+test_that("Correct config file is written",{\n+  expect_equal(strSimpleInterface,readLines(strFileSimpleRCFileAnswer))\n+  expect_equal(strUseAllParametersInterface,readLines(strFileUseAllRCFileAnswer))\n+  expect_equal(strAppendFalseInterface,readLines(strFileSimpleRCFileAnswer))\n+  expect_equal(strAppendTrueInterface,readLines(strFileAppendTrueRCFileAnswer))\n+})\n+\n+context("Test funcReadConfigFile")\n+lsSimpleRC = funcReadConfigFile(strFileSimpleRCFileAnswer,strFileMatrix)\n+lsAllRC = funcReadConfigFile(strFileUseAllRCFileAnswer,strFileMatrix)\n+\n+lsSimpleListAnswer = list()\n+lsSimpleListAnswer[[1]]=c("SimpleMatrix",strFileMatrix,"\\t","-","-")\n+lsAllListAnswer = list()\n+lsAll'..b'ure5")\n+colnames(dfUseAllReadCorrect) = c("Sample1", "Sample2", "Sample3")\n+\n+test_that("Matrix file is read correctly.",{\n+  expect_equal(dfSimpleRead,dfSimpleReadCorrect)\n+  expect_equal(dfUseAllParametersRead,dfUseAllReadCorrect)\n+})\n+\n+context("Test funcReadMatrices")\n+\n+sConfigureFile1Matrix = file.path(strTestingDirectory,c_strTestingInput,"1Matrix.read.config")\n+mtxOne = as.data.frame(as.matrix(rbind(c(11,12,13,14,15),c(21,22,23,24,25),c(31,32,33,34,35),c(41,42,43,44,45),\n+                                                        c(51,52,53,54,55),c(61,62,63,64,65),c(71,72,73,74,75),c(81,82,83,84,85),\n+                                                        c(91,92,93,94,95),c(101,102,103,104,105),c(111,112,113,114,115),c(121,122,123,124,125),\n+                                                        c(131,132,133,134,135),c(141,142,143,144,145),c(151,152,153,154,155))))\n+rownames(mtxOne) = c("Feature1","Feature2","Feature3","Feature4","Feature5","Feature6","Feature7","Feature8","Feature9","Feature10",\n+                     "Feature11","Feature12","Feature13","Feature14","Feature15")\n+colnames(mtxOne) = c("Sample1","Sample2","Sample3","Sample4","Sample5")\n+sConfigureFile2Matrix = file.path(strTestingDirectory,c_strTestingInput,"2Matrix.read.config")\n+mtxTwo = as.data.frame(as.matrix(rbind(c(11,12,13),c(21,22,23),c(31,32,33))))\n+rownames(mtxTwo) = c("Feature1","Feature2","Feature3")\n+colnames(mtxTwo) = c("Sample1","Sample2","Sample3")\n+\n+sConfigureFile3Matrix = file.path(strTestingDirectory,c_strTestingInput,"3Matrix.read.config")\n+mtxThree = as.data.frame(as.matrix(rbind(c(11,12,14),c(21,22,24),c(31,32,34),c(41,42,44),\n+                                         c(51,52,54),c(61,62,64),c(71,72,74),c(81,82,84),c(91,92,94))))\n+rownames(mtxThree) = c("Feature1","Feature2","Feature3","Feature4","Feature5","Feature6","Feature7","Feature8","Feature9")\n+colnames(mtxThree) = c("Sample1","Sample2","Sample4")\n+\n+#Read one matrix\n+ldfRet1 = funcReadMatrices(configureFile=sConfigureFile1Matrix,strFileMatrix)\n+ldfRet1Answer = list( "Matrix1" = mtxOne)\n+\n+#Read two matrices\n+ldfRet2 = funcReadMatrices(configureFile=sConfigureFile2Matrix,strFileMatrix)\n+ldfRet2Answer = list( "Matrix1" = mtxOne,\n+                      "Matrix2" = mtxTwo)\n+\n+#Read three matrices from two different files\n+ldfRet3 = funcReadMatrices(configureFile=sConfigureFile3Matrix,strFileMatrix)\n+ldfRet3Answer = list( "Matrix1" = mtxOne,\n+                      "Matrix2" = mtxTwo,\n+                      "Matrix3" = mtxThree)\n+\n+test_that("Test funcReadMatrices read in the correct matrices not matter the number or source",{\n+  expect_equal(ldfRet1,ldfRet1Answer)\n+  expect_equal(ldfRet2,ldfRet2Answer)\n+  expect_equal(ldfRet3,ldfRet3Answer)\n+})\n+\n+context("Test funcWriteMatrices")\n+strFuncWriteMatricesMatrix1 = file.path(strTestingDirectory,c_strTemporaryFiles,"FuncWriteMatrices1.tsv")\n+strFuncWriteMatricesMatrix2 = file.path(strTestingDirectory,c_strTemporaryFiles,"FuncWriteMatrices2.tsv")\n+strFuncWriteMatricesMatrix1Answer = file.path(strTestingDirectory, c_strCorrectAnswers,"FuncWriteMatrices1.tsv")\n+strFuncWriteMatricesMatrix2Answer = file.path(strTestingDirectory, c_strCorrectAnswers,"FuncWriteMatrices2.tsv")\n+strFuncWriteMatricesRCFile = file.path(strTestingDirectory,c_strTemporaryFiles,"FuncWriteMatrices.read.config")\n+strFuncWriteMatricesRCFileAnswer = file.path(strTestingDirectory, c_strCorrectAnswers,"FuncWriteMatrices.read.config")\n+funcWriteMatrices(list("1"=mtxOne, "2"=mtxThree),c(strFuncWriteMatricesMatrix1, strFuncWriteMatricesMatrix2), strFuncWriteMatricesRCFile)\n+\n+test_that("Test that writing to a file occurs correctly, for both matrix and configure file.",{\n+  expect_equal(readLines(strFuncWriteMatricesMatrix1Answer),readLines(strFuncWriteMatricesMatrix1))\n+  expect_equal(readLines(strFuncWriteMatricesMatrix2Answer),readLines(strFuncWriteMatricesMatrix2))\n+  expect_equal(readLines(strFuncWriteMatricesRCFileAnswer),readLines(strFuncWriteMatricesRCFile))\n+})\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-Maaslin/test-Maaslin.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-Maaslin/test-Maaslin.R Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,41 @@
+c_strDir <- file.path(getwd( ),"..")
+
+source(file.path(c_strDir,"lib","Constants.R"))
+strTestingDirectory = file.path(c_strDir,c_strTestingDirectory)
+sScriptMaaslin = file.path( c_strDir, "Maaslin.R" )
+
+context("Test Run From Commandline")
+
+#Input Files
+sTestReadConfig = file.path(strTestingDirectory, c_strTestingInput, "TestMaaslin.read.config")
+sTestCustomR = file.path(strTestingDirectory, c_strTestingInput, "TestMaaslin.R")
+sTestMaaslinDirectory = file.path(strTestingDirectory, c_strTemporaryFiles, "testMaaslin")
+sTestOutput = file.path(sTestMaaslinDirectory,"TestMaaslin_Summary.txt")
+sTestTSV = file.path(strTestingDirectory, c_strTestingInput, "TestMaaslin.tsv")
+#Test file answers
+sTestOutputAnswer = file.path(strTestingDirectory, c_strCorrectAnswers, "TestMaaslin.tsv")
+
+#Delete Test MaAsLin output
+unlink(sTestMaaslinDirectory, recursive=TRUE)
+#Make neccessary directories
+dir.create(sTestMaaslinDirectory)
+dir.create(file.path(sTestMaaslinDirectory,"QC"))
+
+sCommand = paste(sScriptMaaslin, "-v", "ERROR", "-d", "0.25", "-r", "0.0001", "-p", "0.1", sTestOutput, sTestTSV, sTestReadConfig, sTestCustomR, sep=" ")
+print(sCommand)
+system(sCommand)
+
+sExpectedTitle = "\tVariable\tFeature\tValue\tCoefficient\tN\tN.not.0\tP.value\tQ.value"
+iExpectedNumberOfLines = 3
+lsOutputSummaryFile = readLines(sTestOutput)
+
+test_that("Make sure that the summary output file is what is expected (generally).",{
+  expect_equal(lsOutputSummaryFile[1], sExpectedTitle)
+  expect_equal(length(lsOutputSummaryFile),iExpectedNumberOfLines)
+})
+
+lsDirectoryStructure = list.files(sTestMaaslinDirectory)
+lsDirectoryStructureAnswer = c(basename(sTestOutput),"QC","TestMaaslin-age.pdf","TestMaaslin-age.txt","TestMaaslin-dx.txt","TestMaaslin.pdf","TestMaaslin.txt")
+test_that("Make sure the expected directory structure is created.",{
+  expect_equal(sort(lsDirectoryStructure), sort(lsDirectoryStructureAnswer))
+})
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-SummarizeMaaslin/test-SummarizeMaaslin.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-SummarizeMaaslin/test-SummarizeMaaslin.R Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,72 @@
+c_strDir <- file.path(getwd( ),"..")
+
+source(file.path(c_strDir,"lib","Constants.R"))
+source(file.path(c_strDir,"lib","SummarizeMaaslin.R"))
+source(file.path(c_strDir,"lib","Utility.R"))
+
+context("Test funcSummarizeDirectory")
+strDirectoryNone = file.path(c_strDir,c_strTestingDirectory,c_strTestingInput,"funcSummarizeDirectory","None")
+strDirectory1 = file.path(c_strDir,c_strTestingDirectory,c_strTestingInput,"funcSummarizeDirectory","1")
+strDirectory3 = file.path(c_strDir,c_strTestingDirectory,c_strTestingInput,"funcSummarizeDirectory","3")
+strFileBase1 = "FileBase1.txt"
+strFileBase2 = "FileBase2.txt"
+
+sKeyword = "Q.value"
+sAltKeyword = "P.value"
+sAltSignificance = "0.35"
+
+sBaseName = "FuncSummarizeDirectory"
+
+#Output and answer files
+sNoFileResult = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-NoFileResult.txt")
+sNoFileResultAltKeyword = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-NoFileAltKeyResult.txt")
+sNoFileResultAltSig = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-NoFileAltSigResult.txt")
+sNoFileResultAnswer = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-NoFileAnswer.txt")
+sNoFileResultAnswerAltKeyword = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-NoFileAltKeyAnswer.txt")
+sNoFileResultAnswerAltSig = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-NoFileAltSigAnswer.txt")
+unlink(sNoFileResult)
+sCorrectResults1File = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-1FileResult.txt")
+sCorrectResults1FileAltKeyword = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-1FileAltKeyResult.txt")
+sCorrectResults1FileAltSig = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-1FileAltSigResult.txt")
+sCorrectResults1FileAnswer = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-1FileResult.txt")
+sCorrectResults1FileAnswerAltKeyword = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-1FileAltKeyResult.txt")
+sCorrectResults1FileAnswerAltSig = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-1FileAltSigResult.txt")
+unlink(sCorrectResults1File)
+sCorrectResults3Files = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-3FileResult.txt")
+sCorrectResults3FilesAltKeyword = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-3FileAltKeyResult.txt")
+sCorrectResults3FilesAltSig = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncSummarizeDirectory-3FileAltSigResult.txt")
+sCorrectResults3FilesAnswer = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-3FileResult.txt")
+sCorrectResults3FilesAnswerAltKeyword = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-3FileAltKeyResult.txt")
+sCorrectResults3FilesAnswerAltSig = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncSummarizeDirectory-3FileAltSigResult.txt")
+unlink(sCorrectResults3Files)
+
+#Run tests
+funcSummarizeDirectory(astrOutputDirectory=strDirectoryNone, strBaseName=sBaseName, astrSummaryFileName=sNoFileResult, astrKeyword=sKeyword, afSignificanceLevel="0.25")
+funcSummarizeDirectory(astrOutputDirectory=strDirectory1, strBaseName=sBaseName, astrSummaryFileName=sCorrectResults1File, astrKeyword=sKeyword, afSignificanceLevel="0.25")
+funcSummarizeDirectory(astrOutputDirectory=strDirectory3, strBaseName=sBaseName, astrSummaryFileName=sCorrectResults3Files, astrKeyword=sKeyword, afSignificanceLevel="0.25")
+
+funcSummarizeDirectory(astrOutputDirectory=strDirectoryNone, strBaseName=sBaseName, astrSummaryFileName=sNoFileResultAltKeyword, astrKeyword=sAltKeyword, afSignificanceLevel="0.25")
+funcSummarizeDirectory(astrOutputDirectory=strDirectory1, strBaseName=sBaseName, astrSummaryFileName=sCorrectResults1FileAltKeyword, astrKeyword=sAltKeyword, afSignificanceLevel="0.25")
+funcSummarizeDirectory(astrOutputDirectory=strDirectory3, strBaseName=sBaseName, astrSummaryFileName=sCorrectResults3FilesAltKeyword, astrKeyword=sAltKeyword, afSignificanceLevel="0.25")
+
+funcSummarizeDirectory(astrOutputDirectory=strDirectoryNone, strBaseName=sBaseName, astrSummaryFileName=sNoFileResultAltSig, astrKeyword= sKeyword, afSignificanceLevel=sAltSignificance)
+funcSummarizeDirectory(astrOutputDirectory=strDirectory1, strBaseName=sBaseName, astrSummaryFileName=sCorrectResults1FileAltSig, astrKeyword= sKeyword, afSignificanceLevel=sAltSignificance)
+funcSummarizeDirectory(astrOutputDirectory=strDirectory3, strBaseName=sBaseName, astrSummaryFileName=sCorrectResults3FilesAltSig, astrKeyword= sKeyword, afSignificanceLevel=sAltSignificance)
+
+test_that("Check the cases where no, and real summary files exist.",{
+  expect_equal(readLines(sNoFileResult),readLines(sNoFileResultAnswer))
+  expect_equal(readLines(sCorrectResults1File),readLines(sCorrectResults1FileAnswer))
+  expect_equal(readLines(sCorrectResults3Files),readLines(sCorrectResults3FilesAnswer))
+})
+
+test_that("Check changing the keyword.",{
+  expect_equal(readLines(sNoFileResultAltKeyword),readLines(sNoFileResultAnswerAltKeyword))
+  expect_equal(readLines(sCorrectResults1FileAltKeyword),readLines(sCorrectResults1FileAnswerAltKeyword))
+  expect_equal(readLines(sCorrectResults3FilesAltKeyword),readLines(sCorrectResults3FilesAnswerAltKeyword))
+})
+
+test_that("Check that changing the significance threshold effects inclusion.",{
+  expect_equal(readLines(sNoFileResultAltSig),readLines(sNoFileResultAnswerAltSig))
+  expect_equal(readLines(sCorrectResults1FileAltSig),readLines(sCorrectResults1FileAnswerAltSig))
+  expect_equal(readLines(sCorrectResults3FilesAltSig),readLines(sCorrectResults3FilesAnswerAltSig))
+})
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-Utility/test-Utility.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-Utility/test-Utility.R Sun Feb 08 23:39:43 2015 -0500
b
b'@@ -0,0 +1,159 @@\n+c_strDir <- file.path(getwd( ),"..")\n+\n+source(file.path(c_strDir,"lib","Constants.R"))\n+source(file.path(c_strDir,"lib","Utility.R"))\n+\n+context("Test funcRename")\n+test_that("Test that unclassified and none otus are represented as 2 terminal clades and others are 1",{\n+  expect_equal(funcRename(paste("A","B","C","D",c_strUnclassified, sep=c_cFeatureDelim)),paste("D",c_strUnclassified, sep=c_cFeatureDelim))\n+  expect_equal(funcRename(paste("A","B","C","D","101", sep=c_cFeatureDelim)),paste("D","101", sep=c_cFeatureDelim))\n+  expect_equal(funcRename(paste("A","B","C","D", sep=c_cFeatureDelim)),paste("D", sep=c_cFeatureDelim))\n+  expect_equal(funcRename(paste("A", sep=c_cFeatureDelim)),paste("A", sep=c_cFeatureDelim))\n+  expect_equal(funcRename(paste(c_strUnclassified, sep=c_cFeatureDelim)),paste(c_strUnclassified, sep=c_cFeatureDelim))\n+  expect_equal(funcRename(paste("101", sep=c_cFeatureDelim)),paste("101", sep=c_cFeatureDelim))\n+})\n+\n+context("Test funcColorHelper")\n+test_that("Test that min is min and max is max and average is average even if given as NA",{\n+  expect_equal(funcColorHelper( dMax = 1, dMin = 1, dMed = NA ), list( dMin = 1, dMax = 1, dMed = 1))\n+  expect_equal(funcColorHelper( dMax = -3, dMin = 10, dMed = NA ), list( dMin = -3, dMax = 10, dMed = 3.5))\n+  expect_equal(funcColorHelper( dMax = 1, dMin = 11, dMed = NA ), list( dMin = 1, dMax = 11, dMed = 6))\n+  expect_equal(funcColorHelper( dMax = 4, dMin = 10, dMed = 5 ), list( dMin = 4, dMax = 10, dMed = 5))\n+  expect_equal(funcColorHelper( dMax = 10, dMin = 4, dMed = 5 ), list( dMin = 4, dMax = 10, dMed = 5))\n+})\n+\n+context("Test funcTrim")\n+test_that("Test that white spaces at the beginning and end of s string are removed",{\n+  expect_equal(funcTrim("TRIM"),"TRIM")\n+  expect_equal(funcTrim(" TRIM"),"TRIM")\n+  expect_equal(funcTrim("  TRIM"),"TRIM")\n+  expect_equal(funcTrim(" TRIM "),"TRIM")\n+  expect_equal(funcTrim("TRIM "),"TRIM")\n+  expect_equal(funcTrim("      TRIM          "),"TRIM")\n+  expect_equal(funcTrim("TR IM"),"TR IM")\n+  expect_equal(funcTrim(" TR IM"),"TR IM")\n+  expect_equal(funcTrim("  TR I M"),"TR I M")\n+  expect_equal(funcTrim(" TR IM "),"TR IM")\n+  expect_equal(funcTrim("T R IM "),"T R IM")\n+  expect_equal(funcTrim("      T RIM          "),"T RIM")\n+})\n+\n+#TODO currently the capture versio of this does not produce a tabbed table (or default table delim) which is not consistent with the rest of the code base.\n+context("Test funcWrite")\n+#Answer files\n+c_sAnswerWriteFile1 = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncWriteTemp1.txt")\n+c_sAnswerWriteFile2 = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncWriteTemp2.txt")\n+print("c_sAnswerWriteFile2")\n+print(c_sAnswerWriteFile2)\n+c_sAnswerWriteDFFile1 = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncWriteTempDF1.txt")\n+c_sAnswerWriteDFFile2 = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncWriteTempDF2.txt")\n+\n+#Working files\n+c_sTempWriteFile1 = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncWriteTemp1.txt")\n+c_sTempWriteFile2 = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncWriteTemp2.txt")\n+c_sTempWriteDFFile1 = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncWriteTempDF1.txt")\n+c_sTempWriteDFFile2 = file.path(c_strDir,c_strTestingDirectory,c_strTemporaryFiles,"FuncWriteTempDF2.txt")\n+dfTest = as.data.frame(as.matrix(cbind(c(1,11,111),c(2,22,222),c(3,33,333))))\n+sWriteString = "Testing, 1,2,3 anything but that."\n+unlink(c_sTempWriteFile1)\n+unlink(c_sTempWriteFile2)\n+unlink(c_sTempWriteDFFile1)\n+unlink(c_sTempWriteDFFile2)\n+funcWrite(sWriteString,c_sTempWriteFile1)\n+funcWrite(sWriteString,c_sTempWriteFile2)\n+funcWrite(sWriteString,c_sTempWriteFile2)\n+funcWrite(dfTest,c_sTempWriteDFFile1)\n+funcWrite(dfTest,c_sTempWriteDFFile2)\n+funcWrite(dfTest,c_sTempWriteDFFile2)\n+\n+test_that("Test that a test file is written and appended to for strings and d'..b'ithFactors, aiColumnIndicesToSearch=NULL),"D")\n+  expect_equal(funcMFAValue2Col(xValue=2.0,dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(1,3)),NULL)\n+  expect_equal(funcMFAValue2Col(xValue=6,dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(2,3)),NULL)\n+  expect_equal(funcMFAValue2Col(xValue="one",dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(1,2)),NULL)\n+  expect_equal(funcMFAValue2Col(xValue="two",dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(1,2)),NULL)\n+  expect_equal(funcMFAValue2Col(xValue=2.0,dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(2)),"B")\n+  expect_equal(funcMFAValue2Col(xValue=6,dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(1)),"A")\n+  expect_equal(funcMFAValue2Col(xValue="one",dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(3)),"C")\n+  expect_equal(funcMFAValue2Col(xValue=paste("D","2",sep=c_sMFANameSep1),dfData=dfTestWithFactors, aiColumnIndicesToSearch=c(4)),"D")\n+})\n+\n+context("Test funcFormulaStrToList")\n+test_that("List of covariates are given, from lm or mixed model formulas",{\n+  expect_equal(funcFormulaStrToList("adCur ~ `1Covariate`"),c("1Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ `1Covariate` + `2Covariate`"),c("1Covariate","2Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ `1Covariate` + `2Covariate` + `3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ 1|`1Covariate`"),c("1Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ 1|`1Covariate` + 1|`2Covariate`"),c("1Covariate","2Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ 1|`1Covariate` + 1|`2Covariate` + 1|`3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ 1|`1Covariate` + `2Covariate` + 1|`3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ 1|`1Covariate` + 1|`2Covariate` + `3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ `1Covariate` + 1|`2Covariate` + 1|`3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ `1Covariate` + `2Covariate` + 1|`3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ 1|`1Covariate` + `2Covariate` + `3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+  expect_equal(funcFormulaStrToList("adCur ~ `1Covariate` + 1|`2Covariate` + `3Covariate`"),c("1Covariate","2Covariate","3Covariate"))\n+})\n+\n+context("Test funcFormulaListToString")\n+test_that("The correct string formula for a lm or mixed model is created from a list of covariates",{\n+  expect_equal(funcFormulaListToString(NULL),c(NA,NA))\n+  expect_equal(funcFormulaListToString(c("1Covariate")),c("adCur ~ `1Covariate`",NA))\n+  expect_equal(funcFormulaListToString(c("1Covariate","2Covariate")),c("adCur ~ `1Covariate` + `2Covariate`",NA))\n+  expect_equal(funcFormulaListToString(c("1Covariate","2Covariate","3Covariate")),c("adCur ~ `1Covariate` + `2Covariate` + `3Covariate`",NA))\n+  expect_equal(funcFormulaListToString(c("1Covariate","2Covariate"),c("3Covariate")),c(NA,"adCur ~ `1Covariate` + `2Covariate` + 1|`3Covariate`"))\n+  expect_equal(funcFormulaListToString(c("1Covariate","3Covariate"),c("2Covariate")),c(NA,"adCur ~ `1Covariate` + `3Covariate` + 1|`2Covariate`"))\n+  expect_equal(funcFormulaListToString(c("2Covariate","3Covariate"),c("1Covariate")),c(NA,"adCur ~ `2Covariate` + `3Covariate` + 1|`1Covariate`"))\n+  expect_equal(funcFormulaListToString(c("2Covariate"),c("1Covariate","3Covariate")),c(NA,"adCur ~ `2Covariate` + 1|`1Covariate` + 1|`3Covariate`"))\n+  expect_equal(funcFormulaListToString(c("1Covariate"),c("2Covariate","3Covariate")),c(NA,"adCur ~ `1Covariate` + 1|`2Covariate` + 1|`3Covariate`"))\n+  expect_equal(funcFormulaListToString(c("3Covariate"),c("1Covariate","2Covariate")),c(NA,"adCur ~ `3Covariate` + 1|`1Covariate` + 1|`2Covariate`"))\n+})\n\\ No newline at end of file\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-ValidateData/test-ValidateData.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/test-ValidateData/test-ValidateData.R Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,53 @@
+c_strDir <- file.path(getwd( ),"..")
+
+source(file.path(c_strDir,"lib","Constants.R"))
+source(file.path(c_strDir,"lib","ValidateData.R"))
+
+context("Test funcIsValid")
+test_that("NA and NUll are false, all others are true",{
+  expect_equal(funcIsValid(NA),FALSE)
+  expect_equal(funcIsValid(NULL),FALSE)
+  expect_equal(funcIsValid(1), TRUE)
+  expect_equal(funcIsValid("3"), TRUE)
+  expect_equal(funcIsValid(c("3","4")), TRUE)
+  expect_equal(funcIsValid(c(3,NA)), TRUE)
+  expect_equal(funcIsValid(""), TRUE)
+  expect_equal(funcIsValid(list()), TRUE)
+  expect_equal(funcIsValid(2.3), TRUE)
+  expect_equal(funcIsValid(TRUE), TRUE)
+  expect_equal(funcIsValid(FALSE), TRUE)
+  expect_equal(funcIsValid(as.factor(3)), TRUE)
+})
+
+context("Test funcIsValidString")
+test_that("Test only strings are true",{
+  expect_equal(funcIsValidString(NA),FALSE)
+  expect_equal(funcIsValidString(NULL),FALSE)
+  expect_equal(funcIsValidString(1), FALSE)
+  expect_equal(funcIsValidString("3"), TRUE)
+  expect_equal(funcIsValidString(c("3","4")), FALSE)
+  expect_equal(funcIsValidString(""), TRUE)
+  expect_equal(funcIsValidString(list()), FALSE)
+  expect_equal(funcIsValidString(2.3), FALSE)
+  expect_equal(funcIsValidString(TRUE), FALSE)
+  expect_equal(funcIsValidString(FALSE), FALSE)
+})
+
+context("Test funcIsValidFileName")
+strFileSimpleRCFileAnswer = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncWriteMatrixToReadConfigFile_SimpleAnswer.read.config")
+strFileUseAllRCFileAnswer = file.path(c_strDir,c_strTestingDirectory,c_strCorrectAnswers,"FuncWriteMatrixToReadConfigFile_AllAnswer.read.config")
+
+test_that("Test only strings pointing to existing files are true",{
+  expect_equal(funcIsValidFileName(NA),FALSE)
+  expect_equal(funcIsValidFileName(NULL),FALSE)
+  expect_equal(funcIsValidFileName(1), FALSE)
+  expect_equal(funcIsValidFileName("3"), FALSE)
+  expect_equal(funcIsValidFileName(c("3","4")), FALSE)
+  expect_equal(funcIsValidFileName(""), FALSE)
+  expect_equal(funcIsValidFileName(list()), FALSE)
+  expect_equal(funcIsValidFileName(2.3), FALSE)
+  expect_equal(funcIsValidFileName(TRUE), FALSE)
+  expect_equal(funcIsValidFileName(FALSE), FALSE)
+  expect_equal(funcIsValidFileName(strFileSimpleRCFileAnswer),TRUE)
+  expect_equal(funcIsValidFileName(strFileUseAllRCFileAnswer),TRUE)
+})
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileAltKeyResult.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileAltKeyResult.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,8 @@
+ Variable Feature Value Coefficient N N.not.0 P.value Q.value
+1 V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+2 V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+3 V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+4 V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+5 V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+6 V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+7 V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.201094523365195 0.215761136458804
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileAltSigResult.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileAltSigResult.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,9 @@
+ Variable Feature Value Coefficient N N.not.0 P.value Q.value
+1 V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+2 V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+3 V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+4 V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+5 V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+6 V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+7 V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.201094523365195 0.215761136458804
+8 V1 Bacteria|8 V1 -0.000133233647710018 228 28 0.301159271814143 0.315830056496576
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileResult.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-1FileResult.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,8 @@
+ Variable Feature Value Coefficient N N.not.0 P.value Q.value
+1 V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+2 V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+3 V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+4 V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+5 V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+6 V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+7 V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.201094523365195 0.215761136458804
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileAltKeyResult.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileAltKeyResult.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,16 @@
+ Variable Feature Value Coefficient N N.not.0 P.value Q.value
+1 V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+2 V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+3 V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+4 V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+5 V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+6 V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+7 V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.201094523365195 0.215761136458804
+8 V2 Bacteria|1 V2 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+9 V2 Bacteria|2 V2 -0.000172924087271453 228 39 0.100173356878955 0.104501595020731
+10 V2 Bacteria|3 V2 0.000176541929148173 228 50 0.200213541203878 0.204974253925626
+11 V3 Bacteria|1 V3 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+12 V3 Bacteria|2 V3 -0.000172924087271453 228 39 0.0100173356878955 0.0104501595020731
+13 V3 Bacteria|3 V3 0.000176541929148173 228 50 0.0200213541203878 0.0204974253925626
+14 V3 Bacteria|4 V3 0.000233041055999211 228 54 0.0300309255350078 0.0306531472993643
+15 V3 Bacteria|5 V3 0.000170023412983991 228 28 0.140055803225588 0.144098213677034
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileAltSigResult.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileAltSigResult.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,19 @@
+ Variable Feature Value Coefficient N N.not.0 P.value Q.value
+1 V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+2 V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+3 V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+4 V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+5 V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+6 V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+7 V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.201094523365195 0.215761136458804
+8 V1 Bacteria|8 V1 -0.000133233647710018 228 28 0.301159271814143 0.315830056496576
+9 V2 Bacteria|1 V2 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+10 V2 Bacteria|2 V2 -0.000172924087271453 228 39 0.100173356878955 0.104501595020731
+11 V2 Bacteria|3 V2 0.000176541929148173 228 50 0.200213541203878 0.204974253925626
+12 V2 Bacteria|4 V2 0.000233041055999211 228 54 0.300309255350078 0.306531472993643
+13 V3 Bacteria|1 V3 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+14 V3 Bacteria|2 V3 -0.000172924087271453 228 39 0.0100173356878955 0.0104501595020731
+15 V3 Bacteria|3 V3 0.000176541929148173 228 50 0.0200213541203878 0.0204974253925626
+16 V3 Bacteria|4 V3 0.000233041055999211 228 54 0.0300309255350078 0.0306531472993643
+17 V3 Bacteria|5 V3 0.000170023412983991 228 28 0.140055803225588 0.144098213677034
+18 V3 Bacteria|6 V3 -0.000129327171064622 228 29 0.250062525713049 0.251031674265311
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileResult.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-3FileResult.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,16 @@
+ Variable Feature Value Coefficient N N.not.0 P.value Q.value
+1 V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+2 V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+3 V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+4 V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+5 V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+6 V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+7 V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.201094523365195 0.215761136458804
+8 V2 Bacteria|1 V2 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+9 V2 Bacteria|2 V2 -0.000172924087271453 228 39 0.100173356878955 0.104501595020731
+10 V2 Bacteria|3 V2 0.000176541929148173 228 50 0.200213541203878 0.204974253925626
+11 V3 Bacteria|1 V3 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+12 V3 Bacteria|2 V3 -0.000172924087271453 228 39 0.0100173356878955 0.0104501595020731
+13 V3 Bacteria|3 V3 0.000176541929148173 228 50 0.0200213541203878 0.0204974253925626
+14 V3 Bacteria|4 V3 0.000233041055999211 228 54 0.0300309255350078 0.0306531472993643
+15 V3 Bacteria|5 V3 0.000170023412983991 228 28 0.140055803225588 0.144098213677034
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAltKeyAnswer.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAltKeyAnswer.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,1 @@
+No significant data found.
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAltSigAnswer.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAltSigAnswer.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,1 @@
+No significant data found.
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAnswer.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncSummarizeDirectory-NoFileAnswer.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,1 @@
+No significant data found.
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,16 @@
+Matrix: 1
+Delimiter: TAB
+Name_Row_Number: 1
+Name_Column_Number: 1
+Read_TSV_Rows: 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
+Read_TSV_Columns: 2,3,4,5,6
+
+
+Matrix: 2
+Delimiter: TAB
+Name_Row_Number: 1
+Name_Column_Number: 1
+Read_TSV_Rows: 2,3,4,5,6,7,8,9,10
+Read_TSV_Columns: 2,3,4
+
+
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices1.tsv Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,16 @@
+ Sample1 Sample2 Sample3 Sample4 Sample5
+Feature1 11 12 13 14 15
+Feature2 21 22 23 24 25
+Feature3 31 32 33 34 35
+Feature4 41 42 43 44 45
+Feature5 51 52 53 54 55
+Feature6 61 62 63 64 65
+Feature7 71 72 73 74 75
+Feature8 81 82 83 84 85
+Feature9 91 92 93 94 95
+Feature10 101 102 103 104 105
+Feature11 111 112 113 114 115
+Feature12 121 122 123 124 125
+Feature13 131 132 133 134 135
+Feature14 141 142 143 144 145
+Feature15 151 152 153 154 155
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrices2.tsv Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,10 @@
+ Sample1 Sample2 Sample4
+Feature1 11 12 14
+Feature2 21 22 24
+Feature3 31 32 34
+Feature4 41 42 44
+Feature5 51 52 54
+Feature6 61 62 64
+Feature7 71 72 74
+Feature8 81 82 84
+Feature9 91 92 94
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_AllAnswer.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_AllAnswer.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,8 @@
+Matrix: AllMatrix
+Delimiter: SPACE
+Name_Row_Number: 1
+Name_Column_Number: 1
+Read_TSV_Rows: 1,2,3,4,5
+Read_TSV_Columns: 10,11,12
+
+
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_AppendAnswer.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_AppendAnswer.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,16 @@
+Matrix: SimpleMatrix
+Delimiter: TAB
+Name_Row_Number: 1
+Name_Column_Number: 1
+Read_TSV_Rows: -
+Read_TSV_Columns: -
+
+
+Matrix: SimpleMatrix
+Delimiter: TAB
+Name_Row_Number: 1
+Name_Column_Number: 1
+Read_TSV_Rows: -
+Read_TSV_Columns: -
+
+
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_SimpleAnswer.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteMatrixToReadConfigFile_SimpleAnswer.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,8 @@
+Matrix: SimpleMatrix
+Delimiter: TAB
+Name_Row_Number: 1
+Name_Column_Number: 1
+Read_TSV_Rows: -
+Read_TSV_Columns: -
+
+
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTableTempDF1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTableTempDF1.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,4 @@
+ V1 V2 V3
+1 1 2 3
+2 11 22 33
+3 111 222 333
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTableTempDF2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTableTempDF2.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,8 @@
+ V1 V2 V3
+1 1 2 3
+2 11 22 33
+3 111 222 333
+ V1 V2 V3
+1 1 2 3
+2 11 22 33
+3 111 222 333
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTemp1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTemp1.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,1 @@
+Testing, 1,2,3 anything but that.
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTemp2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTemp2.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,2 @@
+Testing, 1,2,3 anything but that.
+Testing, 1,2,3 anything but that.
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTempDF1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTempDF1.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,4 @@
+   V1  V2  V3
+1   1   2   3
+2  11  22  33
+3 111 222 333
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTempDF2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/answers/FuncWriteTempDF2.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,8 @@
+   V1  V2  V3
+1   1   2   3
+2  11  22  33
+3 111 222 333
+   V1  V2  V3
+1   1   2   3
+2  11  22  33
+3 111 222 333
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/1Matrix.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/1Matrix.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,4 @@
+Matrix: Matrix1
+Delimiter: TAB
+Read_TSV_Rows: -
+Read_TSV_Columns: -
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/2Matrix.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/2Matrix.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,9 @@
+Matrix: Matrix1
+Delimiter: TAB
+Read_TSV_Rows: -
+Read_TSV_Columns: -
+
+Matrix: Matrix2
+Delimiter: TAB
+Read_TSV_Rows: 2-4
+Read_TSV_Columns: 2-4
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/3Matrix.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/3Matrix.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,14 @@
+Matrix: Matrix1
+Delimiter: TAB
+Read_TSV_Rows: -
+Read_TSV_Columns: -
+
+Matrix: Matrix2
+Delimiter: TAB
+Read_TSV_Rows: 2-4
+Read_TSV_Columns: 2-4
+
+Matrix: Matrix3
+Delimiter: TAB
+Read_TSV_Rows: 2-10
+Read_TSV_Columns: 2,3,5
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMaaslin.read.config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMaaslin.read.config Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,7 @@
+Matrix: Metadata 
+Delimiter: TAB
+Read_TSV_Columns: 2,3,4 
+
+Matrix: Abundance 
+Delimiter: TAB
+Read_TSV_Columns: 5-14
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMaaslin.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMaaslin.tsv Sun Feb 08 23:39:43 2015 -0500
b
b'@@ -0,0 +1,251 @@\n+sample\tactivity\tage\tdx\tArchaea|Euryarchaeota|Methanobacteria|Methanobacteriales|Methanobacteriaceae\tArchaea|Euryarchaeota|Methanobacteria|Methanobacteriales|Methanobacteriaceae|Methanobrevibacter\tArchaea|Euryarchaeota|Methanobacteria|Methanobacteriales|Methanobacteriaceae|Methanosphaera\tBacteria\tBacteria|Actinobacteria|Actinobacteria\tBacteria|Actinobacteria|Actinobacteria|Actinomycetales\tBacteria|Actinobacteria|Actinobacteria|Actinomycetales|Actinomycetaceae\tBacteria|Actinobacteria|Actinobacteria|Actinomycetales|Actinomycetaceae|Actinomyces\tBacteria|Actinobacteria|Actinobacteria|Actinomycetales|Actinomycetaceae|Varibaculum\tBacteria|Actinobacteria|Actinobacteria|Actinomycetales|Actinomycetaceae|unclassified\r\n+100001\t \t19\tCD\t0\t0\t0\t1\t0.177746\t0.00108382\t0.00108382\t0.00108382\t0\t0\r\n+100003\t \t26\tCD\t0\t0\t0\t1\t0.0213904\t0.000822707\t0.000822707\t0.000822707\t0\t0\r\n+100009\t \t55\tUC\t0\t0\t0\t1\t0.000457666\t0\t0\t0\t0\t0\r\n+100015\t \t57\tCD\t0.000457038\t0.000457038\t0\t0.999543\t0.0393053\t0.000457038\t0\t0\t0\t0\r\n+100016\t \t46\t \t0\t0\t0\t1\t0.0344828\t0\t0\t0\t0\t0\r\n+100043\t \t21\t \t0\t0\t0\t1\t0.0133779\t0\t0\t0\t0\t0\r\n+100046\t \t61\tCD\t0\t0\t0\t1\t0.0552083\t0.00260417\t0.00260417\t0.00260417\t0\t0\r\n+100047\t \t31\tUC\t0\t0\t0\t1\t0.0135002\t0\t0\t0\t0\t0\r\n+100048\t \t50\tUC\t0\t0\t0\t0.99944\t0.242093\t0.000279877\t0.000279877\t0.000279877\t0\t0\r\n+100049\t \t46\tUC\t0.00139909\t0.00104932\t0.000349773\t0.998601\t0.0255334\t0\t0\t0\t0\t0\r\n+100051\t \t36\tUC\t0\t0\t0\t1\t0.0533784\t0\t0\t0\t0\t0\r\n+100052\t \t23\tCD\t0\t0\t0\t1\t0.00985793\t0\t0\t0\t0\t0\r\n+100058\t \t21\tCD\t0\t0\t0\t1\t0.0308166\t0.000513611\t0.000513611\t0.000513611\t0\t0\r\n+100060\t \t60\tCD\t0\t0\t0\t1\t0.00132406\t0\t0\t0\t0\t0\r\n+100062\t \t12\t \t0\t0\t0\t1\t0.0115964\t0.000386548\t0.000386548\t0.000386548\t0\t0\r\n+100065\t \t20\tCD\t0\t0\t0\t1\t0.0241899\t0.00136924\t0\t0\t0\t0\r\n+100068\t \t33\t \t0\t0\t0\t1\t0.0114533\t0.000254518\t0\t0\t0\t0\r\n+100070\t \t12\tUC\t0\t0\t0\t1\t0.00128991\t0\t0\t0\t0\t0\r\n+100071\t \t43\tUC\t0\t0\t0\t1\t0.00809444\t0\t0\t0\t0\t0\r\n+100072\t \t18\tCD\t0\t0\t0\t1\t0.0171569\t0.00122549\t0.00122549\t0.00122549\t0\t0\r\n+100074\t \t54\tCD\t0\t0\t0\t1\t0.0289179\t0\t0\t0\t0\t0\r\n+100075\t \t19\tCD\t0\t0\t0\t1\t0.00620767\t0\t0\t0\t0\t0\r\n+100077\t \t15\tCD\t0\t0\t0\t1\t0.0122675\t0.00118718\t0.00118718\t0.00118718\t0\t0\r\n+100078\t \t31\t \t0\t0\t0\t1\t0.00114635\t0\t0\t0\t0\t0\r\n+100080\t \t74\tUC\t0\t0\t0\t1\t0.242624\t0.00188324\t0.00125549\t0.00125549\t0\t0\r\n+100083\t \t37\tCD\t0\t0\t0\t1\t0.0517868\t0.000302847\t0\t0\t0\t0\r\n+100084\t \t12\tCD\t0\t0\t0\t1\t0.00522718\t0.000402091\t0.000402091\t0.000402091\t0\t0\r\n+100085\t \t34\tCD\t0\t0\t0\t1\t0.0114848\t0\t0\t0\t0\t0\r\n+100086\t \t80\tUC\t0\t0\t0\t1\t0.0117933\t0\t0\t0\t0\t0\r\n+100087\t \t8\tCD\t0\t0\t0\t1\t0.00256598\t0\t0\t0\t0\t0\r\n+100088\t \t29\tCD\t0\t0\t0\t1\t0.0107383\t0\t0\t0\t0\t0\r\n+100089\t \t22\tUC\t0\t0\t0\t1\t0.0128598\t0\t0\t0\t0\t0\r\n+100090\t \t13\t \t0\t0\t0\t1\t0.0109111\t0.000272777\t0\t0\t0\t0\r\n+100091\t \t23\tCD\t0\t0\t0\t1\t0.0398703\t0.000324149\t0.000324149\t0.000324149\t0\t0\r\n+100092\t \t62\tCD\t0\t0\t0\t1\t0.061049\t0.000429923\t0.000429923\t0\t0.000429923\t0\r\n+100095\t \t41\tUC\t0\t0\t0\t1\t0.105263\t0\t0\t0\t0\t0\r\n+100096\t \t18\tUC\t0\t0\t0\t1\t0.0884817\t0.00026178\t0.00026178\t0.00026178\t0\t0\r\n+100099\t \t83\tUC\t0\t0\t0\t1\t0.0463725\t0\t0\t0\t0\t0\r\n+100100\t \t53\tCD\t0\t0\t0\t1\t0.0272953\t0.00330852\t0.00330852\t0.00330852\t0\t0\r\n+100101\t \t58\tCD\t0\t0\t0\t1\t0.00897724\t0.000320616\t0.000320616\t0.000320616\t0\t0\r\n+100102\t \t62\tCD\t0\t0\t0\t1\t0.0607595\t0.00101266\t0\t0\t0\t0\r\n+100104\t \t44\tUC\t0\t0\t0\t1\t0.0655521\t0.00307963\t0.00307963\t0.00307963\t0\t0\r\n+100105\t \t55\tCD\t0\t0\t0\t1\t0.00692259\t0\t0\t0\t0\t0\r\n+100106\t \t26\tUC\t0\t0\t0\t1\t0.0195765\t0\t0\t0\t0\t0\r\n+100107\t \t12\tCD\t0\t0\t0\t1\t0.0693363\t0.000741565\t0.000741565\t0.000741565\t0\t0\r\n+100109\t \t48\tUC\t0\t0\t0\t1\t0.00874636\t0.000416493\t0.000416493\t0.000416493\t0\t0\r\n+100115\t \t15\tCD\t0\t0\t0\t1\t0.000385802\t0.000385802\t0.000385802\t0.000385802\t0\t0\r\n+100117\t \t33\tUC\t0\t0\t0\t1\t0.0198915\t0\t0\t0\t0\t0\r\n+100128\t \t8\t \t0\t0\t0\t1\t0.176075\t0.00100806\t0.00100806\t0.00100806\t0\t0\r\n+100143\t \t40\tUC\t0\t0\t0\t1\t0.0766177\t0.0023819\t0.00198491\t0.00198491\t0\t0\r\n+100144\t \t66\tCD\t0\t0\t0\t1\t0.0162602\t0.000706964\t0.000706964\t0.000706964\t0\t0\r\n+100150\t \t9\tCD\t0\t0\t0\t1\t0.0476627\t0\t0\t0\t0\t0\r\n+100151\t \t32\tCD\t0\t0\t0\t1\t0.00895857\t0\t0\t0\t0\t0\r\n+100154\t \t44\tCD\t0\t0\t0\t1\t0.0187793\t0.000521648\t0.000521648\t0.000521648\t0\t0\r\n+100155\t \t11\tUC\t0\t0\t0\t1\t0.0318149\t0.000723066\t0.'..b'7265\t \t57\tUC\t0.00043911\t0.00043911\t0\t0.915544\t0.00102459\t0.00014637\t0.00014637\t0.00014637\t0\t0\r\n+7267\t \t37\tCD\t0\t0\t0\t0.956056\t0.00406204\t0\t0\t0\t0\t0\r\n+7284\t \t38\tCD\t0\t0\t0\t0.973013\t0.0009995\t0\t0\t0\t0\t0\r\n+7287\t \t51\tUC\t0\t0\t0\t0.601478\t0.00564972\t0\t0\t0\t0\t0\r\n+7295\t \t35\tCD\t0\t0\t0\t0.825761\t0.0052249\t0\t0\t0\t0\t0\r\n+7298\t \t57\tHealthy\t0\t0\t0\t0.996292\t0.00370828\t0\t0\t0\t0\t0\r\n+7313\t \t42\tHealthy\t0\t0\t0\t0.994539\t0.00204778\t0\t0\t0\t0\t0\r\n+7325\t \t44\tCD\t0\t0\t0\t0.767925\t0.00283019\t0.00188679\t0\t0\t0\t0\r\n+7347\t \t36\tUC\t0\t0\t0\t1\t0.00273224\t0\t0\t0\t0\t0\r\n+7352\t \t46\tCD\t0\t0\t0\t0.834951\t0.00970874\t0.00970874\t0\t0\t0\t0\r\n+7355\t \t26\tUC\t0\t0\t0\t0.935735\t0.00389484\t0\t0\t0\t0\t0\r\n+7356\t \t50\tHealthy\t0\t0\t0\t0.994746\t0.0140105\t0\t0\t0\t0\t0\r\n+7360\t \t60\tHealthy\t0\t0\t0\t0.871642\t0.00298507\t0.00298507\t0\t0\t0\t0\r\n+7361\t \t58\tHealthy\t0\t0\t0\t0.975364\t0.00447928\t0\t0\t0\t0\t0\r\n+7365\t \t52\tHealthy\t0\t0\t0\t0.992902\t0.00177462\t0\t0\t0\t0\t0\r\n+7385\t \t60\tHealthy\t0\t0\t0\t0.947635\t0.00168919\t0\t0\t0\t0\t0\r\n+7429\t \t31\tCD\t0\t0\t0\t0.806763\t0.000536769\t0\t0\t0\t0\t0\r\n+7454\t \t37\tCD\t0\t0\t0\t0.873016\t0.00907029\t0.00226757\t0\t0\t0\t0\r\n+7584\t \t44\tUC\t0\t0\t0\t0.939158\t0.00234009\t0\t0\t0\t0\t0\r\n+7594\t \t28\tCD\t0\t0\t0\t0.993401\t0\t0\t0\t0\t0\t0\r\n+7610\t \t69\tHealthy\t0\t0\t0\t0.982582\t0.00102459\t0.00102459\t0\t0\t0\t0\r\n+7614\t \t60\tCD\t0\t0\t0\t0.996437\t0\t0\t0\t0\t0\t0\r\n+7615\t \t24\tCD\t0\t0\t0\t0.870656\t0\t0\t0\t0\t0\t0\r\n+7621\t \t64\tUC\t0\t0\t0\t0.987464\t0.000569801\t0\t0\t0\t0\t0\r\n+7624\t \t27\tCD\t0\t0\t0\t0.965767\t0.00048216\t0\t0\t0\t0\t0\r\n+7632\t \t25\tCD\t0\t0\t0\t0.977591\t0.00280112\t0.000933707\t0\t0\t0\t0\r\n+7662\t \t31\tHealthy\t0\t0\t0\t1\t0.00213828\t0\t0\t0\t0\t0\r\n+7664\t \t51\tHealthy\t0\t0\t0\t0.987382\t0.00757098\t0\t0\t0\t0\t0\r\n+7749\t \t44\tUC\t0\t0\t0\t0.954764\t0.000962464\t0\t0\t0\t0\t0\r\n+7775\t \t56\tUC\t0\t0\t0\t0.989747\t0.0150376\t0\t0\t0\t0\t0\r\n+7844\t \t31\tHealthy\t0\t0\t0\t1\t0.0167845\t0.000441696\t0.000441696\t0.000441696\t0\t0\r\n+7848\t \t24\tHealthy\t0\t0\t0\t1\t0.0254777\t0.00106157\t0.00106157\t0.00106157\t0\t0\r\n+7855\t \t24\tHealthy\t0\t0\t0\t1\t0.00314465\t0.000628931\t0.000628931\t0.000628931\t0\t0\r\n+7858\t \t26\tHealthy\t0\t0\t0\t1\t0.0264447\t0\t0\t0\t0\t0\r\n+7859\t \t53\tUC\t0\t0\t0\t0.986404\t0.00407886\t0\t0\t0\t0\t0\r\n+7860\t \t22\tHealthy\t0\t0\t0\t1\t0.0560376\t0\t0\t0\t0\t0\r\n+7861\t \t23\tHealthy\t0\t0\t0\t1\t0.0905612\t0\t0\t0\t0\t0\r\n+7862\t \t26\tHealthy\t0\t0\t0\t1\t0.0062819\t0\t0\t0\t0\t0\r\n+7870\t \t26\tHealthy\t0\t0\t0\t1\t0.0276699\t0\t0\t0\t0\t0\r\n+7871\t \t27\tUC\t0\t0\t0\t0.52233\t0.00194175\t0.00194175\t0\t0\t0\t0\r\n+7879\t \t29\tHealthy\t0\t0\t0\t1\t0.0378979\t0\t0\t0\t0\t0\r\n+7899\t \t23\tHealthy\t0\t0\t0\t1\t0.00107875\t0\t0\t0\t0\t0\r\n+7904\t \t23\tHealthy\t0\t0\t0\t1\t0.0112933\t0.000364299\t0\t0\t0\t0\r\n+7906\t \t23\tHealthy\t0\t0\t0\t1\t0.00332717\t0\t0\t0\t0\t0\r\n+7908\t \t23\tHealthy\t0\t0\t0\t1\t0.0453906\t0.00070373\t0.00070373\t0.00070373\t0\t0\r\n+7909\t \t24\tHealthy\t0\t0\t0\t1\t0.0708354\t0\t0\t0\t0\t0\r\n+7910\t \t23\tHealthy\t0\t0\t0\t1\t0.0865063\t0\t0\t0\t0\t0\r\n+7911\t \t25\tHealthy\t0\t0\t0\t1\t0.100825\t0.00030553\t0.00030553\t0.00030553\t0\t0\r\n+7912\t \t24\tHealthy\t0\t0\t0\t1\t0.125173\t0.00138568\t0.000923788\t0.000923788\t0\t0\r\n+MGH100512\t \t \t\t0\t0\t0\t1\t0.00465942\t0.000221877\t0.000221877\t0.000221877\t0\t0\r\n+MGH101598\t \t \t\t0\t0\t0\t1\t0.00281796\t0\t0\t0\t0\t0\r\n+MGH101635\t \t \t\t0\t0\t0\t1\t0.00827316\t0.000300842\t0.000300842\t0.000300842\t0\t0\r\n+MGH101746\t \t \t\t0\t0\t0\t1\t0.00923206\t0\t0\t0\t0\t0\r\n+MGH102376\t \t \t\t0\t0\t0\t0.948276\t0.00229885\t0\t0\t0\t0\t0\r\n+MGH102691\t \t \t\t0\t0\t0\t1\t0.00034002\t0.00034002\t0\t0\t0\t0\r\n+MGH102692\t \t \t\t0\t0\t0\t1\t0.00077101\t0.000192753\t0\t0\t0\t0\r\n+MGH102725\t \t \t\t0\t0\t0\t1\t0.00114443\t0.000228885\t0\t0\t0\t0\r\n+MGH102806\t \t \t\t0\t0\t0\t0.815789\t0.00657895\t0.00657895\t0.00328947\t0.00328947\t0\t0\r\n+MGH103070\t \t \t\t0\t0\t0\t1\t0.0003861\t0\t0\t0\t0\t0\r\n+MGH103120\t \t \t \t0\t0\t0\t1\t0.00148258\t0.000185322\t0\t0\t0\t0\r\n+MGH103121\t \t \t \t0\t0\t0\t1\t0.00103869\t0\t0\t0\t0\t0\r\n+MGH103405\t \t \t\t0.000157406\t0.000157406\t0\t0.999843\t0.00629624\t0.0053518\t0\t0\t0\t0\r\n+MGH103562\t \t \t\t0\t0\t0\t1\t0.000701508\t0\t0\t0\t0\t0\r\n+MGH103629\t \t \t\t0\t0\t0\t1\t0.00153846\t0.00153846\t0\t0\t0\t0\r\n+MGH103803\t \t \t\t0\t0\t0\t0.988064\t0.00132626\t0.00132626\t0\t0\t0\t0\r\n+MGH103909\t \t \t\t0\t0\t0\t1\t0.00116356\t0\t0\t0\t0\t0\r\n+MGH103963\t \t \t\t0\t0\t0\t0.885949\t0.00547445\t0.00456204\t0\t0\t0\t0\r\n+MGH104169\t \t \t\t0\t0\t0\t1\t0.00047672\t0\t0\t0\t0\t0\r\n+MGH104504\t \t \t\t0\t0\t0\t1\t0.000837521\t0.000279174\t0\t0\t0\t0\r\n+MGH104890\t \t \t\t0\t0\t0\t1\t0.00117233\t0.00104207\t0\t0\t0\t0\r\n+MGH105371\tNA\tNA\tNA\t0\t0\t0\t1\t0.000169895\t0\t0\t0\t0\t0\r\n'
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMatrix.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/TestMatrix.tsv Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,16 @@
+ Sample1 Sample2 Sample3 Sample4 Sample5
+Feature1 11 12 13 14 15
+Feature2 21 22 23 24 25
+Feature3 31 32 33 34 35
+Feature4 41 42 43 44 45
+Feature5 51 52 53 54 55
+Feature6 61 62 63 64 65
+Feature7 71 72 73 74 75
+Feature8 81 82 83 84 85
+Feature9 91 92 93 94 95
+Feature10 101 102 103 104 105
+Feature11 111 112 113 114 115
+Feature12 121 122 123 124 125
+Feature13 131 132 133 134 135
+Feature14 141 142 143 144 145
+Feature15 151 152 153 154 155
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/1/FuncSummarizeDirectory-1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/1/FuncSummarizeDirectory-1.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,11 @@
+Variable Feature Value Coefficient N N not 0 P-value Q-value
+V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.201094523365195 0.215761136458804
+V1 Bacteria|8 V1 -0.000133233647710018 228 28 0.30115927181414323 0.3158300564965765
+V1 Bacteria|9 V1 -0.000390699948289467 228 127 0.40153721372588625 0.4190230198578424
+V1 Bacteria|10 V1 0.000260009506485308 228 110 0.50180198778634843 0.5211433233598216
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-1.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,11 @@
+Variable Feature Value Coefficient N N not 0 P-value Q-value
+V1 Bacteria|1 V1 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+V1 Bacteria|2 V1 -0.000172924087271453 228 39 0.00173356878954918 0.04501595020731
+V1 Bacteria|3 V1 0.000176541929148173 228 50 0.00213541203877865 0.0497425392562556
+V1 Bacteria|4 V1 0.000233041055999211 228 54 0.00309255350077782 0.0653147299364275
+V1 Bacteria|5 V1 0.000170023412983991 228 28 0.0055803225587723 0.0982136770343924
+V1 Bacteria|6 V1 -0.000129327171064622 228 29 0.00625257130491581 0.103167426531111
+V1 Bacteria|7 V1 -0.00246205053294096 228 227 0.20109452336519472 0.215761136458804
+V1 Bacteria|8 V1 -0.000133233647710018 228 28 0.30115927181414323 0.3158300564965765
+V1 Bacteria|9 V1 -0.000390699948289467 228 127 0.40153721372588625 0.4190230198578424
+V1 Bacteria|10 V1 0.000260009506485308 228 110 0.50180198778634843 0.5211433233598216
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-2.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,11 @@
+Variable Feature Value Coefficient N N not 0 P-value Q-value
+V2 Bacteria|1 V2 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+V2 Bacteria|2 V2 -0.000172924087271453 228 39 0.100173356878954918 0.104501595020731
+V2 Bacteria|3 V2 0.000176541929148173 228 50 0.200213541203877865 0.20497425392562556
+V2 Bacteria|4 V2 0.000233041055999211 228 54 0.300309255350077782 0.30653147299364275
+V2 Bacteria|5 V2 0.000170023412983991 228 28 0.40055803225587723 0.440982136770343924
+V2 Bacteria|6 V2 -0.000129327171064622 228 29 0.500625257130491581 0.5103167426531111
+V2 Bacteria|7 V2 -0.00246205053294096 228 227 0.620109452336519472 0.6215761136458804
+V2 Bacteria|8 V2 -0.000133233647710018 228 28 0.730115927181414323 0.73158300564965765
+V2 Bacteria|9 V2 -0.000390699948289467 228 127 0.840153721372588625 0.84190230198578424
+V2 Bacteria|10 V2 0.000260009506485308 228 110 0.950180198778634843 0.95211433233598216
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-3.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/testing/input/funcSummarizeDirectory/3/FuncSummarizeDirectory-3.txt Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,11 @@
+Variable Feature Value Coefficient N N not 0 P-value Q-value
+V3 Bacteria|1 V3 0.000376948962983632 228 78 0.000337563617350514 0.0140710728916636
+V3 Bacteria|2 V3 -0.000172924087271453 228 39 0.0100173356878954918 0.0104501595020731
+V3 Bacteria|3 V3 0.000176541929148173 228 50 0.0200213541203877865 0.020497425392562556
+V3 Bacteria|4 V3 0.000233041055999211 228 54 0.0300309255350077782 0.030653147299364275
+V3 Bacteria|5 V3 0.000170023412983991 228 28 0.140055803225587723 0.1440982136770343924
+V3 Bacteria|6 V3 -0.000129327171064622 228 29 0.2500625257130491581 0.25103167426531111
+V3 Bacteria|7 V3 -0.00246205053294096 228 227 0.3620109452336519472 0.36215761136458804
+V3 Bacteria|8 V3 -0.000133233647710018 228 28 0.4730115927181414323 0.473158300564965765
+V3 Bacteria|9 V3 -0.000390699948289467 228 127 0.5840153721372588625 0.584190230198578424
+V3 Bacteria|10 V3 0.000260009506485308 228 110 0.6950180198778634843 0.695211433233598216
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/transpose.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/src/transpose.py Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#######################################################################################
+# This file is provided under the Creative Commons Attribution 3.0 license.
+#
+# You are free to share, copy, distribute, transmit, or adapt this work
+# PROVIDED THAT you attribute the work to the authors listed below.
+# For more information, please see the following web page:
+# http://creativecommons.org/licenses/by/3.0/
+#
+# This file is a component of the SflE Scientific workFLow Environment for reproducible 
+# research, authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Curtis Huttenhower, chuttenh@hsph.harvard.edu).
+#
+# If you use this environment, the included scripts, or any related code in your work,
+# please let us know, sign up for the SflE user's group (sfle-users@googlegroups.com),
+# pass along any issues or feedback, and we'll let you know as soon as a formal citation
+# is available.
+#######################################################################################
+
+"""
+Examples
+~~~~~~~~
+
+``data.pcl``::
+
+ a b
+ c d
+ e f
+
+``Examples``::
+
+ $ transpose.py < data.pcl
+ a c e
+ b d f
+
+ $ echo "a b c" | transpose.py
+ a
+ b
+ c
+
+.. testsetup::
+
+ from transpose import *
+"""
+
+import argparse
+import csv
+import sys
+
+def transpose( aastrIn, ostm ):
+ """
+ Outputs the matrix transpose of the input tab-delimited rows. 
+
+ :param aastrIn: Split lines from which data are read.
+ :type aastrIn: collection of string collections
+ :param ostm: Output stream to which transposed rows are written.
+ :type ostm: output stream
+
+ >>> aastrIn = [list(s) for s in ("ab", "cd", "ef")]
+ >>> transpose( aastrIn, sys.stdout ) #doctest: +NORMALIZE_WHITESPACE
+ a c e
+ b d f
+
+ >>> transpose( [list("abc")], sys.stdout ) #doctest: +NORMALIZE_WHITESPACE
+ a
+ b
+ c
+ """
+
+ aastrLines = [a for a in aastrIn]
+ csvw = csv.writer( ostm, csv.excel_tab )
+ for iRow in range( len( aastrLines[0] ) ):
+ csvw.writerow( [aastrLines[iCol][iRow] for iCol in range( len( aastrLines ) )] )
+
+argp = argparse.ArgumentParser( prog = "transpose.py",
+ description = """Transposes a tab-delimited text matrix.
+
+The transposition process is robust to missing elements and rows of differing lengths.""" )
+__doc__ = "::\n\n\t" + argp.format_help( ).replace( "\n", "\n\t" ) + __doc__
+
+def _main( ):
+ args = argp.parse_args( )
+ transpose( csv.reader( sys.stdin, csv.excel_tab ), sys.stdout )
+
+if __name__ == "__main__":
+ _main( )
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/test-data/maaslin_input
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/test-data/maaslin_input Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,30 @@
+sample Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Sample7 Sample8
+Age 87 78 3 2 32 10 39 96
+Cohort Healthy Healthy Healthy Healthy IBD IBD IBD IBD
+Favorite_color Yellow Blue Green Yellow Green Blue Green Blue
+Height 60 72 63 67 71 65 61 64
+Sex 0 1 0 1 1 0 1 0
+Smoking 0 0 1 0 1 1 1 0
+Star_Trek_Fan 1 1 0 0 1 0 0 1
+Weight 151 258 195 172 202 210 139 140
+Bacteria 1 1 1 1 1 1 1 1
+Bacteria|Actinobacteria|Actinobacteria 0.0507585 0.252153 0.161725 0.0996769 0.144075 0.00592628 0.0399472 0.0663809
+Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium|1 0.0507585 0.0861117 0.00168464 0.0011966 0.0164305 0.00592628 0.0367439 0.0663809
+Bacteria|Actinobacteria|Actinobacteria|Coriobacteriales|Coriobacteriaceae|1008 0 0.166041 0.16004 0.0984803 0.127644 0 0.00320332 0
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales 0.210385 0.0229631 0.154874 0.212157 0.044465 0.0861681 0.349727 0.29982
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides|101 0.0110852 0.0229631 0.019991 0.0329065 0.044465 0.020979 0 0.0450837
+Bacteria|Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|1010 0.1993 0 0.134883 0.179251 0 0.065189 0.349727 0.254737
+Bacteria|Firmicutes 0.738856 0.719806 0.67655 0.668541 0.663381 0.730117 0.417939 0.443231
+Bacteria|Firmicutes|Bacilli|Lactobacillales 0.37713 0.0119232 0.0982704 0.102549 0.45307 0.13903 0.0192199 0
+Bacteria|Firmicutes|Bacilli|Lactobacillales|Enterococcaceae|1023 0.290198 0.0119232 0 0.00538471 0.351818 0.0321204 0.0192199 0
+Bacteria|Firmicutes|Bacilli|Lactobacillales|Unclassified|1013 0.0869312 0 0.0982704 0.0971641 0.101253 0.10691 0 0
+Bacteria|Firmicutes|Clostridia|Clostridiales 0.29755 0.562817 0.503145 0.388656 0.143561 0.142349 0.271528 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae 0.233372 0.41157 0.423967 0.329065 0.142226 0.142349 0.266817 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Anaerostipes|1026 0 0 0.143194 0 0.131957 0.142349 0.228754 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia|1032 0.233372 0.41157 0.280773 0.329065 0.010269 0 0.0380629 0
+Bacteria|Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|1156 0.0641774 0.151248 0.0791779 0.0595908 0.00133498 0 0.00471076 0
+Bacteria|Firmicutes|Erysipelotrichi|Erysipelotrichales|Erysipelotrichaceae|Coprobacillus|1179 0 0.00971517 0.0049416 0.123489 0 0.380586 0 0.380998
+Bacteria|Firmicutes|Unclassified|1232 0.0641774 0.13535 0.0701932 0.0538471 0.0667488 0.0681522 0.127191 0.0622321
+Bacteria|Proteobacteria 0 0.00507838 0.00685085 0.0196243 0.14808 0.177788 0.192387 0.190568
+Bacteria|Proteobacteria|Betaproteobacteria|Burkholderiales|Alcaligenaceae|Parasutterella|1344 0 0.00507838 0.0012354 0.00167524 0.0351201 0 0.00395704 0
+Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia/Shigella|1532 0 0 0.00561545 0.017949 0.11296 0.177788 0.18843 0.190568
\ No newline at end of file
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/test-data/maaslin_output
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/test-data/maaslin_output Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,6 @@
+ Variable Feature Value Coefficient N N.not.0 P.value Q.value
+1 Age Bacteria|Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium|1 Age 0.00247925731553718 8 8 0.000443046842141386 0.0236291649142073
+2 Cohort Bacteria|Proteobacteria CohortIBD 0.361202359969779 8 7 8.29695122618112e-05 0.0132751219618898
+3 Cohort Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia/Shigella|1532 CohortIBD 0.368439847775899 8 6 0.000282569701775158 0.0226055761420126
+4 Cohort Bacteria|Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia|1032 CohortIBD -0.517733343029902 8 6 0.000628473175503113 0.0251389270201245
+5 Cohort Bacteria|Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|1156 CohortIBD -0.271131332905165 8 6 0.00121369709195569 0.0388383069425819
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/tool_dependencies.xml Sun Feb 08 23:39:43 2015 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <set_environment version="1.0">
+        <environment_variable name="maaslin_SCRIPT_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>   
+    </set_environment>
+</tool_dependency>
b
diff -r 232e262654eb -r ca61989bc3b4 maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/transpose.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maaslin-4450aa4ecc84/maaslin-4450aa4ecc84/transpose.py Sun Feb 08 23:39:43 2015 -0500
[
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#######################################################################################
+# This file is provided under the Creative Commons Attribution 3.0 license.
+#
+# You are free to share, copy, distribute, transmit, or adapt this work
+# PROVIDED THAT you attribute the work to the authors listed below.
+# For more information, please see the following web page:
+# http://creativecommons.org/licenses/by/3.0/
+#
+# This file is a component of the SflE Scientific workFLow Environment for reproducible 
+# research, authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Curtis Huttenhower, chuttenh@hsph.harvard.edu).
+#
+# If you use this environment, the included scripts, or any related code in your work,
+# please let us know, sign up for the SflE user's group (sfle-users@googlegroups.com),
+# pass along any issues or feedback, and we'll let you know as soon as a formal citation
+# is available.
+#######################################################################################
+
+"""
+Examples
+~~~~~~~~
+
+``data.pcl``::
+
+ a b
+ c d
+ e f
+
+``Examples``::
+
+ $ transpose.py < data.pcl
+ a c e
+ b d f
+
+ $ echo "a b c" | transpose.py
+ a
+ b
+ c
+
+.. testsetup::
+
+ from transpose import *
+"""
+
+import argparse
+import csv
+import sys
+
+def transpose( aastrIn, ostm ):
+ """
+ Outputs the matrix transpose of the input tab-delimited rows. 
+
+ :param aastrIn: Split lines from which data are read.
+ :type aastrIn: collection of string collections
+ :param ostm: Output stream to which transposed rows are written.
+ :type ostm: output stream
+
+ >>> aastrIn = [list(s) for s in ("ab", "cd", "ef")]
+ >>> transpose( aastrIn, sys.stdout ) #doctest: +NORMALIZE_WHITESPACE
+ a c e
+ b d f
+
+ >>> transpose( [list("abc")], sys.stdout ) #doctest: +NORMALIZE_WHITESPACE
+ a
+ b
+ c
+ """
+
+ aastrLines = [a for a in aastrIn]
+ csvw = csv.writer( ostm, csv.excel_tab )
+ for iRow in range( len( aastrLines[0] ) ):
+ csvw.writerow( [aastrLines[iCol][iRow] for iCol in range( len( aastrLines ) )] )
+
+argp = argparse.ArgumentParser( prog = "transpose.py",
+ description = """Transposes a tab-delimited text matrix.
+
+The transposition process is robust to missing elements and rows of differing lengths.""" )
+__doc__ = "::\n\n\t" + argp.format_help( ).replace( "\n", "\n\t" ) + __doc__
+
+def _main( ):
+ args = argp.parse_args( )
+ transpose( csv.reader( sys.stdin, csv.excel_tab ), sys.stdout )
+
+if __name__ == "__main__":
+ _main( )