Galaxy |

Changeset 0:2f4f6f08c8c4 (2014-05-13)

Next changeset 1:cd71e90abfab (2015-02-08)

Commit message:
Uploaded

diff -r 000000000000 -r 2f4f6f08c8c4 HMPStool10PCoA.png

Binary file HMPStool10PCoA.png has changed

diff -r 000000000000 -r 2f4f6f08c8c4 MicroPITA.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MicroPITA.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,1145 @@\n+#!/usr/bin/env python\n+"""\n+Author: Timothy Tickle\n+Description: Class to Run analysis for the microPITA paper\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+import sys\n+import argparse\n+from src.breadcrumbs.src.AbundanceTable import AbundanceTable\n+from src.breadcrumbs.src.ConstantsBreadCrumbs import ConstantsBreadCrumbs\n+from src.breadcrumbs.src.Metric import Metric\n+from src.breadcrumbs.src.KMedoids import Kmedoids\n+from src.breadcrumbs.src.MLPYDistanceAdaptor import MLPYDistanceAdaptor\n+from src.breadcrumbs.src.SVM import SVM\n+from src.breadcrumbs.src.UtilityMath import UtilityMath\n+\n+from src.ConstantsMicropita import ConstantsMicropita\n+import csv\n+import logging\n+import math\n+import mlpy\n+import numpy as np\n+import operator\n+import os\n+import random\n+import scipy.cluster.hierarchy as hcluster\n+import scipy.spatial.distance\n+from types import *\n+\n+class MicroPITA:\n+\t"""\n+\tSelects samples from a first tier of a multi-tiered study to be used in a second tier.\n+\tDifferent methods can be used for selection.\n+\tThe expected input is an abundance table (and potentially a text file of targeted features,\n+\tif using the targeted features option). Output is a list of samples exhibiting the\n+\tcharacteristics of interest.\n+\t"""\n+\n+\t#Constants\n+\t#Diversity metrics Alpha\n+\tc_strInverseSimpsonDiversity = Metric.c_strInvSimpsonDiversity\n+\tc_strChao1Diversity = Metric.c_strChao1Diversity\n+\n+\t#Diversity metrics Beta\n+\tc_strBrayCurtisDissimilarity = Metric.c_strBrayCurtisDissimilarity\n+\n+\t#Additive inverses of diversity metrics beta\n+\tc_strInvBrayCurtisDissimilarity = Metric.c_strInvBrayCurtisDissimilarity\n+\n+\t#Technique Names\n+\tConstantsMicropita.c_strDiversity2 = ConstantsMicropita.c_strDiversity+"_C"\n+\n+\t#Targeted feature settings\n+\tc_strTargetedRanked = ConstantsMicropita.c_strTargetedRanked\n+\tc_strTargetedAbundance = ConstantsMicropita.c_strTargetedAbundance\n+\n+\t#Technique groupings\n+#\tc_lsDiversityMethods = [ConstantsMicropita.c_strDiversity,ConstantsMicropita.c_strDiversity2]\n+\n+\t#Converts ecology metrics into standardized method selection names\n+\tdictConvertAMetricDiversity = {c_strInverseSimpsonDiversity:ConstantsMicropita.c_strDiversity, c_strChao1Diversity:ConstantsMicropita.c_strDiversity2}\n+#\tdictConvertMicroPITAToAMetric = {ConstantsMicropita.c_strDiversity:c_strInverseSimpsonDiversity, ConstantsMicropita.c_strDiversity2:c_strChao1Diversity}\n+\tdictConvertBMetricToMethod = {c_strBrayCurtisDissimilarity:ConstantsMicropita.c_strRepresentative}\n+\tdictConvertInvBMetricToMethod = {c_strBrayCurtisDi'..b' formatting", "Rarely modified file formatting options" )\n+args.add_argument("-j",ConstantsMicropita.c_strFileDelimiterArgument, dest="cFileDelimiter", metavar="column_delimiter", default="\\t", help=ConstantsMicropita.c_strFileDelimiterHelp) \n+args.add_argument("-k",ConstantsMicropita.c_strFeatureNameDelimiterArgument, dest="cFeatureNameDelimiter", metavar="taxonomy_delimiter", default="|", help=ConstantsMicropita.c_strFeatureNameDelimiterHelp) \n+\n+args = argp.add_argument_group( "Debugging", "Debugging options - modify at your own risk!" )\n+args.add_argument("-v",ConstantsMicropita.c_strLoggingArgument, dest="strLogLevel", metavar = "log_level", default="WARNING", \n+\t\t\t\t choices=ConstantsMicropita.c_lsLoggingChoices, help= ConstantsMicropita.c_strLoggingHelp)\n+args.add_argument("-c",ConstantsMicropita.c_strCheckedAbundanceFileArgument, dest="ostmCheckedFile", metavar = "output_qc", type = argparse.FileType("w"), help = ConstantsMicropita.c_strCheckedAbundanceFileHelp)\n+args.add_argument("-g",ConstantsMicropita.c_strLoggingFileArgument, dest="ostmLoggingFile", metavar = "output_log", type = argparse.FileType("w"), help = ConstantsMicropita.c_strLoggingFileHelp)\n+args.add_argument("-u",ConstantsMicropita.c_strSupervisedInputFile, dest="ostmInputPredictFile", metavar = "output_scaled", type = argparse.FileType("w"), help = ConstantsMicropita.c_strSupervisedInputFileHelp)\n+args.add_argument("-p",ConstantsMicropita.c_strSupervisedPredictedFile, dest="ostmPredictFile", metavar = "output_labels", type = argparse.FileType("w"), help = ConstantsMicropita.c_strSupervisedPredictedFileHelp)\n+\n+argp.add_argument("istmInput", metavar = "input.pcl/biome", type = argparse.FileType("rU"), help = ConstantsMicropita.c_strAbundanceFileHelp,\n+\tdefault = sys.stdin)\n+argp.add_argument("ostmOutput", metavar = "output.txt", type = argparse.FileType("w"), help = ConstantsMicropita.c_strGenericOutputDataFileHelp,\n+\tdefault = sys.stdout)\n+\n+__doc__ = "::\\n\\n\\t" + argp.format_help( ).replace( "\\n", "\\n\\t" ) + __doc__\n+\n+def _main( ):\n+\targs = argp.parse_args( )\n+\n+\t#Set up logger\n+\tiLogLevel = getattr(logging, args.strLogLevel.upper(), None)\n+\tlogging.basicConfig(stream = args.ostmLoggingFile if args.ostmLoggingFile else sys.stderr, filemode = \'w\', level=iLogLevel)\n+\n+\t#Run micropita\n+\tlogging.info("MicroPITA:: Start microPITA")\n+\tmicroPITA = MicroPITA()\n+\n+\t#Argparse will append to the default but will not remove the default so I do this here\n+\tif not len(args.lstrMethods):\n+\t\targs.lstrMethods = [ConstantsMicropita.c_strRepresentative]\n+\n+\tdictSelectedSamples = microPITA.funcRun(\n+\t\tstrIDName\t\t= args.strIDName,\n+\t\tstrLastMetadataName\t= args.strLastMetadataName,\n+\t\tistmInput\t\t= args.istmInput,\n+\t\tostmInputPredictFile\t= args.ostmInputPredictFile,\n+\t\tostmPredictFile\t\t= args.ostmPredictFile,\n+\t\tostmCheckedFile\t\t= args.ostmCheckedFile,\n+\t\tostmOutput\t\t= args.ostmOutput,\n+\t\tcDelimiter\t\t= args.cFileDelimiter,\n+\t\tcFeatureNameDelimiter\t= args.cFeatureNameDelimiter,\n+\t\tistmFeatures\t\t= args.istmFeatures,\n+\t\tstrFeatureSelection\t= args.strFeatureSelection,\n+\t\tiCount\t\t\t= args.iCount,\n+\t\tstrLastRowMetadata\t= args.strLastFeatureMetadata,\n+\t\tstrLabel\t\t= args.strLabel,\n+\t\tstrStratify\t\t= args.strUnsupervisedStratify,\n+\t\tstrCustomAlpha\t\t= args.strAlphaDiversity,\n+\t\tstrCustomBeta\t\t= args.strBetaDiversity,\n+\t\tstrAlphaMetadata\t= args.strAlphaMetadata,\n+\t\tistmBetaMatrix\t\t= args.istmBetaMatrix,\n+\t\tistrmTree\t\t= args.istrmTree,\n+\t\tistrmEnvr\t\t= args.istrmEnvr,\n+\t\tlstrMethods\t\t= args.lstrMethods,\n+\t\tfInvertDiversity\t= args.fInvertDiversity\n+\t)\n+\n+\tif not dictSelectedSamples:\n+\t\tlogging.error("MicroPITA:: Error, did not get a result from analysis.")\n+\t\treturn -1\n+\tlogging.info("End microPITA")\n+\n+\t#Log output for debugging\n+\tlogging.debug("MicroPITA:: Returned the following samples:"+str(dictSelectedSamples))\n+\n+\t#Write selection to file\n+\tmicroPITA.funcWriteSelectionToFile(dictSelection=dictSelectedSamples, xOutputFilePath=args.ostmOutput)\n+\n+if __name__ == "__main__":\n+\t_main( )\n'

diff -r 000000000000 -r 2f4f6f08c8c4 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,185 @@\n+# Using microPITA commandline #\n+\n+These common commands can be used on the default data set obtained when downloading microPITA, simply cut and paste them into a commandline in the downloaded microPITA directory.\n+\n+\n+## Expected input file. ##\n+\n+I. PCL file or BIOM file\n+\n+BIOM file definition:\n+For BIOM file definition please see http://biom-format.org/\n+\n+PCL file definition:\n+Although some defaults can be changed, microPITA expects a PCL file as an input file. Several PCL files are supplied by default in the input directory. A PCL file is a TEXT delimited file similar to an excel spread sheet with the following characteristics.\n+\n+1. Rows represent metadata and features (bugs), columns represent samples.\n+2. The first row by default should be the sample ids.\n+3. Metadata rows should be next.\n+4. Lastly, rows containing features (bugs) measurements (like abundance) should be after metadata rows.\n+5. The first column should contain the ID describing the column. For metadata this may be, for example, "Age" for a row containing the age of the patients donating the samples. For measurements, this should be the feature name (bug name).\n+5. By default the file is expected to be TAB delimited.\n+6. If a consensus lineage or hierarchy of taxonomy is contained in the feature name, the default delimiter between clades is the pipe ("|").\n+\n+II. Targeted feature file\n+If using the targeted feature methodology, you will need to provide a txt file listing the feature(s) of interest. Each feature should be on it\'s own line and should be written as found in the input PCL file.\n+\n+\n+## Basic unsupervised methods ##\n+Please note, all calls to microPITA should work interchangeably with PCL or BIOM files. BIOM files do not require the --lastmeta or --id arguments.\n+\n+There are four unsupervised methods which can be performed:\n+diverse (maximum diversity), extreme (most dissimilar), representative (representative dissimilarity) and features (targeted feature).\n+\n+The first three methods are performed as follows (selecting a default 10 samples):\n+\n+> python MicroPITA.py --lastmeta Label -m representative input/Test.pcl output.txt\n+> python MicroPITA.py -m representative input/Test.biom output.txt\n+\n+> python MicroPITA.py --lastmeta Label -m diverse input/Test.pcl output.txt\n+> python MicroPITA.py -m diverse input/Test.biom output.txt\n+\n+> python MicroPITA.py --lastmeta Label -m extreme input/Test.pcl output.txt\n+> python MicroPITA.py -m extreme input/Test.biom output.txt\n+\n+Each of the previous methods are made up of the following pieces:\n+1. python MicroPITA.py to call the microPITA script.\n+2. --lastmeta which indicates the keyword (first column value) of the last row that contains metadata (PCL input only).\n+3. -m which indicates the method to use in selection.\n+4. input/Test.pcl or input/Test.biom which is the first positional argument indicating an input file\n+5. output.txt which is the second positional argument indicating the location to write to the output file.\n+\n+Selecting specific features has additional arguments to consider --targets (required) and --feature_method (optional).\n+\n+> python MicroPITA.py --lastmeta Label -m features --targets input/TestFeatures.taxa input/Test.pcl output.txt\n+> python MicroPITA.py -m features --targets input/TestFeatures.taxa input/Test.biom output.txt\n+\n+> python MicroPITA.py --lastmeta Label -m features --feature_method abundance --targets input/TestFeatures.taxa input/Test.pcl output.txt\n+> python MicroPITA.py -m features --feature_method abundance --targets input/TestFeatures.taxa input/Test.biom output.txt\n+\n+These additional arguments are described as:\n+1. --targets The path to the file that has the features (bugs or clades) of interest. Make sure they are written as they appear in your input file!\n+2. --feature_method is the method of selection used and can be based on ranked abundance ("rank") or abundance ("abundance"). The default value is rank.\n+To differentiate the meth'..b'+\n+Sample Selection:\n+To change the number of selected samples for any method use the -n argument. This example selects 6 representative samples instead of the default 10.\n+\n+> python MicroPITA.py --lastmeta Label -m representative -n 6 input/Test.pcl output.txt\n+> python MicroPITA.py -m representative -n 6 input/Test.biom output.txt\n+\n+When using a supervised method this indicates how many samples will be selected per class of sample. For example if you are performing supervised selection of 6 samples (-n 6) on a dataset with 2 classes (values) in it\'s label row, you will get 6 x 2 = 12 samples. If a class does not have 6 samples in it, you will get the max possible for that class. In a scenario where you are selecting 6 samples (-n 6) and have two classes but one class has only 3 samples then you will get 6 + 3 = 9 selected samples.\n+\n+Stratification:\n+To stratify any method use the --stratify argument which is the first column keyword of the metadata row used to stratify samples before selection occurs. (Selection will occur independently within each strata). This example stratifies diverse selection by the "Label".\n+\n+> python MicroPITA.py --lastmeta Label --stratify Label -m representative input/Test.pcl output.txt\n+> python MicroPITA.py --stratify Label -m representative input/Test.biom output.txt\n+\n+> python MicroPITA.py --lastmeta Label --label Label --stratify StratifyLabel -m distinct input/Test.pcl output.txt\n+> python MicroPITA.py --label Label --stratify StratifyLabel -m distinct input/Test2.biom output.txt \n+\n+Changing PCL file defaults:\n+Some PCL files have feature metadata. These are columns of data that comment on bug features (rows) in the file. An example of this could be a certain taxonomy clade for different bug features. If this type of data exists please use -w or --lastFeatureMetadata to indicate the last column of feature metadata before the first column which is a sample. For an example please look in docs for PCL-Description.txt. This only applys to PCL files.\n+\n+> python MicroPITA.py --lastmeta Label -m representative -w taxonomy_5 input/FeatureMetadata.pcl output.txt\n+\n+MicroPITA assumes the first row of the input file is the sample IDs, if it is not you may use --id to indicate the row.\n+--id expects the entry in the first column of your input file that matches the row used as Sample Ids. See the input file and the following command as an example.\n+This only applys to PCL files.\n+\n+> python MicroPITA.py --id Sample --lastmeta Label -m representative input/Test.pcl output.txt\n+\n+MicroPITA assumes the input file is TAB delimited, we strongly recommend you use this convention. If not, you can use --delim to change the delimiter used to read in the file.\n+Here is an example of reading the comma delimited file micropita/input/CommaDelim.pcl\n+This only applys to PCL files.\n+\n+> python MicroPITA.py --delim , --lastmeta Label -m representative input/CommaDelim.pcl output.txt\n+\n+MicroPITA assumes the input file has feature names in which, if the name contains the consensus lineage or full taxonomic hierarchy, it is delimited with a pipe "|". We strongly recommend you use this default. The delimiter of the feature name can be changed using --featdelim. Here is an example of reading in a file with periods as the delimiter.\n+This only applys to PCL files.\n+\n+> python MicroPITA.py --featdelim . --lastmeta Label -m representative input/PeriodDelim.pcl output.txt\n+\n+\n+## Dependencies ##\n+Please note the following dependencies need to be installed for micropita to run.\n+1. Python 2.x\t\thttp://www.python.org/download/\n+2. blist\t\thttp://pypi.python.org/pypi/blist/\n+3. NumPy\t\thttp://numpy.scipy.org/\n+4. SciPy\t\thttp://www.scipy.org/\n+5. PyCogent\t\thttp://pycogent.sourceforge.net/install.html\n+6. mlpy\t\t\thttp://mlpy.sourceforge.net/\n+7. mpi4py\t\thttp://mpi4py.scipy.org/\n+8. biome support \thttp://biom-format.org/\n+\n+This covers how to use microPITA. Thank you for using this software and good luck with all your endeavors!\n'

diff -r 000000000000 -r 2f4f6f08c8c4 README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt Tue May 13 21:58:57 2014 -0400

diff -r 000000000000 -r 2f4f6f08c8c4 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<datatypes>
+    <registration>
+        <datatype extension="micropita" type="galaxy.datatypes.data:Text" subclass="true" display_in_upload="true"/>
+    </registration>
+</datatypes>

diff -r 000000000000 -r 2f4f6f08c8c4 input/CommaDelim.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/CommaDelim.pcl Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,229 @@\n+ID,Sample_0_D,Sample_1_D,Sample_2_D,Sample_3_D,Sample_4_D,Sample_5_D,Sample_6_D,Sample_7_D,Sample_8_D,Sample_9_D,Sample_10_D,Sample_11_D,Sample_12_D,Sample_13_D,Sample_14_D,Sample_15_D,Sample_16_R,Sample_17_R,Sample_18_R,Sample_19_R,Sample_20_R,Sample_21_R,Sample_22_R,Sample_23_R,Sample_24_R,Sample_25_R,Sample_26_R,Sample_27_R,Sample_28_R,Sample_29_R,Sample_30_E,Sample_31_E,Sample_32_E,Sample_33_E,Sample_34_E,Sample_35_E,Sample_36_E,Sample_37_E,Sample_38_E,Sample_39_E,Sample_40_E,Sample_41_E,Sample_42_E,Sample_43_E,Sample_44_T,Sample_45_T,Sample_46_T,Sample_47_T\n+Sample,Sample_0_D,Sample_1_D,Sample_2_D,Sample_3_D,Sample_4_D,Sample_5_D,Sample_6_D,Sample_7_D,Sample_8_D,Sample_9_D,Sample_10_D,Sample_11_D,Sample_12_D,Sample_13_D,Sample_14_D,Sample_15_D,Sample_16_R,Sample_17_R,Sample_18_R,Sample_19_R,Sample_20_R,Sample_21_R,Sample_22_R,Sample_23_R,Sample_24_R,Sample_25_R,Sample_26_R,Sample_27_R,Sample_28_R,Sample_29_R,Sample_30_E,Sample_31_E,Sample_32_E,Sample_33_E,Sample_34_E,Sample_35_E,Sample_36_E,Sample_37_E,Sample_38_E,Sample_39_E,Sample_40_E,Sample_41_E,Sample_42_E,Sample_43_E,Sample_44_T,Sample_45_T,Sample_46_T,Sample_47_T\n+Group,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Complex,Moderate_Dissimilarity_Feature,Moderate_Dissimilarity_Feature,Moderate_Dissimilarity_Feature,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,Moderate_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity_Feature,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,High_Dissimilarity,Targeted_Feature,Targeted_Feature,Targeted_Feature,Targeted_Feature\n+StratifyLabel,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2\n+Label,Class-Two,Class-One,Class-Two,Class-One,Class-One,Class-One,Class-One,Class-Two,Class-Two,Class-One,Class-Two,Class-One,Class-One,Class-Two,Class-Two,Class-Two,Class-Two,Class-Two,Class-Two,Class-Two,Class-One,Class-One,Class-One,Class-One,Class-One,Class-Two,Class-One,Class-Two,Class-Two,Class-One,Class-Two,Class-Two,Class-Two,Class-Two,Class-One,Class-One,Class-One,Class-One,Class-One,Class-Two,Class-One,Class-Two,Class-Two,Class-One,Class-Two,Class-Two,Class-Two,Class-Two\n+Root|Taxa_0,0,0.132639108,51.67205155,0,0,0,51.7444670649,0,0,0,0,0,51.2083349844,52.1495033914,0,54.2809813981,51.6829297536,0,0.3123676392,0,0,0,0,0,0.2166953032,0,0.2411828448,0,0,0,0,0,0,0.3122296644,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1772302872\n+Root|Taxa_1,54.0993873098,0,0.441962075,0,0,0,50.6647838005,53.447223597,50.5817481168,0,53.5412967792,0,0,0,0,0,54.5327122192,0,0,0,0,0,0,0,0,0,0,0,0.3066743566,0,0.4312838574,0,0.3731209223,0,0,0,0,0,0,0,0,0,0,0,0,0.1274865184,0,0\n+Root|Taxa_2,51.461026897,0,0,0,0,53.0265676376,0,0,0,0,54.047444278,0,52.5264375555,0,0,0,54.9402852499,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4223345514,0,0,0,0,0.4607446961,0,0.3442454432,0,0,0,0,0,0,0,0\n+Root|Taxa_3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,53.7506605469,0,0.4148157079,0,0,0,0.2685767173,0,0,0,0,0,0.3663062251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50.5269874981,51.8543446876,53.7681811219,52.6344247487\n+Root|Taxa_4,50.2405694418,51.7777654529,0,54.8458795552,0,0,0,0,0,0,0,53.8808302665,0,0,0,0,50.0282264237,0,0,0,0.0980723897,0,0.3886813644,0,0,0,0,0,0.4286598254,0,0,0,0.4731642927,0,0,0,0,0.1568392012,0,0,0,0,0,0,0,0,0,0\n+Root|Taxa_5,0,0,0,0,0,51.9510168529,0.2296159024,52.9698629485,0,0,0,52.1974377835,0,0,0.2252690679,0,53.653338634,0,0,0,0,0,0,0,0,0.3963056725,0,0,0.0678330435,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n+Root|Taxa_6,0,0,0,0,0,54.5673895399,0,0,0,0,51.1909575326,0.2522232281,0,52.4007159288,54.537199915,0,51.323920'..b'3.8938971864,52.5574477678,53.8098522525,0,0,0,0,0,0,0,0,0,0,0,0,54.2401876661,0.2242680732,0.115238483,0,0,0,0,0.4881061169,0,0,0,0,0,0,102.186344705,0,0,0,0,0\n+Root|Taxa_207,51.2183210273,51.1563774994,0,0,54.1762942805,0,0,0,51.2704897195,52.8968592985,0,52.288966104,0,0,0,54.3906412249,0,0,0,0,0.0048042154,0,0,0,0,0,0,0,51.2408250309,0,0,0,0,0,0,0,0,0,0,0,0,0,103.110597368,0,0,0,0,0\n+Root|Taxa_208,0,0,0,0.0546805022,0,0.1599409596,50.0472074245,0.4099746803,51.4051352965,0,0,0,0,53.2887290406,0,0.0848935397,0,0,0,0,0,0,0,0.1555916655,0,0,0,0,0,53.0135385478,0.1497035187,0,0.0691993821,0,0.3373772825,0,0,0,0,0,0,0,0,0,0.2522945018,0,0,0\n+Root|Taxa_209,0,50.1030172793,0,0,0,0,54.1650217307,52.2230603121,0,0,0,0,0,0,0,0.1143171628,0,0,0,0,0,0,0,0,0,0,0,0,0,51.8871962649,0.299430337,0,0,0,0,0,0,0.4798221512,0,0,0,0,0,0,0,0,0,0\n+Root|Taxa_210,0,0,0,0,0,0,50.5549695516,52.1576796275,0,0,0,0,0,0,0,52.3992524271,0,0,0,0.1706714754,0,0,0,0,0.3811782646,0.0008517706,0,0,0,51.0207565229,0,0,0,0,0,0,0,0,0,0,0.0102807436,0,0,0,0,0.0085181585,0,0\n+Root|Taxa_211,50.536257303,0,0,0,0,0,0,0,0,0,51.9813477573,0,51.5911214861,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,53.1296406718,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4077664043,0\n+Root|Taxa_212,0,0,0,0,0.4699813783,0,0,0,0,54.5530559822,0,50.743119663,51.3155881447,0,0,53.2941828392,0,0,0,0,0,0,0,0,0,0,0.0342022552,0,0,54.1038020638,0,0,0,0.040960994,0,0,0,0.4947938425,0,0,0,0,0.3862038599,0,0,0,0,0\n+Root|Taxa_213,0.3399472909,0,0,0,0,0,54.6671775034,0.050131803,50.2999962265,0,51.8713213189,0,0,0,53.2943170335,51.1243603164,0,0,0,0,0,0,0,0,0,0,0,0,0,50.6101294432,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n+Root|Taxa_214,50.0115213059,0,0,0,0,50.8371273498,0,0,0,54.4139864126,0,0,0,0,0,0,0,0,0,0.3015435222,0,0,0,0,0,0,0,0.0872919265,0.2777439996,50.3179817753,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4557640334,0,0,0\n+Root|Taxa_215,0,0.4561298327,0,53.5216413012,54.8891010063,50.9129555637,0,0,0,0,0,0,0,0,53.4880926895,0,0,0,0,0,0,0,0,0,0,0,0,0.1940475073,0.2803421878,50.3585027491,0,0,0,0.345883306,0.3534255574,0,0,0,0,0,0.2075798899,0,0.4493876473,0,0,0,0,0\n+Root|Taxa_216,0,52.664567111,54.9286304319,0,0,0.3528460271,0,0,0,53.8271900954,53.2864160864,0,0,53.8916659856,0,0,0.0390396631,0,0,0,0,0,0,0,0,0,0,0,0.0501403367,53.9106611035,0,0,0,0,0,0,0,0,0,0,0,0,0,102.503416561,0,0,0.2323723816,0\n+Root|Taxa_217,0,0,0,53.8189770697,0,0,0,0,0,0,51.0739213403,51.9325727684,53.6586894371,0,50.8848681615,0,0,0,0,0,0,0.4134750657,0.2984209586,0,0,0,0,0.3984011824,0,50.0637489202,0.1745273923,0,0,0,0,0,0,0,0,0,0,0,0.0970082737,102.133370457,0,0,0,0\n+Root|Taxa_218,53.6309027759,0,0,54.1993531611,0,53.8609164351,0,0,0,0,0,0,0,0,0,0,0,0,0.4230445366,0,0,0,0,0,0,0.3460879368,0,0,0,54.0328089965,0,0,0,0,0,0,0,0,0,0,0.1875089533,0,0,102.742579014,0,0,0,0\n+Root|Taxa_219,0,0,50.6959593123,0,0,0,0,0,52.9583336932,51.2063060177,0,0,0,0,0.1839092432,0.1624310372,0,0,0.2217437424,0,0,0,0,0,0,0,0,0,0,51.3989543421,0.1469196015,0,0,0.4515484111,0,0,0,0,0,0,0.362398058,0,0,103.426555217,0,0,0,0\n+Root|Taxa_220,53.138860148,54.260252989,50.1122213715,0,53.7052604162,0,0.4760342302,0,51.9385050693,0.191283382,0,0,51.217538964,0,0.2580800578,50.1174597734,0,0,0,0,0,0,0.115889335,0,0,0,0,0,0,53.3111145776,0,0,0,0,0,0,0,0,0,0,0,0,0,104.430682236,0,0,0,0\n+Root|Taxa_221,0,0.3384535892,0.4106893002,52.1411487055,50.5241190477,0,0,53.9774121283,0,0,0,52.1082021353,0,0,0,0,0.4172253944,0,0,0,0,0,0,0,0,0,0,0,0,50.9817139231,0,0,0,0,0,0,0,0,0,0.4224842988,0,0,0,102.376952212,0.2260951816,0,0.3067861355,0\n+Root|Taxa_222,0,52.3172435891,52.6471280765,0,51.567115685,0,0,53.6724555185,0,0,0,0,0,54.2428549188,51.6482654065,0,0,0,0,0,0,0.2425898966,0,0,0,0,0,0,0,51.1118820396,0,0,0,0,0,0,0,0,0,0,0.2951844288,0,0,102.047792613,0,0,0,0\n+Root|Taxa_223,0,0,0.2600381862,0,0,50.2577764475,0.4992511416,0,0,0,0,51.3720073928,0,53.1526569712,0.3864806128,0,0,0.0519771654,0,0,0,0,0,0.3896198936,0,0,0,0.4447746424,0,51.736707456,0,0,0,0,0,0,0,0,0,0,0,0,0,103.397909634,0,0,0,0\n'

diff -r 000000000000 -r 2f4f6f08c8c4 input/FeatureMetadata.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/FeatureMetadata.pcl Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,8 @@
+ID taxonomy_0 taxonomy_1 taxonomy_2 taxonomy_3 taxonomy_4 taxonomy_5 70009986 70009894 70008980 70009988 70003470 70007472 70003744 70007476 70003274 70476
+TID NA NA NA NA NA NA CohortA CohortA CohortB CohortB CohortB CohortA CohortA CohortB CohortA CohortB
+Label NA NA NA NA NA NA L_Antecubital_fossa R_Retroauricular_crease Subgingival_plaque R_Antecubital_fossa L_Retroauricular_crease R_Retroauricular_crease L_Antecubital_fossa R_Antecubital_fossa L_Antecubital_fossa R_Antecubital_fossa
+72 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 1.23 0 12 0 6 0 2 1 2 1
+4904 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 0 10 43 6 0 23 0 6 5 53
+1361 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 3 0 29 0 45 0 1 1 1 3
+3417 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 0 45 34 3 0 0 0 4 0 3
+1368 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 5 0 2 0 6 0 1 1 3 1

diff -r 000000000000 -r 2f4f6f08c8c4 input/PeriodDelim.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/PeriodDelim.pcl Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,229 @@\n+ID\tSample_0_D\tSample_1_D\tSample_2_D\tSample_3_D\tSample_4_D\tSample_5_D\tSample_6_D\tSample_7_D\tSample_8_D\tSample_9_D\tSample_10_D\tSample_11_D\tSample_12_D\tSample_13_D\tSample_14_D\tSample_15_D\tSample_16_R\tSample_17_R\tSample_18_R\tSample_19_R\tSample_20_R\tSample_21_R\tSample_22_R\tSample_23_R\tSample_24_R\tSample_25_R\tSample_26_R\tSample_27_R\tSample_28_R\tSample_29_R\tSample_30_E\tSample_31_E\tSample_32_E\tSample_33_E\tSample_34_E\tSample_35_E\tSample_36_E\tSample_37_E\tSample_38_E\tSample_39_E\tSample_40_E\tSample_41_E\tSample_42_E\tSample_43_E\tSample_44_T\tSample_45_T\tSample_46_T\tSample_47_T\n+Sample\tSample_0_D\tSample_1_D\tSample_2_D\tSample_3_D\tSample_4_D\tSample_5_D\tSample_6_D\tSample_7_D\tSample_8_D\tSample_9_D\tSample_10_D\tSample_11_D\tSample_12_D\tSample_13_D\tSample_14_D\tSample_15_D\tSample_16_R\tSample_17_R\tSample_18_R\tSample_19_R\tSample_20_R\tSample_21_R\tSample_22_R\tSample_23_R\tSample_24_R\tSample_25_R\tSample_26_R\tSample_27_R\tSample_28_R\tSample_29_R\tSample_30_E\tSample_31_E\tSample_32_E\tSample_33_E\tSample_34_E\tSample_35_E\tSample_36_E\tSample_37_E\tSample_38_E\tSample_39_E\tSample_40_E\tSample_41_E\tSample_42_E\tSample_43_E\tSample_44_T\tSample_45_T\tSample_46_T\tSample_47_T\n+Group\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tModerate_Dissimilarity_Feature\tModerate_Dissimilarity_Feature\tModerate_Dissimilarity_Feature\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity_Feature\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tTargeted_Feature\tTargeted_Feature\tTargeted_Feature\tTargeted_Feature\n+StratifyLabel\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\n+Label\tClass-Two\tClass-One\tClass-Two\tClass-One\tClass-One\tClass-One\tClass-One\tClass-Two\tClass-Two\tClass-One\tClass-Two\tClass-One\tClass-One\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-One\tClass-One\tClass-One\tClass-One\tClass-One\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-One\tClass-One\tClass-One\tClass-One\tClass-One\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-Two\tClass-Two\n+Root.Taxa_0\t0\t0.132639108\t51.67205155\t0\t0\t0\t51.7444670649\t0\t0\t0\t0\t0\t51.2083349844\t52.1495033914\t0\t54.2809813981\t51.6829297536\t0\t0.3123676392\t0\t0\t0\t0\t0\t0.2166953032\t0\t0.2411828448\t0\t0\t0\t0\t0\t0\t0.3122296644\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1772302872\n+Root.Taxa_1\t54.0993873098\t0\t0.441962075\t0\t0\t0\t50.6647838005\t53.447223597\t50.5817481168\t0\t53.5412967792\t0\t0\t0\t0\t0\t54.5327122192\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.3066743566\t0\t0.4312838574\t0\t0.3731209223\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1274865184\t0\t0\n+Root.Taxa_2\t51.461026897\t0\t0\t0\t0\t53.0265676376\t0\t0\t0\t0\t54.047444278\t0\t52.5264375555\t0\t0\t0\t54.9402852499\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4223345514\t0\t0\t0\t0\t0.4607446961\t0\t0.3442454432\t0\t0\t0\t0\t0\t0\t0\t0\n+Root.Taxa_3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t53.7506605469\t0\t0.4148157079\t0\t0\t0\t0.2685767173\t0\t0\t0\t0\t0\t0.3663062251\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t50.5269874981\t51.8543446876\t53.7681811219\t52.6344247487\n+Root.Taxa_4\t50.2405694418\t51.7777654529\t0\t54.8458795552\t0\t0\t0\t0\t0\t0\t0\t53.8808302665\t0\t0\t0\t0\t50.0282264237\t0\t0\t0\t0.0980723897\t0\t0.3886813644\t0\t0\t0\t0\t0\t0.4286598254\t0\t0\t0\t0.4731642927\t0\t0\t0\t0\t0.1568392012\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Root.Taxa_5\t0\t0\t0\t0\t0\t51.9510168529\t0.2296159024\t52.9698629485\t0\t0\t0\t52.1974377835\t0\t0\t0.2252690679\t0\t53.653338634\t0\t0\t0\t0\t0\t0\t0\t0\t0.3963056725\t0\t0\t0.0678330435\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Root.Taxa_6\t0\t0\t0\t0\t0\t54.5673895399\t0\t0\t0\t0\t51.1909575326\t0.2522232281\t0\t52.4007159288\t54.537199915\t0\t51.323920'..b'3.8938971864\t52.5574477678\t53.8098522525\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t54.2401876661\t0.2242680732\t0.115238483\t0\t0\t0\t0\t0.4881061169\t0\t0\t0\t0\t0\t0\t102.186344705\t0\t0\t0\t0\t0\n+Root.Taxa_207\t51.2183210273\t51.1563774994\t0\t0\t54.1762942805\t0\t0\t0\t51.2704897195\t52.8968592985\t0\t52.288966104\t0\t0\t0\t54.3906412249\t0\t0\t0\t0\t0.0048042154\t0\t0\t0\t0\t0\t0\t0\t51.2408250309\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t103.110597368\t0\t0\t0\t0\t0\n+Root.Taxa_208\t0\t0\t0\t0.0546805022\t0\t0.1599409596\t50.0472074245\t0.4099746803\t51.4051352965\t0\t0\t0\t0\t53.2887290406\t0\t0.0848935397\t0\t0\t0\t0\t0\t0\t0\t0.1555916655\t0\t0\t0\t0\t0\t53.0135385478\t0.1497035187\t0\t0.0691993821\t0\t0.3373772825\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.2522945018\t0\t0\t0\n+Root.Taxa_209\t0\t50.1030172793\t0\t0\t0\t0\t54.1650217307\t52.2230603121\t0\t0\t0\t0\t0\t0\t0\t0.1143171628\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t51.8871962649\t0.299430337\t0\t0\t0\t0\t0\t0\t0.4798221512\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Root.Taxa_210\t0\t0\t0\t0\t0\t0\t50.5549695516\t52.1576796275\t0\t0\t0\t0\t0\t0\t0\t52.3992524271\t0\t0\t0\t0.1706714754\t0\t0\t0\t0\t0.3811782646\t0.0008517706\t0\t0\t0\t51.0207565229\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0102807436\t0\t0\t0\t0\t0.0085181585\t0\t0\n+Root.Taxa_211\t50.536257303\t0\t0\t0\t0\t0\t0\t0\t0\t0\t51.9813477573\t0\t51.5911214861\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t53.1296406718\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4077664043\t0\n+Root.Taxa_212\t0\t0\t0\t0\t0.4699813783\t0\t0\t0\t0\t54.5530559822\t0\t50.743119663\t51.3155881447\t0\t0\t53.2941828392\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0342022552\t0\t0\t54.1038020638\t0\t0\t0\t0.040960994\t0\t0\t0\t0.4947938425\t0\t0\t0\t0\t0.3862038599\t0\t0\t0\t0\t0\n+Root.Taxa_213\t0.3399472909\t0\t0\t0\t0\t0\t54.6671775034\t0.050131803\t50.2999962265\t0\t51.8713213189\t0\t0\t0\t53.2943170335\t51.1243603164\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t50.6101294432\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Root.Taxa_214\t50.0115213059\t0\t0\t0\t0\t50.8371273498\t0\t0\t0\t54.4139864126\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.3015435222\t0\t0\t0\t0\t0\t0\t0\t0.0872919265\t0.2777439996\t50.3179817753\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4557640334\t0\t0\t0\n+Root.Taxa_215\t0\t0.4561298327\t0\t53.5216413012\t54.8891010063\t50.9129555637\t0\t0\t0\t0\t0\t0\t0\t0\t53.4880926895\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1940475073\t0.2803421878\t50.3585027491\t0\t0\t0\t0.345883306\t0.3534255574\t0\t0\t0\t0\t0\t0.2075798899\t0\t0.4493876473\t0\t0\t0\t0\t0\n+Root.Taxa_216\t0\t52.664567111\t54.9286304319\t0\t0\t0.3528460271\t0\t0\t0\t53.8271900954\t53.2864160864\t0\t0\t53.8916659856\t0\t0\t0.0390396631\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0501403367\t53.9106611035\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t102.503416561\t0\t0\t0.2323723816\t0\n+Root.Taxa_217\t0\t0\t0\t53.8189770697\t0\t0\t0\t0\t0\t0\t51.0739213403\t51.9325727684\t53.6586894371\t0\t50.8848681615\t0\t0\t0\t0\t0\t0\t0.4134750657\t0.2984209586\t0\t0\t0\t0\t0.3984011824\t0\t50.0637489202\t0.1745273923\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0970082737\t102.133370457\t0\t0\t0\t0\n+Root.Taxa_218\t53.6309027759\t0\t0\t54.1993531611\t0\t53.8609164351\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4230445366\t0\t0\t0\t0\t0\t0\t0.3460879368\t0\t0\t0\t54.0328089965\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1875089533\t0\t0\t102.742579014\t0\t0\t0\t0\n+Root.Taxa_219\t0\t0\t50.6959593123\t0\t0\t0\t0\t0\t52.9583336932\t51.2063060177\t0\t0\t0\t0\t0.1839092432\t0.1624310372\t0\t0\t0.2217437424\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t51.3989543421\t0.1469196015\t0\t0\t0.4515484111\t0\t0\t0\t0\t0\t0\t0.362398058\t0\t0\t103.426555217\t0\t0\t0\t0\n+Root.Taxa_220\t53.138860148\t54.260252989\t50.1122213715\t0\t53.7052604162\t0\t0.4760342302\t0\t51.9385050693\t0.191283382\t0\t0\t51.217538964\t0\t0.2580800578\t50.1174597734\t0\t0\t0\t0\t0\t0\t0.115889335\t0\t0\t0\t0\t0\t0\t53.3111145776\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t104.430682236\t0\t0\t0\t0\n+Root.Taxa_221\t0\t0.3384535892\t0.4106893002\t52.1411487055\t50.5241190477\t0\t0\t53.9774121283\t0\t0\t0\t52.1082021353\t0\t0\t0\t0\t0.4172253944\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t50.9817139231\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4224842988\t0\t0\t0\t102.376952212\t0.2260951816\t0\t0.3067861355\t0\n+Root.Taxa_222\t0\t52.3172435891\t52.6471280765\t0\t51.567115685\t0\t0\t53.6724555185\t0\t0\t0\t0\t0\t54.2428549188\t51.6482654065\t0\t0\t0\t0\t0\t0\t0.2425898966\t0\t0\t0\t0\t0\t0\t0\t51.1118820396\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.2951844288\t0\t0\t102.047792613\t0\t0\t0\t0\n+Root.Taxa_223\t0\t0\t0.2600381862\t0\t0\t50.2577764475\t0.4992511416\t0\t0\t0\t0\t51.3720073928\t0\t53.1526569712\t0.3864806128\t0\t0\t0.0519771654\t0\t0\t0\t0\t0\t0.3896198936\t0\t0\t0\t0.4447746424\t0\t51.736707456\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t103.397909634\t0\t0\t0\t0\n'

diff -r 000000000000 -r 2f4f6f08c8c4 input/Test-env.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/Test-env.txt Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,10083 @@\n+Taxa_0\tSample_0_D\t0\n+Taxa_1\tSample_0_D\t54\n+Taxa_2\tSample_0_D\t51\n+Taxa_3\tSample_0_D\t0\n+Taxa_4\tSample_0_D\t50\n+Taxa_5\tSample_0_D\t0\n+Taxa_6\tSample_0_D\t0\n+Taxa_7\tSample_0_D\t0\n+Taxa_8\tSample_0_D\t50\n+Taxa_9\tSample_0_D\t0\n+Taxa_10\tSample_0_D\t0\n+Taxa_11\tSample_0_D\t0\n+Taxa_12\tSample_0_D\t0\n+Taxa_13\tSample_0_D\t0\n+Taxa_14\tSample_0_D\t0\n+Taxa_15\tSample_0_D\t0\n+Taxa_16\tSample_0_D\t51\n+Taxa_18\tSample_0_D\t0\n+Taxa_19\tSample_0_D\t0\n+Taxa_20\tSample_0_D\t0\n+Taxa_21\tSample_0_D\t50\n+Taxa_22\tSample_0_D\t0\n+Taxa_23\tSample_0_D\t55\n+Taxa_24\tSample_0_D\t53\n+Taxa_25\tSample_0_D\t0\n+Taxa_26\tSample_0_D\t0\n+Taxa_27\tSample_0_D\t0\n+Taxa_28\tSample_0_D\t0\n+Taxa_29\tSample_0_D\t0\n+Taxa_30\tSample_0_D\t0\n+Taxa_31\tSample_0_D\t0\n+Taxa_32\tSample_0_D\t0\n+Taxa_33\tSample_0_D\t0\n+Taxa_34\tSample_0_D\t53\n+Taxa_35\tSample_0_D\t52\n+Taxa_36\tSample_0_D\t0\n+Taxa_37\tSample_0_D\t0\n+Taxa_38\tSample_0_D\t0\n+Taxa_40\tSample_0_D\t0\n+Taxa_41\tSample_0_D\t51\n+Taxa_42\tSample_0_D\t0\n+Taxa_43\tSample_0_D\t0\n+Taxa_44\tSample_0_D\t0\n+Taxa_45\tSample_0_D\t0\n+Taxa_46\tSample_0_D\t51\n+Taxa_47\tSample_0_D\t0\n+Taxa_48\tSample_0_D\t52\n+Taxa_49\tSample_0_D\t0\n+Taxa_50\tSample_0_D\t0\n+Taxa_51\tSample_0_D\t0\n+Taxa_52\tSample_0_D\t55\n+Taxa_53\tSample_0_D\t0\n+Taxa_54\tSample_0_D\t0\n+Taxa_55\tSample_0_D\t0\n+Taxa_56\tSample_0_D\t55\n+Taxa_57\tSample_0_D\t0\n+Taxa_58\tSample_0_D\t0\n+Taxa_59\tSample_0_D\t54\n+Taxa_60\tSample_0_D\t0\n+Taxa_61\tSample_0_D\t0\n+Taxa_62\tSample_0_D\t0\n+Taxa_63\tSample_0_D\t0\n+Taxa_64\tSample_0_D\t0\n+Taxa_65\tSample_0_D\t0\n+Taxa_66\tSample_0_D\t0\n+Taxa_67\tSample_0_D\t53\n+Taxa_68\tSample_0_D\t51\n+Taxa_69\tSample_0_D\t51\n+Taxa_70\tSample_0_D\t0\n+Taxa_71\tSample_0_D\t0\n+Taxa_72\tSample_0_D\t0\n+Taxa_73\tSample_0_D\t0\n+Taxa_74\tSample_0_D\t0\n+Taxa_75\tSample_0_D\t0\n+Taxa_76\tSample_0_D\t0\n+Taxa_77\tSample_0_D\t0\n+Taxa_78\tSample_0_D\t0\n+Taxa_79\tSample_0_D\t53\n+Taxa_80\tSample_0_D\t55\n+Taxa_81\tSample_0_D\t0\n+Taxa_82\tSample_0_D\t0\n+Taxa_83\tSample_0_D\t0\n+Taxa_84\tSample_0_D\t0\n+Taxa_85\tSample_0_D\t0\n+Taxa_87\tSample_0_D\t54\n+Taxa_88\tSample_0_D\t0\n+Taxa_89\tSample_0_D\t53\n+Taxa_90\tSample_0_D\t0\n+Taxa_91\tSample_0_D\t0\n+Taxa_92\tSample_0_D\t0\n+Taxa_93\tSample_0_D\t0\n+Taxa_95\tSample_0_D\t51\n+Taxa_96\tSample_0_D\t0\n+Taxa_97\tSample_0_D\t54\n+Taxa_98\tSample_0_D\t0\n+Taxa_99\tSample_0_D\t0\n+Taxa_100\tSample_0_D\t50\n+Taxa_101\tSample_0_D\t0\n+Taxa_102\tSample_0_D\t53\n+Taxa_103\tSample_0_D\t0\n+Taxa_104\tSample_0_D\t0\n+Taxa_105\tSample_0_D\t0\n+Taxa_106\tSample_0_D\t0\n+Taxa_107\tSample_0_D\t55\n+Taxa_108\tSample_0_D\t0\n+Taxa_109\tSample_0_D\t0\n+Taxa_110\tSample_0_D\t0\n+Taxa_111\tSample_0_D\t0\n+Taxa_112\tSample_0_D\t53\n+Taxa_113\tSample_0_D\t0\n+Taxa_114\tSample_0_D\t0\n+Taxa_115\tSample_0_D\t0\n+Taxa_116\tSample_0_D\t0\n+Taxa_117\tSample_0_D\t54\n+Taxa_118\tSample_0_D\t0\n+Taxa_119\tSample_0_D\t52\n+Taxa_120\tSample_0_D\t0\n+Taxa_121\tSample_0_D\t0\n+Taxa_122\tSample_0_D\t51\n+Taxa_123\tSample_0_D\t0\n+Taxa_124\tSample_0_D\t0\n+Taxa_125\tSample_0_D\t0\n+Taxa_126\tSample_0_D\t0\n+Taxa_127\tSample_0_D\t0\n+Taxa_129\tSample_0_D\t0\n+Taxa_130\tSample_0_D\t0\n+Taxa_131\tSample_0_D\t52\n+Taxa_132\tSample_0_D\t0\n+Taxa_133\tSample_0_D\t0\n+Taxa_136\tSample_0_D\t50\n+Taxa_137\tSample_0_D\t0\n+Taxa_138\tSample_0_D\t0\n+Taxa_139\tSample_0_D\t0\n+Taxa_140\tSample_0_D\t55\n+Taxa_141\tSample_0_D\t0\n+Taxa_142\tSample_0_D\t0\n+Taxa_143\tSample_0_D\t0\n+Taxa_144\tSample_0_D\t0\n+Taxa_145\tSample_0_D\t0\n+Taxa_146\tSample_0_D\t0\n+Taxa_147\tSample_0_D\t0\n+Taxa_149\tSample_0_D\t54\n+Taxa_150\tSample_0_D\t0\n+Taxa_151\tSample_0_D\t0\n+Taxa_152\tSample_0_D\t53\n+Taxa_153\tSample_0_D\t55\n+Taxa_154\tSample_0_D\t0\n+Taxa_155\tSample_0_D\t0\n+Taxa_156\tSample_0_D\t0\n+Taxa_157\tSample_0_D\t51\n+Taxa_158\tSample_0_D\t0\n+Taxa_159\tSample_0_D\t0\n+Taxa_160\tSample_0_D\t0\n+Taxa_161\tSample_0_D\t0\n+Taxa_162\tSample_0_D\t53\n+Taxa_164\tSample_0_D\t0\n+Taxa_165\tSample_0_D\t52\n+Taxa_166\tSample_0_D\t53\n+Taxa_168\tSample_0_D\t0\n+Taxa_169\tSample_0_D\t0\n+Taxa_170\tSample_0_D\t0\n+Taxa_171\tSample_0_D\t0\n+Taxa_172\tSample_0_D\t51\n+Taxa_173\tSample_0_D\t0\n+Taxa_174\tSample_0_D\t0\n+Taxa_175\tSample_0_D\t0\n+Taxa_176\tSample_0_D\t0\n+Taxa_177\tSample_0_D\t50\n+Taxa_178\tSample_0_D\t51\n+Taxa_179\tSample_0_D\t0\n+Taxa_181\tSample_0_D\t0\n+Taxa_183\tSample_0_D\t0\n+Taxa_184\tSample_0_D\t0\n+Taxa_185\tSample_0_D\t0\n+Taxa_186\tSample_0_D\t0\n+Taxa_187\tSampl'..b'Sample_47_T\t51\n+Taxa_44\tSample_47_T\t0\n+Taxa_45\tSample_47_T\t0\n+Taxa_46\tSample_47_T\t0\n+Taxa_47\tSample_47_T\t0\n+Taxa_48\tSample_47_T\t0\n+Taxa_49\tSample_47_T\t0\n+Taxa_50\tSample_47_T\t0\n+Taxa_51\tSample_47_T\t0\n+Taxa_52\tSample_47_T\t0\n+Taxa_53\tSample_47_T\t0\n+Taxa_54\tSample_47_T\t0\n+Taxa_55\tSample_47_T\t0\n+Taxa_56\tSample_47_T\t0\n+Taxa_57\tSample_47_T\t0\n+Taxa_58\tSample_47_T\t0\n+Taxa_59\tSample_47_T\t0\n+Taxa_60\tSample_47_T\t0\n+Taxa_61\tSample_47_T\t0\n+Taxa_62\tSample_47_T\t0\n+Taxa_63\tSample_47_T\t0\n+Taxa_64\tSample_47_T\t0\n+Taxa_65\tSample_47_T\t0\n+Taxa_66\tSample_47_T\t0\n+Taxa_67\tSample_47_T\t0\n+Taxa_68\tSample_47_T\t0\n+Taxa_69\tSample_47_T\t0\n+Taxa_70\tSample_47_T\t0\n+Taxa_71\tSample_47_T\t0\n+Taxa_72\tSample_47_T\t0\n+Taxa_73\tSample_47_T\t0\n+Taxa_74\tSample_47_T\t0\n+Taxa_75\tSample_47_T\t0\n+Taxa_76\tSample_47_T\t0\n+Taxa_77\tSample_47_T\t0\n+Taxa_78\tSample_47_T\t0\n+Taxa_79\tSample_47_T\t0\n+Taxa_80\tSample_47_T\t0\n+Taxa_81\tSample_47_T\t0\n+Taxa_82\tSample_47_T\t0\n+Taxa_83\tSample_47_T\t0\n+Taxa_84\tSample_47_T\t0\n+Taxa_85\tSample_47_T\t0\n+Taxa_87\tSample_47_T\t0\n+Taxa_88\tSample_47_T\t0\n+Taxa_89\tSample_47_T\t0\n+Taxa_90\tSample_47_T\t0\n+Taxa_91\tSample_47_T\t0\n+Taxa_92\tSample_47_T\t0\n+Taxa_93\tSample_47_T\t0\n+Taxa_95\tSample_47_T\t0\n+Taxa_96\tSample_47_T\t0\n+Taxa_97\tSample_47_T\t0\n+Taxa_98\tSample_47_T\t0\n+Taxa_99\tSample_47_T\t0\n+Taxa_100\tSample_47_T\t0\n+Taxa_101\tSample_47_T\t0\n+Taxa_102\tSample_47_T\t0\n+Taxa_103\tSample_47_T\t0\n+Taxa_104\tSample_47_T\t0\n+Taxa_105\tSample_47_T\t0\n+Taxa_106\tSample_47_T\t0\n+Taxa_107\tSample_47_T\t0\n+Taxa_108\tSample_47_T\t0\n+Taxa_109\tSample_47_T\t0\n+Taxa_110\tSample_47_T\t0\n+Taxa_111\tSample_47_T\t0\n+Taxa_112\tSample_47_T\t0\n+Taxa_113\tSample_47_T\t0\n+Taxa_114\tSample_47_T\t0\n+Taxa_115\tSample_47_T\t0\n+Taxa_116\tSample_47_T\t0\n+Taxa_117\tSample_47_T\t0\n+Taxa_118\tSample_47_T\t0\n+Taxa_119\tSample_47_T\t0\n+Taxa_120\tSample_47_T\t0\n+Taxa_121\tSample_47_T\t0\n+Taxa_122\tSample_47_T\t0\n+Taxa_123\tSample_47_T\t0\n+Taxa_124\tSample_47_T\t0\n+Taxa_125\tSample_47_T\t0\n+Taxa_126\tSample_47_T\t0\n+Taxa_127\tSample_47_T\t0\n+Taxa_129\tSample_47_T\t0\n+Taxa_130\tSample_47_T\t0\n+Taxa_131\tSample_47_T\t0\n+Taxa_132\tSample_47_T\t0\n+Taxa_133\tSample_47_T\t0\n+Taxa_136\tSample_47_T\t0\n+Taxa_137\tSample_47_T\t0\n+Taxa_138\tSample_47_T\t0\n+Taxa_139\tSample_47_T\t0\n+Taxa_140\tSample_47_T\t0\n+Taxa_141\tSample_47_T\t0\n+Taxa_142\tSample_47_T\t0\n+Taxa_143\tSample_47_T\t0\n+Taxa_144\tSample_47_T\t0\n+Taxa_145\tSample_47_T\t0\n+Taxa_146\tSample_47_T\t0\n+Taxa_147\tSample_47_T\t0\n+Taxa_149\tSample_47_T\t0\n+Taxa_150\tSample_47_T\t0\n+Taxa_151\tSample_47_T\t0\n+Taxa_152\tSample_47_T\t0\n+Taxa_153\tSample_47_T\t0\n+Taxa_154\tSample_47_T\t0\n+Taxa_155\tSample_47_T\t0\n+Taxa_156\tSample_47_T\t0\n+Taxa_157\tSample_47_T\t0\n+Taxa_158\tSample_47_T\t0\n+Taxa_159\tSample_47_T\t0\n+Taxa_160\tSample_47_T\t0\n+Taxa_161\tSample_47_T\t0\n+Taxa_162\tSample_47_T\t0\n+Taxa_164\tSample_47_T\t0\n+Taxa_165\tSample_47_T\t0\n+Taxa_166\tSample_47_T\t0\n+Taxa_168\tSample_47_T\t0\n+Taxa_169\tSample_47_T\t0\n+Taxa_170\tSample_47_T\t0\n+Taxa_171\tSample_47_T\t0\n+Taxa_172\tSample_47_T\t0\n+Taxa_173\tSample_47_T\t0\n+Taxa_174\tSample_47_T\t0\n+Taxa_175\tSample_47_T\t0\n+Taxa_176\tSample_47_T\t0\n+Taxa_177\tSample_47_T\t0\n+Taxa_178\tSample_47_T\t0\n+Taxa_179\tSample_47_T\t0\n+Taxa_181\tSample_47_T\t0\n+Taxa_183\tSample_47_T\t0\n+Taxa_184\tSample_47_T\t0\n+Taxa_185\tSample_47_T\t0\n+Taxa_186\tSample_47_T\t0\n+Taxa_187\tSample_47_T\t0\n+Taxa_188\tSample_47_T\t0\n+Taxa_189\tSample_47_T\t0\n+Taxa_190\tSample_47_T\t0\n+Taxa_191\tSample_47_T\t0\n+Taxa_192\tSample_47_T\t0\n+Taxa_193\tSample_47_T\t0\n+Taxa_194\tSample_47_T\t0\n+Taxa_195\tSample_47_T\t0\n+Taxa_196\tSample_47_T\t0\n+Taxa_198\tSample_47_T\t0\n+Taxa_199\tSample_47_T\t0\n+Taxa_200\tSample_47_T\t0\n+Taxa_202\tSample_47_T\t0\n+Taxa_203\tSample_47_T\t0\n+Taxa_204\tSample_47_T\t0\n+Taxa_205\tSample_47_T\t0\n+Taxa_206\tSample_47_T\t0\n+Taxa_207\tSample_47_T\t0\n+Taxa_208\tSample_47_T\t0\n+Taxa_209\tSample_47_T\t0\n+Taxa_210\tSample_47_T\t0\n+Taxa_211\tSample_47_T\t0\n+Taxa_212\tSample_47_T\t0\n+Taxa_213\tSample_47_T\t0\n+Taxa_214\tSample_47_T\t0\n+Taxa_215\tSample_47_T\t0\n+Taxa_216\tSample_47_T\t0\n+Taxa_217\tSample_47_T\t0\n+Taxa_218\tSample_47_T\t0\n+Taxa_219\tSample_47_T\t0\n+Taxa_220\tSample_47_T\t0\n+Taxa_221\tSample_47_T\t0\n+Taxa_222\tSample_47_T\t0\n+Taxa_223\tSample_47_T\t0\n+\n+\n+\n'

diff -r 000000000000 -r 2f4f6f08c8c4 input/Test.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/Test.biom Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,1 @@\n+{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "BreadCrumbs","date": "2013-10-07T16:48:52.647220","matrix_type": "sparse","matrix_element_type": "float","shape": [224, 48],"data": [[0,1,0.13263911008834839],[0,2,51.672050476074219],[0,6,51.744468688964844],[0,12,51.208335876464844],[0,13,52.149501800537109],[0,15,54.280982971191406],[0,16,51.682929992675781],[0,18,0.31236764788627625],[0,24,0.21669530868530273],[0,26,0.2411828488111496],[0,33,0.31222966313362122],[0,47,0.17723028361797333],[1,0,54.099388122558594],[1,2,0.44196206331253052],[1,6,50.664783477783203],[1,7,53.447223663330078],[1,8,50.581748962402344],[1,10,53.541297912597656],[1,16,54.532711029052734],[1,28,0.30667436122894287],[1,30,0.4312838613986969],[1,32,0.37312093377113342],[1,45,0.12748651206493378],[2,0,51.461025238037109],[2,5,53.026569366455078],[2,10,54.047443389892578],[2,12,52.526435852050781],[2,16,54.940284729003906],[2,32,0.42233455181121826],[2,37,0.460744708776474],[2,39,0.34424543380737305],[3,16,53.750659942626953],[3,18,0.41481569409370422],[3,22,0.26857671141624451],[3,28,0.36630621552467346],[3,44,50.526988983154297],[3,45,51.854343414306641],[3,46,53.768180847167969],[3,47,52.634426116943359],[4,0,50.240570068359375],[4,1,51.777767181396484],[4,3,54.845878601074219],[4,11,53.880828857421875],[4,16,50.028224945068359],[4,20,0.098072387278079987],[4,22,0.38868135213851929],[4,28,0.4286598265171051],[4,32,0.47316429018974304],[4,37,0.15683920681476593],[5,5,51.951015472412109],[5,6,0.22961589694023132],[5,7,52.969863891601562],[5,11,52.197437286376953],[5,14,0.22526906430721283],[5,16,53.653339385986328],[5,25,0.39630568027496338],[5,28,0.06783304363489151],[6,5,54.567390441894531],[6,10,51.190956115722656],[6,11,0.25222322344779968],[6,13,52.400714874267578],[6,14,54.537200927734375],[6,16,51.323921203613281],[6,29,0.17403842508792877],[6,41,0.052579630166292191],[7,2,54.889991760253906],[7,3,50.592502593994141],[7,7,54.676334381103516],[7,8,0.11892234534025192],[7,9,50.23931884765625],[7,16,54.696922302246094],[7,19,0.26401954889297485],[7,25,0.093931779265403748],[7,29,0.0068426746875047684],[7,33,0.18845303356647491],[8,0,50.167209625244141],[8,2,52.330379486083984],[8,7,0.26085031032562256],[8,8,51.234970092773438],[8,9,53.837917327880859],[8,16,50.158138275146484],[8,17,0.00067086971830576658],[8,30,102.35082244873047],[8,37,0.25920611619949341],[8,38,0.27336984872817993],[9,2,54.295417785644531],[9,3,0.39004826545715332],[9,4,53.497718811035156],[9,6,52.853614807128906],[9,13,53.070518493652344],[9,14,53.997322082519531],[9,15,52.567996978759766],[9,16,52.433670043945312],[9,18,0.44207152724266052],[9,26,0.20625883340835571],[9,28,0.22174246609210968],[9,30,102.36073303222656],[10,1,52.030044555664062],[10,4,50.834751129150391],[10,6,54.687831878662109],[10,8,52.774848937988281],[10,10,0.17284990847110748],[10,13,0.34940233826637268],[10,15,53.292324066162109],[10,16,52.094409942626953],[10,30,103.40880584716797],[10,39,0.2798474133014679],[11,5,50.226642608642578],[11,10,54.800552368164062],[11,13,0.26145485043525696],[11,14,54.634014129638672],[11,16,53.305400848388672],[11,30,104.29749298095703],[11,31,0.41477712988853455],[11,43,0.2095019668340683],[11,44,0.2636646032333374],[11,46,0.14962244033813477],[12,1,51.245124816894531],[12,4,52.189498901367188],[12,8,0.14854021370410919],[12,11,51.457836151123047],[12,15,53.714405059814453],[12,16,52.488780975341797],[12,18,0.49128276109695435],[12,21,0.040838729590177536],[12,23,0.12988743185997009],[12,28,0.40605998039245605],[12,30,100.32480621337891],[12,45,0.11192221939563751],[13,0,0.30497664213180542],[13,5,0.25770536065101624],[13,7,0.033960685133934021],[13,8,54.769084930419922],[13,9,50.697971343994141],[13,12,53.864070892333984],[13,14,52.003578186035156],[13,16,53.642868041992188],[13,24,0.42540961503982544],[13,25,0.22494150698184967],[13,27,0.18784832954406738],[13,30,10'..b'Class-Two"}},{"id": "Sample_18_R", "metadata": {"Group": "Moderate_Dissimilarity_Feature", "Old": "Class-Two", "ID": "Sample_18_R", "Label": "Class-Two"}},{"id": "Sample_19_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-Two", "ID": "Sample_19_R", "Label": "Class-Two"}},{"id": "Sample_20_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-One", "ID": "Sample_20_R", "Label": "Class-One"}},{"id": "Sample_21_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-One", "ID": "Sample_21_R", "Label": "Class-One"}},{"id": "Sample_22_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-One", "ID": "Sample_22_R", "Label": "Class-One"}},{"id": "Sample_23_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-One", "ID": "Sample_23_R", "Label": "Class-One"}},{"id": "Sample_24_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-One", "ID": "Sample_24_R", "Label": "Class-One"}},{"id": "Sample_25_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-Two", "ID": "Sample_25_R", "Label": "Class-Two"}},{"id": "Sample_26_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-One", "ID": "Sample_26_R", "Label": "Class-One"}},{"id": "Sample_27_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-Two", "ID": "Sample_27_R", "Label": "Class-Two"}},{"id": "Sample_28_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-Two", "ID": "Sample_28_R", "Label": "Class-Two"}},{"id": "Sample_29_R", "metadata": {"Group": "Moderate_Dissimilarity", "Old": "Class-One", "ID": "Sample_29_R", "Label": "Class-One"}},{"id": "Sample_30_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-Two", "ID": "Sample_30_E", "Label": "Class-Two"}},{"id": "Sample_31_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-Two", "ID": "Sample_31_E", "Label": "Class-Two"}},{"id": "Sample_32_E", "metadata": {"Group": "High_Dissimilarity_Feature", "Old": "Class-Two", "ID": "Sample_32_E", "Label": "Class-Two"}},{"id": "Sample_33_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-Two", "ID": "Sample_33_E", "Label": "Class-Two"}},{"id": "Sample_34_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-One", "ID": "Sample_34_E", "Label": "Class-One"}},{"id": "Sample_35_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-One", "ID": "Sample_35_E", "Label": "Class-One"}},{"id": "Sample_36_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-One", "ID": "Sample_36_E", "Label": "Class-One"}},{"id": "Sample_37_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-One", "ID": "Sample_37_E", "Label": "Class-One"}},{"id": "Sample_38_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-One", "ID": "Sample_38_E", "Label": "Class-One"}},{"id": "Sample_39_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-Two", "ID": "Sample_39_E", "Label": "Class-Two"}},{"id": "Sample_40_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-One", "ID": "Sample_40_E", "Label": "Class-One"}},{"id": "Sample_41_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-Two", "ID": "Sample_41_E", "Label": "Class-Two"}},{"id": "Sample_42_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-Two", "ID": "Sample_42_E", "Label": "Class-Two"}},{"id": "Sample_43_E", "metadata": {"Group": "High_Dissimilarity", "Old": "Class-One", "ID": "Sample_43_E", "Label": "Class-One"}},{"id": "Sample_44_T", "metadata": {"Group": "Targeted_Feature", "Old": "Class-Two", "ID": "Sample_44_T", "Label": "Class-Two"}},{"id": "Sample_45_T", "metadata": {"Group": "Targeted_Feature", "Old": "Class-Two", "ID": "Sample_45_T", "Label": "Class-Two"}},{"id": "Sample_46_T", "metadata": {"Group": "Targeted_Feature", "Old": "Class-Two", "ID": "Sample_46_T", "Label": "Class-Two"}},{"id": "Sample_47_T", "metadata": {"Group": "Targeted_Feature", "Old": "Class-Two", "ID": "Sample_47_T", "Label": "Class-Two"}}]}\n\\ No newline at end of file\n'

diff -r 000000000000 -r 2f4f6f08c8c4 input/Test.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/Test.pcl Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,230 @@\n+ID\tSample_0_D\tSample_1_D\tSample_2_D\tSample_3_D\tSample_4_D\tSample_5_D\tSample_6_D\tSample_7_D\tSample_8_D\tSample_9_D\tSample_10_D\tSample_11_D\tSample_12_D\tSample_13_D\tSample_14_D\tSample_15_D\tSample_16_R\tSample_17_R\tSample_18_R\tSample_19_R\tSample_20_R\tSample_21_R\tSample_22_R\tSample_23_R\tSample_24_R\tSample_25_R\tSample_26_R\tSample_27_R\tSample_28_R\tSample_29_R\tSample_30_E\tSample_31_E\tSample_32_E\tSample_33_E\tSample_34_E\tSample_35_E\tSample_36_E\tSample_37_E\tSample_38_E\tSample_39_E\tSample_40_E\tSample_41_E\tSample_42_E\tSample_43_E\tSample_44_T\tSample_45_T\tSample_46_T\tSample_47_T\n+alpha_custom\t1\t0.99\t0.98\t0.97\t0.96\t0.95\t0.94\t0.93\t0.92\t0.91\t0.9\t0.89\t0.88\t0.87\t0.86\t0.85\t0.84\t0.83\t0.82\t0.81\t0.8\t0.79\t0.78\t0.77\t0.76\t0.75\t0.74\t0.73\t0.72\t0.71\t0.7\t0.69\t0.68\t0.67\t0.66\t0.65\t0.64\t0.63\t0.62\t0.61\t0.6\t0.59\t0.58\t0.57\t0.56\t0.55\t0.54\t0.53\n+Sample\tSample_0_D\tSample_1_D\tSample_2_D\tSample_3_D\tSample_4_D\tSample_5_D\tSample_6_D\tSample_7_D\tSample_8_D\tSample_9_D\tSample_10_D\tSample_11_D\tSample_12_D\tSample_13_D\tSample_14_D\tSample_15_D\tSample_16_R\tSample_17_R\tSample_18_R\tSample_19_R\tSample_20_R\tSample_21_R\tSample_22_R\tSample_23_R\tSample_24_R\tSample_25_R\tSample_26_R\tSample_27_R\tSample_28_R\tSample_29_R\tSample_30_E\tSample_31_E\tSample_32_E\tSample_33_E\tSample_34_E\tSample_35_E\tSample_36_E\tSample_37_E\tSample_38_E\tSample_39_E\tSample_40_E\tSample_41_E\tSample_42_E\tSample_43_E\tSample_44_T\tSample_45_T\tSample_46_T\tSample_47_T\n+Group\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tComplex\tModerate_Dissimilarity_Feature\tModerate_Dissimilarity_Feature\tModerate_Dissimilarity_Feature\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tModerate_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity_Feature\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tHigh_Dissimilarity\tTargeted_Feature\tTargeted_Feature\tTargeted_Feature\tTargeted_Feature\n+StratifyLabel\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\t1\t2\n+Label\tClass-Two\tClass-One\tClass-Two\tClass-One\tClass-One\tClass-One\tClass-One\tClass-Two\tClass-Two\tClass-One\tClass-Two\tClass-One\tClass-One\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-One\tClass-One\tClass-One\tClass-One\tClass-One\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-Two\tClass-Two\tClass-One\tClass-One\tClass-One\tClass-One\tClass-One\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-One\tClass-Two\tClass-Two\tClass-Two\tClass-Two\n+Root|Taxa_0\t0\t0.132639108\t51.67205155\t0\t0\t0\t51.7444670649\t0\t0\t0\t0\t0\t51.2083349844\t52.1495033914\t0\t54.2809813981\t51.6829297536\t0\t0.3123676392\t0\t0\t0\t0\t0\t0.2166953032\t0\t0.2411828448\t0\t0\t0\t0\t0\t0\t0.3122296644\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1772302872\n+Root|Taxa_1\t54.0993873098\t0\t0.441962075\t0\t0\t0\t50.6647838005\t53.447223597\t50.5817481168\t0\t53.5412967792\t0\t0\t0\t0\t0\t54.5327122192\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.3066743566\t0\t0.4312838574\t0\t0.3731209223\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1274865184\t0\t0\n+Root|Taxa_2\t51.461026897\t0\t0\t0\t0\t53.0265676376\t0\t0\t0\t0\t54.047444278\t0\t52.5264375555\t0\t0\t0\t54.9402852499\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4223345514\t0\t0\t0\t0\t0.4607446961\t0\t0.3442454432\t0\t0\t0\t0\t0\t0\t0\t0\n+Root|Taxa_3\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t53.7506605469\t0\t0.4148157079\t0\t0\t0\t0.2685767173\t0\t0\t0\t0\t0\t0.3663062251\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t50.5269874981\t51.8543446876\t53.7681811219\t52.6344247487\n+Root|Taxa_4\t50.2405694418\t51.7777654529\t0\t54.8458795552\t0\t0\t0\t0\t0\t0\t0\t53.8808302665\t0\t0\t0\t0\t50.0282264237\t0\t0\t0\t0.0980723897\t0\t0.3886813644\t0\t0\t0\t0\t0\t0.4286598254\t0\t0\t0\t0.4731642927\t0\t0\t0\t0\t0.1568392012\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Root|Taxa_5\t0\t0\t0\t0\t0\t51.9510168529\t0.2296159024\t52.9698629485\t0'..b'3.8938971864\t52.5574477678\t53.8098522525\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t54.2401876661\t0.2242680732\t0.115238483\t0\t0\t0\t0\t0.4881061169\t0\t0\t0\t0\t0\t0\t102.186344705\t0\t0\t0\t0\t0\n+Root|Taxa_207\t51.2183210273\t51.1563774994\t0\t0\t54.1762942805\t0\t0\t0\t51.2704897195\t52.8968592985\t0\t52.288966104\t0\t0\t0\t54.3906412249\t0\t0\t0\t0\t0.0048042154\t0\t0\t0\t0\t0\t0\t0\t51.2408250309\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t103.110597368\t0\t0\t0\t0\t0\n+Root|Taxa_208\t0\t0\t0\t0.0546805022\t0\t0.1599409596\t50.0472074245\t0.4099746803\t51.4051352965\t0\t0\t0\t0\t53.2887290406\t0\t0.0848935397\t0\t0\t0\t0\t0\t0\t0\t0.1555916655\t0\t0\t0\t0\t0\t53.0135385478\t0.1497035187\t0\t0.0691993821\t0\t0.3373772825\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.2522945018\t0\t0\t0\n+Root|Taxa_209\t0\t50.1030172793\t0\t0\t0\t0\t54.1650217307\t52.2230603121\t0\t0\t0\t0\t0\t0\t0\t0.1143171628\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t51.8871962649\t0.299430337\t0\t0\t0\t0\t0\t0\t0.4798221512\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Root|Taxa_210\t0\t0\t0\t0\t0\t0\t50.5549695516\t52.1576796275\t0\t0\t0\t0\t0\t0\t0\t52.3992524271\t0\t0\t0\t0.1706714754\t0\t0\t0\t0\t0.3811782646\t0.0008517706\t0\t0\t0\t51.0207565229\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0102807436\t0\t0\t0\t0\t0.0085181585\t0\t0\n+Root|Taxa_211\t50.536257303\t0\t0\t0\t0\t0\t0\t0\t0\t0\t51.9813477573\t0\t51.5911214861\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t53.1296406718\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4077664043\t0\n+Root|Taxa_212\t0\t0\t0\t0\t0.4699813783\t0\t0\t0\t0\t54.5530559822\t0\t50.743119663\t51.3155881447\t0\t0\t53.2941828392\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0342022552\t0\t0\t54.1038020638\t0\t0\t0\t0.040960994\t0\t0\t0\t0.4947938425\t0\t0\t0\t0\t0.3862038599\t0\t0\t0\t0\t0\n+Root|Taxa_213\t0.3399472909\t0\t0\t0\t0\t0\t54.6671775034\t0.050131803\t50.2999962265\t0\t51.8713213189\t0\t0\t0\t53.2943170335\t51.1243603164\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t50.6101294432\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Root|Taxa_214\t50.0115213059\t0\t0\t0\t0\t50.8371273498\t0\t0\t0\t54.4139864126\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.3015435222\t0\t0\t0\t0\t0\t0\t0\t0.0872919265\t0.2777439996\t50.3179817753\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4557640334\t0\t0\t0\n+Root|Taxa_215\t0\t0.4561298327\t0\t53.5216413012\t54.8891010063\t50.9129555637\t0\t0\t0\t0\t0\t0\t0\t0\t53.4880926895\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1940475073\t0.2803421878\t50.3585027491\t0\t0\t0\t0.345883306\t0.3534255574\t0\t0\t0\t0\t0\t0.2075798899\t0\t0.4493876473\t0\t0\t0\t0\t0\n+Root|Taxa_216\t0\t52.664567111\t54.9286304319\t0\t0\t0.3528460271\t0\t0\t0\t53.8271900954\t53.2864160864\t0\t0\t53.8916659856\t0\t0\t0.0390396631\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0501403367\t53.9106611035\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t102.503416561\t0\t0\t0.2323723816\t0\n+Root|Taxa_217\t0\t0\t0\t53.8189770697\t0\t0\t0\t0\t0\t0\t51.0739213403\t51.9325727684\t53.6586894371\t0\t50.8848681615\t0\t0\t0\t0\t0\t0\t0.4134750657\t0.2984209586\t0\t0\t0\t0\t0.3984011824\t0\t50.0637489202\t0.1745273923\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0970082737\t102.133370457\t0\t0\t0\t0\n+Root|Taxa_218\t53.6309027759\t0\t0\t54.1993531611\t0\t53.8609164351\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4230445366\t0\t0\t0\t0\t0\t0\t0.3460879368\t0\t0\t0\t54.0328089965\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.1875089533\t0\t0\t102.742579014\t0\t0\t0\t0\n+Root|Taxa_219\t0\t0\t50.6959593123\t0\t0\t0\t0\t0\t52.9583336932\t51.2063060177\t0\t0\t0\t0\t0.1839092432\t0.1624310372\t0\t0\t0.2217437424\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t51.3989543421\t0.1469196015\t0\t0\t0.4515484111\t0\t0\t0\t0\t0\t0\t0.362398058\t0\t0\t103.426555217\t0\t0\t0\t0\n+Root|Taxa_220\t53.138860148\t54.260252989\t50.1122213715\t0\t53.7052604162\t0\t0.4760342302\t0\t51.9385050693\t0.191283382\t0\t0\t51.217538964\t0\t0.2580800578\t50.1174597734\t0\t0\t0\t0\t0\t0\t0.115889335\t0\t0\t0\t0\t0\t0\t53.3111145776\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t104.430682236\t0\t0\t0\t0\n+Root|Taxa_221\t0\t0.3384535892\t0.4106893002\t52.1411487055\t50.5241190477\t0\t0\t53.9774121283\t0\t0\t0\t52.1082021353\t0\t0\t0\t0\t0.4172253944\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t50.9817139231\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.4224842988\t0\t0\t0\t102.376952212\t0.2260951816\t0\t0.3067861355\t0\n+Root|Taxa_222\t0\t52.3172435891\t52.6471280765\t0\t51.567115685\t0\t0\t53.6724555185\t0\t0\t0\t0\t0\t54.2428549188\t51.6482654065\t0\t0\t0\t0\t0\t0\t0.2425898966\t0\t0\t0\t0\t0\t0\t0\t51.1118820396\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.2951844288\t0\t0\t102.047792613\t0\t0\t0\t0\n+Root|Taxa_223\t0\t0\t0.2600381862\t0\t0\t50.2577764475\t0.4992511416\t0\t0\t0\t0\t51.3720073928\t0\t53.1526569712\t0.3864806128\t0\t0\t0.0519771654\t0\t0\t0\t0\t0\t0.3896198936\t0\t0\t0\t0.4447746424\t0\t51.736707456\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t103.397909634\t0\t0\t0\t0\n'

diff -r 000000000000 -r 2f4f6f08c8c4 input/Test.tree
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/Test.tree Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,1 @@
+(Taxa_Root:1.0,(Taxa_0:1.0,Taxa_1:1.0,Taxa_2:1.0,(Taxa_3:1.0),Taxa_4:1.0,Taxa_5:1.0,Taxa_6:1.0,Taxa_7:1.0,(Taxa_8:1.0,Taxa_9:1.0,Taxa_10:1.0,Taxa_11:1.0,Taxa_12:1.0,Taxa_13:1.0,Taxa_14:1.0,Taxa_15:1.0)),(Taxa_16:1.0,Taxa_18:1.0,(Taxa_19:1.0),Taxa_20:1.0,Taxa_21:1.0,Taxa_22:1.0,Taxa_23:1.0,(Taxa_24:1.0,Taxa_25:1.0,Taxa_26:1.0,Taxa_27:1.0,Taxa_28:1.0,Taxa_29:1.0,Taxa_30:1.0,Taxa_31:1.0)),(Taxa_32:1.0,Taxa_33:1.0,Taxa_34:1.0,Taxa_35:1.0,Taxa_36:1.0,Taxa_37:1.0,Taxa_38:1.0,(Taxa_40:1.0,Taxa_41:1.0,Taxa_42:1.0,(Taxa_43:1.0),Taxa_44:1.0,Taxa_45:1.0,Taxa_46:1.0,Taxa_47:1.0)),(Taxa_48:1.0,Taxa_49:1.0,Taxa_50:1.0,Taxa_51:1.0,Taxa_52:1.0,Taxa_53:1.0,Taxa_54:1.0,Taxa_55:1.0,(Taxa_56:1.0,Taxa_57:1.0,Taxa_58:1.0,Taxa_59:1.0,Taxa_60:1.0,Taxa_61:1.0,Taxa_62:1.0,Taxa_63:1.0)),(Taxa_64:1.0,Taxa_65:1.0,Taxa_66:1.0,Taxa_67:1.0,Taxa_68:1.0,Taxa_69:1.0,Taxa_70:1.0,Taxa_71:1.0,(Taxa_72:1.0,Taxa_73:1.0,Taxa_74:1.0,Taxa_75:1.0,Taxa_76:1.0,Taxa_77:1.0,Taxa_78:1.0,Taxa_79:1.0)),(Taxa_80:1.0,Taxa_81:1.0,Taxa_82:1.0,Taxa_83:1.0,Taxa_84:1.0,Taxa_85:1.0,Taxa_87:1.0,(Taxa_88:1.0,Taxa_89:1.0,Taxa_90:1.0,Taxa_91:1.0,Taxa_92:1.0,Taxa_93:1.0,Taxa_95:1.0)),(Taxa_96:1.0,Taxa_97:1.0,Taxa_98:1.0,Taxa_99:1.0,Taxa_100:1.0,Taxa_101:1.0,Taxa_102:1.0,Taxa_103:1.0,(Taxa_104:1.0,Taxa_105:1.0,Taxa_106:1.0,Taxa_107:1.0,Taxa_108:1.0,Taxa_109:1.0,Taxa_110:1.0,Taxa_111:1.0)),(Taxa_112:1.0,Taxa_113:1.0,Taxa_114:1.0,Taxa_115:1.0,Taxa_116:1.0,Taxa_117:1.0,Taxa_118:1.0,Taxa_119:1.0,(Taxa_120:1.0,Taxa_121:1.0,Taxa_122:1.0,Taxa_123:1.0,Taxa_124:1.0,Taxa_125:1.0,Taxa_126:1.0,Taxa_127:1.0)),(Taxa_129:1.0,Taxa_130:1.0,Taxa_131:1.0,Taxa_132:1.0,Taxa_133:1.0,(Taxa_136:1.0,Taxa_137:1.0,Taxa_138:1.0,Taxa_139:1.0,Taxa_140:1.0,Taxa_141:1.0,Taxa_142:1.0,Taxa_143:1.0)),(Taxa_144:1.0,Taxa_145:1.0,Taxa_146:1.0,Taxa_147:1.0,Taxa_149:1.0,Taxa_150:1.0,Taxa_151:1.0,(Taxa_152:1.0,Taxa_153:1.0,Taxa_154:1.0,Taxa_155:1.0,Taxa_156:1.0,Taxa_157:1.0,Taxa_158:1.0,Taxa_159:1.0)),(Taxa_160:1.0,Taxa_161:1.0,Taxa_162:1.0,Taxa_164:1.0,Taxa_165:1.0,Taxa_166:1.0,(Taxa_168:1.0,Taxa_169:1.0,Taxa_170:1.0,Taxa_171:1.0,Taxa_172:1.0,Taxa_173:1.0,Taxa_174:1.0,Taxa_175:1.0)),(Taxa_176:1.0,Taxa_177:1.0,Taxa_178:1.0,Taxa_179:1.0,Taxa_181:1.0,Taxa_183:1.0,(Taxa_184:1.0,Taxa_185:1.0,Taxa_186:1.0,Taxa_187:1.0,Taxa_188:1.0,Taxa_189:1.0,Taxa_190:1.0,Taxa_191:1.0)),(Taxa_192:1.0,Taxa_193:1.0,Taxa_194:1.0,Taxa_195:1.0,Taxa_196:1.0,Taxa_198:1.0,Taxa_199:1.0,(Taxa_200:1.0,Taxa_202:1.0,Taxa_203:1.0,Taxa_204:1.0,Taxa_205:1.0,Taxa_206:1.0,Taxa_207:1.0)),(Taxa_208:1.0,Taxa_209:1.0,Taxa_210:1.0,Taxa_211:1.0,Taxa_212:1.0,Taxa_213:1.0,Taxa_214:1.0,Taxa_215:1.0,(Taxa_216:1.0,Taxa_217:1.0,Taxa_218:1.0,Taxa_219:1.0,Taxa_220:1.0,Taxa_221:1.0,Taxa_222:1.0,Taxa_223:1.0)))

diff -r 000000000000 -r 2f4f6f08c8c4 input/Test2.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/Test2.biom Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,1 @@\n+{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "BreadCrumbs","date": "2013-10-08T12:59:54.028525","matrix_type": "sparse","matrix_element_type": "float","shape": [224, 48],"data": [[0,1,0.13263911008834839],[0,2,51.672050476074219],[0,6,51.744468688964844],[0,12,51.208335876464844],[0,13,52.149501800537109],[0,15,54.280982971191406],[0,16,51.682929992675781],[0,18,0.31236764788627625],[0,24,0.21669530868530273],[0,26,0.2411828488111496],[0,33,0.31222966313362122],[0,47,0.17723028361797333],[1,0,54.099388122558594],[1,2,0.44196206331253052],[1,6,50.664783477783203],[1,7,53.447223663330078],[1,8,50.581748962402344],[1,10,53.541297912597656],[1,16,54.532711029052734],[1,28,0.30667436122894287],[1,30,0.4312838613986969],[1,32,0.37312093377113342],[1,45,0.12748651206493378],[2,0,51.461025238037109],[2,5,53.026569366455078],[2,10,54.047443389892578],[2,12,52.526435852050781],[2,16,54.940284729003906],[2,32,0.42233455181121826],[2,37,0.460744708776474],[2,39,0.34424543380737305],[3,16,53.750659942626953],[3,18,0.41481569409370422],[3,22,0.26857671141624451],[3,28,0.36630621552467346],[3,44,50.526988983154297],[3,45,51.854343414306641],[3,46,53.768180847167969],[3,47,52.634426116943359],[4,0,50.240570068359375],[4,1,51.777767181396484],[4,3,54.845878601074219],[4,11,53.880828857421875],[4,16,50.028224945068359],[4,20,0.098072387278079987],[4,22,0.38868135213851929],[4,28,0.4286598265171051],[4,32,0.47316429018974304],[4,37,0.15683920681476593],[5,5,51.951015472412109],[5,6,0.22961589694023132],[5,7,52.969863891601562],[5,11,52.197437286376953],[5,14,0.22526906430721283],[5,16,53.653339385986328],[5,25,0.39630568027496338],[5,28,0.06783304363489151],[6,5,54.567390441894531],[6,10,51.190956115722656],[6,11,0.25222322344779968],[6,13,52.400714874267578],[6,14,54.537200927734375],[6,16,51.323921203613281],[6,29,0.17403842508792877],[6,41,0.052579630166292191],[7,2,54.889991760253906],[7,3,50.592502593994141],[7,7,54.676334381103516],[7,8,0.11892234534025192],[7,9,50.23931884765625],[7,16,54.696922302246094],[7,19,0.26401954889297485],[7,25,0.093931779265403748],[7,29,0.0068426746875047684],[7,33,0.18845303356647491],[8,0,50.167209625244141],[8,2,52.330379486083984],[8,7,0.26085031032562256],[8,8,51.234970092773438],[8,9,53.837917327880859],[8,16,50.158138275146484],[8,17,0.00067086971830576658],[8,30,102.35082244873047],[8,37,0.25920611619949341],[8,38,0.27336984872817993],[9,2,54.295417785644531],[9,3,0.39004826545715332],[9,4,53.497718811035156],[9,6,52.853614807128906],[9,13,53.070518493652344],[9,14,53.997322082519531],[9,15,52.567996978759766],[9,16,52.433670043945312],[9,18,0.44207152724266052],[9,26,0.20625883340835571],[9,28,0.22174246609210968],[9,30,102.36073303222656],[10,1,52.030044555664062],[10,4,50.834751129150391],[10,6,54.687831878662109],[10,8,52.774848937988281],[10,10,0.17284990847110748],[10,13,0.34940233826637268],[10,15,53.292324066162109],[10,16,52.094409942626953],[10,30,103.40880584716797],[10,39,0.2798474133014679],[11,5,50.226642608642578],[11,10,54.800552368164062],[11,13,0.26145485043525696],[11,14,54.634014129638672],[11,16,53.305400848388672],[11,30,104.29749298095703],[11,31,0.41477712988853455],[11,43,0.2095019668340683],[11,44,0.2636646032333374],[11,46,0.14962244033813477],[12,1,51.245124816894531],[12,4,52.189498901367188],[12,8,0.14854021370410919],[12,11,51.457836151123047],[12,15,53.714405059814453],[12,16,52.488780975341797],[12,18,0.49128276109695435],[12,21,0.040838729590177536],[12,23,0.12988743185997009],[12,28,0.40605998039245605],[12,30,100.32480621337891],[12,45,0.11192221939563751],[13,0,0.30497664213180542],[13,5,0.25770536065101624],[13,7,0.033960685133934021],[13,8,54.769084930419922],[13,9,50.697971343994141],[13,12,53.864070892333984],[13,14,52.003578186035156],[13,16,53.642868041992188],[13,24,0.42540961503982544],[13,25,0.22494150698184967],[13,27,0.18784832954406738],[13,30,10'..b'data": {"Sample": "Sample_26_R", "alpha_custom": "0.74", "StratifyLabel": "1", "Group": "Moderate_Dissimilarity", "ID": "Sample_26_R", "Label": "Class-One"}},{"id": "Sample_27_R", "metadata": {"Sample": "Sample_27_R", "alpha_custom": "0.73", "StratifyLabel": "2", "Group": "Moderate_Dissimilarity", "ID": "Sample_27_R", "Label": "Class-Two"}},{"id": "Sample_28_R", "metadata": {"Sample": "Sample_28_R", "alpha_custom": "0.72", "StratifyLabel": "1", "Group": "Moderate_Dissimilarity", "ID": "Sample_28_R", "Label": "Class-Two"}},{"id": "Sample_29_R", "metadata": {"Sample": "Sample_29_R", "alpha_custom": "0.71", "StratifyLabel": "2", "Group": "Moderate_Dissimilarity", "ID": "Sample_29_R", "Label": "Class-One"}},{"id": "Sample_30_E", "metadata": {"Sample": "Sample_30_E", "alpha_custom": "0.7", "StratifyLabel": "1", "Group": "High_Dissimilarity", "ID": "Sample_30_E", "Label": "Class-Two"}},{"id": "Sample_31_E", "metadata": {"Sample": "Sample_31_E", "alpha_custom": "0.69", "StratifyLabel": "2", "Group": "High_Dissimilarity", "ID": "Sample_31_E", "Label": "Class-Two"}},{"id": "Sample_32_E", "metadata": {"Sample": "Sample_32_E", "alpha_custom": "0.68", "StratifyLabel": "1", "Group": "High_Dissimilarity_Feature", "ID": "Sample_32_E", "Label": "Class-Two"}},{"id": "Sample_33_E", "metadata": {"Sample": "Sample_33_E", "alpha_custom": "0.67", "StratifyLabel": "2", "Group": "High_Dissimilarity", "ID": "Sample_33_E", "Label": "Class-Two"}},{"id": "Sample_34_E", "metadata": {"Sample": "Sample_34_E", "alpha_custom": "0.66", "StratifyLabel": "1", "Group": "High_Dissimilarity", "ID": "Sample_34_E", "Label": "Class-One"}},{"id": "Sample_35_E", "metadata": {"Sample": "Sample_35_E", "alpha_custom": "0.65", "StratifyLabel": "2", "Group": "High_Dissimilarity", "ID": "Sample_35_E", "Label": "Class-One"}},{"id": "Sample_36_E", "metadata": {"Sample": "Sample_36_E", "alpha_custom": "0.64", "StratifyLabel": "1", "Group": "High_Dissimilarity", "ID": "Sample_36_E", "Label": "Class-One"}},{"id": "Sample_37_E", "metadata": {"Sample": "Sample_37_E", "alpha_custom": "0.63", "StratifyLabel": "2", "Group": "High_Dissimilarity", "ID": "Sample_37_E", "Label": "Class-One"}},{"id": "Sample_38_E", "metadata": {"Sample": "Sample_38_E", "alpha_custom": "0.62", "StratifyLabel": "1", "Group": "High_Dissimilarity", "ID": "Sample_38_E", "Label": "Class-One"}},{"id": "Sample_39_E", "metadata": {"Sample": "Sample_39_E", "alpha_custom": "0.61", "StratifyLabel": "2", "Group": "High_Dissimilarity", "ID": "Sample_39_E", "Label": "Class-Two"}},{"id": "Sample_40_E", "metadata": {"Sample": "Sample_40_E", "alpha_custom": "0.6", "StratifyLabel": "1", "Group": "High_Dissimilarity", "ID": "Sample_40_E", "Label": "Class-One"}},{"id": "Sample_41_E", "metadata": {"Sample": "Sample_41_E", "alpha_custom": "0.59", "StratifyLabel": "2", "Group": "High_Dissimilarity", "ID": "Sample_41_E", "Label": "Class-Two"}},{"id": "Sample_42_E", "metadata": {"Sample": "Sample_42_E", "alpha_custom": "0.58", "StratifyLabel": "1", "Group": "High_Dissimilarity", "ID": "Sample_42_E", "Label": "Class-Two"}},{"id": "Sample_43_E", "metadata": {"Sample": "Sample_43_E", "alpha_custom": "0.57", "StratifyLabel": "2", "Group": "High_Dissimilarity", "ID": "Sample_43_E", "Label": "Class-One"}},{"id": "Sample_44_T", "metadata": {"Sample": "Sample_44_T", "alpha_custom": "0.56", "StratifyLabel": "1", "Group": "Targeted_Feature", "ID": "Sample_44_T", "Label": "Class-Two"}},{"id": "Sample_45_T", "metadata": {"Sample": "Sample_45_T", "alpha_custom": "0.55", "StratifyLabel": "2", "Group": "Targeted_Feature", "ID": "Sample_45_T", "Label": "Class-Two"}},{"id": "Sample_46_T", "metadata": {"Sample": "Sample_46_T", "alpha_custom": "0.54", "StratifyLabel": "1", "Group": "Targeted_Feature", "ID": "Sample_46_T", "Label": "Class-Two"}},{"id": "Sample_47_T", "metadata": {"Sample": "Sample_47_T", "alpha_custom": "0.53", "StratifyLabel": "2", "Group": "Targeted_Feature", "ID": "Sample_47_T", "Label": "Class-Two"}}]}\n\\ No newline at end of file\n'

diff -r 000000000000 -r 2f4f6f08c8c4 input/TestFeatures.taxa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/TestFeatures.taxa Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,3 @@
+Root|Taxa_43
+Root|Taxa_19
+Root|Taxa_3

diff -r 000000000000 -r 2f4f6f08c8c4 input/Test_Matrix.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/input/Test_Matrix.txt Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,49 @@\n+\tSample_0_D\tSample_1_D\tSample_2_D\tSample_3_D\tSample_4_D\tSample_5_D\tSample_6_D\tSample_7_D\tSample_8_D\tSample_9_D\tSample_10_D\tSample_11_D\tSample_12_D\tSample_13_D\tSample_14_D\tSample_15_D\tSample_16_R\tSample_17_R\tSample_18_R\tSample_19_R\tSample_20_R\tSample_21_R\tSample_22_R\tSample_23_R\tSample_24_R\tSample_25_R\tSample_26_R\tSample_27_R\tSample_28_R\tSample_29_R\tSample_30_E\tSample_31_E\tSample_32_E\tSample_33_E\tSample_34_E\tSample_35_E\tSample_36_E\tSample_37_E\tSample_38_E\tSample_39_E\tSample_40_E\tSample_41_E\tSample_42_E\tSample_43_E\tSample_44_T\tSample_45_T\tSample_46_T\tSample_47_T\r\n+Sample_0_D\t0.0\t0.3504150231223735\t0.3774532814755867\t0.3498523857145207\t0.38460427869111374\t0.3684573878701818\t0.3942884495138759\t0.38442113907082526\t0.35972509349143367\t0.3503313862645051\t0.33248801726485966\t0.31498656531755315\t0.3942341294900376\t0.37691872196965986\t0.38506803012912605\t0.40343673351243525\t0.4642329552936743\t0.46327757133691333\t0.46381907488716906\t0.4618600444490561\t0.4642002980737147\t0.4630109221246633\t0.4632154661879316\t0.4637283539991759\t0.46308382425028444\t0.46301385613996987\t0.46375984183730984\t0.4634953549046846\t0.4635688923034126\t0.4641219606524568\t0.4907356279586906\t0.49025021878085095\t0.48094572139780023\t0.4800344179041085\t0.489881012077767\t0.4818947274226439\t0.48952425845281133\t0.4905534755560824\t0.48135203631818196\t0.4718635215269368\t0.4898600014443216\t0.4813349137840063\t0.48154364575925396\t0.4806321203597953\t0.49391802273735197\t0.4986688133323061\t0.4953322195388267\t0.49596318917365195\r\n+Sample_1_D\t0.3504150231223735\t0.0\t0.34990687415642924\t0.37804056462059465\t0.3320503081927189\t0.3402915420230556\t0.37791685442214407\t0.35943689445783744\t0.3596328925657078\t0.376858373865497\t0.3758152883772704\t0.33350908005263896\t0.41167572457511786\t0.37678662660456774\t0.40303268494031574\t0.3681904907503879\t0.46331866507608044\t0.462779422729878\t0.46358263694042234\t0.4625806743587285\t0.463277690990464\t0.4630525876856449\t0.46298383477193006\t0.4642561244897644\t0.4645217075941841\t0.4639369713582843\t0.46329945469077466\t0.4639146441137947\t0.463007318770675\t0.463307176055478\t0.4732100717071456\t0.47198337234234367\t0.4812253017531194\t0.4810195560904611\t0.4816792295812404\t0.4898468979768662\t0.4899712925253329\t0.4808663981919258\t0.48167238628476633\t0.4721589058941818\t0.47221426569213504\t0.47331038683295035\t0.4904559533167224\t0.4718986049801861\t0.4926062109512548\t0.49621983236350575\t0.49659966327963156\t0.49709402570559835\r\n+Sample_2_D\t0.3774532814755867\t0.34990687415642924\t0.0\t0.4113389591418409\t0.378165235951646\t0.42062021435447655\t0.3675904747461084\t0.3747978922512954\t0.40297643061235044\t0.36752276282523993\t0.39423416141602996\t0.41151692300869547\t0.3683202817867162\t0.36681975316646004\t0.34158481204797986\t0.3860843947020536\t0.46252881559166836\t0.4642433915384347\t0.4622482749043408\t0.4641850558917974\t0.4623806469198768\t0.4628682242557616\t0.46369378341314976\t0.4636171590257385\t0.46389357257556946\t0.46392455885481554\t0.46312296201964426\t0.46423797566225483\t0.4635117693026363\t0.46348179210419205\t0.48137564708846353\t0.47305507177773737\t0.4805352268992408\t0.4898495343596132\t0.4805601079311763\t0.48122697697777655\t0.48106800576956765\t0.4725933812043244\t0.48182229639868557\t0.48139329533548064\t0.48056443059065074\t0.47256741970552285\t0.49049948497199125\t0.46333490991372994\t0.4939241197438564\t0.49406487910982544\t0.4933576629571626\t0.49125724041072816\r\n+Sample_3_D\t0.3498523857145207\t0.37804056462059465\t0.4113389591418409\t0.0\t0.35024175605832153\t0.3596738717040975\t0.341498293743435\t0.3420939770008637\t0.4038025007649029\t0.38565118250681873\t0.36813367547302267\t0.3316298213886089\t0.3676114464189733\t0.35145674807254823\t0.3594847704053428\t0.37692034582164535\t0.4628690756420117\t0.462295311277525\t0.4641679853319643\t0.4627293636562284\t0.4646914421283104\t0.462602276291508\t0.4634107138030018\t0.46433757250421087\t0.4639770329016334\t0.46344745530023335\t0.46400331369113484\t0.4622017367604051\t0.46322063044770395\t0.4627493339884753\t0.4819967281571807\t0.4813260288089169\t0.4817536847'..b'625643\t0.4997258772765634\t0.49955179453951154\t0.499898365707218\t0.4999567965494394\t0.4999598687415415\t0.4997211365030237\t0.4989880580659021\t0.49973992062518047\t0.4999412943085991\t0.0\t0.4986592239527547\t0.5000000025204372\t0.4977004863530355\t0.4994265813220846\r\n+Sample_44_T\t0.49391802273735197\t0.4926062109512548\t0.4939241197438564\t0.49692113665299764\t0.49418502370670253\t0.4944075285089823\t0.4976302876701483\t0.49670044259466894\t0.4942650391688446\t0.4925678556528127\t0.49628087294452605\t0.49318894132223307\t0.49661303879086155\t0.4953764977478303\t0.4945418457589582\t0.4965499858653412\t0.4670524394015869\t0.46793657431726066\t0.4684230009977462\t0.497566580602127\t0.49805815076277976\t0.49954412998879455\t0.49839906660231603\t0.49753865870647024\t0.4985811328211148\t0.49801897149358243\t0.49916100765734833\t0.49826071996424265\t0.4972487991550752\t0.49659067050210093\t0.49890613345395624\t0.49959390784657803\t0.4364859288331077\t0.4976257770187389\t0.49788329615478827\t0.49942954583913984\t0.49819494236130957\t0.49877858445228734\t0.49975304204953674\t0.49802318566509585\t0.49934533171019463\t0.4997154953368182\t0.49969904645974317\t0.4986592239527547\t0.0\t0.019584678413384585\t0.020572419678914527\t0.02382744868352471\r\n+Sample_45_T\t0.4986688133323061\t0.49621983236350575\t0.49406487910982544\t0.49783002594076065\t0.4970222833076462\t0.4952002287934291\t0.49765492003134354\t0.4981610773730664\t0.4964175261360027\t0.496442400774936\t0.4978436632305417\t0.497377235319802\t0.4964069635868935\t0.4986785579560372\t0.49473118907375635\t0.495233084378049\t0.46732178603743507\t0.46724354578487626\t0.4696246471483272\t0.49593341432011817\t0.4964941161641117\t0.4993833044435624\t0.4995040216845463\t0.4988202940289412\t0.499908277431166\t0.4973860553951744\t0.4997221989939481\t0.49828181911358993\t0.4980380222908761\t0.49959986957986746\t0.49939175447204953\t0.4990023316156439\t0.43556768580546923\t0.49755901009070364\t0.4978790310222312\t0.49929361488564705\t0.4998206572826379\t0.499442427992359\t0.49990948004085867\t0.49926592510115103\t0.4991406739662497\t0.4981141526178374\t0.4987431983000706\t0.5000000025204372\t0.019584678413384585\t0.0\t0.020356500840554154\t0.020309528543168948\r\n+Sample_46_T\t0.4953322195388267\t0.49659966327963156\t0.4933576629571626\t0.49519013298288145\t0.49716533470266455\t0.4989462356864437\t0.4983055113560354\t0.49567804492401724\t0.4972533400557505\t0.49644981377647496\t0.4928555186148247\t0.4960232379787571\t0.49620522086151514\t0.4951086811322987\t0.4941348960989179\t0.49878851161198384\t0.4670757372073602\t0.467930468554186\t0.46819776485750303\t0.49985427400841653\t0.4993705286897671\t0.49909074870966713\t0.4983854480199241\t0.49795596744384046\t0.49898766380725507\t0.4969531539886237\t0.4987068621605247\t0.4985573219900368\t0.4977489693773089\t0.49657609124345886\t0.4992563697754623\t0.49926086637911105\t0.43560702453330485\t0.49934652717746425\t0.4994211879727363\t0.49886573177148286\t0.49989804200971566\t0.4997290105615935\t0.49999999965393727\t0.4987021921154233\t0.4999999985971044\t0.49982093992442694\t0.49863624237062043\t0.4977004863530355\t0.020572419678914527\t0.020356500840554154\t0.0\t0.014540004047772032\r\n+Sample_47_T\t0.49596318917365195\t0.49709402570559835\t0.49125724041072816\t0.49743189664426457\t0.49548571290643806\t0.4975408880817702\t0.4936213729748551\t0.4943693831078025\t0.49719971864918266\t0.4999149825758584\t0.4935259454443311\t0.49512951123187854\t0.49531466612293806\t0.4919950885273459\t0.4911072716826435\t0.4989787474050167\t0.4677233683078083\t0.4667142330613921\t0.46965345679206993\t0.4983230565959039\t0.4981250418324548\t0.4996342401688223\t0.4966731958009985\t0.4981792735047705\t0.4962047228769835\t0.49662604625923457\t0.4994405709575792\t0.4992935205600046\t0.49690019579876427\t0.49930936218645006\t0.4998597054553685\t0.4995959216444699\t0.43580100088133855\t0.4981129536704402\t0.4984689832743117\t0.4996988322235001\t0.49954149725920505\t0.4987908150358317\t0.49725522677241296\t0.4970767440747246\t0.4998721277887906\t0.49901987021286887\t0.49854474068122046\t0.4994265813220846\t0.02382744868352471\t0.020309528543168948\t0.014540004047772032\t0.0\r\n'

diff -r 000000000000 -r 2f4f6f08c8c4 micropita.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/micropita.xml Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,195 @@\n+<tool id="micropita" name="Run" version="1.0.1">\r\n+<code file="micropita_format_input_selector.py"/> \r\n+<description>micropita</description>\r\n+<command interpreter="python">micropita_prepare.py \r\n+--lastmeta $cls_x \r\n+-m $cond.method_sel\r\n+-n $selected_samples\r\n+--input $inp_data\r\n+--output $out_file1\r\n+--stratify_value $cls_s\r\n+\r\n+#if $cond.method_sel == "features":\r\n+\t--feature_method $cond.feature_method\r\n+\t--targets $cond.cls_f\r\n+#end if\r\n+#if $cond.method_sel == "distinct" or $cond.method_sel == "discriminant" :\r\n+\t--label_value $cond.cls_L\r\n+#end if\r\n+\r\n+</command>\r\n+ <inputs>\r\n+\t<param format="micropita" name="inp_data" type="data" label="Input file"/>\r\n+\r\n+\t<param name="cls_x" type="select" label="Last metadata row (Use \'Label\' for demo data)" multiple="False" size ="70" dynamic_options="get_cols(inp_data,\'0\')"/>\r\n+\r\n+\r\n+\t<param name="cond" type="data_column" data_ref="inp_data" accept_default="true" /> \r\n+\t\t<conditional name="cond" type="data_column" data_ref="inp_data" accept_default="true">\r\n+\t\t \t<param name="method_sel" type="select" data_ref="inp_data" label="Select method">\r\n+\t\t \t<option value="representative" selected="True">Representative</option>\r\n+\t\t\t\t<option value="diverse">Diverse</option>\r\n+\t\t\t\t<option value="extreme" >Extreme</option>\r\n+\t\t\t\t<option value="features" >Features</option>\r\n+\t\t\t\t<option value="distinct" >Distinct (Supervised)</option>\r\n+\t\t\t\t<option value="discriminant" >Discriminant (Supervised)</option>\r\n+\t\t\t</param>\r\n+\t\t \t<when value="representative">\r\n+\t\t\t</when>\r\n+\t\t \t<when value="diverse">\r\n+\t\t \t</when>\r\n+\t\t \t<when value="extreme">\r\n+\t\t \t</when>\r\n+\t\t \t<when value="features">\r\n+\t\t\t\t<param name="cls_f" type="select" label="Targeted feature(s)" multiple="True" size ="70" dynamic_options="get_cols_features(inp_data,\'0\',cls_x)"/>\r\n+\t\t\t\t<param name="feature_method" type="select" format="text">\r\n+\t\t\t\t\t<label>Selection type</label>\r\n+\t\t\t\t\t\t<option value="rank">Rank</option>\r\n+\t\t\t\t\t\t<option value="abundance">Abundance</option>\r\n+\t\t\t\t</param>\r\n+\t\t \t</when>\r\n+\t\t \t<when value="distinct">\r\n+\t\t\t\t<param name="cls_L" type="select" label="Label (Use \'Group\' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,\'0\',cls_x)"/>\r\n+\t\t \t</when>\r\n+\t\r\n+\t\t \t<when value="discriminant">\r\n+\t\t\t\t<param name="cls_L" type="select" label="Label (Use \'Group\' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,\'0\',cls_x)"/>\r\n+\t\t \t</when>\r\n+\r\n+ \t</conditional>\r\n+\r\n+\t\r\n+\t<param name="cls_s" type="select" label="Stratify by (optional) (Use \'StratifyLabel\' for demo data)" multiple="False" size ="70" dynamic_options="get_cols_add_line(inp_data,\'0\',cls_x)"/>\r\n+\r\n+\t<param name="selected_samples" type="integer" size="4" value="10" label="Number of samples to select"/>\r\n+\r\n+\t\r\n+\r\n+ </inputs>\r\n+\r\n+ <outputs>\r\n+ <data format="text" name="out_file1" />\r\n+ </outputs>\r\n+ <requirements>\r\n+ <requirement type="set_environment">micropita_SCRIPT_PATH</requirement>\r\n+ </requirements>\r\n+ <help>\r\n+\t \r\n+\r\n+microbiome: Picking Interesting Taxonomic Abundance\r\n+---------------------------------------------------\r\n+\r\n+\r\n+\r\n+microPITA is a computational tool enabling sample selection in tiered studies. Using tiered-study designs can more efficiently allocate resources, reducing study costs, and maximizing the use of samples. From a survey study, selection of samples can be performed to target various microbial communities including:\r\n+\r\n+1. Samples with the most diverse community (maximum diversity);\r\n+2. Samples dominated by specific microbes (targeted feature);\r\n+3. Samples with microbial communities representative of the survey (representative dissimilarity);\r\n+4. Samples with the most extreme microbial communities in the survey (most dissimilar);\r\n+5. Given a phenotype (like disease state), samples at the border of phenotypes (discriminant) or sa'..b'stly, rows containing features (bugs) measurements (like abundance) should be after metadata rows.\r\n+5. The **first column** should contain the ID describing the column. For metadata this may be, for example, "Age" for a row containing the age of the patients donating the samples. For measurements, this should be the feature name (bug name).\r\n+6. The file is expected to be TAB delimited.\r\n+7. If a consensus lineage or hierarchy of taxonomy is contained in the feature name, the default delimiter between clades is the pipe ("|").\r\n+\r\n+**Note** MAC users, please save file as windows formatted text.\r\n+\r\n+.. image:: https://bytebucket.org/biobakery/galaxy_micropita/wiki/pcl_diagram.png\r\n+ :height: 500 \r\n+ :width: 600 \r\n+\r\n+Outputs\r\n+-------\r\n+\r\n+The Run MicroPITA module will create one output text file. The output will consist of one line starting with a key word for the selection method and then followed by selected samples delimited by tabs. An example of 6 samples selected by the representative:\r\n+\r\n+representative\tsample_1\tsample_2\tsample_3\tsample_4\tsample_5\tsample_6\r\n+\r\n+\r\n+\t \r\n+\t \r\n+Run microPITA\r\n+-------------\r\n+\r\n+A brief description of the Run micropita module.\r\n+\r\n+**Input file:**\r\n+This should be populated by the Load microPITA module.\r\n+\r\n+**Last metadata row:**\r\n+The row on the input pcl file that is the last metadata. All microbial measurements should follow this row.\r\n+\r\n+**Select method:**\r\n+Select which method to use for sample selection. Selection methods include:\r\n+\r\n+1. Representative. Samples with microbial communities representative of the survey (representative dissimilarity);\r\n+2. Diverse. Samples with the most diverse community (maximum diversity);\r\n+3. Extreme. Samples with the most extreme microbial communities in the survey (most dissimilar);\r\n+4. Features. Samples dominated by specific microbes (targeted feature);\r\n+5. Distinct. Given a phenotype (like disease state), samples typical of each phenotype (Distinct). \r\n+6. Discriminant. Given a phenotype (like disease state), samples at the border of phenotypes (Discriminant). \r\n+\r\n+**Targeted feature(s):** (visible with Features method selection only)\r\n+Select 1 or more features to target in sample selection.\r\n+\r\n+**Selection type:** (visible with Features method selection only)\r\n+Rank or Abundance. \r\n+\r\n+1. Rank indicates selecting samples that have the highest rank of the Targeted features(s), this tends to select sample in which these feature dominant the sample.\r\n+2. Abundance indicates selecting samples that have the highest average abundance of the Targeted features(s), this selects samples where features are most abundant but not necessarily dominant in the community.\r\n+\r\n+**Label:** (visible with supervised method selection only)\r\n+The row which contains the label used to classify the samples from supervised methods.\r\n+\r\n+**Stratify by (optional):**\r\n+The row which contains the groupings the samples will first be placed in before running the selection method on each group. If no grouping is selected, selection methods will be performed on the data set as a whole.\r\n+\r\n+**Number of samples to select:**\r\n+The number of samples to select. If samples are stratified, this is per stratification (or group). If supervised methods are used, this is the number of samples selected per classification group (as defined by the label).\r\n+\r\n+For more information please visit http://huttenhower.sph.harvard.edu/micropita\r\n+\r\n+\r\n+Acknowledgments\r\n+---------------\r\n+Special thanks to Eric Franzosa for developing the above PCL figure!\r\n+\r\n+Citation and Contacts\r\n+---------------------\r\n+\r\n+For more information please visit http://huttenhower.sph.harvard.edu/micropita\r\n+When using MicroPITA please cite:\r\n+Tickle T, Segata N, Waldron L, Weingart G, Huttenhower C. Two-stage microbial community experimental design. (Under review)\r\n+\r\n+Please feel free to contact us at ttickle@hsph.harvard.edu for any questions or comments!\r\n+ \r\n+\t \r\n+ </help>\r\n+</tool>\r\n'

diff -r 000000000000 -r 2f4f6f08c8c4 micropita_format_input_selector.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/micropita_format_input_selector.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+
+"""
+Author: George Weingart
+Description: Dynamically read columns from input file for UI
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "George Weingart"
+__copyright__ = "Copyright 2012"
+__credits__ = ["George Weingart"]
+__license__ = "MIT"
+__maintainer__ = "George Weingart"
+__email__ = "george.weingart@gmail.com"
+__status__ = "Development"
+
+import sys,string,time
+from pprint import pprint
+
+def red(st,l):
+ if len(st) <= l: return st
+ l1,l2 = l/2,l/2
+ return st[:l1]+".."+st[len(st)-l2:]
+
+def get_cols(data,full_names):
+ if data == "": return []
+ max_len =32
+        fname = data.dataset.file_name
+ input_file = open(fname,'rU')
+ input_lines = input_file.readlines()
+ input_file.close()
+ table_lines = []
+ for x in input_lines:
+ first_column = x.split('\t')[0]
+ table_lines.append(first_column)
+
+ opt = []
+ rc = ''
+ lines = []
+        try:
+ lines = [(red((rc+v.split()[0]),max_len),'%d' % (i+1),False) for i,v in enumerate(table_lines) if v]
+
+ except:
+ l1 = '*ALL*'
+ l2 = 1
+ l3 = False
+ MyList = [l1,l2,l3]
+ lines.append(MyList)
+ return opt+lines
+
+def get_cols_add_line(data,full_names,lastmeta):
+ if data == "": return []
+ display_to = 1
+ try:
+ display_to = int(lastmeta)
+ except:
+ pass
+
+ max_len = 32
+        fname = data.dataset.file_name
+ input_file = open(fname,'rU')
+ input_lines = input_file.readlines()
+ input_file.close()
+ table_lines = []
+ for x in input_lines:
+ first_column = x.split('\t')[0]
+ table_lines.append(first_column)
+ table_lines.insert(0,'-')
+ if  not display_to == 1:
+ del  table_lines[display_to + 1:]
+
+
+ opt = []
+ rc = ''
+ lines = []
+        try:
+ lines = [(red((rc+v.split()[0]),max_len),'%d' % (i+1),False) for i,v in enumerate(table_lines) if v]
+
+ except:
+ l1 = '*ALL*'
+ l2 = 1
+ l3 = False
+ MyList = [l1,l2,l3]
+ lines.append(MyList)
+ return opt+lines
+
+def get_cols_features(data,full_names,lastmeta):
+ if data == "": return []
+ display_from = 1
+ try:
+ display_from = int(lastmeta)
+ except:
+ pass
+ max_len = 32
+        fname = data.dataset.file_name
+ input_file = open(fname,'rU')
+
+ input_lines = input_file.readlines()
+ input_file.close()
+ table_lines = []
+ for x in input_lines:
+ first_column = x.split('\t')[0]
+ table_lines.append(first_column)
+
+ opt = []
+ rc = ''
+ del table_lines[:display_from]
+ lines = []
+        try:
+ lines = [(red((rc+v.split()[0]),max_len),'%d' % (i+1),False) for i,v in enumerate(table_lines) if v]
+
+ except:
+ l1 = '*ALL*'
+ l2 = 1
+ l3 = False
+ MyList = [l1,l2,l3]
+ lines.append(MyList)
+ return opt+lines

diff -r 000000000000 -r 2f4f6f08c8c4 micropita_galaxy_ReadMe.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/micropita_galaxy_ReadMe.txt Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,34 @@
+#Installation instructions for microPITA in a galaxy environment.
+These instructions require the Mercurial versioning system, galaxy, and an internet connection.
+
+#For general reference about microPita please refer to:
+```
+https://bitbucket.org/biobakery/micropita
+```
+
+
+
+
+#Installation Instructions
+In the  "galaxy-dist/tools" directory install micropita by typing in a terminal:
+```
+hg clone https://bitbucket.org/biobakery/micropita
+```
+
+
+Update member tool_conf.xml  in the galaxy directory adding the following:
+```
+  <section name="micropita" id="micropita">
+    <tool file="micropita/galaxy/micropita.xml"/>
+  </section>
+```
+
+Update member datatypes_conf.xml  in the galaxy directory adding the following:
+```
+ <datatype extension="micropita" type="galaxy.datatypes.data:Text" subclass="true" display_in_upload="true"/>
+```
+
+Copy the 2 *.png  members   to /galaxy/static/images
+
+Recycle galaxy
+

diff -r 000000000000 -r 2f4f6f08c8c4 micropita_prepare.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/micropita_prepare.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+
+"""
+Author: George Weingart
+Description: Prepare parameters to call micropita
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "George Weingart"
+__copyright__ = "Copyright 2012"
+__credits__ = ["George Weingart"]
+__license__ = "MIT"
+__maintainer__ = "George Weingart"
+__email__ = "george.weingart@gmail.com"
+__status__ = "Development"
+
+import argparse
+from cStringIO import StringIO
+import sys,string,time
+import os
+from time import gmtime, strftime
+from pprint import pprint
+import subprocess
+import blist
+import shlex
+import tempfile
+
+##################################################################################
+#   Modification by George Weingart    5/6/2014                                  #
+#   Using subprocess to invoke the calls to Micropita                            #
+#   and allocating the temporary file using trmpfile                             #
+##################################################################################
+
+
+
+
+##################################################################################
+#  Decode Parms                                                                  #
+##################################################################################
+def read_params(x):
+ parser = argparse.ArgumentParser(description='Micropita Annotate Argparser')
+ parser.add_argument('--input', action="store",dest='inputname')
+ parser.add_argument('--output', action="store",dest='outputname')
+ parser.add_argument('-m', action="store",dest='MParameter')
+ parser.add_argument('-n', action="store",dest='NSamples')
+ parser.add_argument('--lastmeta', action="store",dest='lastmeta')
+ parser.add_argument('--stratify_value', action="store",dest='stratify_value')
+
+
+ try:
+ parser.add_argument('--feature_method', action="store",dest='feature_method')
+ except:
+ pass
+ try:
+ parser.add_argument('--targets', action="store",dest='targets')
+ except:
+ pass
+ try:
+ parser.add_argument('--label_value', action="store",dest='label_value')
+ except:
+ pass
+ return  parser
+
+
+##################################################################################
+#  Main Program                                                                  #
+##################################################################################
+parser = read_params( sys.argv )
+results = parser.parse_args()
+root_dir = os.environ.get('micropita_SCRIPT_PATH')
+
+
+fname =  results.inputname
+input_file = open(fname,'rU')
+input_lines = input_file.readlines()
+input_file.close()
+table_lines = []
+for x in input_lines:
+ first_column = x.split('\t')[0]
+ table_lines.append(first_column)
+
+
+
+FileTimeStamp =  strftime("%Y%m%d%H%M%S", gmtime())
+LastMetaInt = 0
+if results.lastmeta and not results.lastmeta == "None":
+ LastMetaInt = int(results.lastmeta) - 1
+
+StratifyValueInt = 0
+if  results.stratify_value and not   results.stratify_value == "None":
+ StratifyValueInt = int(results.stratify_value) - 2
+
+LabelValueInt = 0
+if results.label_value and not results.label_value == "None":
+ LabelValueInt = int(results.label_value) - 1
+
+stratify_string = ""
+q = '"'
+if  not results.stratify_value == '1':
+ stratify_string = " --stratify " + q + table_lines[StratifyValueInt] + q + " "
+
+if results.MParameter == "features":
+ TBTargets = list()
+ TableTargets = results.targets.split(',')
+ for t in TableTargets:
+ tb_entry = int(t) + LastMetaInt
+ TBTargets.append(int(tb_entry))
+
+
+ OutTargetsFile  = tempfile.NamedTemporaryFile('w', delete=False )
+ TempTargetsFileName = OutTargetsFile.name
+ indx = -1
+ for  c in table_lines:
+ indx+=1
+ if  indx in TBTargets:
+ OutputString = table_lines[indx] + "\n"
+ OutTargetsFile.write(OutputString)
+ OutTargetsFile.close()
+ os_command = "python " + \
+ root_dir + \
+ "/MicroPITA.py "+\
+ "--lastmeta " + table_lines[LastMetaInt]+ " " +\
+ "--feature_method " + results.feature_method + " " + \
+ "--target " + TempTargetsFileName + " " +\
+ "-m " + results.MParameter + " " + \
+ "-n " + results.NSamples + " " +\
+ stratify_string + " " +\
+ results.inputname + " " +\
+ results.outputname
+ #print os_command
+ os.system(os_command)
+ argsx = shlex.split(os_command) #Split the command
+ try:
+ subprocess.check_call(argsx , shell=False)
+ except:
+ print "The call to micropita failed============="
+ sys.exit(0)
+
+
+
+if results.MParameter == "representative"\
+or results.MParameter == "diverse"\
+or results.MParameter == "extreme":
+ os_command = "python " + \
+ root_dir +  \
+ "/MicroPITA.py "+\
+ "--lastmeta " + table_lines[LastMetaInt]+ " " +\
+ "-m " + results.MParameter + " " + \
+ "-n " + results.NSamples + " " +\
+ stratify_string + " " + \
+ results.inputname + " " +\
+ results.outputname
+ argsx = shlex.split(os_command) #Split the command
+ try:
+ ###os.system(os_command)
+ subprocess.check_call(argsx , shell=False)
+ except:
+ print "The call to micropita failed============="
+ sys.exit(0)
+
+
+
+
+if results.MParameter == "distinct"\
+or results.MParameter == "discriminant":
+ os_command = "python " + \
+ root_dir + \
+ "/MicroPITA.py "+\
+ "--lastmeta " + table_lines[LastMetaInt]+ " " +\
+ "--label " + table_lines[LastMetaInt]+ " " +\
+ "-m " + results.MParameter + " " + \
+ "-n " + results.NSamples + " " +\
+ stratify_string + " " + \
+ results.inputname + " " +\
+ results.outputname
+ #print os_command
+ argsx = shlex.split(os_command) #Split the command
+ try:
+ subprocess.check_call(argsx , shell=False)
+ except:
+ print "The call to micropita failed============="
+ sys.exit(0)

diff -r 000000000000 -r 2f4f6f08c8c4 pcl_diagram.png

Binary file pcl_diagram.png has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/ConstantsMicropita.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ConstantsMicropita.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,208 @@\n+"""\n+Author: Timothy Tickle\n+Description: Constants.\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+from breadcrumbs.src.Metric import Metric\n+\n+class ConstantsMicropita():\n+ """\n+ Class to hold project constants.\n+ """\n+\n+ #Character Constants\n+ COLON = ":"\n+ COMMA = ","\n+ FASTA_ID_LINE_START = ">"\n+ QUOTE = "\\""\n+ TAB = \'\\t\'\n+ WHITE_SPACE = " "\n+ PIPE = "|"\n+ c_outputFileDelim = \'\\t\'\n+\n+ c_sEmptyPredictFileValue = \'NA\'\n+\n+ #Used to stop divide by zero errors\n+ c_smallNumber = 0.00000000001\n+\n+ #SVM related\n+ c_COST_RANGE_KEY = "range"\n+ c_lCostRange = [-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10]\n+ c_SCALED_FILE_EXT = ".scaled"\n+ c_intScaleLowerBound = 0\n+ #LIBSVM file extensions\n+ c_SCALING_PARAMETERS = ".range"\n+ c_CV_FILE_EXT = ".cv.out"\n+ c_CV_IMAGE_EXT = ".cv.png"\n+ c_MODEL_FILE_EXT = ".model"\n+ c_PREDICT_FILE_EXT = ".predict"\n+ c_fProbabilitistic = True\n+ c_SCALED_FOR_PREDICTION_FILE_EXT = ".scaledForpredict"\n+\n+ #SVM output Dictionary keywords for files\n+ c_strKeywordInputFile = "INPUT"\n+ c_strKeywordScaledFile = "SCALED"\n+ c_strKeywordRangeFile = "RANGE"\n+ c_strKeywordCVOutFile = "CV_OUT"\n+ c_strKeywordModelFile = "MODEL"\n+ c_strKeywordScaledPredFile = "SCALED_FOR_PREDICTION"\n+ c_strKeywordPredFile = "PREDICTION"\n+ c_strKeywordCostValue = "C"\n+ c_strKeywordAccuracy = "ACCURACY"\n+\n+ #Default values for missing data in the Abundance Table\n+ c_strEmptyAbundanceData = "0"\n+ c_strEmptyDataMetadata = "NA"\n+ lNAs = list(set(["NA","na","Na","nA",c_strEmptyDataMetadata]))\n+\n+ #Occurence filter [min abundance, min samples occuring in]\n+ #To turn off make == [0,0]\n+ c_liOccurenceFilter = [0,0]\n+\n+ #Break ties in targeted feature with diversity\n+ c_fBreakRankTiesByDiversity = False\n+\n+ ####Commandline arguments\n+ #a Custom diversity metrics found in cogent\n+ c_strCustomAlphaDiversityHelp = "A key word for any PyCogent supplied alpha diveristy metric (Richness, evenness, or diversity). Please supply an unnormalized (counts) abundance table for these metrics. Metrics include "+" ".join(Metric.setAlphaDiversities)+"."\n+\n+ #b Custom diversity metrics found in cogent\n+ c_strCustomBetaDiversityHelp = "A key word for any PyCogent supplied beta diversity metric. Metrics include "+" ".join(list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted])+"."\n+\n+ #c,checked Checked abundance file\n+ c_strCheckedAbundanceFileAr'..b'#l,lastmeta The name of the last metadata\n+ c_strLastMetadataNameArgument = "--lastmeta"\n+ c_strLastMetadataNameHelp = "The row in the abundance file that is the sample name/id row. Should be the metadata name/Id in first column of the metadta row."\n+\n+ #m,method\n+ c_strSelectionTechniquesHelp = "Select techniques listed one after another."\n+\n+ #n,num The Number of unsupervised sample selection\n+ c_strCountArgument = "-n"\n+ c_strCountHelp = "The number of samples to select with unsupervised methodology. (An integer greater than 0.)."\n+\n+ #o,tree\n+ c_strCustomPhylogeneticTreeHelp = "Tree for phylogenetic when selecting custom beta-diversities in the representative sampling criteria."\n+\n+ #p,suppredfile File path fo the predict file for the supervised methods\n+ c_strSupervisedPredictedFile = "--suppredfile"\n+ c_strSupervisedPredictedFileHelp = "The file path for the predict file."\n+\n+ #q,alphameta\n+ c_strCustomAlphaDiversityMetadataHelp = "Metric in the pcl file which has custom alpha diversity measurements to use with the highest diversity sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most diverse."\n+\n+ #r,targetmethod Taxa selection method\n+ c_strTargetedFeatureMethodArgument = "--feature_method"\n+ c_strTargetedFeatureMethodHelp = "The ranking method used to select targeted features."\n+\n+ #s,stratify Unsupervised stratify metadata\n+ c_strUnsupervisedStratifyMetadataArgument = "--stratify"\n+ c_strUnsupervisedStratifyMetadataHelp = "The metatdata to stratify unsupervised analysis."\n+\n+ #t,target Targeted feature file\n+ c_strTargetedSelectionFileArgument = "--targets"\n+ c_strTargetedSelectionFileHelp = "A file containing taxa/OTUs/clades to be used in targeted feature sampling criteria."\n+\n+ #u,supinputfile File path for the input file for the supervised methods\n+ c_strSupervisedInputFile = "--supinputfile"\n+ c_strSupervisedInputFileHelp = "The file path for the input file for supervised methods."\n+\n+ #v,logging String for logging level\n+ c_strLoggingArgument = "--logging"\n+ c_strLoggingHelp = "".join(["Logging level which will be logged to a .log file with the",\n+ " same name as the strOutFile (but with a .log extension). Valid values are DEBUG, INFO, WARNING, ERROR, or CRITICAL."])\n+ c_lsLoggingChoices = ["DEBUG","INFO","WARNING","ERROR","CRITICAL"]\n+\n+ #w, Last Feature Metadata indicator\n+ c_strFeatureMetadataArgument = "--lastFeatureMetadata"\n+ c_strFeatureMetadataHelp = "The last metadata describing a (bug) feature (not sample). Not all studies have feature metadata, if so this can be ignored and not used. See doc for PCL-Description.txt"\n+\n+ #x,betamatrix\n+ c_strCustomBetaDiversityMatrixHelp = "Precalculated beta-diversity matrix to be used in the representative sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most dissimilar."\n+\n+ #Order is important, the first is the default\n+ c_strTargetedRanked = "rank"\n+ c_strTargetedAbundance = "abundance"\n+ lsTargetedFeatureMethodValues = [c_strTargetedRanked, c_strTargetedAbundance]\n+\n+ #Selection methods\n+ c_strDiversity = "diverse"\n+ c_strExtreme = "extreme"\n+ c_strDiscriminant = "discriminant"\n+ c_strDistinct = "distinct"\n+ c_strRandom = "random"\n+ c_strRepresentative = "representative"\n+ c_strFeature = "features"\n+ c_custom = "custom"\n+ c_lsAllUnsupervisedMethods = [c_strRepresentative,c_strDiversity,c_strExtreme,c_strFeature,c_strRandom]\n+ c_lsAllSupervisedMethods = [c_strDiscriminant,c_strDistinct]\n+ c_lsAllMethods = c_lsAllUnsupervisedMethods + c_lsAllSupervisedMethods\n+\n+ #Technique Names\n+ c_strDiversity2 = c_strDiversity+"_C"\n+\n+ ####################################\n+ #Arguments without commandline flags\n+ c_strAbundanceFileHelp = "Input file as either a PCL or Biome file."\n+ c_strGenericOutputDataFileHelp = "The generated output data file."\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/ConstantsMicropita.pyc

Binary file src/ConstantsMicropita.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/__init__.pyc

Binary file src/__init__.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/README.md Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,106 @@
+# BreadCrumbs #
+
+BreadCrumbs is an unofficial collection of scripts and code intended to consolidate functions for tool development and contain scripts for command line access to commonly used functions. Breadcrumbs tends to include functionality associated with metagenomics analysis but you never know what you will find!
+
+
+## Dependencies: ##
+
+1. Cogent https://pypi.python.org/pypi/cogent
+2. MatplotLib http://matplotlib.org/downloads.html
+3. Mercurial http://mercurial.selenic.com/ (optional for downloading)
+4. Numpy http://www.numpy.org/
+5. Python 2.x http://www.python.org/download/
+6. SciPy http://www.scipy.org/install.html
+7. biom support http://biom-format.org/
+
+
+## How to download ##
+
+To download BreadCrumbs from BitBucket use the command:
+
+> hg clone https://bitbucket.org/timothyltickle/breadcrumbs
+
+To update BreadCrumbs, in the BreadCrumbs directory use the 2 commands sequentially:
+
+> hg pull
+> hg update
+
+
+## Scripts: ##
+
+Scripts are included to expose core functionality through the command line. Currently these scripts center on manipulating and visualizing abundance tables.
+A quick description of the scripts include:
+
+* *Hclust.py* Flexible script to create a visualization of hierarchical clustering of abundance tables (or other matrices).
+
+* *scriptBiplotTSV.R* Allows one to plot a tsv file as a biplot using nonmetric multidimensional scaling.
+
+* *scriptPlotFeature.py* Allows one to plot a histogram, boxplot, or scatter plot of a bug or metadata in an abundance table. Will work on any row in a matrix.
+
+* *scriptManipulateTable.py* Allows one to perform common functions on an abundance table including, summing, normalizing, filtering, stratifying tables.
+
+* *scriptPcoa.py* Allows one to plot a principle covariance analysis (PCoA) plot of an abundance table.
+
+* *scriptConvertBetweenBIOMAndPCL.py* Allows one to convert between BIOM and PCL file formats.
+
+
+## Programming Classes: ##
+
+Brief descriptions of classes are as follows. More detailed descriptions are given in the classes themselves.
+
+* *AbundanceTable* Data structure to contain and perform operations on an abundance table.
+
+* *BoxPlot* Wrapper to plot box plots.
+
+* *CClade* Helper object used in hierarchical summing and normalization
+
+* *Cladogram* Object that manipulated an early dendrogram visualization. Deprecated, should use the GraPhlan visualization tool on bitbucket instead.
+
+* *CommandLine* Collection of code to work with command line. Deprecated. Should use sfle calls.
+
+* *ConstantsBreadCrumbs* Contains generic constants.
+
+* *ConstantsFiguresBreadCrumbs* Contains constants associated with formatting figures.
+
+* *KMedoids* Code from MLPY which performs KMedoids sample selection.
+
+* *MLPYDistanceAdaptor* Used to allow custom distance matrices to be used by KMedoids.
+
+* *Metric* Difference functions associated with distance and diversity metrics.
+
+* *PCoA* Functionality surrounding the plotting of a PCoA
+
+* *PlotMatrix* Allows on to plot a matrix of numbers.
+
+* *SVM* Support Vector Machine associated scripts.
+
+* *Utility* Generic functions
+
+* *UtilityMath* Generic math related functions
+
+* *ValidateData* Collection of functions to validate data types when needed.
+
+
+## Demo input files: ##
+
+* *fastunifrac_Ley_et_al_NRM_2_sample_id_map.txt* Example Unifrac Id mapping file (source http://bmf2.colorado.edu/fastunifrac/tutorial.psp)
+
+* *GreenGenesCore-May09.ref.tre* Example Greengenes core set reference for Unifrac demo (source http://bmf2.colorado.edu/fastunifrac/tutorial.psp)
+
+* *Test.pcl* Example file / Test PCL file to run scripts on.
+
+* *Test.biom* Example file / Test BIOM file to run scripts on.
+
+* *Test_no_metadata.pcl* Example file / Test PCL file to run scripts on which does not have metadata.
+
+* *Test_no_metadata.biom* Example file / Test BIOM file to run scripts on which does not have metadata.
+
+* *Test-biplot.tsv* Example file / Test file for the scriptBiplotTSV.R
+
+
+## Contributing Authors: ##
+Timothy Tickle, George Weingart, Nicola Segata, Curtis Huttenhower
+
+
+## Contact: ##
+Please feel free to contact ttickle@hsph.harvard.edu with questions.

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/__init__.pyc

Binary file src/breadcrumbs/__init__.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/GreenGenesCore-May09.ref.tre
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/GreenGenesCore-May09.ref.tre Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,2 @@\n+((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((185648:0.04227,190891:0.04565):0.03317,198806:0.03807):0.03385,(189521:0.03381,194180:0.04431):0.00527)adhufec43:0.00520,(107329:0.05109,134265:0.08790):0.00857):0.00412,108139:0.05816):0.01745,(((114813:0.06855,56320:0.05242):0.00845,55854:0.05484):0.00743,115051:0.07172):0.00335):0.00164,2200:0.04835):0.01242,68416:0.04600):0.00247,(((102007:0.06112,104511:0.04475)RC11:0.04600,(229211:0.07581,36896:0.03548):0.00677):0.01000,(114946:0.04569,130804:0.05242):0.01849):0.00330):0.00984,(((102469:0.04858,106618:0.03644):0.00503,2170:0.02916)Prevotella:0.04830,(25004:0.05331,31856:0.06384):0.00761):0.00166):0.02376,192222:0.08949):0.00764,157387:0.06134):0.01165,((((((225558:0.03560,28952:0.02736)DO039:0.03401,73937:0.03641):0.00416,(106015:0.02958,157425:0.07769):0.00759):0.01233,(20534:0.04248,83456:0.03320):0.01333):0.00412,30384:0.08333):0.01347,160648:0.06392):0.03408):0.00167,((((((100912:0.09780,211356:0.08210):0.00434,209576:0.06371):0.03422,83963:0.07264):0.00929,((208212:0.06053,26888:0.06634):0.00847,103876:0.07195):0.02700):0.00417,((((204894:0.09617,234111:0.06063):0.02667,68356:0.08158):0.01185,234668:0.06877):0.00168,(167734:0.03160,189330:0.09330):0.01108):0.00991):0.00990,34809:0.07579):0.00334)Prevotellaceae:0.02712,(((((((((186688:0.03732,195634:0.04082):0.00873,183721:0.04661)RL303_aal70c09:0.02605,(187087:0.05165,191786:0.05178):0.03723):0.00349,(184646:0.12544,187452:0.03393):0.00453)RL304_aal76g04:0.04040,199514:0.04153):0.05699,((162684:0.08071,183870:0.06864):0.00696,198706:0.06107):0.00417):0.00413,190309:0.02792):0.00082,((((((((102620:0.05353,203004:0.03966):0.00172,102407:0.03312):0.01396,114500:0.04604):0.00332,(137514:0.02744,30861:0.06366):0.01596):0.00408,149504:0.01612):0.00489,(175182:0.06277,182569:0.10687):0.01103):0.00490,139167:0.01616):0.00817,2045:0.01942):0.00164):0.01867,((((((((158207:0.07593,158624:0.06624)CFT112E11:0.00689,172821:0.03148):0.00491,(157565:0.05250,158580:0.08165):0.04193):0.01333,210534:0.02421):0.00576,133416:0.03632):0.00905,28056:0.05224):0.01157,174109:0.10602):0.00427,197735:0.10746):0.00677):0.00576):0.00992,(142859:0.04996,142871:0.05771):0.02254):0.02487,102227:0.06388)Bacteroidaceae:0.00587,153657:0.07229):0.00167,((((((((((((((((((((((177371:0.03581,185012:0.04263):0.01242,133805:0.03038):0.00259,191536:0.09941):0.00268,(184028:0.04855,193506:0.05200):0.05775)M1_e11_3:0.01571,97196:0.04784):0.00336,((((179616:0.06356,196985:0.03396):0.00792,179958:0.06706):0.01322,98683:0.03943):0.00500,227615:0.08420):0.01528):0.00581,(157824:0.10914,234795:0.08891):0.02264):0.00501,38438:0.05008):0.02161,99034:0.05578):0.01755,163328:0.03393):0.00579,45604:0.04988):0.01158,((132011:0.03388,135872:0.03365):0.00176,179000:0.09822):0.00356):0.00329,(175763:0.04068,186820:0.05593):0.11838):0.00337,(176174:0.05419,18983:0.03955):0.00671):0.00408,98087:0.04839):0.00248,(((((177014:0.04996,38834:0.01130):0.00746,135808:0.04526):0.00662,(130860:0.06831,131443:0.02448):0.02548)rc5-47:0.00411,(178045:0.08213,182458:0.02957):0.00978):0.01149,(187947:0.05433,197891:0.04576)C13_G12:0.02747):0.00165):0.00408,((((((183750:0.04399,194828:0.02961):0.00439,44489:0.00888):0.02267,212685:0.08555)F5:0.01949,191820:0.12437):0.01716,183477:0.05838):0.01417,(100296:0.06618,97151:0.03759):0.00676):0.00497):0.00491,190369:0.03396):0.01884,188851:0.07022):0.01263,209806:0.10743):0.00513,(45529:0.09465,84125:0.06624):0.00949):0.00664,49015:0.08678)p-184-o5:0.01688):0.00417,((202464:0.08190,32849:0.07173):0.01985,77707:0.07102):0.00332):0.00747,(((((((175210:0.04983,44678:0.06525):0.00980,135739:0.06014):0.00613,148038:0.05417):0.00957,114529:0.05077):0.00662,(((146492:0.03952,149037:0.04725):0.01775,77973:0.07651):0.03110,173598:0.06282):0.00972):0.00413,(149604:0.05082,38316:0.06525):0.00798):0.00412,148392:0.06793)BCf7-02:0.00584):0.00247,45562:0.06149):0.00250,(((((106462:0.03062,224633:0.05560):0.01170,219368:0.'..b':0.02903,100020:0.12347):0.01097):0.00793,(211317:0.12422,52:0.13273):0.02180):0.00701,(((1019:0.05822,237:0.08247):0.01263,16284:0.10092)pMC2A36:0.09553,772:0.08438)pMC2A209:0.05495):0.00349,((((203065:0.13973,223034:0.12170):0.05869,200845:0.15524):0.00470,7:0.11604):0.00635,((115256:0.09829,203276:0.09225):0.07057,151811:0.10325)HV1:0.01789)pMC2A384:0.02179):0.00609):0.01036,((((((((140100:0.06865,140261:0.05289):0.01484,24840:0.03802):0.01016,141088:0.06115):0.00342,34194:0.03220):0.02017,111614:0.07279):0.00604,43427:0.04218)SAGMEG-1:0.06397,(131861:0.03742,132875:0.04592)MSBL1:0.08715):0.04464,(128710:0.07967,142642:0.05237)Methanococci_Eury:0.04739):0.00260):0.00000,((((((((((((((139526:0.08382,199554:0.08897):0.00533,(200010:0.07315,711:0.04561):0.02789):0.00341,((209248:0.06634,63282:0.06470)ARCP1-30:0.02188,22294:0.06836):0.00604):0.00424,((52339:0.07155,62881:0.06589)WCHD3-02:0.02188,140032:0.08562):0.01390):0.01529,130029:0.09896):0.01136,137883:0.11111):0.00790,(201596:0.08017,204898:0.07357):0.00360)terrestrial_group:0.00938,((((((109524:0.04908,146447:0.05031)CTD005-13A:0.02677,104364:0.05473):0.02317,112246:0.07250)marine_group_II:0.06502,190786:0.06983):0.02109,141005:0.09860):0.01058,(109663:0.08056,124193:0.05148)marine_group_III:0.03332)E3:0.01986):0.00600,(219191:0.05408,428:0.04246)pMC2A33:0.10254)E2:0.01129,(132046:0.11171,54:0.07827):0.01615):0.00172,((((((104684:0.04971,47977:0.04722):0.01036,161666:0.05329):0.00340,71237:0.05560)Thermoplasmataceae:0.06309,32461:0.07645):0.02896,161417:0.05638)Thermoplasmatales:0.02759,13:0.07721):0.01045)Thermoplasmata_Eury:0.03596,144490:0.13228):0.02507,(((108925:0.08674,202396:0.08197):0.07165,(200115:0.12257,202746:0.11926):0.03705)pMC2A15:0.03934,((137541:0.08229,62886:0.07807):0.01425,39716:0.08236)WSA2:0.01819):0.01147):0.00439,((((245:0.07204,249:0.08326):0.03100,49:0.06754):0.01562,123021:0.05061)pMC1A4:0.02314,188272:0.06162)pMC1:0.03374):0.00602):0.00595,((((130:0.04888,150461:0.05302)Methanobrevibacter_cuticularis:0.00952,194137:0.06186)Methanobrevibacter:0.02651,223:0.02320):0.07546,46:0.03065)Methanobacteria_Eury:0.04903):0.01712,(137113:0.06053,743:0.09310):0.00795):0.00598,((125020:0.06468,77902:0.10741)xNanoarchaeota:0.11191,(223234:0.01369,64372:0.02642)Methanopyri:0.07265):0.00357):0.03674,142674:0.11519):0.02585,((((((((1029:0.07158,108956:0.09699)Cenarchaeum:0.00887,49412:0.07404)Cenarchaeales:0.03873,101095:0.05494):0.01478,71074:0.04406):0.03588,204290:0.04992)Thaumarchaeota:0.02245,39230:0.05800):0.01727,(((((108942:0.09425,111933:0.09353):0.00892,213389:0.08909):0.01897,115168:0.06250):0.01561,1006:0.07092)Sd-NA:0.01726,127052:0.04541):0.00770):0.02056,((43314:0.05877,53714:0.03660)A14:0.08127,888:0.06839):0.00525):0.01706):0.00862,((((((105220:0.04665,835:0.08706):0.01404,(140627:0.04481,201321:0.05932):0.02081):0.00597,138956:0.07600):0.00687,((112707:0.08088,177633:0.03172):0.02442,(126957:0.07800,140689:0.04333):0.02283):0.01031)C2:0.00683,((((193033:0.09039,198509:0.07699):0.01960,142115:0.11287):0.00462,(112651:0.13277,839:0.09609):0.03185):0.01563,(119867:0.04291,122677:0.06486):0.02386):0.00946):0.00845,126878:0.07699):0.01388):0.00341,(((((((((19329:0.05079,978:0.04651)Acidianus:0.01990,54467:0.05653):0.00602,(35548:0.06847,946:0.06934):0.01687)Sulfolobales:0.04344,144945:0.03552):0.00692,(910:0.05083,916:0.02991):0.00086):0.00842,186470:0.02919):0.03197,(((23289:0.03648,872:0.06078):0.00688,877:0.06385)Thermoproteaceae:0.02822,(181142:0.04754,73046:0.05430)Thermofilaceae:0.01653)Thermoproteales:0.00257)Thermoprotei_Cren:0.00085,(179805:0.06119,836:0.07463):0.01662):0.00594,((190468:0.12200,34207:0.11399):0.02507,51122:0.11130)YNPFFA:0.01440):0.00861):0.00591,((136293:0.05106,141677:0.04602)pISA7:0.02551,(137080:0.07756,838:0.06403)pSL4:0.05968):0.00260):0.03304,((200378:0.01500,202338:0.00887)SBAK-mid-13:0.26400,771:0.10870):0.09135):0.01246,883:0.13561):0.00444,(160320:0.03987,51224:0.03904)Korarchaeota:0.05918)Archaea:0.13464);\n+\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test-Biplot.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test-Biplot.tsv Tue May 13 21:58:57 2014 -0400

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test-BiplotNA.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test-BiplotNA.tsv Tue May 13 21:58:57 2014 -0400

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test-comma.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test-comma.biom Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,1 @@
+{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "BreadCrumbs","date": "2013-09-08T17:12:24.307906","matrix_type": "sparse","matrix_element_type": "float","shape": [5, 8],"data": [[0,0,1.0],[0,2,12.0],[0,4,6.0],[0,6,2.0],[0,7,1.0],[1,1,10.0],[1,2,43.0],[1,3,6.0],[1,5,23.0],[1,7,1.0],[2,0,3.0],[2,2,29.0],[2,4,45.0],[2,6,1.0],[2,7,1.0],[3,1,45.0],[3,2,34.0],[3,3,3.0],[3,7,1.0],[4,0,5.0],[4,2,2.0],[4,4,6.0],[4,6,1.0],[4,7,1.0]],"rows": [{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|72", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|4904", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|1361", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|3417", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|1368", "metadata": null}],"columns": [{"id": "700098986", "metadata": {"TID": "700098986", "STSite": "L_Antecubital_fossa", "ID": "700098986"}},{"id": "700098984", "metadata": {"TID": "700098984", "STSite": "R_Retroauricular_crease", "ID": "700098984"}},{"id": "700098980", "metadata": {"TID": "700098980", "STSite": "Subgingival_plaque", "ID": "700098980"}},{"id": "700098988", "metadata": {"TID": "700098988", "STSite": "R_Antecubital_fossa", "ID": "700098988"}},{"id": "700037470", "metadata": {"TID": "700037470", "STSite": "L_Retroauricular_crease", "ID": "700037470"}},{"id": "700037472", "metadata": {"TID": "700037472", "STSite": "R_Retroauricular_crease", "ID": "700037472"}},{"id": "700037474", "metadata": {"TID": "700037474", "STSite": "L_Antecubital_fossa", "ID": "700037474"}},{"id": "700037476", "metadata": {"TID": "700037476", "STSite": "R_Antecubital_fossa", "ID": "700037476"}}]}
\ No newline at end of file

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test-comma.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test-comma.pcl Tue May 13 21:58:57 2014 -0400

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test.biom Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,1 @@
+{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "BreadCrumbs","date": "2013-09-04T12:40:01.356447","matrix_type": "sparse","matrix_element_type": "float","shape": [5, 8],"data": [[0,0,1.2300000190734863],[0,2,12.0],[0,4,6.0],[0,6,2.0],[0,7,1.0],[1,1,10.0],[1,2,43.0],[1,3,6.0],[1,5,23.0],[1,7,1.0],[2,0,3.0],[2,2,29.0],[2,4,45.0],[2,6,1.0],[2,7,1.0],[3,1,45.0],[3,2,34.0],[3,3,3.0],[3,7,1.0],[4,0,5.0],[4,2,2.0],[4,4,6.0],[4,6,1.0],[4,7,1.0]],"rows": [{"id": "72", "metadata": {"taxonomy": ["Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Bacillaceae", "unclassified"]}},{"id": "4904", "metadata": {"taxonomy": ["Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Bacillaceae", "unclassified"]}},{"id": "1361", "metadata": {"taxonomy": ["Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Bacillaceae", "unclassified"]}},{"id": "3417", "metadata": {"taxonomy": ["Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Bacillaceae", "unclassified"]}},{"id": "1368", "metadata": {"taxonomy": ["Bacteria", "Firmicutes", "Bacilli", "Bacillales", "Bacillaceae", "unclassified"]}}],"columns": [{"id": "700098986", "metadata": {"TID": "700098986", "STSite": "L_Antecubital_fossa", "ID": "700098986"}},{"id": "700098984", "metadata": {"TID": "700098984", "STSite": "R_Retroauricular_crease", "ID": "700098984"}},{"id": "700098980", "metadata": {"TID": "700098980", "STSite": "Subgingival_plaque", "ID": "700098980"}},{"id": "700098988", "metadata": {"TID": "700098988", "STSite": "R_Antecubital_fossa", "ID": "700098988"}},{"id": "700037470", "metadata": {"TID": "700037470", "STSite": "L_Retroauricular_crease", "ID": "700037470"}},{"id": "700037472", "metadata": {"TID": "700037472", "STSite": "R_Retroauricular_crease", "ID": "700037472"}},{"id": "700037474", "metadata": {"TID": "700037474", "STSite": "L_Antecubital_fossa", "ID": "700037474"}},{"id": "700037476", "metadata": {"TID": "700037476", "STSite": "R_Antecubital_fossa", "ID": "700037476"}}]}
\ No newline at end of file

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test.pcl Tue May 13 21:58:57 2014 -0400

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test.tsv Tue May 13 21:58:57 2014 -0400

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/TestForConversion.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/TestForConversion.biom Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,1 @@
+{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "BreadCrumbs","date": "2013-09-17T04:25:03.670076","matrix_type": "sparse","matrix_element_type": "float","shape": [5, 8],"data": [[0,0,1.2300000190734863],[0,2,12.0],[0,4,6.0],[0,6,2.0],[0,7,1.0],[1,1,10.0],[1,2,43.0],[1,3,6.0],[1,5,23.0],[1,7,1.0],[2,0,3.0],[2,2,29.0],[2,4,45.0],[2,6,1.0],[2,7,1.0],[3,1,45.0],[3,2,34.0],[3,3,3.0],[3,7,1.0],[4,0,5.0],[4,2,2.0],[4,4,6.0],[4,6,1.0],[4,7,1.0]],"rows": [{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|72", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|4904", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|1361", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|3417", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|1368", "metadata": null}],"columns": [{"id": "700098986", "metadata": {"TID": "700098986", "STSite": "L_Antecubital_fossa", "ID": "700098986"}},{"id": "700098984", "metadata": {"TID": "700098984", "STSite": "R_Retroauricular_crease", "ID": "700098984"}},{"id": "700098980", "metadata": {"TID": "700098980", "STSite": "Subgingival_plaque", "ID": "700098980"}},{"id": "700098988", "metadata": {"TID": "700098988", "STSite": "R_Antecubital_fossa", "ID": "700098988"}},{"id": "700037470", "metadata": {"TID": "700037470", "STSite": "L_Retroauricular_crease", "ID": "700037470"}},{"id": "700037472", "metadata": {"TID": "700037472", "STSite": "R_Retroauricular_crease", "ID": "700037472"}},{"id": "700037474", "metadata": {"TID": "700037474", "STSite": "L_Antecubital_fossa", "ID": "700037474"}},{"id": "700037476", "metadata": {"TID": "700037476", "STSite": "R_Antecubital_fossa", "ID": "700037476"}}]}
\ No newline at end of file

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test_no_metadata.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test_no_metadata.biom Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,1 @@
+{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","type": "OTU table","generated_by": "BreadCrumbs","date": "2013-09-08T21:45:51.197864","matrix_type": "sparse","matrix_element_type": "float","shape": [5, 8],"data": [[0,0,1.2300000190734863],[0,2,12.0],[0,4,6.0],[0,6,2.0],[0,7,1.0],[1,1,10.0],[1,2,43.0],[1,3,6.0],[1,5,23.0],[1,7,1.0],[2,0,3.0],[2,2,29.0],[2,4,45.0],[2,6,1.0],[2,7,1.0],[3,1,45.0],[3,2,34.0],[3,3,3.0],[3,7,1.0],[4,0,5.0],[4,2,2.0],[4,4,6.0],[4,6,1.0],[4,7,1.0]],"rows": [{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|72", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|4904", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|1361", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|3417", "metadata": null},{"id": "Bacteria|Firmicutes|Bacilli|Bacillales|Bacillaceae|unclassified|1368", "metadata": null}],"columns": [{"id": "700098986", "metadata": {"ID": "700098986"}},{"id": "700098984", "metadata": {"ID": "700098984"}},{"id": "700098980", "metadata": {"ID": "700098980"}},{"id": "700098988", "metadata": {"ID": "700098988"}},{"id": "700037470", "metadata": {"ID": "700037470"}},{"id": "700037472", "metadata": {"ID": "700037472"}},{"id": "700037474", "metadata": {"ID": "700037474"}},{"id": "700037476", "metadata": {"ID": "700037476"}}]}
\ No newline at end of file

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/Test_no_metadata.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/Test_no_metadata.pcl Tue May 13 21:58:57 2014 -0400

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map-colors.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map-colors.txt Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,8 @@
+Group1:Nse#L#150,Nse#L#151,Nse#L#152,Nse#L#153,Nse#L#98
+Group2:Nso#106,Nso#107,Nso#108,Nso#1209,Nso#65
+Group3:Nw#L#119,Nw#L#160,Nw#L#5,Nw#R#189,Nw#R#50
+Group4:Sse#1224,Sse#M#14,Sse#M#169,Sse#M#62,Sse#M#63,Sse#M#64,Sse#M#75
+Group5:Swb#M#137,Swb#M#154,Swb#M#155,Swb#M#156,Swb#M#157
+Group6:Sws#M#1227,Sws#M#1230,Sws#M#1234,Sws#M#163,Sws#M#83
+Group7:Tg#1238,Tg#1249,Tg#1251,Tg#1252
+Group8:Vg#h#1038,Vg#h#1039,Vg#h#1043,Vg#h#1061,Vg#h#1104,Vg#o#1124,Vg#o#1128,Vg#o#1132,Vg#o#1153,Vg#o#1160,Vg#oh#1051,Vg#oh#1055,Vg#oh#1070

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map.txt Tue May 13 21:58:57 2014 -0400

b'@@ -0,0 +1,4844 @@\n+150394\tTg#1249\t1\n+150394\tTg#1251\t2\n+215260\tNso#65\t1\n+215260\tNso#1209\t4\n+16073\tVg#h#1061\t1\n+16072\tVg#h#1104\t1\n+16076\tVg#o#1128\t2\n+16076\tVg#oh#1070\t84\n+16076\tVg#oh#1055\t1\n+16076\tVg#o#1132\t2\n+16076\tTg#1249\t2\n+16076\tVg#o#1153\t2\n+35238\tNse#L#151\t1\n+35238\tNw#R#50\t2\n+35238\tNso#106\t1\n+16074\tVg#h#1043\t2\n+16074\tVg#oh#1070\t1\n+16074\tVg#oh#1051\t1\n+16074\tVg#h#1038\t6\n+16074\tVg#o#1132\t2\n+101767\tNw#L#5\t1\n+101767\tNso#108\t1\n+114078\tNso#65\t1\n+131443\tVg#h#1039\t1\n+144338\tTg#1249\t1\n+111532\tNso#108\t1\n+111532\tNso#107\t7\n+169721\tVg#oh#1055\t26\n+169721\tVg#oh#1051\t33\n+103892\tSse#M#62\t1\n+214891\tNso#65\t1\n+214891\tNso#1209\t1\n+170442\tSse#M#62\t1\n+58397\tTg#1252\t1\n+58397\tNse#L#150\t1\n+82191\tTg#1238\t2\n+93780\tNw#L#5\t1\n+61204\tSwb#M#156\t42\n+61204\tSwb#M#157\t118\n+61204\tSwb#M#154\t10\n+61204\tSwb#M#155\t2\n+61204\tSws#M#1227\t10\n+61204\tSws#M#163\t276\n+61204\tNso#108\t1\n+61204\tSws#M#83\t44\n+61204\tSws#M#1234\t15\n+61204\tSwb#M#137\t2\n+61204\tSws#M#1230\t27\n+130882\tNso#65\t1\n+130882\tNw#L#5\t1\n+130882\tNw#L#119\t1\n+130882\tSse#M#169\t1\n+130882\tNso#107\t1\n+130882\tNw#R#50\t1\n+6155\tNw#R#50\t1\n+6155\tNso#108\t1\n+6155\tNso#106\t2\n+167734\tVg#o#1132\t2\n+7624\tSws#M#1234\t1\n+7624\tSse#M#62\t1\n+7624\tSse#M#63\t1\n+7624\tSse#M#75\t1\n+7624\tSws#M#83\t1\n+182229\tVg#o#1153\t1\n+131207\tSse#M#169\t1\n+131207\tSse#M#63\t1\n+131207\tSwb#M#156\t2\n+112460\tNse#L#151\t1\n+112460\tSse#M#64\t1\n+192175\tVg#o#1124\t3\n+57724\tSwb#M#154\t8\n+57724\tSws#M#163\t1\n+57724\tNso#108\t2\n+57724\tNso#106\t1\n+166488\tNse#L#151\t1\n+155255\tNso#1209\t1\n+55589\tNso#65\t1\n+220301\tVg#o#1124\t2\n+97151\tVg#h#1039\t4\n+97151\tVg#h#1104\t8\n+97151\tVg#h#1043\t1\n+166485\tSse#M#62\t1\n+154652\tSse#M#62\t1\n+154652\tSse#M#63\t2\n+60136\tVg#oh#1055\t2\n+6294\tSwb#M#156\t1\n+6294\tSwb#M#155\t1\n+6294\tNse#L#98\t1\n+6294\tNw#R#50\t1\n+6294\tSse#M#75\t1\n+6294\tNso#108\t2\n+6294\tSse#M#64\t1\n+191465\tNse#L#98\t1\n+191465\tNw#L#160\t1\n+191465\tNso#106\t1\n+152550\tSws#M#1230\t26\n+152550\tSws#M#1227\t56\n+152550\tSws#M#1234\t6\n+152550\tSse#M#64\t2\n+152550\tSws#M#83\t1\n+106352\tSwb#M#154\t1\n+106352\tSws#M#163\t2\n+106352\tSse#M#62\t1\n+106352\tSse#M#75\t1\n+152557\tSws#M#83\t5\n+105395\tNw#L#5\t1\n+105395\tNw#L#160\t2\n+105395\tNw#L#119\t5\n+105395\tNw#R#189\t2\n+105004\tNso#1209\t3\n+105004\tNso#107\t1\n+105004\tNw#R#50\t1\n+81674\tVg#oh#1070\t127\n+81674\tVg#oh#1055\t10\n+81674\tVg#oh#1051\t137\n+81674\tVg#o#1132\t3\n+140328\tNso#1209\t1\n+101380\tNw#L#119\t1\n+101380\tNso#107\t1\n+227238\tNso#65\t1\n+227238\tNse#L#150\t1\n+227238\tNso#106\t1\n+199059\tVg#o#1124\t5\n+43051\tVg#h#1043\t1\n+43051\tVg#o#1124\t4\n+43051\tVg#o#1128\t1\n+43051\tVg#oh#1070\t1\n+43051\tVg#o#1132\t1\n+43051\tVg#o#1153\t1\n+161296\tSse#M#63\t1\n+101018\tNse#L#98\t1\n+159156\tSse#M#75\t1\n+200413\tSse#M#62\t5\n+200413\tSse#M#63\t2\n+10709\tSse#M#169\t1\n+10709\tNso#108\t2\n+2446\tSse#M#14\t1\n+2446\tSws#M#1227\t1\n+2446\tSws#M#83\t4\n+2446\tNw#R#189\t1\n+2446\tSwb#M#137\t1\n+2446\tSws#M#1230\t12\n+2446\tSse#M#62\t1\n+2446\tSse#M#64\t4\n+99206\tNse#L#150\t1\n+95791\tTg#1238\t2\n+95791\tTg#1249\t1\n+128299\tNse#L#151\t1\n+128299\tNso#1209\t1\n+102326\tNso#1209\t2\n+102326\tSws#M#163\t1\n+102326\tNso#108\t1\n+150608\tSse#M#63\t1\n+38438\tVg#o#1124\t11\n+167514\tVg#o#1124\t8\n+113170\tNso#108\t1\n+2045\tVg#o#1124\t1\n+2045\tVg#h#1061\t1\n+2045\tVg#oh#1055\t44\n+2045\tVg#h#1038\t1\n+2045\tVg#h#1039\t1\n+2045\tVg#o#1132\t1\n+84125\tVg#h#1104\t7\n+84125\tVg#h#1061\t9\n+190826\tSws#M#83\t1\n+102448\tSws#M#163\t1\n+102448\tSwb#M#157\t1\n+102448\tSws#M#83\t1\n+34809\tVg#h#1061\t2\n+154272\tSse#M#62\t1\n+128618\tNso#1209\t1\n+21292\tNw#L#119\t3\n+21292\tNso#106\t1\n+105718\tVg#h#1038\t1\n+105718\tVg#h#1104\t3\n+105718\tVg#o#1132\t1\n+153802\tNw#R#50\t1\n+80115\tSse#M#62\t1\n+1783\tNso#108\t1\n+1783\tNso#1209\t1\n+1783\tNso#107\t10\n+1783\tNso#106\t13\n+45661\tSse#M#75\t1\n+90000\tNse#L#98\t1\n+14135\tSws#M#83\t1\n+164968\tVg#h#1104\t1\n+164968\tVg#o#1124\t3\n+164968\tVg#oh#1070\t9\n+164968\tVg#oh#1055\t39\n+164968\tVg#oh#1051\t33\n+164968\tVg#h#1039\t3\n+164968\tVg#o#1132\t1\n+164968\tVg#o#1153\t2\n+63116\tSse#M#169\t1\n+63116\tNw#R#50\t1\n+200704\tSse#M#169\t1\n+46867\tVg#o#1132\t1\n+142023\tTg#1251\t1\n+70396\tNso#108\t1\n+70396\tNso#106\t1\n+198122\tVg#oh#1070\t4\n+198122\tVg#oh#1055\t2\n+198122\tVg#oh#1051\t3\n+163720\tSwb#M#155\t13\n+48540\tNw#L#160\t2\n+48540\tNw#R#50\t1\n+108713\tTg#1249\t1\n+108713\tSws#M#163\t1\n+108713\tNso#108\t3\n+108713\tSws#M#83\t2\n+108713\tNw#R#189\t3\n+220892\tNso#108\t2'..b'L#150\t1\n+1563\tNso#108\t2\n+1563\tNso#106\t1\n+1563\tSws#M#83\t1\n+99302\tNso#107\t1\n+204742\tNse#L#151\t1\n+204742\tNso#1209\t1\n+9349\tVg#o#1153\t1\n+52759\tVg#o#1132\t1\n+52759\tVg#o#1124\t1\n+167146\tTg#1249\t1\n+167146\tNso#108\t2\n+105079\tNso#108\t1\n+105079\tSse#M#63\t1\n+25580\tSwb#M#137\t2\n+161280\tNse#L#151\t1\n+161280\tNse#L#152\t1\n+199686\tVg#o#1128\t1\n+17020\tNso#106\t1\n+17020\tTg#1252\t1\n+124743\tSwb#M#157\t1\n+208234\tNso#108\t1\n+144247\tTg#1252\t1\n+25420\tSwb#M#157\t7\n+25420\tSwb#M#155\t1\n+25420\tSws#M#163\t8\n+25420\tSws#M#1234\t1\n+25420\tSwb#M#137\t12\n+25420\tSws#M#1230\t5\n+25420\tSse#M#62\t1\n+25420\tSse#M#63\t2\n+25420\tSse#M#64\t1\n+25420\tSws#M#83\t4\n+113166\tNse#L#152\t2\n+113166\tNse#L#98\t1\n+113166\tSse#M#169\t1\n+12396\tNw#R#50\t1\n+109586\tVg#o#1124\t1\n+109586\tVg#h#1061\t1\n+190451\tVg#oh#1070\t57\n+190451\tVg#oh#1055\t45\n+190451\tVg#oh#1051\t30\n+155077\tNw#L#160\t1\n+155077\tNw#R#50\t1\n+134601\tVg#o#1124\t23\n+135739\tTg#1238\t1\n+135739\tTg#1252\t2\n+203425\tTg#1238\t1\n+203425\tTg#1251\t1\n+203425\tTg#1252\t1\n+14128\tVg#o#1132\t1\n+63107\tSse#M#169\t3\n+39689\tTg#1251\t1\n+39689\tSse#M#169\t2\n+39689\tSse#M#75\t1\n+39689\tSws#M#1230\t1\n+39689\tNso#108\t3\n+39689\tNso#65\t1\n+39689\tSse#1224\t1\n+63104\tSse#M#169\t3\n+63104\tSse#M#75\t4\n+205757\tSwb#M#155\t3\n+205757\tVg#o#1160\t138\n+205757\tVg#oh#1070\t3\n+205757\tVg#oh#1055\t7\n+205757\tVg#oh#1051\t1\n+205757\tVg#o#1132\t5\n+205757\tVg#o#1153\t244\n+142033\tTg#1249\t4\n+142033\tTg#1252\t1\n+147741\tTg#1251\t5\n+37034\tNso#108\t1\n+37034\tNso#107\t1\n+37034\tNso#106\t1\n+163284\tNw#L#160\t2\n+163284\tNso#1209\t2\n+163284\tNso#106\t1\n+148790\tNso#106\t1\n+68416\tVg#h#1038\t1\n+68416\tVg#o#1132\t3\n+193755\tVg#h#1038\t1\n+66994\tNso#108\t1\n+184850\tVg#oh#1070\t3\n+184850\tVg#oh#1055\t1\n+184850\tVg#oh#1051\t8\n+184850\tVg#o#1132\t1\n+160555\tSse#M#75\t1\n+195325\tVg#o#1124\t1\n+71875\tNso#107\t2\n+62679\tSse#1224\t2\n+222035\tNso#1209\t1\n+112960\tNso#108\t2\n+62675\tSse#1224\t1\n+132912\tSse#M#169\t2\n+106633\tNso#65\t1\n+106633\tNse#L#150\t1\n+182108\tVg#o#1124\t6\n+182108\tVg#h#1039\t8\n+182108\tVg#o#1132\t3\n+162867\tVg#o#1128\t1\n+137271\tSws#M#1230\t1\n+109541\tTg#1251\t1\n+109541\tVg#oh#1055\t24\n+69764\tSse#M#62\t1\n+115032\tNw#L#5\t1\n+2818\tSse#M#62\t1\n+2818\tSse#M#63\t4\n+133193\tVg#h#1039\t1\n+133193\tVg#o#1124\t13\n+128453\tSwb#M#155\t1\n+128453\tNse#L#152\t1\n+128453\tSws#M#163\t1\n+128453\tNso#108\t1\n+128453\tNw#R#50\t1\n+175099\tVg#h#1038\t1\n+210079\tSws#M#1230\t1\n+175094\tVg#o#1124\t3\n+175650\tVg#h#1039\t1\n+216781\tNso#65\t1\n+216781\tNso#1209\t1\n+29792\tNse#L#152\t1\n+29792\tNse#L#98\t1\n+136708\tSse#M#63\t1\n+78074\tTg#1249\t1\n+129193\tNso#108\t2\n+129193\tNso#1209\t8\n+1740\tNse#L#151\t1\n+1740\tNse#L#153\t1\n+1740\tNso#108\t1\n+1740\tNso#107\t1\n+1740\tNso#65\t4\n+1740\tNso#1209\t4\n+1743\tSwb#M#154\t6\n+1743\tNse#L#151\t1\n+1743\tNso#1209\t2\n+114327\tNso#108\t1\n+114327\tNso#1209\t7\n+8178\tSse#M#75\t1\n+169294\tNw#R#50\t1\n+169294\tSws#M#83\t5\n+169777\tSws#M#1227\t1\n+86555\tVg#h#1043\t26\n+113259\tNso#108\t2\n+113125\tNso#65\t2\n+113125\tSse#M#64\t1\n+113125\tNso#108\t6\n+113125\tNw#R#50\t1\n+114578\tTg#1238\t1\n+114816\tTg#1249\t3\n+114816\tTg#1238\t3\n+129226\tSwb#M#155\t1\n+129226\tSws#M#1234\t1\n+129226\tSws#M#163\t2\n+114813\tVg#h#1038\t1\n+114813\tVg#o#1128\t1\n+99342\tNse#L#150\t1\n+110824\tVg#h#1104\t26\n+110824\tVg#h#1061\t1\n+97946\tVg#o#1153\t3\n+97946\tVg#oh#1070\t6\n+97946\tVg#o#1132\t3\n+97946\tVg#o#1124\t1\n+110829\tTg#1251\t1\n+110829\tVg#h#1061\t3\n+110829\tVg#h#1039\t3\n+110829\tVg#o#1132\t1\n+144881\tTg#1252\t1\n+48088\tVg#h#1104\t5\n+48088\tVg#o#1132\t1\n+5798\tNw#L#119\t1\n+5798\tSse#M#14\t1\n+5798\tNse#L#152\t1\n+5798\tNw#R#50\t3\n+5798\tSws#M#163\t3\n+5798\tSse#M#62\t2\n+5798\tSse#M#64\t1\n+136672\tSws#M#163\t6\n+136672\tSwb#M#137\t2\n+111849\tNse#L#98\t2\n+111849\tSse#M#62\t1\n+111849\tSse#M#169\t1\n+1295\tSse#M#62\t1\n+1295\tSse#M#63\t2\n+1295\tSse#M#64\t1\n+155851\tVg#h#1104\t2\n+137507\tNso#1209\t2\n+137507\tNso#106\t1\n+154145\tSwb#M#154\t4\n+154145\tSws#M#163\t2\n+154145\tSws#M#1234\t1\n+154145\tSws#M#83\t2\n+104638\tSws#M#1230\t1\n+104638\tSse#M#75\t1\n+104638\tSws#M#83\t6\n+132008\tVg#o#1124\t10\n+76891\tNse#L#153\t1\n+2588\tSse#M#62\t1\n+216714\tNso#107\t1\n+105813\tVg#o#1153\t9\n+105813\tVg#oh#1070\t15\n+105813\tVg#h#1104\t1\n+105813\tVg#h#1061\t1\n+105813\tVg#o#1132\t2\n+105813\tVg#o#1124\t1\n+28192\tVg#h#1104\t1\n+7461\tSse#1224\t2\n+7461\tSse#M#62\t2\n+7461\tNso#107\t1\n+91272\tSwb#M#156\t9\n+91272\tSwb#M#157\t1\n+91272\tSse#M#62\t1\n+91272\tSws#M#83\t2\n+103090\tNse#L#98\t1\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/demo_input/testFeatureMetadata.pcl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/demo_input/testFeatureMetadata.pcl Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,8 @@
+ID taxonomy_0 taxonomy_1 taxonomy_2 taxonomy_3 taxonomy_4 taxonomy_5 700098986 700098984 700098980 700098988 700037470 700037472 700037474 700037476
+TID NA NA NA NA NA NA 700098986 700098984 700098980 700098988 700037470 700037472 700037474 700037476
+STSite NA NA NA NA NA NA L_Antecubital_fossa R_Retroauricular_crease Subgingival_plaque R_Antecubital_fossa L_Retroauricular_crease R_Retroauricular_crease L_Antecubital_fossa R_Antecubital_fossa
+72 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 1.23 0 12 0 6 0 2 1
+4904 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 0 10 43 6 0 23 0 1
+1361 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 3 0 29 0 45 0 1 1
+3417 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 0 45 34 3 0 0 0 1
+1368 Bacteria Firmicutes Bacilli Bacillales Bacillaceae unclassified 5 0 2 0 6 0 1 1

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/docs/PCL-Description.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/docs/PCL-Description.txt Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,17 @@
+Although this example is delimited with spaces for consistent visualization, in a PCL file each element would be delimited by one tab.
+
+Here is an example of a very small PCL file.
+
+ID      Kingdom   Genus        Sample 1  Sample 2
+Cohort  NA        NA           Test      Control
+Age     NA        NA           34        43
+1232    Bacteria  Bacteroides  .23       .16
+543     Bacteria  Dorea        .001      .0021
+
+These are the different parts of the PCL file.
+
+ID                Feature metadata ID     Last feature metadata ID    sample ID          sample ID
+Metadata ID       NA                      NA                          sample metadata    sample metadata
+Last metadata ID  NA                      NA                          sample metadata    sample metadata
+Feature ID        Feature (row) metadata  Feature (row) metadata      Data measurement   Data measurement
+Feature ID        Feature (row) metadata  Feature (row) metadata      Data measurement   Data measurement

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/docs/Tutorial-BreadCrumbs.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/docs/Tutorial-BreadCrumbs.md Tue May 13 21:58:57 2014 -0400

b"@@ -0,0 +1,306 @@\n+# BreadCrumbs Tutorial #\n+\n+This is a brief tutorial to get you acquainted with the scripts provided in breadcrumbs. This tutorial is oragnized by script and task. Examples are given using files in the demo_input folder which is included in the BreadCrumbs package. Each of these commands should work from the command line in the breadcrumbs directory.\n+\n+Please note all of the following calls expect you to be in the breadcrumbs directory and to have both the ./breadcrumbs/src and ./breadcrumbs/scripts in your path and or python path.\n+\n+Enjoy and happy research!\n+\n+## Contents: ##\n+1. scriptPCoA \n+2. scriptManipulateTable.py \n+I. Manipulating the measurements \n+II. Filtering \n+III. Filtering with knowledge of feature hierarchical relationship\n+IV. Manipulate samples by metadata\n+V. Manipulate the feature names\n+3. scriptPlotFeature.py\n+4. scriptBiplotTSV.R\n+5. scriptConvertBetweenBIOMAndPCL.py\n+\n+## scriptPCoA.py ##\n+This script allows one to plot a PCoA of an abundance table. In the plot each sample is one marker. The marker shape and color is determined by a metadata (of your choice). The distances between each sample is determined by a specific beta-diversity distance metric. By default Bray-curtis distance is used. This can be changed as needed. You will notice for every call you must give it the sample id (-i) and the last metadata which should be the row before your first data (-l). This helps the scripts understand what is a data measurement and what is a metadata.\n+\n+A. How do I make a PCoA of an abundance table, painting (coloring) it by a specific metadata?\n+\n+> scripts/scriptPcoa.py -i TID -l STSite -p STSite demo_input/Test.pcl\n+\n+B. How do I make a series of PCoAs of an abundance table, one PCoA for every metadata?\n+\n+If nothing is specified with -p then all metadata are painted. Note there are a max of 9 shapes to use, a metadata will be skipped if it has more than 9 levels (specific values which can be used many times). Don't worry, the script will let you know if this happens and will just skip to the next metadata.\n+\n+> scripts/scriptPcoa.py -i TID -l STSite demo_input/Test.pcl\n+\n+C. How do I use a different beta-diversity distance metric instead of Bray-curtis distance?\n+The following metrics can be choosen: braycurtis, canberra, chebyshev, cityblock, correlation, cosine, euclidean, hamming, sqeuclidean, unifrac_unweighted, unifrac_weighted\n+\n+> scripts/scriptPcoa.py -i TID -l STSite -m sqeuclidean demo_input/Test.pcl\n+\n+D. How do I get the coordinates of the points in the PCoA plot? Use -C and give a file path to which to write.\n+\n+> scripts/scriptPcoa.py -i TID -l STSite -C coordinates.txt demo_input/Test.pcl\n+\n+E. How do I get the distance matrix represented by the PCoA plot? Use -D and give a file path to which to write.\n+\n+> scripts/scriptPcoa.py -i TID -l STSite -D distances.txt demo_input/Test.pcl\n+\n+F. How do I make a PCoA using unifrac type metrics.\n+\n+> scripts/scriptPcoa.py -m unifrac_weighted -t demo_input/GreenGenesCore-May09.ref.tre -e demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map.txt -c demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map-colors.txt\n+> scripts/scriptPcoa.py -m unifrac_unweighted -t demo_input/GreenGenesCore-May09.ref.tre -e demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map.txt -c demo_input/fastunifrac_Ley_et_al_NRM_2_sample_id_map-colors.txt\n+\n+There already exists a collection of functionality surrounding unifrac distances in Qiime and related software. We support these metrics here for completeness, if your need is not met here, please look into Qiime and related software for a solutions with a more rich collection of functionality.\n+\n+## scriptManipulateTable.py ##\n+Abundance tables can be difficult to manipulate. This script captures frequent tasks that may be important to manipulating an abundance table including normalization, summing, filtering, stratifying the tables into subsets (for instance breaking up a large HMP table into tables, "..b'4stem.org/r-colors.html This requires you to be coloring the plot by a metadata (option -c).\n+\n+> ./scripts/scriptBiplotTSV.R -n grey -c STSite STSite demo_input/Test-BiplotNA.tsv\n+\n+P. How do I scale arrows in the plot. Use -z and a number to weight how much the metadata influences the rotation (number between 0 and very large).\n+\n+> ./scripts/scriptBiplotTSV.R -z 2 STSite demo_input/Test-Biplot.tsv\n+\n+Q. How do I plot metadata labels without the arrows?\n+\n+> ./scripts/scriptBiplotTSV.R -A STSite demo_input/Test-Biplot.tsv\n+\n+R. How do I plot the biplot without metadata?\n+\n+> ./scripts/scriptBiplotTSV.R -m "" STSite demo_input/Test-Biplot.tsv\n+\n+## scriptConvertBetweenBIOMAndPCL.py ##\n+The script allows one to convert between PCL and BIOM file formats. ID, last feature (row) metadata, and last sample metadata are optional information in the script call (when converting from PCL to BIOM). These are used to dictate placement of certain key sample metadata in the PCL file. Typically, it is helpful to set these arguments. This aids in the consistent and reliable manipulation of these files. If the are not given, a guess will be made to the ID and it will be assumed no metadata exist.\n+\n+A quick definition:\n+*ID or sample id* - typically your first row in the PCL file (the Ids of all your samples) in the example below "ID"\n+*Feature (row) metadata* - columns in your PCL file which describe your features. These come after your feature IDs but before your measurements.\n+*Sample metadata* - rows in your PCL file which come before your measurements and describe your samples\n+\n+For a description of a PCL and it\'s parts please look in the docs folder for PCL-Description.txt\n+\n+A. The minimal call to convert from BIOM file to a PCL file or visa versa. This call indicates the sample metadata entry which is the sample id and which is the last listed metadata in a pcl file (before the data measurements). When converting a PCL file, if there are no metadata and only a metadata id, -l and -i is not required. If there are multiple metadata in a pcl file the -l (last metadata) field is required. Neither of these fields are required for biom file conversion to pcl.\n+\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py demo_input/Test_no_metadata.pcl example1.biom\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py demo_input/Test.biom example2.pcl\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -l STSite demo_input/Test.pcl example3.biom\n+\n+B. Specifying ID and lastmetadata\n+\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i TID -l STSite demo_input/Test.pcl example4.biom\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i TID -l STSite demo_input/Test.biom example5.pcl\n+\n+C. The case where there are no sample metadata, just sample IDs. Indicate the ID and if no last metadata is indicated (-l) it is assumed no sample metadata exist.\n+\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i ID demo_input/Test_no_metadata.pcl example6.biom\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i ID demo_input/Test_no_metadata.biom example7.pcl\n+\n+D. The case when converting a PCL file with Feature (row) metadata (for example taxonomy_5). Include the last column with feature metadata.\n+\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i ID -r taxonomy_5 -l STSite ./demo_input/testFeatureMetadata.pcl testFeatureMetadata.biom\n+\n+E. Although the output file name can be automatically generated, the output file name can be given if needed.\n+\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i TID -l STSite demo_input/Test.biom CustomFileName.pcl\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i TID -l STSite demo_input/Test.pcl CustomFileName.biom\n+\n+F. Indicate the use of a pcl file using a delimiter that is not tab or indicate the creation of a pcl file using a delimier that is not tab.\n+\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i TID -l STSite -f , demo_input/Test-comma.pcl\n+> ./scripts/scriptConvertBetweenBIOMAndPCL.py -i TID -l STSite -f , demo_input/Test-comma.biom\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/hclust/hclust.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/hclust/hclust.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,603 @@\n+#!/usr/bin/env python\n+\n+import sys\n+import numpy as np \n+import matplotlib\n+matplotlib.use(\'Agg\')\n+import scipy\n+import pylab\n+import scipy.cluster.hierarchy as sch\n+import scipy.spatial.distance as dis \n+from scipy import stats\n+\n+# User defined color maps (in addition to matplotlib ones)\n+bbcyr = {\'red\': ( (0.0, 0.0, 0.0),\n+ (0.25, 0.0, 0.0),\n+ (0.50, 0.0, 0.0),\n+ (0.75, 1.0, 1.0),\n+ (1.0, 1.0, 1.0)),\n+ \'green\': ( (0.0, 0.0, 0.0),\n+ (0.25, 0.0, 0.0),\n+ (0.50, 1.0, 1.0),\n+ (0.75, 1.0, 1.0),\n+ (1.0, 0.0, 1.0)),\n+ \'blue\': ( (0.0, 0.0, 0.0),\n+ (0.25, 1.0, 1.0),\n+ (0.50, 1.0, 1.0),\n+ (0.75, 0.0, 0.0),\n+ (1.0, 0.0, 1.0))}\n+\n+bbcry = {\'red\': ( (0.0, 0.0, 0.0),\n+ (0.25, 0.0, 0.0),\n+ (0.50, 0.0, 0.0),\n+ (0.75, 1.0, 1.0),\n+ (1.0, 1.0, 1.0)),\n+ \'green\': ( (0.0, 0.0, 0.0),\n+ (0.25, 0.0, 0.0),\n+ (0.50, 1.0, 1.0),\n+ (0.75, 0.0, 0.0),\n+ (1.0, 1.0, 1.0)),\n+ \'blue\': ( (0.0, 0.0, 0.0),\n+ (0.25, 1.0, 1.0),\n+ (0.50, 1.0, 1.0),\n+ (0.75, 0.0, 0.0),\n+ (1.0, 0.0, 1.0))}\n+my_colormaps = [ (\'bbcyr\',bbcyr),\n+ (\'bbcry\',bbcry)]\n+\n+\n+\n+def read_params(args):\n+ import argparse as ap\n+ import textwrap\n+\n+ p = ap.ArgumentParser( description= "TBA" )\n+ \n+ p.add_argument( \'--in\', \'--inp\', metavar=\'INPUT_FILE\', type=str, \n+ nargs=\'?\', default=sys.stdin,\n+ help= "the input archive " )\n+\n+ p.add_argument( \'--out\', metavar=\'OUTPUT_FILE\', type=str, \n+ nargs = \'?\', default=None,\n+ help= " the output file, image on screen"\n+ " if not specified. " )\n+\n+ p.add_argument( \'-m\', metavar=\'method\', type=str,\n+ choices=[ "single","complete","average",\n+ "weighted","centroid","median",\n+ "ward" ],\n+ default="average" )\n+\n+ dist_funcs = [ "euclidean","minkowski","cityblock","seuclidean",\n+ "sqeuclidean","cosine","correlation","hamming",\n+ "jaccard","chebyshev","canberra","braycurtis",\n+ "mahalanobis","yule","matching","dice",\n+ "kulsinski","rogerstanimoto","russellrao","sokalmichener",\n+ "sokalsneath","wminkowski","ward"]\n+ p.add_argument( \'-d\', metavar=\'distance function\', type=str,\n+ choices=dist_funcs,\n+ default="euclidean" )\n+ p.add_argument( \'-f\', metavar=\'distance function for features\', type=str,\n+ choices=dist_funcs,\n+ default="d" )\n+\n+ p.add_argument( \'--dmf\', metavar=\'distance matrix for features\', type=str,\n+ default = None )\n+ p.add_argument( \'--dms\', metavar=\'distance matrix for samples\', type=str,\n+ default = None )\n+\n+\n+ p.add_argument( \'-l\', metavar=\'sample label\', type=str,\n+ default = None )\n+\n+ p.add_argument( \'-s\', metavar=\'scale norm\', type=str,\n+ default = \'lin\', choices = [\'log\',\'lin\'])\n+\n+ p.add_argument( \'-x\', metavar=\'x cell width\', type=float,\n+ default = 0.1)\n+ p.add_argument( \'-y\', metavar=\'y cell width\', type=float,\n+ default = 0.1 )\n+\n+ p.add_argument( \'--minv\', metavar=\'min value\', type=float,\n+ default = 0.0 )\n+ p.add_argument( \'--maxv\', metavar=\'max value\', type=float,\n+ default = None )\n+\n+ p.add_argument( \'--xstart\', metavar=\'x coordinate of'..b"nc == 'd':\n+ feat_dist_func = dist_func\n+\n+ D, feat_labels, sample_labels = read_table(fin,xstart,xstop,ystart,ystop,percentile,top,norm)\n+\n+ ylen,xlen = D[:].shape\n+ Dt = D.transpose() \n+\n+ size_cx, size_cy = xcw, ycw\n+ \n+ xsize, ysize = max(xlen*size_cx,2.0), max(ylen*size_cy,2.0)\n+ ydend_offset = 0.025*8.0/ysize if s2l else 0.0\n+\n+ fig = init_fig(xsize,ysize,clust_ncols)\n+\n+ nfeats, nsamples = len(D), len(Dt) \n+ \n+ if dmf:\n+ p1 = read_dm( dmf, nfeats )\n+ Y1 = sch.linkage( p1, method=method )\n+ else:\n+ p1 = dis.pdist( D, feat_dist_func )\n+ Y1 = sch.linkage( p1, method=method ) # , metric=feat_dist_func )\n+ #Y1 = sch.linkage( D, method=method, metric=feat_dist_func )\n+ Z1 = sch.dendrogram(Y1, no_plot=True, color_threshold=feat_dend_col_th) \n+ \n+ if fdend_out:\n+ exp_newick( Y1, feat_labels, fdend_out )\n+\n+ if dms:\n+ p2 = read_dm( dms, nsamples )\n+ Y2 = sch.linkage( p2, method=method )\n+ else:\n+ p2 = dis.pdist( Dt, dist_func )\n+ Y2 = sch.linkage( p2, method=method ) # , metric=dist_func )\n+ #Y2 = sch.linkage( Dt, method=method, metric=dist_func )\n+ Z2 = sch.dendrogram(Y2, no_plot=True, color_threshold=sample_dend_col_th) \n+\n+ if sdend_out:\n+ exp_newick( Y2, sample_labels, sdend_out )\n+\n+ if fdend_w > 0.0:\n+ features_dend_panel(fig, Y1, Z1, fdend_w*8.0/xsize, clust_line_w ) \n+ if sdend_h > 0.0: \n+ samples_dend_panel(fig, Y2, Z2, ydend_offset, sdend_h*8.0/ysize, clust_line_w)\n+ \n+ idx1, idx2 = Z1['leaves'], Z2['leaves']\n+ D = D[idx1,:][:,idx2]\n+ \n+ if s2l:\n+ samples2classes_panel( fig, sample_labels, s2l, idx1, idx2, 0.025*8.0/ysize, xsize, label_cols, legendon, legend_font_size, label2cols, legend_ncol )\n+ heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, sample_labels, feat_labels, label_font_size, -cm_h*8.0/ysize, cm_h*0.8*8.0/ysize, flabelon, slabelon, cm_ticks, gridon, ydend_offset+sdend_h*8.0/ysize )\n+ \n+ fig.savefig( fout, bbox_inches='tight', \n+ pad_inches = pad_inches, \n+ dpi=300) if fout else pylab.show()\n+\n+if __name__ == '__main__':\n+ pars = read_params( sys.argv )\n+ \n+ hclust( fin = pars['in'],\n+ fout = pars['out'],\n+ method = pars['m'],\n+ dist_func = pars['d'],\n+ feat_dist_func = pars['f'],\n+ xcw = pars['x'],\n+ ycw = pars['y'],\n+ scale = pars['s'],\n+ minv = pars['minv'],\n+ maxv = pars['maxv'],\n+ xstart = pars['xstart'],\n+ ystart = pars['ystart'],\n+ xstop = pars['xstop'],\n+ ystop = pars['ystop'],\n+ percentile = pars['perc'],\n+ top = pars['top'],\n+ norm = pars['norm'],\n+ cm_name = pars['c'],\n+ s2l = pars['l'],\n+ label_font_size = pars['font_size'],\n+ feat_dend_col_th = pars['feat_dend_col_th'],\n+ sample_dend_col_th = pars['sample_dend_col_th'],\n+ clust_ncols = pars['clust_ncols'],\n+ clust_line_w = pars['clust_line_w'],\n+ label_cols = pars['label_cols'],\n+ sdend_h = pars['sdend_h'],\n+ fdend_w = pars['fdend_w'],\n+ cm_h = pars['cm_h'],\n+ dmf = pars['dmf'],\n+ dms = pars['dms'],\n+ legendon = pars['legend'],\n+ label2cols = pars['label2cols'],\n+ flabelon = pars['flabel'],\n+ slabelon = pars['slabel'],\n+ cm_ticks = pars['cm_ticks'],\n+ legend_ncol = pars['legend_ncol'],\n+ pad_inches = pars['pad_inches'],\n+ legend_font_size = pars['legend_font_size'],\n+ gridon = pars['grid'],\n+ sdend_out = pars['sdend_out'],\n+ fdend_out = pars['fdend_out'],\n+ )\n+\n"

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/scripts/scriptBiplotTSV.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/scripts/scriptBiplotTSV.R Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,496 @@\n+#!/usr/bin/env Rscript\n+\n+library(vegan)\n+library(optparse)\n+\n+funcGetCentroidForMetadatum <- function(\n+### Given a binary metadatum, calculate the centroid of the samples associated with the metadata value of 1\n+# 1. Get all samples that have the metadata value of 1\n+# 2. Get the x and y coordinates of the selected samples\n+# 3. Get the median value for the x and ys\n+# 4. Return those coordinates as the centroid\'s X and Y value\n+vfMetadata,\n+### Logical or integer (0,1) vector, TRUE or 1 values indicate correspoinding samples in the\n+### mSamplePoints which will be used to define the centroid\n+mSamplePoints\n+### Coordinates (columns;n=2) of samples (rows) corresponding to the vfMetadata\n+){\n+ # Check the lengths which should be equal\n+ if(length(vfMetadata)!=nrow(mSamplePoints))\n+ {\n+ print(paste("funcGetCentroidForMetadata::Error: Should have received metadata and samples of the same length, received metadata length ",length(vfMetadata)," and sample ",nrow(mSamplePoints)," length.",sep=""))\n+ return( FALSE )\n+ }\n+\n+ # Get all the samples that have the metadata value of 1\n+ viMetadataSamples = which(as.integer(vfMetadata)==1)\n+\n+ # Get the x and y coordinates for the selected samples\n+ mSelectedPoints = mSamplePoints[viMetadataSamples,]\n+\n+ # Get the median value for the x and the ys\n+ if(!is.null(nrow(mSelectedPoints)))\n+ {\n+ return( list(x=median(mSelectedPoints[,1],na.rm = TRUE),y=median(mSelectedPoints[,2],na.rm = TRUE)) )\n+ } else {\n+ return( list(x=mSelectedPoints[1],y=mSelectedPoints[2]) )\n+ }\n+}\n+\n+funcGetMaximumForMetadatum <- function(\n+### Given a continuous metadata\n+### 1. Use the x and ys from mSamplePoints for coordinates and the metadata value as a height (z)\n+### 2. Use lowess to smooth the landscape\n+### 3. Take the maximum of the landscape\n+### 4. Return the coordiantes for the maximum as the centroid\n+vdMetadata,\n+### Continuous (numeric or integer) metadata\n+mSamplePoints\n+### Coordinates (columns;n=2) of samples (rows) corresponding to the vfMetadata\n+){\n+ # Work with data frame\n+ if(class(mSamplePoints)=="matrix")\n+ {\n+ mSamplePoints = data.frame(mSamplePoints)\n+ }\n+ # Check the lengths of the dataframes and the metadata\n+ if(length(vdMetadata)!=nrow(mSamplePoints))\n+ {\n+ print(paste("funcGetMaximumForMetadatum::Error: Should have received metadata and samples of the same length, received metadata length ",length(vdMetadata)," and sample ",nrow(mSamplePoints)," length.",sep=""))\n+ return( FALSE )\n+ }\n+\n+ # Add the metadata value to the points\n+ mSamplePoints[3] = vdMetadata\n+ names(mSamplePoints) = c("x","y","z") \n+\n+ # Create lowess to smooth the surface\n+ # And calculate the fitted heights\n+ # x = sample coordinate 1\n+ # y = sample coordinate 2\n+ # z = metadata value\n+ loessSamples = loess(z~x*y, data=mSamplePoints, degree = 1, normalize = FALSE, na.action=na.omit)\n+\n+ # Naively get the max\n+ vdCoordinates = loessSamples$x[which(loessSamples$y==max(loessSamples$y)),]\n+ return(list(lsmod = loessSamples, x=vdCoordinates[1],y=vdCoordinates[2]))\n+}\n+\n+funcMakeShapes <- function(\n+### Takes care of defining shapes for the plot\n+dfInput,\n+### Data frame of metadata measurements\n+sShapeBy,\n+### The metadata to shape by\n+sShapes,\n+### List of custom metadata (per level if factor).\n+### Should correspond to the number of levels in shapeBy; the format is level:shape,level:shape for example HighLuminosity:14,LowLuminosity:2,HighPH:10,LowPH:18 \n+cDefaultShape\n+### Shape to default to if custom shapes are not used\n+){\n+ lShapes = list()\n+ vsShapeValues = c()\n+ vsShapeShapes = c()\n+ vsShapes = c()\n+ sMetadataId = sShapeBy\n+\n+ # Set default shape, color, and color ranges \n+ if(!is.null(cDefaultShape))\n+ {\n+ # Default shape should be an int for the int pch options\n+ if(!is.na(as.integer(cDefaultShape)))\n+ {\n+ cDefaultShape = as.integer(cDefaultShape)\n+ }\n+ } else {\n+ cDefaultShape = 16\n+ }\n+\n+ # Make shapes\n+ vs'..b'ch(sInputFileName==".")\n+ if(length(viPeriods)>0)\n+ {\n+ sOutputFileName = paste(OutputFileName[1:viPeriods[length(viPeriods)]],"pdf",sep=".")\n+ } else {\n+ sOutputFileName = paste(sInputFileName,"pdf",sep=".")\n+ }\n+ }\n+\n+ pdf(sOutputFileName,useDingbats=FALSE)\n+ plot(mNMDSData$points, xlab=paste("NMDS1","Stress=",mNMDSData$stress), ylab="NMDS2", pch=vsShapes, col=vsColors)\n+ title(sTitle,line=3)\n+ # Plot Bugs\n+ mPlotBugs = mNMDSData$species[viBugsToPlot,]\n+ if(length(viBugsToPlot)==1)\n+ {\n+ text(x=mPlotBugs[1],y=mPlotBugs[2],labels=row.names(mNMDSData$species)[viBugsToPlot],col=sTextColor)\n+ } else if(length(viBugsToPlot)>1){\n+ text(x=mPlotBugs[,1],y=mPlotBugs[,2],labels=row.names(mNMDSData$species)[viBugsToPlot],col=sTextColor)\n+ }\n+\n+ # Add alternative axes\n+ axis(3, col=sArrowColor)\n+ axis(4, col=sArrowColor)\n+ box(col = "black")\n+\n+ # Plot Metadata\n+ if(length(viMetadataDummy)>0)\n+ {\n+ if(fPlotArrow)\n+ {\n+ # Plot arrows\n+ for(i in viMetadataDummy)\n+ {\n+ curCoordinates = mMetadataCoordinates[i,]\n+ curCoordinates = curCoordinates * dResizeArrow\n+ # Plot Arrow\n+ arrows(0,0, curCoordinates[1] * 0.8, curCoordinates[2] * 0.8, col=sArrowColor, length=0.1 )\n+ }\n+ }\n+ # Plot text\n+ if(length(viMetadataDummy)==1)\n+ {\n+ text(x=mMetadataCoordinates[viMetadataDummy,][1]*dResizeArrow*0.8, y=mMetadataCoordinates[viMetadataDummy,][2]*dResizeArrow*0.8, labels=row.names(mMetadataCoordinates)[viMetadataDummy],col=sArrowTextColor)\n+ } else {\n+ text(x=mMetadataCoordinates[viMetadataDummy,1]*dResizeArrow*0.8, y=mMetadataCoordinates[viMetadataDummy,2]*dResizeArrow*0.8, labels=row.names(mMetadataCoordinates)[viMetadataDummy],col=sArrowTextColor)\n+ }\n+ }\n+\n+ sLegendText = c(paste(vsColorValues,sColorBy,sep="_"),paste(vsShapeValues,sMetadataShape,sep="_"))\n+ sLegendShapes = c(rep(cDefaultShape,length(vsColorValues)),vsShapeShapes)\n+ sLegendColors = c(vsColorRBG,rep(cDefaultColor,length(vsShapeValues)))\n+ if(length(sLegendText)>0)\n+ {\n+ legend("topright",legend=sLegendText,pch=sLegendShapes,col=sLegendColors)\n+ }\n+\n+ # Original biplot call if you want to check the custom ploting of the script\n+ # There will be one difference where the biplot call scales an axis, this one does not. In relation to the axes, the points, text and arrows should still match.\n+ # Axes to the top and right are for the arrow, otherse are for markers and bug names.\n+ #biplot(mNMDSData$points,mMetadataCoordinates[viMetadataDummy,],xlabs=vsShapes,xlab=paste("MDS1","Stress=",mNMDSData$stress),main="Biplot function Bugs and Sampes - Metadata Plotted with Centroids")\n+ dev.off()\n+}\n+\n+# This is the equivalent of __name__ == "__main__" in Python.\n+# That is, if it\'s true we\'re being called as a command line script;\n+# if it\'s false, we\'re being sourced or otherwise included, such as for\n+# library or inlinedocs.\n+if( identical( environment( ), globalenv( ) ) &&\n+\t!length( grep( "^source\\\\(", sys.calls( ) ) ) )\n+{\n+ lsArgs <- parse_args( pArgs, positional_arguments=TRUE )\n+\n+ print("lsArgs")\n+ print(lsArgs)\n+\n+ funcDoBiplot(\n+ sBugs = lsArgs$options$sBugs,\n+ sMetadata = lsArgs$options$sMetadata,\n+ sColorBy = lsArgs$options$sColorBy,\n+ sColorRange = lsArgs$options$sColorRange,\n+ sTextColor = lsArgs$options$sTextColor,\n+ sArrowColor = lsArgs$options$sArrowColor,\n+ sArrowTextColor = lsArgs$options$sArrowTextColor,\n+ sPlotNAColor = lsArgs$options$sPlotNAColor,\n+ sShapeBy = lsArgs$options$sShapeBy,\n+ sShapes = lsArgs$options$sShapes,\n+ sDefaultMarker = lsArgs$options$sDefaultMarker,\n+ sRotateByMetadata = lsArgs$options$sRotateByMetadata,\n+ dResizeArrow = lsArgs$options$dResizeArrow,\n+ fPlotArrow = !lsArgs$options$fNoPlotMetadataArrows,\n+ sTitle = lsArgs$options$sTitle,\n+ sInputFileName = lsArgs$args[2],\n+ sLastMetadata = lsArgs$args[1],\n+ sOutputFileName = lsArgs$options$sOutputFileName)\n+}\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/scripts/scriptConvertBetweenBIOMAndPCL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/scripts/scriptConvertBetweenBIOMAndPCL.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+"""
+Author: Timothy Tickle
+Description: Converts between BIOM and PCL files. If a PCL file is read, an equivalent BIOM file will be written; if a BIOM file is read, an equivalent pcl file will be written.
+"""
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2013"
+__credits__ = ["Timothy Tickle","George Weingart"]
+__license__ = ""
+__version__ = ""
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@hsph.harvard.edu"
+__status__ = "Development"
+
+from AbundanceTable import AbundanceTable
+import argparse
+from ConstantsBreadCrumbs import ConstantsBreadCrumbs
+import os
+import sys
+
+
+#Set up arguments reader
+argp = argparse.ArgumentParser( prog = "convertBetweenBIOMAndPCL.py",
+    description = """Converts a PCL file to a BIOM file and visa versa.""" )
+
+#Arguments
+#For table
+argp.add_argument("-i","--id", dest="sID", default = None, metavar= "Sample ID", help="The metadata indicating the sample ID.")
+argp.add_argument("-l","--meta", dest = "sLastMetadataName", default = None, metavar= "Last Metadata Name", help="The last listed metadata before the first data measurement in the pcl file or to be in the pcl file.")
+argp.add_argument("-r","--rowMetadataID", dest = "sLastMetadataRow", default = None,  metavar = "Last Row Metadata Column", help = "If row metadata is present in a PCL file, what is the id of the last row metadata column (most right column that contains row metadata). PCL file only.")
+argp.add_argument("-f","--delim", dest = "cFileDelimiter", action= "store", metavar="File Delimiter", default="\t", help="File delimiter, default tab")
+argp.add_argument("strFileAbund", metavar = "Abundance file", help ="Input data file")
+argp.add_argument("strOutputFile", default = "", nargs="?", metavar = "Selection Output File", help ="Output file")
+
+args = argp.parse_args( )
+
+# Make the output file name (if not given) and get the type of output file name
+# Change the extension from BIOM to pcl
+lsFilePieces = os.path.splitext(args.strFileAbund)
+strOutputFileType = ConstantsBreadCrumbs.c_strPCLFile if lsFilePieces[-1]=="."+ConstantsBreadCrumbs.c_strBiomFile else ConstantsBreadCrumbs.c_strBiomFile
+
+if not args.strOutputFile:
+  args.strOutputFile = lsFilePieces[0] + "." + strOutputFileType
+
+# Set the last metadata to the id if not given.
+if not args.sLastMetadataName:
+  args.sLastMetadataName = args.sID
+
+# Read in abundance table
+abndTable = AbundanceTable.funcMakeFromFile(args.strFileAbund, cDelimiter=args.cFileDelimiter, sMetadataID=args.sID, sLastMetadataRow = args.sLastMetadataRow, sLastMetadata=args.sLastMetadataName, xOutputFile=args.strOutputFile)
+if not abndTable:
+  print("Could not create an abundance table from the given file and settings.")
+else:
+  abndTable.funcWriteToFile(args.strOutputFile, cDelimiter=args.cFileDelimiter, cFileType=strOutputFileType)

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/scripts/scriptEnvToTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/scripts/scriptEnvToTable.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+"""
+Author: Timothy Tickle
+Description: Convert Env file to table
+"""
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = ""
+__version__ = ""
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+import sys
+import argparse
+import csv
+
+
+#Set up arguments reader
+argp = argparse.ArgumentParser( prog = "scriptEnvToTable.py",
+    description = """Convert Env file to table""" )
+
+#Arguments
+#For table
+argp.add_argument("strEnvFile", metavar = "EnvFile", help ="EnvFile data file")
+argp.add_argument("strOutputFile", metavar = "OutputFile", help ="Output File")
+args = argp.parse_args( )
+
+hndlReader = csv.reader(open(args.strEnvFile,'rU'), delimiter="\t")
+
+lsListOfIDs = []
+lsListOfFeatures = []
+dictValues = {}
+for lsLine in hndlReader:
+  print(lsLine)
+  lsListOfIDs.append(lsLine[1])
+  lsListOfFeatures.append(lsLine[0])
+  tpleKey = tuple([lsLine[1],lsLine[0]])
+  if tpleKey in dictValues:
+    print("Error:: Duplicate key entries found")
+    exit(1)
+  dictValues[tpleKey] = lsLine[2]
+
+lsListOfIDs = list(set(lsListOfIDs))
+lsListOfFeatures = list(set(lsListOfFeatures))
+print(lsListOfIDs)
+print(lsListOfFeatures)
+hndlWrite = csv.writer(open(args.strOutputFile,'w'), delimiter="\t")
+hndlWrite.writerow(["ID"]+lsListOfIDs)
+for sFeature in lsListOfFeatures:
+  lsFeatureLine = [sFeature]
+  for sSample in lsListOfIDs:
+    lsFeatureLine.append(dictValues.get(tuple([sSample,sFeature]),0))
+  hndlWrite.writerow(lsFeatureLine)

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/scripts/scriptManipulateTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/scripts/scriptManipulateTable.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,295 @@\n+#!/usr/bin/env python\n+"""\n+Author: Timothy Tickle\n+Description: Performs common manipulations on tables\n+"""\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = ""\n+__version__ = ""\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+import argparse\n+import csv\n+import sys\n+import re\n+import os\n+import numpy as np\n+from src.AbundanceTable import AbundanceTable\n+#from src.PCA import PCA\n+from src.ValidateData import ValidateData\n+\n+#Set up arguments reader\n+argp = argparse.ArgumentParser( prog = "scriptManipulateTable.py",\n+ description = """Performs common manipulations on tables.\\nExample: python scriptManipulateTable.py -i TID -l STSite Test.pcl""" )\n+\n+#Arguments\n+#Describe table\n+argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID")\n+argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name")\n+argp.add_argument("-d","--fileDelim", dest= "cFileDelimiter", action= "store", default="\\t", help="File delimiter, default tab")\n+argp.add_argument("-f","--featureDelim", dest= "cFeatureDelimiter", action= "store", default="|", help="Feature (eg. bug or function) delimiter, default \'|\'")\n+\n+#Checked x 2\n+argp.add_argument("-n","--doNorm", dest="fNormalize", action="store_true", default=False, help="Flag to turn on normalization")\n+argp.add_argument("-s","--doSum", dest="fSum", action="store_true", default=False, help="Flag to turn on summation")\n+\n+#Unsupervised filtering\n+argp.add_argument("-A","--doFilterAbundance", dest="strFilterAbundance", action="store", default=None, help="Turns on filtering by abundance (remove features that do not have a minimum abundance in a minimum number of samples); Should be a real number and an integer in the form \'minAbundance,minSamples\', (should be performed on a normalized file).")\n+argp.add_argument("-P","--doFilterPercentile", dest="strFilterPercentile", action="store", default=None, help="Turns on filtering by percentile Should be two numbers between 0 and 1 in the form \'percentile,percentage\'. (should be performed on a normalized file).")\n+argp.add_argument("-O","--doFilterOccurrence", dest="strFilterOccurence", action="store", default=None, help="Turns on filtering by occurrence. Should be two integers in the form \'minSequence,minSample\' (should NOT be performed on a normalized file).")\n+#argp.add_argument("-D","--doFilterDeviation", dest="dCuttOff", action="store", type=float, default=None, help="Flag to turn on filtering by standard deviation (should NOT be performed on a normalized file).")\n+\n+#Change bug membership\n+argp.add_argument("-t","--makeTerminal", dest="fMakeTerminal", action="store_true", default=False, help="Works reduces the file to teminal features in the original file.")\n+argp.add_argument("-u","--reduceOTUs", dest="fRemoveOTUs", action="store_true", default=False, help="Remove otu entries from file.")\n+argp.add_argument("-c","--reduceToClade", dest="iClade", action="store", type=int, default=None, help="Specify a level of clade to reduce to [].")\n+argp.add_argument("-b","--reduceToFeatures", dest="strFeatures", action="store", default=None, help="Reduce measurements to certain features (bugs or functions). This can be a comma delimited string (of atleast 2 bugs) or a file.")\n+\n+#Manipulate based on metadata\n+#Checked\n+argp.add_argument("-y","--stratifyBy", dest="strStratifyBy", action="store", default=None, help="Metadata to stratify tables by.")\n+argp.add_argument("-r","--removeMetadata", dest="strRemoveMetadata", action="store", default=None, help="Remove samples of this metadata and value (format comma delimited string with metadata id first and the values to remove after \'id,lvalue1,value2\').")\n+\n+#Manipulate lineage\n+#Checked\n+argp.add_argument("-x","--doPrefixClades", dest="fPrefixClades", action="store_true", default=False, help="Flag to turn on adding prefixes to clade'..b'+ iMinSamples = int(iMinSamples)\n+ for abndTable in lsTables:\n+ if abndTable.funcIsNormalized():\n+ fResult = abndTable.funcFilterAbundanceByMinValue(dMinAbundance=dAbundance,iMinSamples=iMinSamples)\n+ if fResult:\n+ print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by minimum relative abundance value and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."\n+ else:\n+ print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by percentile."\n+ else:\n+ print "ManipulateTable::"+abndTable.funcGetName()+" was NOT normalized and so the abundance filter is invalid, please indicate to normalize the table."\n+\n+#if args.dCuttOff:\n+# print "Standard deviation filtering not completed"\n+# for abndTable in lsTables:\n+# abndTable.funcFilterFeatureBySD(dMinSDCuttOff=args.dCuttOff)\n+# if fResult:\n+# print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by standard deviation and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."\n+# else:\n+# print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by standard devation."\n+\n+# Need to normalize again after abundance data filtering given removing features breaks the normalization\n+# This happends twice because normalization is required to make the abundance data to filter on ;-)\n+# Normalize if needed\n+if args.fNormalize:\n+ for abndTable in lsTables:\n+ fResult = abndTable.funcNormalize()\n+ if fResult:\n+ print "ManipulateTable::"+abndTable.funcGetName()+" was normalized after filtering on abundance data."\n+\n+#Manipulate lineage\n+if args.fPrefixClades:\n+ for abndTable in lsTables:\n+ fResult = abndTable.funcAddCladePrefixToFeatures()\n+ if fResult:\n+ print "ManipulateTable::Clade Prefix was added to "+abndTable.funcGetName()\n+ else:\n+ print "ManipulateTable::ERROR. Clade Prefix was NOT added to "+abndTable.funcGetName()\n+\n+# Under development\n+# Reduce dimensionality\n+#if args.fDoPCA:\n+# pcaCur = PCA()\n+# for abndTable in lsTables:\n+#\n+# # Add data features\n+# # Make data components and add to abundance table\n+# pcaCur.loadData(abndTable,True)\n+# pcaCur.run(fASTransform=True)\n+# ldVariance = pcaCur.getVariance()\n+# lldComponents = pcaCur.getComponents()\n+# # Make Names\n+# lsNamesData = ["Data_PC"+str((tpleVariance[0]+1))+"_"+re.sub("[\\.|-]","_",str(tpleVariance[1])) for tpleVariance in enumerate(ldVariance)]\n+# abndTable.funcAddDataFeature(lsNamesData,lldComponents)\n+#\n+# # Add metadata features\n+# # Convert metadata to an input for PCA\n+# pcaCur.loadData(pcaCur.convertMetadataForPCA(abndTable),False)\n+# fSuccessful = pcaCur.run(fASTransform=False)\n+# if(fSuccessful):\n+# ldVariance = pcaCur.getVariance()\n+# lldComponents = pcaCur.getComponents()\n+# # Make Names\n+# lsNamesMetadata = ["Metadata_PC"+str((tpleVariance[0]+1))+"_"+re.sub("[\\.|-]","_",str(tpleVariance[1])) for tpleVariance in enumerate(ldVariance)]\n+# # Make metadata components and add to abundance\n+# llsMetadata = [list(npdRow) for npdRow in lldComponents]\n+# abndTable.funcAddMetadataFeature(lsNamesMetadata, llsMetadata)\n+# else:\n+# print "ManipulateTable::No metadata to PCA, no PCA components added to file based on metadata"\n+\n+#Manipulate based on metadata\n+if args.strStratifyBy:\n+ labndStratifiedTables = []\n+ for abndTable in lsTables:\n+ labndResult = abndTable.funcStratifyByMetadata(strMetadata=args.strStratifyBy)\n+ print "ManipulateTable::"+abndTable.funcGetName()+" was stratified by "+args.strStratifyBy+" in to "+str(len(labndResult))+" tables."\n+ labndStratifiedTables.extend(labndResult)\n+ lsTables = labndStratifiedTables\n+\n+if len(lsTables) == 1:\n+ lsTables[0].funcWriteToFile(args.strOutFile)\n+else:\n+ iIndex = 1\n+ for abndManTable in lsTables:\n+ abndManTable.funcWriteToFile(lsPieces[0]+str(iIndex)+lsPieces[1])\n+ iIndex = iIndex + 1\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/scripts/scriptPcoa.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/scripts/scriptPcoa.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+"""
+Author: Timothy Tickle
+Description: Make PCoA of an abundance file
+"""
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = ""
+__version__ = ""
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+import sys
+import argparse
+from src.AbundanceTable import AbundanceTable
+from src.Metric import Metric
+import csv
+import os
+from src.PCoA import PCoA
+
+#Set up arguments reader
+argp = argparse.ArgumentParser( prog = "scriptPcoa.py",
+    description = """PCoAs an abundance file given a metadata.\nExample:python scriptPcoa.py -i TID -l STSite""" )
+
+#Arguments
+#For table
+argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID")
+argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name")
+argp.add_argument("-d","--fDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab")
+argp.add_argument("-f","--featureDelim", dest="cFeatureNameDelimiter", action= "store", metavar="Feature Name Delimiter", default="|", help="Feature delimiter")
+
+argp.add_argument("-n","--doNorm", dest="fDoNormData", action="store_true", default=False, help="Flag to turn on normalization")
+argp.add_argument("-s","--doSum", dest="fDoSumData", action="store_true", default=False, help="Flag to turn on summation")
+
+argp.add_argument("-p","--paint", dest="sLabel", metavar= "Label", default=None, help="Label to paint in the PCoA")
+argp.add_argument("-m","--metric", dest="strMetric", metavar = "distance", default = PCoA.c_BRAY_CURTIS, help ="Distance metric to use. Pick from braycurtis, canberra, chebyshev, cityblock, correlation, cosine, euclidean, hamming, spearman, sqeuclidean, unifrac_unweighted, unifrac_weighted")
+argp.add_argument("-o","--outputFile", dest="strOutFile", metavar= "outputFile", default=None, help="Specify the path for the output figure.")
+argp.add_argument("-D","--DistanceMatrix", dest="strFileDistanceMatrix", metavar= "strFileDistanceMatrix", default=None, help="Specify the path for outputing the distance matrix (if interested). Default this will not output.")
+argp.add_argument("-C","--CoordinatesMatrix", dest="strFileCoordinatesMatrix", metavar= "strFileCoordinatesMatrix", default=None, help="Specify the path for outputing the x,y coordinates matrix (Dim 1 and 2). Default this will not output.")
+
+# Unifrac arguments
+argp.add_argument("-t","--unifracTree", dest="istrmTree", metavar="UnifracTreeFile", default=None, help="Optional file only needed for UniFrac calculations.")
+argp.add_argument("-e","--unifracEnv", dest="istrmEnvr", metavar="UnifracEnvFile", default=None, help="Optional file only needed for UniFrac calculations.")
+argp.add_argument("-c","--unifracColor", dest="fileUnifracColor", metavar="UnifracColorFile", default = None, help="A text file indicating the groupings of metadata to color. Each line in the file is a group to color. An example file line would be  'GroupName:ID,ID,ID,ID'")
+
+argp.add_argument("strFileAbund", metavar = "Abundance file", nargs="?", help ="Input data file")
+
+args = argp.parse_args( )
+
+#Read in abundance table
+abndTable = None
+if args.strFileAbund:
+  abndTable = AbundanceTable.funcMakeFromFile(args.strFileAbund,
+                             cDelimiter = args.cFileDelimiter,
+                             sMetadataID = args.sIDName,
+                             sLastMetadata = args.sLastMetadataName,
+                             cFeatureNameDelimiter= args.cFeatureNameDelimiter)
+
+  #Normalize if need
+  if args.fDoSumData:
+    abndTable.funcSumClades()
+
+  #Sum if needed
+  if args.fDoNormData:
+    abndTable.funcNormalize()
+
+#Get the metadata to paint
+lsKeys = None
+if abndTable:
+  lsKeys = abndTable.funcGetMetadataCopy().keys() if not args.sLabel else [args.sLabel]
+
+#Get pieces of output file
+if not args.strOutFile:
+  if not args.strFileAbund:
+    args.strOutFile = os.path.splitext(os.path.basename(args.istrmEnvr))[0]+"-pcoa.pdf"
+  else:
+    args.strOutFile = os.path.splitext(os.path.basename(args.strFileAbund))[0]+"-pcoa.pdf"
+lsFilePieces = os.path.splitext(args.strOutFile)
+
+# Make PCoA object
+# Get PCoA object and plot
+pcoa = PCoA()
+if(not args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]) and abndTable:
+  pcoa.loadData(abndTable,True)
+# Optional args.strFileDistanceMatrix if not none will force a printing of the distance measures to the path in args.strFileDistanceMatrix
+pcoa.run(tempDistanceMetric=args.strMetric, iDims=2, strDistanceMatrixFile=args.strFileDistanceMatrix, istrmTree=args.istrmTree, istrmEnvr=args.istrmEnvr)
+
+# Write dim 1 and 2 coordinates to file
+if args.strFileCoordinatesMatrix:
+  lsIds = pcoa.funcGetIDs()
+  mtrxCoordinates = pcoa.funcGetCoordinates()
+  csvrCoordinates = csv.writer(open(args.strFileCoordinatesMatrix, 'w'))
+  csvrCoordinates.writerow(["ID","Dimension_1","Dimension_2"])
+  for x in xrange(mtrxCoordinates.shape[0]):
+    strId = lsIds[x] if lsIds else ""
+    csvrCoordinates.writerow([strId]+mtrxCoordinates[x].tolist())
+
+# Paint metadata
+if lsKeys:
+  for iIndex in xrange(len(lsKeys)):
+    lsMetadata = abndTable.funcGetMetadata(lsKeys[iIndex])
+
+    pcoa.plotList(lsLabelList = lsMetadata,
+      strOutputFileName = lsFilePieces[0]+"-"+lsKeys[iIndex]+lsFilePieces[1],
+      iSize=20,
+      dAlpha=1.0,
+      charForceColor=None,
+      charForceShape=None,
+      fInvert=False,
+      iDim1=1,
+      iDim2=2)
+
+if args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
+
+  c_sNotGiven = "Not_specified"
+
+  lsIds = pcoa.funcGetIDs()
+  lsGroupLabels = [c_sNotGiven for s in lsIds]
+
+  if args.fileUnifracColor:
+
+    # Read color file and make a dictionary to convert ids
+    lsColorLines = csv.reader(open(args.fileUnifracColor))
+    dictConvertIDToGroup = {}
+    for lsLine in lsColorLines:
+      if lsLine:
+        sGroupID, sFirstID = lsLine[0].split(":")
+        dictConvertIDToGroup.update(dict([(sID,sGroupID) for sID in [sFirstID]+lsLine[1:]]))
+
+    lsGroupLabels = [dictConvertIDToGroup.get(sID,c_sNotGiven) for sID in lsIds]
+
+  pcoa.plotList(lsLabelList = lsGroupLabels,
+      strOutputFileName = lsFilePieces[0]+"-"+args.strMetric+lsFilePieces[1],
+      iSize=20,
+      dAlpha=1.0,
+      charForceColor=None,
+      charForceShape=None,
+      fInvert=False,
+      iDim1=1,
+      iDim2=2)

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/scripts/scriptPlotFeature.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/scripts/scriptPlotFeature.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+"""
+Author: Timothy Tickle
+Description: Plots feaures
+"""
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = ""
+__version__ = ""
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+import sys
+import argparse
+import csv
+import os
+from src.BoxPlot import BoxPlot
+from src.Histogram import Histogram
+from src.ScatterPlot import ScatterPlot
+
+def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric):
+
+  ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1]
+
+  # Group data
+  dictGroups = {}
+  for iIndex in xrange(len(ly)):
+    lsList = dictGroups.get(lsLabels[iIndex],[])
+    lsList.append(ly[iIndex])
+    dictGroups.setdefault(lsLabels[iIndex],lsList)
+  ly = [dictGroups[sKey] for sKey in dictGroups.keys()]
+  lsLabels = dictGroups.keys()
+
+  BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis)
+
+
+#Set up arguments reader
+argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID",
+    description = "Make a box plot from an abundance table.")
+
+#Sepecify output if needed
+argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.")
+
+# Text annotation
+argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.")
+argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.")
+argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.")
+
+# Color options
+argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).")
+argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.")
+
+# Axis adjustments
+argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.")
+
+# Required
+argp.add_argument("strFileAbund", help ="Input data file")
+argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).")
+
+args = argp.parse_args( )
+
+#Holds the data
+lxVariable1 = None
+lxVariable2 = None
+fOneIsNumeric = False
+fTwoIsNumeric = False
+
+strFeatureOneID = args.strFeatures[0]
+strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1]
+
+# If the output file is not specified, make it up
+if not args.strOutputFile:
+  lsPieces = os.path.splitext(args.strFileAbund)
+  args.strOutputFile = [lsPieces[0],strFeatureOneID]
+  if strFeatureTwoID:
+    args.strOutputFile = args.strOutputFile+[strFeatureTwoID]
+  args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"])
+
+if not args.strTitle:
+  args.strTitle = [strFeatureOneID]
+  if strFeatureTwoID:
+    args.strTitle = args.strTitle+[strFeatureTwoID]
+  args.strTitle = " vs ".join(args.strTitle)
+
+csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t")
+
+if args.strX is None:
+  args.strX = strFeatureOneID
+
+if args.strY is None:
+  args.strY = strFeatureTwoID
+
+# Get values and groupings
+for lsLine in csvReader:
+  if lsLine[0] == strFeatureOneID:
+    lxVariable1 = lsLine[1:]
+  if not strFeatureTwoID is None:
+    if lsLine[0] == strFeatureTwoID:
+      lxVariable2 = lsLine[1:]
+
+# Remove NAs
+liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"]
+liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs)
+lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs]
+
+if not lxVariable2 is None:
+  lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs]
+
+# Type variables
+if not lxVariable1 is None:
+  try:
+    float(lxVariable1[0])
+    lxVariable1 = [float(xItem) for xItem in lxVariable1]
+    fOneIsNumeric = True
+  except ValueError:
+    pass
+
+if not lxVariable2 is None:
+  try:
+    float(lxVariable2[0])
+    lxVariable2 = [float(xItem) for xItem in lxVariable2]
+    fTwoIsNumeric = True
+  except ValueError:
+    pass
+
+if lxVariable1 is None:
+  print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .")
+elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ):
+  print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .")
+else:
+  # Plot as needed
+  if((not lxVariable1 is None ) and (not lxVariable2 is None)):
+    if(sum([fOneIsNumeric, fTwoIsNumeric])==0):
+      print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric."
+    elif(sum([fOneIsNumeric, fTwoIsNumeric])==1):
+      funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric)
+    elif(sum([fOneIsNumeric, fTwoIsNumeric])==2):
+      ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor)
+  elif(not lxVariable1 is None ):
+    if fOneIsNumeric:
+      Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor)
+    else:
+      print "Sorry currently histograms are support for only numeric data."

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/AbundanceTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/AbundanceTable.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,2435 @@\n+"""\n+Author: Timothy Tickle\n+Description: Class to abstract an abundance table and methods to run on such a table.\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+import csv\n+import sys\n+import blist\n+from CClade import CClade\n+from ConstantsBreadCrumbs import ConstantsBreadCrumbs\n+import copy\n+from datetime import date\n+import numpy as np\n+import os\n+import re\n+import scipy.stats\n+import string\n+from ValidateData import ValidateData\n+from biom.parse import *\n+from biom.table import *\n+\n+c_dTarget\t= 1.0\n+c_fRound\t= False\n+c_iSumAllCladeLevels = -1\n+c_fOutputLeavesOnly = False\n+\n+class RowMetadata:\n+\t"""\n+\tHolds the row (feature) metadata and associated functions.\n+\t"""\n+\n+\tdef __init__(self, dictRowMetadata, iLongestMetadataEntry=None, lsRowMetadataIDs=None):\n+\t\t""" Constructor requires a dictionary or row metadata.\n+\t\t:param dictRowMetadata:\tThe row metadata values with the ids as the keys, must be stable (keep order)\n+\t\t:type:\t\t\t{string feature id: {\'metadata\': {\'taxonomy\': [list of metadata values]}}}\n+\t\t"""\n+\n+\t\tself.dictRowMetadata = dictRowMetadata\n+\t\tself.iLongestMetadataEntry = iLongestMetadataEntry\n+\t\tself.lsRowMetadataIDs = lsRowMetadataIDs\n+\n+\t\tself.dictMetadataIDs = {}\n+\t\t# Get the ids for the metadata\n+\t\tif self.dictRowMetadata:\n+\t\t\tfor dictMetadata in self.dictRowMetadata.values():\n+\t\t\t\tdictMetadata = dictMetadata.get(ConstantsBreadCrumbs.c_metadata_lowercase, None)\n+\n+\t\t\t\tif dictMetadata:\n+\t\t\t\t\tfor key,value in dictMetadata.items():\n+\t\t\t\t\t\tif self.dictMetadataIDs.get(key, None):\n+\t\t\t\t\t\t\tself.dictMetadataIDs[key] = max(self.dictMetadataIDs[key],len(dictMetadata[key]))\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tself.dictMetadataIDs[key] = len(dictMetadata[key])\n+\n+\tdef funcMakeIDs(self):\n+\t\t""" There should be a one to one mapping of row metadata ids and the values associated here with a feature ID.\n+\t\t If not make ids from the key by appending numbers.\n+\t\t"""\n+\n+\t\t# If there exists a ids list already return (this allows ID order to be given and preserved)\n+\t\t# Else make a list of IDs\n+\t\tif self.lsRowMetadataIDs:\n+\t\t\treturn self.lsRowMetadataIDs\n+\n+\t\tlsIDs = []\n+\t\tlsKeys = []\n+\n+\t\tfor key, value in self.dictMetadataIDs.items():\n+\t\t\tlsKeys.append( key )\n+\t\t\tif value > 1:\n+\t\t\t\tlsIDs.extend( [ "_".join( [ key, str( iIndex ) ] ) for iIndex in xrange( value ) ] )\n+\t\t\telse:\n+\t\t\t\tlsIDs.append( key )\n+\t\treturn [ lsIDs, lsKeys ]\n+\n+\tdef funGetFeatureMetadata(self, sFeature, sMetadata):\n+\t\t"""\n+\t\tReturns a list of values in the order of row metadta ids for a microbial feature given an id.\n+\n+\t\t:param sFeature'..b'MDkeyAscii) > 0:\t\t#Search for the last metadata\n+\t\t\t\t\t\t\t\t\tif not strIDMetadata:\n+\t\t\t\t\t\t\t\t\t\tstrIDMetadata = MDkeyAscii\n+\t\t\t\t\t\t\t\t\tBiomCommonArea[ConstantsBreadCrumbs.c_sLastMetadata] = MDkeyAscii #Set the last Metadata\n+\t\t\t\t\t\t\tif MDkeyAscii not in BiomMetadata:\n+\t\t\t\t\t\t\t\tBiomMetadata[MDkeyAscii] = list()\n+\t\t\t\t\t\t\t\tfor indx in range(0, lenBiomValue):\n+\t\t\t\t\t\t\t\t\tBiomMetadata[MDkeyAscii].append(None)\n+\t\t\t\t\t\t\tBiomMetadata[MDkeyAscii][cntMetadata] = MDvalueAscii \n+ \n+\n+\t\tBiomCommonArea[ConstantsBreadCrumbs.c_Metadata] = BiomMetadata\n+\t\tBiomCommonArea[ConstantsBreadCrumbs.c_MetadataID] = strIDMetadata\n+\t\t\n+\t\t#**********************************************\n+\t\t#* Build dtype *\n+\t\t#**********************************************\n+\n+\t\tBiomDtype = list()\n+\t\tiMaxIdLen+=10 #Increase it by 10\n+\t\tBiomDtypeEntry = list()\n+\t\tFirstValue = ConstantsBreadCrumbs.c_ID\n+\t\tSecondValue = "a" + str(iMaxIdLen)\n+\t\tBiomDtypeEntry.append(FirstValue)\n+\t\tBiomDtypeEntry.append(SecondValue)\n+\t\tBiomDtype.append(tuple(BiomDtypeEntry))\n+\n+\t\tfor a in BiomMetadata[ConstantsBreadCrumbs.c_ID]:\n+\t\t\t\tBiomDtypeEntry = list()\n+\t\t\t\tFirstValue = a.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)\n+\t\t\t\tSecondValue = ConstantsBreadCrumbs.c_f4 \n+\t\t\t\tBiomDtypeEntry.append(FirstValue)\n+\t\t\t\tBiomDtypeEntry.append(SecondValue)\n+\t\t\t\tBiomDtype.append(tuple(BiomDtypeEntry))\n+\t\t\t\t\n+\t\tBiomCommonArea[ConstantsBreadCrumbs.c_Dtype] = BiomDtype\n+\t\treturn BiomCommonArea\n+\n+\t@staticmethod\n+\tdef _funcBiomBuildRowMetadata( BiomValue, iMaxIdLen ):\t\n+\t\t"""\n+\t\tBuilds the row metadata from a BIOM value\n+\n+ \t\t:param\tBiomValue:\tBIOM Value from the BIOM JSON parsing\n+\t\t:type:\t\t\tComplex dict of string pairs and dicts\n+\t\t:param\tiMaxIdLen:\tMaximum length of all the IDs\n+\t\t:type:\t\t\tint\n+\t\t:return:\t\tdictRowsMetadata - np Array containing the rows metadata\n+\t\t:type:\t\t\t{string feature id: {\'metadata\': {\'taxonomy\': [list of metadata values]}}}\t\n+\t\t"""\t\n+\t\t# Build the input dict for RowMetadata from a dict of dicts from a BIOM file \n+\t\tdictRowsMetadata = dict()\n+\t\tfor iIndexRowMetaData in range(0, len(BiomValue)):\n+\t\t\tdictRowsMetadata[str(BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase])] = dict()\n+\t\t\tRowMetadataEntryFromTable = BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_metadata_lowercase]\n+\t\t\tdMetadataTempDict = dict()\n+\t\t\tfor key, value in RowMetadataEntryFromTable.iteritems():\n+\t\t\t\tdMetadataTempDict[key] = value\n+\t\t\tdictRowsMetadata[str(BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase])][ConstantsBreadCrumbs.c_metadata_lowercase] = dMetadataTempDict\n+\t\treturn dictRowsMetadata\n+\n+\t@staticmethod\n+\tdef _funcInsertKeyToCommonArea(BiomCommonArea, BiomKey, BiomValue):\n+\t\t"""\n+\t\tInserts the keys into the BiomCommonArea["BiomFileInfo"]\n+ \t\t:param\tBiomCommonArea - The common area that has been built before\n+\t\t:type:\tdict()\n+\t\t:param\tBiomKey - The current key (eg. format, date, generated by)\n+\t\t:type:\tstr\n+\t\t:param\tBiomValue - The current value of the key (eg. for format: "Biological Observation Matrix 0.9.1")\n+\t\t:type:\tstr\n+\t\t:return: BiomCommonArea - The updated common area\n+\t\t:type:\tdict()\t\t\n+\t\t"""\t\n+\t\n+\t\tif ConstantsBreadCrumbs.c_BiomFileInfo not in BiomCommonArea:\n+\t\t\t\tBiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo] = dict()\n+\t\t\t\n+\t\tstrInsertKey = BiomKey\t\t\t#Set Default - But it is now always the same... (eg. URL is not: format_url -->url and others)\n+\t\tPostBiomValue = BiomValue\t\t#The default value to be posted \n+\t\tif BiomKey == ConstantsBreadCrumbs.c_strFormatUrl:\n+\t\t\tstrInsertKey = ConstantsBreadCrumbs.c_strURLKey\n+\t\t\t\n+\t\tif BiomKey == ConstantsBreadCrumbs.c_MatrixTtype:\n+\t\t\tstrInsertKey = ConstantsBreadCrumbs.c_strSparsityKey\n+\t\t\t\n+\t\tif BiomKey == ConstantsBreadCrumbs.c_GeneratedBy:\n+\t\t\tPostBiomValue = None\n+\n+\t\tif BiomKey == ConstantsBreadCrumbs.c_strDateKey:\n+\t\t\tPostBiomValue = None\t\t\t\n+\t\t\t\n+\t\tBiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo][strInsertKey] = PostBiomValue\n+\t\treturn BiomCommonArea\n+\t\t\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/AbundanceTable.pyc

Binary file src/breadcrumbs/src/AbundanceTable.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/BoxPlot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/BoxPlot.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,130 @@
+"""
+Author: Timothy Tickle
+Description: Class to create box plots.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#External libraries
+from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
+import matplotlib.pyplot as plt
+from pylab import *
+
+#Plots a matrix
+class BoxPlot:
+
+  @staticmethod
+  def funcPlot(ly, lsLabels, strOutputFigurePath, strTitle = "Title", strXTitle="X Axis", strYTitle="Y Axis", strColor = "#83C8F9", fJitter=False, fInvert=False, fInvertY=False):
+    """
+    Plot a box plot with optional jittering.
+
+    :params ly: List of y values
+    :type: List of doubles
+    :params lsLabels: List of labels (x tick lables)
+    :type: List string
+    :params strOutputFigurePath: File path to make figure
+    :type: String file path
+    :params strTitle: Title of figure
+    :type: String
+    :params strXTitle: Label of x axis
+    :type: String
+    :params strYTitle: Label of y axis
+    :type: String
+    :params strColor: Hex color for the face of the boxplots
+    :type: String
+    :params fJitter: Indicator of jittering (true) or not (false)
+    :type: Boolean
+    :params fInvert: Invert colors (true)
+    :type: Boolean
+    :params fInvertY: Invert y axis
+    :type: Boolean
+    """
+
+    #Start plot
+    #Get plot object
+    imgFigure = plt.figure()
+
+    #Get plot colorsstrOutFigure
+    objFigureControl = ConstantsFiguresBreadCrumbs()
+    #Boxplots have to be plotted over the scatter so the alpha can not go to 1.0
+    #In this case capturing the alpha before inversion
+    #Inversion automoatically sets it to 1.
+    dAlpha=objFigureControl.c_dAlpha
+    objFigureControl.invertColors(fInvert=fInvert)
+
+    #Color/Invert figure
+    imgFigure.set_facecolor(objFigureControl.c_strBackgroundColorWord)
+    imgSubplot = imgFigure.add_subplot(111,axisbg=objFigureControl.c_strBackgroundColorLetter)
+    imgSubplot.set_xlabel(strXTitle)
+    imgSubplot.set_ylabel(strYTitle)
+    imgSubplot.spines['top'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['bottom'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['left'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['right'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.xaxis.label.set_color(objFigureControl.c_strDetailsColorLetter)
+
+    #Adds light grid for numbers and puts them in the background
+    imgSubplot.yaxis.grid(True, linestyle='-', which='major', color=objFigureControl.c_strGridLineColor, alpha=objFigureControl.c_dAlpha)
+    imgSubplot.set_axisbelow(True)
+    imgSubplot.yaxis.label.set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.tick_params(axis='x', colors=objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.tick_params(axis='y', colors=objFigureControl.c_strDetailsColorLetter)
+    charMarkerEdgeColor = objFigureControl.c_strDetailsColorLetter
+
+    #Make box plot
+    bp = plt.boxplot(x=ly, notch=1, patch_artist=True)
+    for iindex, ldData in enumerate(ly):
+      ldX = None
+      if fJitter:
+        ldX = [float(iindex+1)+ uniform(-.05,.05) for x in xrange(len(ldData))]
+      else:
+        ldX = [float(iindex+1) for x in xrange(len(ldData))]
+      plt.scatter(x=ldX,y=ly[iindex],c=strColor,marker="o",alpha=objFigureControl.c_dAlpha)
+
+    #Color boxes
+    plt.setp(bp['boxes'], color=objFigureControl.c_strDetailsColorLetter, facecolor=strColor, alpha=dAlpha)
+    plt.setp(bp['whiskers'], color=objFigureControl.c_strDetailsColorLetter)
+
+    #Set ticks and title
+    lsLabelsWithCounts = []
+    for iindex,sCurLabel in enumerate(lsLabels):
+      lsLabelsWithCounts.append(sCurLabel+" ( "+str(len(ly[iindex]))+" )")
+    xtickNames = plt.setp(imgSubplot, xticklabels=lsLabelsWithCounts)
+    imgSubplot.set_title(strTitle)
+    imgSubplot.title.set_color(objFigureControl.c_strDetailsColorLetter)
+
+    #Invert Y axis
+    if fInvertY:
+      ax = plt.gca()
+      ax.set_ylim(ax.get_ylim()[::-1])
+
+    #End plot
+    #Save to a file
+    imgFigure.savefig(strOutputFigurePath, facecolor=imgFigure.get_facecolor())

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/CClade.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/CClade.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,181 @@
+"""
+Author: Curtis Huttenhower
+Description: Used to create tree structures to hierarchically normalize abundance tables.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Curtis Huttenhower"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Curtis Huttenhower"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+import blist
+import sys
+
+class CClade:
+
+ def __init__( self ):
+ """
+ Initialize CClade
+ Dictionary to hold the children nodes from feature consensus lineages.
+ adValues is a list of the abundance value.
+ """
+
+ self.m_hashChildren = {}
+ self.m_adValues = None
+
+ def get( self, astrClade ):
+ """
+ Recursively travel the length of a tree until you find the terminal node
+ (where astrClade == Falseor actually [])
+ or a dict key that matches the clade call.
+ If at any time a clade is given that is not currently know, return a new clade
+ which is set to the current Clade as a child.
+ """
+
+ return self.m_hashChildren.setdefault(
+ astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self
+
+ def set( self, adValues ):
+ """
+        Set all the values given as a list in the same order given.
+ """
+
+ self.m_adValues = blist.blist( [0] ) * len( adValues )
+ for i, d in enumerate( adValues ):
+ if d:
+ self.m_adValues[i] = d
+
+ def impute( self ):
+ """
+ This allows you to recursively impute values for clades without values given their children counts.
+ Assumably this should be called only once and after all clade abundances have been added.
+ If the m_adValues already exist return the stored m_adValues. (No imputation needed).
+ Otherwise call impute for all children and take the sum of the values from all the children by column
+ Not a sum of a list but summing a list with lists by element.
+ """
+
+        #If values do not exist
+ if not self.m_adValues:
+            #Call impute on all children
+            #If the parent clade has no abundance values
+            #Then take a copy of the child's
+            #If they now have a copy of a child's but have other children
+            #Sum their children with thier current values
+ for pChild in self.m_hashChildren.values( ):
+ adChild = pChild.impute( )
+ if self.m_adValues:
+ for i in range( len( adChild or [] ) ):
+ if adChild[i]:
+ self.m_adValues[i] += adChild[i]
+ elif adChild:
+ self.m_adValues = adChild[:]
+ #If values exist return
+ return self.m_adValues
+
+ def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ):
+ """
+ Update the given hashValues dict with clade abundances given depth specifications
+ Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash
+ When the appropriate level of the tree is reached
+ Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])}
+ """
+
+        #fHit is true on atleast one of the following conditions:
+        #iTarget is not 0 indicating no changes
+        #Leaves are indicated to be only given and the target depth for the leaves is reached.
+        #The required depth is reached.
+ fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) )
+                #Increment depth
+ iDepth += 1
+                #Returns a set
+ setiRet = set()
+                #If there are children build integer set indicating structure of the tree preserved in the dict
+ if self.m_hashChildren:
+                        #Union all the results from freeze of all children
+                        #Call freeze but append the child clade to the clade in the call.
+                        #And give an incremented depth
+ for strChild, pChild in self.m_hashChildren.items( ):
+ setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves )
+ setiRet = set( ( i + 1 ) for i in setiRet )
+ else:
+ setiRet.add( 0 )
+                #Indicate if the correct level is reached
+ if iTarget < 0:
+ if fLeaves:
+ fHit = -( iTarget + 1 ) in setiRet
+ else:
+ fHit = -( iTarget + 1 ) <= max( setiRet )
+                #if astrClade is not == [] (so you are actually in a clade of the tree)
+                #And the clade has values (should be true, if not impute should have been callded before running this method)
+                #And we are at the correct level of the tree then
+                #Add to the dict the clade and the abundance values
+ if astrClade and self.m_adValues and fHit:
+ hashValues["|".join( astrClade )] = self.m_adValues
+ return setiRet
+
+ def freeze( self, hashValues, iTarget, fLeaves ):
+ """
+ Call helper function setting the clade and depth to defaults (start positions)
+ The important result of this method is hashValues is updated with clade and abundance information
+ """
+ self._freeze( hashValues, iTarget, [], 0, fLeaves )
+
+ def _repr( self, strClade ):
+ """
+ Represent tree clade for debugging. Helper function for recursive repr.
+ """
+
+ strRet = "<"
+ if strClade:
+ strRet += "%s %s" % (strClade, self.m_adValues)
+ if self.m_hashChildren:
+ strRet += " "
+ if self.m_hashChildren:
+ strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) )
+
+ return ( strRet + ">" )
+
+ def __repr__( self ):
+ """
+ Represent tree clade for debugging.
+ """
+ return self._repr( "" )
+
+"""
+pTree = CClade( )
+pTree.get( ("A", "B") ).set( [1, 2, 3] )
+pTree.get( ("A", "C") ).set( [4, 5, 6] )
+pTree.get( ("D", "E") ).set( [7, 8, 9] )
+iTaxa = 0
+if iTaxa:
+ pTree.impute( )
+hashFeatures = {}
+pTree.freeze( hashFeatures, iTaxa )
+print( pTree )
+print( hashFeatures )
+sys.exit( 0 )
+#"""

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/CClade.pyc

Binary file src/breadcrumbs/src/CClade.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/Cladogram.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/Cladogram.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,950 @@\n+"""\n+Author: Timothy Tickle\n+Description: Class to call circlader and create dendrograms.\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+#External libraries\n+from AbundanceTable import AbundanceTable\n+from CommandLine import CommandLine\n+from ConstantsBreadCrumbs import ConstantsBreadCrumbs\n+from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs\n+import math\n+import numpy as np\n+import os\n+import re\n+import scipy.stats\n+from ValidateData import ValidateData\n+#import scipy.stats.stats as stats\n+\n+class Cladogram:\n+ """\n+ This class manages creating files for Circlader and calling circulator.\n+ """\n+\n+ #Script name\n+ circladerScript=None\n+\n+ #Constants\n+ c_sTaxa="Taxa"\n+ c_sCircle="Circle"\n+ c_sBorder="Border"\n+ c_sShape="Shape"\n+ c_sAlpha="Alpha"\n+ c_sForced="Forced"\n+\n+ #Numpy array (structured array) holding data\n+ #Should be SampleID, Sample Abundances/Data (samples = columns).....\n+ npaAbundance = None\n+ #List of sample names\n+ lsSampleNames = None\n+ #Name of output image\n+ strImageName = "Cladogram.png"\n+ #String used to call the sample id column\n+ strSampleID = "ID"\n+ strUnclassified = "unclassified"\n+\n+ #Minimum size of clade (terminal node count for clade)\n+ iMinCladeSize = 1\n+ #Level of ancestry to filter at (starts with 0 and based on the input file)\n+ iCladeLevelToMeasure = 1\n+ iCladeLevelToReduce = 1\n+ cFeatureDelimiter = "|"\n+\n+ #Flags\n+ #Turns on (True) or off (False) abundance-based filtering\n+ fAbundanceFilter = False\n+ #Turns on (True) or off (False) clade size-based filtering\n+ fCladeSizeFilter = False\n+ #Indicate if the following files were made\n+ fSizeFileMade=False\n+ fCircleFileMade=False\n+ fColorFileMade=False\n+ fTickFileMade=False\n+ fHighlightFileMade=False\n+\n+ #Circlader files\n+ strTreeFilePath="_Taxa.txt"\n+ strCircleFilePath = "_Circle.txt"\n+ strColorFilePath="_Color.txt"\n+ strTickFilePath="_Tick.txt"\n+ strHighLightFilePath="_HighLight.txt"\n+ strSizeFilePath="_Size.txt"\n+ strStyleFilePath=""\n+\n+ #Thresholds\n+ #Controls the showing of taxa\n+ c_dPercentileCutOff = 90.0\n+ c_dPercentageAbovePercentile = 1.0\n+\n+ #Minimum average abundance score when using log scale\n+ c_dMinLogSize = 0.0000000001\n+ #Constant used to maginfy the size difference in the taxa (log only)\n+ c_dLogScale = 1000000\n+ #When after log10, an addition scaling adjustment (use this)\n+ c_dCircleScale = 3\n+\n+ #Data for circular files\n+ #Used to change IDs to proper labels\n+ dictConvertIDs = None\n+ #Labels to be relabeled\n+ dictRelabels'..b'ing to the files generated that Circlader acts on.\n+ These files include the tree, color, highlight, tick, circle, and size files.\n+ Checks to make sure the file path to the syle file provided is an existing file.\n+ Deletes any existing files with these generated names (except for the style files).\n+\n+ :param sStyleFile: File path indicating the style file to use\n+ :type: String\n+ :param strTaxaFile: File path indicating the taxa file to use\n+ :type: String\n+ :param sColorFile: File path indicating the color file to use\n+ :type: String\n+ :param sTickFile: File path indicating the tick file to use\n+ :type: String\n+ :param sHighlightFile: File path indicating the highlight file to use\n+ :type: String\n+ :param sSizeFile: File path indicating the size file to use\n+ :type: String\n+ :param sCircleFileName: File path for circle files\n+ :type: String\n+ :return boolean: True indicates success, false indicates error\n+ """\n+ #Do not remove the style file, it is static\n+ if strStyleFile is None:\n+ print("Error, style file is None")\n+ return(False)\n+ if not os.path.exists(strStyleFile):\n+ print("Error, no style file found.")\n+ return(False)\n+ else:\n+ self.strStyleFilePath = strStyleFile\n+\n+ #Set output files and remove if needed\n+ self.strTreeFilePath = sTaxaFileName\n+ self.strColorFilePath = sColorFileName\n+ self.strTickFilePath = sTickFileName\n+ self.strHighLightFilePath = sHighlightFileName\n+ self.strSizeFilePath = sSizeFileName\n+ self.strCircleFilePath = sCircleFileName\n+ for sFile in [self.strTreeFilePath,self.strColorFilePath,self.strTickFilePath,\n+ self.strHighLightFilePath,self.strSizeFilePath,self.strCircleFilePath]:\n+ if not sFile is None:\n+ if(os.path.exists(sFile)):\n+ os.remove(sFile)\n+ return True\n+\n+ #Not tested\n+ def relabelIDs(self, dictLabels):\n+ """\n+ Allows the relabeling of ids. Can be used to make numeric labeling of ids or renaming\n+\n+ :param dictLabels: Should label (key) (after unclassified is modified) and new label (value)\n+ :type: dictLabels Dictionary of string (key:label to replace) string (value:new label to use in replacing)\n+ """\n+ self.dictRelabels = dictLabels\n+\n+ #Happy path tested\n+ def updateToRoot(self, lsIDs):\n+ """\n+ Updates the clade to the root given. The clade must contain the root and the level of the \n+ root in the clade will be rest to it\'s first level, ignoring the previous levels of the clade.\n+\n+ :param lsIDs: List of Clades that will be reset to the root specified by setRoot\n+ :type: lsIDs List of strings. Each string representing a clade.\n+ """\n+\n+ if(self.strRoot is None):\n+ return lsIDs\n+ #Force root tree if indicated to do so\n+ lsRootedIDs = list()\n+ for sID in lsIDs:\n+ sIDElements = filter(None,re.split("\\|",sID))\n+ if(self.strRoot in sIDElements):\n+ iRootIndex = sIDElements.index(self.strRoot)\n+ #If multiple levels of the clade exist after the new root merge them.\n+ if(len(sIDElements)>iRootIndex+2):\n+ lsRootedIDs.append("|".join(sIDElements[iRootIndex+1:]))\n+ #If only one level of the clade exists after the new root, return it.\n+ elif(len(sIDElements)>iRootIndex+1):\n+ lsRootedIDs.append(sIDElements[iRootIndex+1])\n+ return(lsRootedIDs)\n+\n+ #Testing: Used extensively in other tests\n+ def writeToFile(self, strFileName, strDataToWrite, fAppend):\n+ """\n+ Helper function that writes a string to a file\n+\n+ :param strFileName: File to write to\n+ :type: strFileName File path (string)\n+ :param strDataToWrite: Data to write to file\n+ :type: strDataToWrite String\n+ :param fAppend: Indicates if an append should occur (True == Append)\n+ :type: fAppend boolean\n+ """\n+\n+ cMode = \'w\'\n+ if fAppend:\n+ cMode = \'a\'\n+ with open(strFileName,cMode) as f:\n+ f.write(strDataToWrite)\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/CommandLine.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/CommandLine.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,137 @@
+"""
+Author: Timothy Tickle
+Description: Manages calling commandline from within code.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#Import libaries
+from subprocess import call, Popen, PIPE
+from ValidateData import ValidateData
+import traceback
+
+class CommandLine():
+    """
+    Manages calling commandline from within code.
+    """
+
+    ##
+    #Contructor
+    def __init__(self): pass
+
+    def runCommandLine(self,tempCommand = None):
+        """
+        Sends a command to command line interface.
+
+        :param tempCommand: Must be an list of command key word and string arguments, no whitespaces
+        :type: List of strings
+        :return: boolean indicator of success (True = Success)
+        """
+
+        #Makes sure the the input data is a list of strings
+        if(not ValidateData.funcIsValidStringList(tempCommand)):
+            print "Error:: tempCommand must be an array of strings. Received="+str(tempCommand)+"."
+            return False
+
+        #Run command
+        try:
+            returnCode = call(tempCommand)
+            print "Return="+str(returnCode)
+            if returnCode > 0:
+                print "Error:: Error during command call. Script stopped."
+                print "Error:: Error Code "+str(returnCode)+"."
+                print "Error:: Command ="+str(tempCommand)+"."
+                return False
+        except (OSError,TypeError), e:
+                print "Error:: Error during command call. Script stopped."
+                print "Error:: Command ="+str(tempCommand)+"."
+                print "Error:: OS error: "+str(traceback.format_exc(e))+"."
+                return False
+        return True
+
+    def runPipedCommandLine(self,tempCommand = None):
+        """
+        Sends a command to command line interface.
+        Create new array of string elements instead of white spacing
+        Put file names in escaped quotation marks.
+        This uses shell == true so make sure the commandline is not malicious
+        This should wait for process completion
+
+        :param tempCommand: Must be an list of command key word and string arguments, no whitespaces.
+        :type: List of strings
+        :return: Boolean (False = Failure or the return code from the subprocess)
+        """
+
+        #Makes sure the the input data is a list of strings
+        if(not ValidateData.funcIsValidStringList(tempCommand)):
+            print "Error:: tempCommand must be an array of strings. Received="+str(tempCommand)+"."
+            return False
+
+        #Run command
+        tempCommand = " ".join(tempCommand)
+        try:
+            returnCode = Popen(tempCommand, shell = True, stdout = PIPE).communicate()
+            return returnCode
+        except (OSError,TypeError), e:
+                print "Error:: Error during command call. Script stopped."
+                print "Error:: Command ="+str(tempCommand)+"."
+                print "Error:: OS error: "+str(traceback.format_exc(e))+"."
+                return False
+
+    def runBatchCommandline(self,tempArrayOfCommands = None):
+        """
+        Sends a an array of commands to the commandline.
+
+        :param tempArrayOfCommands: Must be an list of commands, parsing and removing whitespace is handled internally.
+         Do not send mkdir and rm commands, use the appropriate os.* method call
+        :type: List of strings
+        :return: boolean indicator of success (True = Success)
+        """
+
+        #Holds commands
+        parsedCommmands = []
+
+        #Indicates if success or error occured
+        success = True
+
+        #Makes sure the the input data is list of strings
+        if(not ValidateData.funcIsValidStringList(tempArrayOfCommands)):
+            print "Error:: tempCommand must be an array of strings. Received="+str(tempArrayOfCommands)+"."
+            return False
+
+        #Parse commands into an array and call
+        #On an error break and return False
+        for command in tempArrayOfCommands:
+            commandElements = command.split(" ")
+            if(not self.runCommandLine(commandElements)):
+                success = False
+                break
+        return success
+

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/ConstantsBreadCrumbs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/ConstantsBreadCrumbs.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,155 @@
+"""
+Author: Timothy Tickle
+Description: Project constants.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+##
+#Used to test the FileIO class
+class ConstantsBreadCrumbs():
+    """
+    Class to hold project constants.
+    """
+
+    #Character Constants
+    c_strComma = ','
+    c_strColon = ':'
+    c_strConfigFileHeaderChar = '['
+    c_strConfigFileCommentChar = '#'
+    c_strEndline = '\n'
+    c_strExtDelim = '.'
+    c_cFastaIDLineStart = '>'
+    c_strPathDelim = '/'
+    c_cPipe = '|'
+    c_cQuote = '\"'
+    c_cTab = '\t'
+    c_strWhiteSpace = ' '
+    c_matrixFileDelim = '\t'
+
+    c_strBreadCrumbsSVMSpace = c_strWhiteSpace
+
+    #Default values for missing data in the Abundance Table
+    c_strEmptyAbundanceData = "0"
+    c_strEmptyDataMetadata = "NA"
+    c_strSVMNoSample = "-"
+
+    lNAs = list(set(["NA","na","Na","nA",c_strEmptyDataMetadata]))
+
+    #TODO remove
+    #Reference to circlader
+    c_strCircladerScript = "circlader/circlader.py"
+
+    #AbundanceTable
+    #Suffix given to a file that is check with the checkRawDataFile method
+    OUTPUT_SUFFIX = "-checked.pcl"
+
+    #BIOM related
+    #PCL File metadata defaults (many of these come from biom file requirements
+    #ID
+    c_strIDKey = "id"
+    c_strDefaultPCLID = None
+
+    #File date
+    c_strDateKey = "date"
+
+    #File format type
+    c_strFormatKey = "format"
+    c_strDefaultPCLFileFormateType = "PCL"
+
+    #File generation source
+    c_strSourceKey = "source"
+    c_strDefaultPCLGenerationSource = None
+
+    #File type
+    c_strTypekey = "type"
+    c_strDefaultPCLFileTpe = None
+
+    #Allowable file types for biom files
+    c_strOTUType = "OTU"
+    c_strOTUBIOMType = "OTU table"
+    c_strPathwayType = "Pathway"
+    c_strPathwayBIOMType = "Pathway table"
+    c_strFunctionType = "Function"
+    c_strFunctionBIOMType = "Function table"
+    c_strOrthologType = "Ortholog"
+    c_strOrthologBIOMType = "Ortholog table"
+    c_strGeneType = "Gene"
+    c_strGeneBIOMType = "Gene table"
+    c_strMetaboliteType = "Metabolite"
+    c_strMetaboliteBIOMType = "Metabolite table"
+    c_strTaxonType = "Taxon"
+    c_strTaxonBIOMType = "Taxon table"
+    c_dictFileType = {c_strOTUType:c_strOTUBIOMType, c_strPathwayType:c_strPathwayBIOMType, c_strFunctionType:c_strFunctionBIOMType, c_strOrthologType:c_strOrthologBIOMType, c_strGeneType:c_strGeneBIOMType, c_strMetaboliteType:c_strMetaboliteBIOMType, c_strTaxonType:c_strTaxonType}
+
+    #File URL
+    c_strURLKey = "url"
+    c_strDefaultPCLURL = None
+    c_strFormatUrl =  "format_url"
+
+    #File sparse matrix
+    c_strSparsityKey = "sparsity"
+    c_fDefaultPCLSparsity = False
+
+    # BIOM related Data
+    # Data shape
+    c_strDataShapeKey = "shape"
+
+ ######################################################################
+ # Constants related to biom import and export files                  #
+ ######################################################################
+    # Biom file extension
+    c_strBiomFile = "biom"
+    c_BiomTaxData = "BiomTaxData"
+    c_MetadataID = "column_metadata_id"
+    c_Metadata = "Metadata"
+    c_metadata_lowercase = "metadata"
+    c_sLastMetadata = "sLastMetadata"
+    c_columns = "columns"
+    c_rows = "rows"
+    c_ascii = "ascii"
+    c_ignore = "ignore"
+    c_Dtype = "Dtype"
+    c_ID = "ID"
+    c_id_lowercase = "id"
+    c_f4 = "f8"
+    c_biom_file_generated_by = "BreadCrumbs"
+    c_strPCLFile = "pcl"
+    c_taxonomy = "taxonomy"
+    c_dRowsMetadata = "dRowsMetadata"
+    c_BiomFileInfo = "BiomFileInfo"
+    c_MatrixTtype = "matrix_type"
+    c_GeneratedBy = "generated_by"
+    c_MetadataEntriesTotal = "MetadataEntriesTotal"
+    c_MaximumLength = "MaximumLength"
+
+
+    def __init__(self):
+      pass

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/ConstantsBreadCrumbs.pyc

Binary file src/breadcrumbs/src/ConstantsBreadCrumbs.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/ConstantsFiguresBreadCrumbs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/ConstantsFiguresBreadCrumbs.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,99 @@
+"""
+Author: Timothy Tickle
+Description: Constants for figures.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+##
+#Holds global configuration constants
+class ConstantsFiguresBreadCrumbs():
+
+    #Figure oriented
+    c_strBackgroundColorName = "Invisible"
+    c_strBackgroundColor = "255,255,255"
+    c_strBackgroundColorWord = "white"
+    c_strBackgroundColorLetter = "w"
+    c_strDetailsColorWord = "black"
+    c_strDetailsColorLetter = "k"
+
+    #PCOA Markers
+    c_charPCOAPieChart = "o"
+    c_charPCOASquarePieChart = "s"
+    iMarkerSize = 100
+
+    #PCoA defaults
+    c_strPCoALabelDefault = "Label"
+    c_cPCoAColorDefault = 'g'
+    c_cPCoAShapeDefault = 'o'
+    c_cPCoASizeDefault = 20
+
+    #General plotting
+    c_strGridLineColor = "#CCCCCC"
+
+    c_fInverted = False
+    c_dAlpha = 0.5
+
+    def invertColors(self,fInvert):
+        if fInvert==True:
+            #General colors
+            self.c_strBackgroundColor = "0,0,0"
+            self.c_strBackgroundColorTuple = (0,0,0)
+            self.c_strBackgroundColorWord = "black"
+            self.c_strBackgroundColorLetter = "k"
+            self.c_strDetailsColorWord = "white"
+            self.c_strDetailsColorLetter = "w"
+
+            #Invert no select color
+            self.c_charNoSelect = "#000000" # black
+
+            #Record that it is inverted
+            self.c_fInverted = True
+
+            #Alpha looks best at full in inversion
+            self.c_dAlpha = 1.0
+
+        else:
+            #General colors
+            self.c_strBackgroundColor = "255,255,255"
+            self.c_strBackgroundColorTuple = (255,255,255)
+            self.c_strBackgroundColorWord = "white"
+            self.c_strBackgroundColorLetter = "w"
+            self.c_strDetailsColorWord = "black"
+            self.c_strDetailsColorLetter = "k"
+
+            #No select color
+            self.c_charNoSelect = "#FFFFFF" # White
+
+            #Record that it is not inverted
+            self.c_fInverted = False
+
+            #Alpha looks best at full in inversion
+            self.c_dAlpha = 0.5

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/Histogram.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/Histogram.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,103 @@
+"""
+Author: Timothy Tickle
+Description: Class to create scatter plots.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#External libraries
+from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
+import matplotlib.pyplot as plt
+from pylab import *
+
+#Plots a matrix
+class Histogram:
+
+  @staticmethod
+  def funcPlot(lx, strOutputFigurePath, strTitle = "Title", strXTitle="X Axis", strYTitle="Y Axis", strColor = "#83C8F9", fInvert=False):
+    """
+    Plot a box plot with optional jittering.
+
+    :params lx: List of x values
+    :type: List of doubles
+    :params strOutputFigurePath: File path to make figure
+    :type: String file path
+    :params strTitle: Title of figure
+    :type: String
+    :params strXTitle: Label of x axis
+    :type: String
+    :params strYTitle: Label of y axis
+    :type: String
+    :params strColor: Hex color for the face of the boxplots
+    :type: String
+    :params fInvert: Invert colors (true)
+    :type: Boolean
+    """
+
+    #Start plot
+    #Get plot object
+    imgFigure = plt.figure()
+
+    #Get plot colorsstrOutFigure
+    objFigureControl = ConstantsFiguresBreadCrumbs()
+    #Boxplots have to be plotted over the scatter so the alpha can not go to 1.0
+    #In this case capturing the alpha before inversion
+    #Inversion automoatically sets it to 1.
+    dAlpha=objFigureControl.c_dAlpha
+    objFigureControl.invertColors(fInvert=fInvert)
+
+    #Color/Invert figure
+    imgFigure.set_facecolor(objFigureControl.c_strBackgroundColorWord)
+    imgSubplot = imgFigure.add_subplot(111,axisbg=objFigureControl.c_strBackgroundColorLetter)
+    imgSubplot.set_xlabel(strXTitle)
+    imgSubplot.set_ylabel(strYTitle)
+    imgSubplot.spines['top'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['bottom'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['left'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['right'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.xaxis.label.set_color(objFigureControl.c_strDetailsColorLetter)
+
+    #Adds light grid for numbers and puts them in the background
+    imgSubplot.yaxis.grid(True, linestyle='-', which='major', color=objFigureControl.c_strGridLineColor, alpha=objFigureControl.c_dAlpha)
+    imgSubplot.set_axisbelow(True)
+    imgSubplot.yaxis.label.set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.tick_params(axis='x', colors=objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.tick_params(axis='y', colors=objFigureControl.c_strDetailsColorLetter)
+    charMarkerEdgeColor = objFigureControl.c_strDetailsColorLetter
+
+    #Make scatter plot
+    plt.hist(x=lx,histtype='bar',color=strColor)
+    #Set ticks and title
+    imgSubplot.set_title(strTitle)
+    imgSubplot.title.set_color(objFigureControl.c_strDetailsColorLetter)
+
+    #End plot
+    #Save to a file
+    imgFigure.savefig(strOutputFigurePath, facecolor=imgFigure.get_facecolor())

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/KMedoids.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/KMedoids.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,208 @@
+## Included from MLPY build 2.2.0
+## Attempts were made to contact Davide Albanese on 08-10-2012 and 09-19-2012 at albanese@fbk.eu
+
+## This code is written by Davide Albanese, <albanese@fbk.eu>
+## (C) 2009 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+__all__= ['Kmedoids', 'Minkowski']
+
+
+import numpy as np
+import matplotlib
+matplotlib.use( "Agg" )
+import mlpy
+
+
+def kmedoids_core(x, med, oth, clust, cost, dist):
+    """
+    * for each mediod m
+       * for each non-mediod data point n
+         Swap m and n and compute the total cost of the configuration
+    Select the configuration with the lowest cost
+    """
+
+    d = np.empty((oth.shape[0], med.shape[0]), dtype=float)
+    med_n = np.empty_like(med)
+    oth_n = np.empty_like(oth)
+    idx = np.arange(oth.shape[0])
+
+    med_cur = med.copy()
+    oth_cur = oth.copy()
+    clust_cur = clust.copy()
+    cost_cur = cost
+
+    for i, m in enumerate(med):
+        for j, n in enumerate(oth[clust == i]):
+
+            med_n, oth_n = med.copy(), oth.copy()
+
+            med_n[i] = n
+            tmp = oth_n[clust == i]
+            tmp[j] = m
+            oth_n[clust == i] = tmp
+
+            for ii, nn in enumerate(oth_n):
+                for jj, mm in enumerate(med_n):
+                    d[ii, jj] = dist.compute(x[mm], x[nn])
+
+            clust_n = np.argmin(d, axis=1) # clusters
+            cost_n = np.sum(d[idx, clust_n]) # total cost of configuration
+
+            if cost_n <= cost_cur:
+                med_cur   = med_n.copy()
+                oth_cur   = oth_n.copy()
+                clust_cur = clust_n.copy()
+                cost_cur  = cost_n
+
+    return med_cur, oth_cur, clust_cur, cost_cur
+
+
+class Kmedoids:
+    """k-medoids algorithm.
+    """
+
+    def __init__(self, k, dist, maxloops=100, rs=0):
+        """Initialize Kmedoids.
+
+        :Parameters:
+
+          k : int
+              Number of clusters/medoids
+          dist : class
+                 class with a .compute(x, y) method which
+                 returns a distance
+          maxloops : int
+                     maximum number of loops
+          rs : int
+               random seed
+
+        Example:
+
+        >>> import numpy as np
+        >>> import mlpy
+        >>> x = np.array([[ 1. ,  1.5],
+        ...               [ 1.1,  1.8],
+        ...               [ 2. ,  2.8],
+        ...               [ 3.2,  3.1],
+        ...               [ 3.4,  3.2]])
+        >>> dtw = mlpy.Dtw(onlydist=True)
+        >>> km = mlpy.Kmedoids(k=3, dist=dtw)
+        >>> km.compute(x)
+        (array([4, 0, 2]), array([3, 1]), array([0, 1]), 0.072499999999999981)
+
+        Samples 4, 0, 2 are medoids and represent cluster 0, 1, 2 respectively.
+
+         * cluster 0: samples 4 (medoid) and 3
+         * cluster 1: samples 0 (medoid)  and 1
+         * cluster 2: sample 2 (medoid)
+        """
+
+        self.__k = k
+        self.__maxloops = maxloops
+        self.__rs = rs
+        self.__dist = dist
+
+        np.random.seed(self.__rs)
+
+
+    def compute(self, x):
+        """Compute Kmedoids.
+
+        :Parameters:
+           x : ndarray
+               An 2-dimensional vector (sample x features).
+
+        :Returns:
+           m : ndarray (1-dimensional vector)
+               medoids indexes
+           n : ndarray (1-dimensional vector)
+               non-medoids indexes
+           cl : ndarray 1-dimensional vector)
+                cluster membership for non-medoids.
+                Groups are in 0, ..., k-1
+           co : double
+                total cost of configuration
+        """
+
+        # randomly select k of the n data points as the mediods
+        idx = np.arange(x.shape[0])
+        np.random.shuffle(idx)
+        med = idx[0:self.__k]
+        oth = idx[self.__k::]
+
+        # compute distances
+        d = np.empty((oth.shape[0], med.shape[0]), dtype=float)
+        for i, n in enumerate(oth):
+            for j, m in enumerate(med):
+                d[i, j] = self.__dist.compute(x[m], x[n])
+
+        # associate each data point to the closest medoid
+        clust = np.argmin(d, axis=1)
+
+        # total cost of configuration
+        cost = np.sum(d[np.arange(d.shape[0]), clust])
+
+        # repeat kmedoids_core until there is no change in the medoid
+        for l in range(self.__maxloops):
+
+            med_n, oth_n, clust_n, cost_n = kmedoids_core(x, med, oth, clust, cost, self.__dist)
+
+            if (cost_n < cost):
+                med, oth, clust, cost = med_n, oth_n, clust_n, cost_n
+            else:
+                break
+
+        return med, oth, clust, cost
+
+
+class Minkowski:
+    """
+    Computes the Minkowski distance between two vectors ``x`` and ``y``.
+
+    .. math::
+
+      {||x-y||}_p = (\sum{|x_i - y_i|^p})^{1/p}.
+    """
+
+    def __init__(self, p):
+        """
+        Initialize Minkowski class.
+
+        :Parameters:
+          p : float
+              The norm of the difference :math:`{||x-y||}_p`
+        """
+
+        self.__p = p
+
+
+    def compute(self, x, y):
+        """
+        Compute Minkowski distance
+
+        :Parameters:
+           x : ndarray
+               An 1-dimensional vector.
+           y : ndarray
+               An 1-dimensional vector.
+
+        :Returns:
+           d : float
+               The Minkowski distance between vectors ``x`` and ``y``
+        """
+
+        return (abs(x - y)**self.__p).sum() ** (1.0 / self.__p)

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/KMedoids.pyc

Binary file src/breadcrumbs/src/KMedoids.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/MLPYDistanceAdaptor.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/MLPYDistanceAdaptor.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,76 @@
+"""
+Author: Timothy Tickle
+Description: Allows KMedoids on a custom metric space.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#External libraries
+from scipy.spatial.distance import squareform
+
+class MLPYDistanceAdaptor:
+    """
+    Allows one to use custom distance metrics with KMedoids in the MLPY package.
+    """
+
+    npaMatrix = None
+    """
+    Distance matrix to reference.
+    """
+
+    def __init__(self, npaDistanceMatrix, fIsCondensedMatrix):
+        """
+        Constructor requires a matrix of distances, could be condensed or square matrices
+
+     :param npaDistanceMatrix: The distance matrix to be used
+ :type Numpy array
+ :param fIsCondensedMatrix: Indicator of the matrix being square (true = condensed; false = square)
+ :type Boolean
+        """
+
+        if fIsCondensedMatrix:
+            self.npaMatrix = squareform(npaDistanceMatrix)
+        else:
+            self.npaMatrix = npaDistanceMatrix
+
+    def compute(self,x,y):
+        """
+        This is the only method required in the interface to MLPY to be a distance metric.
+        Does NOT want values but positions, the positions will be used for accessing the distance matrix already provided.
+
+ :param x: X position as a array of 1 number
+ :type Numpy array
+ :param y: Y position as a array of 1 number
+ :type Boolean
+        """
+
+        if(self.npaMatrix == None):
+            raise Exception("".join(["MLPYDistanceAdaptor. Attempted to compute distance with out a distance matrix passed in during construction."]))
+        return self.npaMatrix[x[0],y[0]]

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/MLPYDistanceAdaptor.pyc

Binary file src/breadcrumbs/src/MLPYDistanceAdaptor.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/Metric.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/Metric.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,504 @@\n+"""\n+Author: Timothy Tickle\n+Description: Calculates Metrics.\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+#Update path\n+from ConstantsBreadCrumbs import ConstantsBreadCrumbs\n+import csv\n+import numpy as np\n+from types import *\n+from ValidateData import ValidateData\n+\n+#External libraries\n+from cogent.maths.unifrac.fast_unifrac import fast_unifrac_file\n+import cogent.maths.stats.alpha_diversity\n+import scipy.spatial.distance\n+\n+class Metric:\n+ """\n+ Performs ecological measurements.\n+ """\n+\n+ #Diversity metrics Alpha\n+ c_strSimpsonDiversity = "SimpsonD"\n+ c_strInvSimpsonDiversity = "InSimpsonD"\n+ c_strChao1Diversity = "Chao1"\n+\n+ #Diversity metrics Beta\n+ c_strBrayCurtisDissimilarity = "B_Curtis"\n+ c_strUnifracUnweighted = "unifrac_unweighted"\n+ c_strUnifracWeighted = "unifrac_weighted"\n+\n+ #Additive inverses of beta metrics\n+ c_strInvBrayCurtisDissimilarity = "InB_Curtis"\n+\n+ #Richness\n+ c_strShannonRichness = "ShannonR"\n+ c_strObservedCount = "Observed_Count"\n+\n+ #Different alpha diversity metrics\n+ setAlphaDiversities = set(["observed_species","margalef","menhinick",\n+\t"dominance","reciprocal_simpson","shannon","equitability","berger_parker_d",\n+\t"mcintosh_d","brillouin_d","strong","fisher_alpha","simpson",\n+\t"mcintosh_e","heip_e","simpson_e","robbins","michaelis_menten_fit","chao1","ACE"])\n+\n+ #Different beta diversity metrics\n+ setBetaDiversities = set(["braycurtis","canberra","chebyshev","cityblock",\n+\t"correlation","cosine","euclidean","hamming","sqeuclidean"])\n+\n+ #Tested 4\n+ @staticmethod\n+ def funcGetSimpsonsDiversityIndex(ldSampleTaxaAbundancies=None):\n+ """\n+ Calculates the Simpsons diversity index as defined as sum(Pi*Pi).\n+ Note***: Assumes that the abundance measurements are already normalized by the total population N.\n+\n+ :param\tldSampleTaxaAbundancies:\tList of measurements to calculate metric on (a sample).\n+ :type:\tList of doubles\n+ :return\tDouble:\tDiversity metric\n+ """\n+\n+ #Calculate metric\n+ return sum((ldSampleTaxaAbundancies)*(ldSampleTaxaAbundancies))\n+\n+ #Tested 4\n+ @staticmethod\n+ def funcGetInverseSimpsonsDiversityIndex(ldSampleTaxaAbundancies=None):\n+ """\n+ Calculates Inverse Simpsons diversity index 1/sum(Pi*Pi).\n+ This is multiplicative inverse which reverses the order of the simpsons diversity index.\n+ Note***: Assumes that the abundance measurements are already normalized by the total populati'..b'ed:\n+ mtrxDistance = Metric.funcGetUnifracDistance(istrmTree=istrmTree,istrmEnvr=istrmEnvr,lsSampleOrder=lsSampleOrder,fWeighted=True)\n+# mtrxDistance = xReturn[0] if not type(xReturn) is BooleanType else xReturn\n+ else:\n+ mtrxDistance = False\n+ if fAdditiveInverse and not type(mtrxDistance) is BooleanType:\n+\t if sMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:\n+\t\tmtrxDistance = (1.0 - mtrxDistance[0],mtrxDistance[1])\n+\t else:\n+ mtrxDistance = 1.0 - mtrxDistance\n+\treturn mtrxDistance\n+\n+ #Test Cases 11\n+ @staticmethod\n+ def funcReadMatrixFile(istmMatrixFile, lsSampleOrder=None):\n+\t"""\n+\tReads in a file with a precalculated beta-diversty matrix.\n+\n+\t:param istmMatrixFile:\tFile with beta-diversity matrix\n+\t:type:\tFileStream of String file path\n+\t"""\n+\n+ #Read in data\n+ f = csv.reader(open(istmMatrixFile,"r") if isinstance(istmMatrixFile, str) else istmMatrixFile, delimiter=ConstantsBreadCrumbs.c_matrixFileDelim )\n+\n+ #Get header\n+ try:\n+ lsHeader = f.next()\n+ except StopIteration:\n+ return (False,False)\n+ lsHeaderReducedToSamples = [sHeader for sHeader in lsHeader if sHeader in lsSampleOrder] if lsSampleOrder else lsHeader[1:]\n+\n+ #If no sample ordering is given, set the ordering to what is in the file\n+ if not lsSampleOrder:\n+\t lsSampleOrder = lsHeaderReducedToSamples\n+\n+ #Preallocate matrix\n+ mtrxData = np.zeros(shape=(len(lsSampleOrder),len(lsSampleOrder)))\n+\n+ #Make sure all samples requested are in the file\n+ if(not len(lsSampleOrder) == len(lsHeaderReducedToSamples)): return False\n+\n+\tfor lsLine in f:\n+ if lsLine[0] in lsSampleOrder:\n+ iRowIndex = lsSampleOrder.index(lsLine[0])\n+\n+ for i in xrange(1,len(lsSampleOrder)):\n+ iColumnIndexComing = lsHeader.index(lsSampleOrder[i])\n+ iColumnIndexGoing = lsSampleOrder.index(lsSampleOrder[i])\n+ mtrxData[iRowIndex,iColumnIndexGoing] = lsLine[iColumnIndexComing]\n+ mtrxData[iColumnIndexGoing,iRowIndex] = lsLine[iColumnIndexComing]\n+ tpleMData = mtrxData.shape\n+ mtrxData = mtrxData if any(sum(ld)>0 for ld in mtrxData) or ((tpleMData[0]==1) and (tpleMData[1]==1)) else []\n+ return (mtrxData,lsSampleOrder)\n+\n+ #Test cases 2\n+ @staticmethod\n+ def funcWriteMatrixFile(mtrxMatrix, ostmMatrixFile, lsSampleNames=None):\n+ """\n+ Writes a square matrix to file.\n+ \n+ :param mtrxMatrix:\tMatrix to write to file\n+ :type:\tNumpy array\n+ :lsSampleNames:\tThe names of the samples in the order of the matrix\n+ :type:\tList of strings\n+ :ostmBetaMatrixFile:\tFile to write to\n+ :type:\tString or file stream\n+ """\n+\n+ if not sum(mtrxMatrix.shape)>0 or not ostmMatrixFile:\n+ return False\n+\n+ #Check to make sure the sample names are the correct length\n+ tpleiShape = mtrxMatrix.shape\n+ if not lsSampleNames:\n+ lsSampleNames = range(tpleiShape[0])\n+ if not(len(lsSampleNames) == tpleiShape[0]):\n+ print "".join(["Metric.funcWriteMatrixFile. Error= Length of sample names ("+str(len(lsSampleNames))+") and matrix ("+str(mtrxMatrix.shape)+") not equal."])\n+ return False\n+\n+ #Write to file\n+ ostmOut = csv.writer(open(ostmMatrixFile,"w") if isinstance(ostmMatrixFile,str) else ostmMatrixFile, delimiter=ConstantsBreadCrumbs.c_matrixFileDelim )\n+\n+ #Add the additional space at the beginning of the sample names to represent the id row/column\n+ lsSampleNames = [""]+list(lsSampleNames)\n+\n+ #Write header and each row to file\n+ ostmOut.writerow(lsSampleNames)\n+ [ostmOut.writerow([lsSampleNames[iIndex+1]]+mtrxMatrix[iIndex,].tolist()) for iIndex in xrange(tpleiShape[0])]\n+ return True\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/Metric.pyc

Binary file src/breadcrumbs/src/Metric.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/Ordination.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/Ordination.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,94 @@
+"""
+Author: Timothy Tickle
+Description: Base class for ordination plots.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2013"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#External libraries
+import AbundanceTable
+from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
+import matplotlib.cm as cm
+from matplotlib import pyplot as plt
+from UtilityMath import UtilityMath
+from ValidateData import ValidateData
+
+class Ordination:
+    """
+    Base class for ordination methods and plots.
+    """
+
+    def __init__(self):
+      # Rows = Samples
+      self.dataMatrix = None
+      self.isRawData = None
+      self.lsIDs = []
+      self.dataProcessed = None
+
+    #Happy path tested
+    def loadData(self, xData, fIsRawData):
+        """
+        Loads data into the object (given a matrix or an abundance table)
+        Data can be the Abundance Table to be converted to a distance matrix or a distance matrix
+        If it is the AbundanceTable, indicate that it is rawData (tempIsRawData=True)
+        If it is the distance matrix already generated indicate (tempIsRawData=False)
+        and no conversion will occur in subsequent methods.
+
+        :params xData: AbundanceTable or Distance matrix . Taxa (columns) by samples (rows)(lists)
+        :type: AbundanceTable or DistanceMatrix
+        :param fIsRawData: Indicates if the xData is an AbudanceTable (True) or distance matrix (False; numpy array)
+        :type: boolean
+        :return boolean: indicator of success (True=Was able to load data)
+        """
+
+        if fIsRawData:
+            #Read in the file data to a numpy array.
+            #Samples (column) by Taxa (rows)(lists) without the column
+            data = xData.funcToArray()
+            if data==None:
+                print("Ordination:loadData::Error when converting AbundanceTable to Array, did not perform ordination.")
+                return False
+
+            #Transpose data to be Taxa (columns) by samples (rows)(lists)
+            data = UtilityMath.funcTransposeDataMatrix(data,fRemoveAdornments=False)
+            if(ValidateData.funcIsFalse(data)):
+                print("Ordination:loadData::Error when transposing data file, did not perform ordination.")
+                return False
+            else:
+                self.dataMatrix=data
+                self.isRawData=fIsRawData
+                self.lsIDs=xData.funcGetMetadata(xData.funcGetIDMetadataName())
+
+        #Otherwise load the data directly as passed.
+        else:
+            self.dataMatrix=xData
+            self.isRawData=fIsRawData
+        return True
+

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/PCoA.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/PCoA.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,809 @@\n+"""\n+Author: Timothy Tickle\n+Description: Perfroms and plots Principle Coordinates Analysis.\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+#External libraries\n+from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs\n+from cogent.cluster.nmds import NMDS\n+import csv\n+import math\n+import matplotlib.cm as cm\n+from Metric import Metric\n+import numpy as np\n+from scipy.spatial.distance import squareform\n+from scipy.stats.stats import spearmanr\n+from Utility import Utility\n+from UtilityMath import UtilityMath\n+from ValidateData import ValidateData\n+from matplotlib import pyplot as plt\n+\n+class PCoA:\n+ """\n+ Class to Run Principle Coordinates Analysis.\n+\n+ To run PCoA first load the AbundanceTable or distance matrix using the "load" method, \n+ then use the "run" method to derive points, and then use "plot" to plot the graph.\n+ The process is structured in this way so that data is read once but can be transformed to different\n+ distance matricies and after analysis can be plotted with multiple sample highlighting.\n+ One can always reload or rerun data by calling the appropriate function.\n+\n+ Supported beta diversity metrics include "braycurtis","canberra","chebyshev","cityblock","correlation",\n+\t"cosine","euclidean","hamming","sqeuclidean",unifrac_unweighted","unifrac_weighted"\n+ """\n+\n+ #Supported distance metrics\n+ c_BRAY_CURTIS="B_Curtis"\n+ c_SPEARMAN="spearman"\n+\n+ #Holds the data Matrix\n+ dataMatrix=None\n+ #Indicates if the data matrix is raw data (True) or a distance matrix (False)\n+ isRawData=None\n+ # Holds current matrix ids\n+ lsIDs = None\n+\n+ #Current pcoa object\n+ pcoa = None\n+\n+ #Holds the most recently successful distance metric\n+ strRecentMetric = None\n+\n+ #Current dimensions\n+ _iDimensions = 2\n+\n+ #Get plot colors\n+ objFigureControl = ConstantsFiguresBreadCrumbs()\n+\n+ #Forced X Axis\n+ ldForcedXAxis = None\n+\n+ #Indices for the plot group dictionary\n+ c_iXPointIndex = 0\n+ c_iYPointIndex = 1\n+ c_iColorIndex = 2\n+ c_iMarkerIndex = 3\n+ c_iAlphaIndex = 4\n+ c_iLabelIndex = 5\n+ c_iShapeIndex = 6\n+ c_iEdgeColorIndex = 7\n+ c_strTiesKey = "Ties"\n+\n+ #Happy path tested\n+ def loadData(self, xData, fIsRawData):\n+ """\n+ Loads data into PCoA (given the matrix or an abundance table)\n+ Data can be the Abundance Table to be converted to a distance matrix or a distance matrix\n+ If it is the AbundanceTable, indicate that it is rawData (tempIsRawData=T'..b'e\n+\n+ #Set shapes\n+ alLabelShapes = None\n+ if charForceShape == None:\n+ #Get shapes\n+ acharShapes = PCoA.getShapes(iCountUniqueValues)\n+ if len(acharShapes) == 0:\n+ return False\n+ #Make label shapes\n+ alLabelShapes = [ acharShapes[acharUniqueValues.index(sMetadata)] for sMetadata in lsLabelList ]\n+ else:\n+ alLabelShapes = charForceShape\n+\n+ #If the coloring is not forced, color so it is based on the labels\n+ if charForceColor == None:\n+ #Get colors based on labels\n+ atupldColors = [Utility.RGBToHex(cm.jet(float(iUniqueValueIndex)/float(iCountUniqueValues))) for iUniqueValueIndex in xrange(0,iCountUniqueValues)]\n+ #Make sure generated colors are unique\n+ if not iCountUniqueValues == len(set(atupldColors)):\n+ print "PCoA::plotList:Error, generated colors were not unique for each unique label value."\n+ print "Labels"\n+ print lsLabelList\n+ print len(lsLabelList)\n+ print "Unique Labels"\n+ print set(lsLabelList)\n+ print len(set(lsLabelList))\n+ print "Colors"\n+ print atupldColors\n+ print len(atupldColors)\n+ print "Unique Colors"\n+ print set(atupldColors)\n+ print len(set(atupldColors))\n+ return False\n+ #Make label coloring\n+ atupldLabelColors = [ atupldColors[acharUniqueValues.index(sMetadata)] for sMetadata in lsLabelList ]\n+ #If the coloring is forced, color so it is based on the charForcedColor list\n+ elif(ValidateData.funcIsValidList(charForceColor)):\n+ atupldLabelColors = charForceColor[0]\n+ if not len(lsLabelList) == len(atupldLabelColors):\n+ print "PCoA::plotList:Error, label and forced color lengths were not the same."\n+ print "Labels"\n+ print lsLabelList\n+ print len(lsLabelList)\n+ print "Forced Colors"\n+ print charForceColor[0]\n+ print len(charForceColor[0])\n+ return False\n+ lsLabelList = [ "".join([charForceColor[1][iLabelIndex], "_", lsLabelList[iLabelIndex]]) for iLabelIndex in xrange(0,len(charForceColor[1]))]\n+ #If the color is forced but the color does not vary, color all markers are the same.\n+ else:\n+ atupldLabelColors = charForceColor\n+\n+ #Call plot\n+ self.plot(tempPlotName=strOutputFileName, tempColorGrouping=atupldLabelColors, tempShape=alLabelShapes, tempLabels=lsLabelList, tempShapeSize = iSize, tempAlpha=dAlpha, tempInvert = fInvert, iDim1=iDim1, iDim2=iDim2)\n+\n+ def funcForceXAxis(self, dList):\n+ """\n+ Force the X axis to the given list.\n+\n+ :param dList: List of values to force the x axis of the plot (floats).\n+ :type: List of floats\n+ """\n+\n+ self.ldForcedXAxis = dList\n+\n+ def funcUnforceXAxis(self):\n+ """\n+ Return the X axis to the values derived from the loaded data.\n+ """\n+\n+ self.ldForcedXAxis = None\n+\n+ #Happy Path Tested\n+ @staticmethod\n+ def getShapes(intShapeCount):\n+ """\n+ Returns a list of characters which are valid shapes for markers.\n+\n+ :param intShapeCount: The number of shapes to return.\n+ :type: Integer (min 1, max 9)\n+ :return: A list of characters to use as markers. [] is returned on error\n+ """\n+\n+ lsPointShapes = [\'o\',\'s\',\'^\',\'v\',\'<\',\'>\',\'8\',\'p\',\'h\']\n+ if intShapeCount > len(lsPointShapes):\n+ print("".join(["Error, PCoA.getShapes. Do not have enough shapes to give. Received request for ",str(intShapeCount)," shapes. Max available shape count is ",str(len(lsPointShapes)),"."]))\n+ return []\n+ return lsPointShapes[0:intShapeCount]\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/PlotMatrix.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/PlotMatrix.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,96 @@
+"""
+Author: Timothy Tickle
+Description: Plots matrices.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#External libraries
+import matplotlib.pyplot as plt
+import numpy as np
+from pylab import *
+
+#Plots a matrix
+class PlotMatrix:
+
+  #Given a matrix and labels consistent to the matrix, plot a matrix
+  @staticmethod
+  def funcPlotMatrix(npMatrix, lsXLabels, strOutputFigurePath, strXTitle="X Axis", strYTitle="Y Axis", fFlipYLabels=False):
+    """
+    Given a matrix and labels consistent to the matrix, plot a matrix.
+
+    :param npMatrix: Numpy Array (matrix) to plot.
+    :type: Numpy Array
+    :param lsXLabels: X Labels
+    :type: List of strings
+    :param strOutputFigurePath: File to create the figure file.
+    :type: String
+    :param strXTitle: X Axis label.
+    :type: String
+    :param strYTitle: Y axis label.
+    :type: String
+    :param fFlipYLabels: Flip the Y labels so they are opposite order of x axis.
+    :type: Boolean
+    """
+
+    #Get canvas/figure
+    plt.clf()
+    figConfusionMatrix = plt.figure()
+    objAxis = figConfusionMatrix.add_subplot(111)
+
+    #Get y labels
+    lNewYLabels = list(lsXLabels)
+    if fFlipYLabels:
+        lNewYLabels.reverse()
+
+    #Set x axis and position
+    objAxis.xaxis.set_ticklabels([""]+lsXLabels)
+    objAxis.xaxis.set_ticks_position('top')
+
+    #Set y axis
+    objAxis.yaxis.set_ticklabels([""]+lNewYLabels)
+
+    #Set axis titles
+    ylabel(strYTitle)
+    plt.suptitle(strXTitle)
+
+    #Plot matrix values
+    objPlot = objAxis.imshow(np.array(npMatrix), cmap=get_cmap("Blues"), interpolation='nearest')
+
+    #Plot text values
+    for yIndex, ldRow in enumerate(npMatrix):
+        for xIndex, dValue in enumerate(ldRow):
+            plt.text(xIndex, yIndex, dValue, fontdict = {'size':18,'weight':'bold'} )
+
+    #Add color bar
+    figConfusionMatrix.colorbar(objPlot, ticks=range(int(min(np.array(npMatrix).ravel())),int(max(np.array(npMatrix).ravel()))))
+
+    #Save to a file
+    savefig(strOutputFigurePath)

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/SVM.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/SVM.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,306 @@\n+"""\n+Author: Timothy Tickle\n+Description: Class to Allow Support Vector Machine analysis and to contain associated scripts\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+#Libraries\n+from AbundanceTable import AbundanceTable\n+from ConstantsBreadCrumbs import ConstantsBreadCrumbs\n+import csv\n+import os\n+from random import shuffle\n+from ValidateData import ValidateData\n+\n+class SVM:\n+ """\n+ Class which holds generic methods for SVM use.\n+ """\n+\n+ #1 Happy Path tested\n+ @staticmethod\n+ def funcConvertAbundanceTableToSVMFile(abndAbundanceTable, xOutputSVMFile, sMetadataLabel, lsOriginalLabels = None, lsSampleOrdering = None):\n+ """\n+ Converts abundance files to input SVM files.\n+\n+ :param abndAbundanceTable: AbudanceTable object to turn to input SVM file.\n+ :type:\tAbundanceTable\n+ :param xOutputSVMFile: File to save SVM data to when converted from the abundance table.\n+ :type:\tFileStream or string file path\n+ :param\tsMetadataLabel: The name of the last row in the abundance table representing metadata.\n+ :type:\tString\n+\t:param:\tlsOriginalLabels The original labels.\n+\t:type:\tList of strings\n+ :param\tlsSampleOrdering: Order of samples to output to output file. If none, the order in the abundance table is used.\n+ :type:\tList of strings\n+ :return\tlsUniqueLabels:\tList of unique labels.\n+ """\n+\n+ #Create data matrix\n+ dataMatrix = zip(*abndAbundanceTable.funcGetAbundanceCopy())\n+\n+ #Add labels\n+ llData = []\n+ lsLabels = lsOriginalLabels if lsOriginalLabels else SVM.funcMakeLabels(abndAbundanceTable.funcGetMetadata(sMetadataLabel))\n+ if not isinstance(xOutputSVMFile,str):\n+ if xOutputSVMFile.closed:\n+ xOutputSVMFile = open(xOutputSVMFile.name,"w")\n+\tostm = open(xOutputSVMFile,"w") if isinstance(xOutputSVMFile, str) else xOutputSVMFile\n+ f = csv.writer(ostm, csv.excel_tab, delimiter = ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)\n+\n+\t#This allows the creation of partially known files for stratification purposes\n+\tlsCurrentSamples = abndAbundanceTable.funcGetSampleNames()\n+ lsOrderingSamples = lsSampleOrdering if lsSampleOrdering else lsCurrentSamples[:]\n+\n+\tiLabelIndex = 0\n+\tiSize = len(dataMatrix[0])\n+\tiIndexSample = 1\n+\tfor sSample in lsOrderingSamples:\n+\t\tif sSample in lsCurrentSamples:\n+ \t\tf.writerow([lsLabels[iLabelIndex]]+\n+\t\t\t\t[ConstantsBreadCrumbs.c_strColon.join([str(tpleFeature[0]+1),str(tpleFeature[1])]) '..b'el_tab, delimiter = ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace )\n+ lsOriginalLabels = [lsLineElements[0] for lsLineElements in g if not lsLineElements[0] == ConstantsBreadCrumbs.c_strSVMNoSample]\n+\n+ if isPredictFile:\n+ lsOriginalLabels = lsOriginalLabels[1:]\n+\n+ #Check sample name length\n+ if not len(lsAllSampleNames) == len(lsOriginalLabels):\n+ print "SVM::funcReadLabelsFromFile. Error, the length of sample names did not match the original labels length. Samples ("+str(len(lsAllSampleNames))+"):"+str(lsAllSampleNames)+" Labels ("+str(len(lsOriginalLabels))+"):"+str(lsOriginalLabels)\n+ return False\n+\n+ #Change to {label:["sampleName1", "sampleName2"...],...}\n+ dictSampleLabelsRet = dict()\n+ for sValue in set(lsOriginalLabels): \n+ dictSampleLabelsRet[sValue] = set([lsAllSampleNames[iindex] for iindex, sLabel in enumerate(lsOriginalLabels) if sLabel == sValue])\n+ return dictSampleLabelsRet\n+\n+ #Tested\n+ @staticmethod\n+ def funcScaleFeature(npdData):\n+ """\n+ Scale a feature between 0 and 1. Using 01 and not 01,1 because it keeps the sparsity of the data and may save time.\n+\n+ :param\tnpdData:\tFeature data to scale.\n+ :type\tNumpy Array\tScaled feature data.\n+ :return npaFloat: A numpy array of floats.\n+ """\n+ if sum(npdData) == 0 or len(set(npdData))==1:\n+ return npdData\n+ dMin = min(npdData)\n+ return (npdData-dMin)/float(max(npdData-dMin))\n+\n+ #Tested\n+ @staticmethod\n+ def funcWeightLabels(lLabels):\n+ """\n+ Returns weights for labels based on how balanced the labels are. Weights try to balance unbalanced results.\n+\n+ :params\tlLabels:\tList of labels to use for measure how balanced the comparison is.\n+ :type\tList\n+ :return\tList:\t\t[dictWeights ({"label":weight}),lUniqueLabels (unique occurences of original labels)]\n+ """\n+ #Convert to dict\n+ #Do not use set to make elements unique. Need to preserve order.\n+ #First label should be 0\n+ lUniqueLabels = []\n+ for sElement in lLabels:\n+ if sElement not in lUniqueLabels:\n+ lUniqueLabels.append(sElement)\n+ dictLabels = dict(zip(lUniqueLabels, range(len(lUniqueLabels))))\n+\n+ #Build a dict of weights per label {label:weight, label:weight}\n+ #Get the occurrence of each label\n+ dictWeights = dict()\n+ for sLabelKey in dictLabels:\n+ sCurLabel = dictLabels[sLabelKey]\n+ dictWeights[sCurLabel] = lLabels.count(sLabelKey)\n+\n+ #Divide the highest occurrence each occurrence\n+ iMaxOccurence = max(dictWeights.values())\n+ for sWeightKey in dictWeights:\n+ dictWeights[sWeightKey]=iMaxOccurence/float(dictWeights[sWeightKey])\n+\n+ return [dictWeights,lUniqueLabels]\n+\n+ #Tested 3/4 cases could add in test 12 with randomize True\n+ def func10FoldCrossvalidation(self, iTotalSampleCount, fRandomise = False):\n+ """\n+ Generator.\n+ Generates the indexes for a 10 fold cross validation given a sample count.\n+ If there are less than 10 samples, it uses the sample count as the K-fold cross validation\n+ as a leave one out method.\n+\n+ :param\tiTotalSampleCount:\tTotal Sample Count\n+\t:type\tInteger\tSample Count\n+\t:param\tfRandomise:\tRandom sample indices\n+\t:type\tBoolean\tTrue indicates randomise (Default False)\n+ """\n+ #Make indices and shuffle if needed\n+ liindices = range(iTotalSampleCount)\n+ if fRandomise:\n+ shuffle(liindices)\n+\n+ #For 10 times\n+ iKFold = 10\n+ if iTotalSampleCount < iKFold:\n+ iKFold = iTotalSampleCount\n+ for iiteration in xrange(iKFold):\n+ lfTraining = [iindex % iKFold != iiteration for iindex in liindices]\n+ lfValidation = [not iindex for iindex in lfTraining]\n+ yield lfTraining, lfValidation\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/SVM.pyc

Binary file src/breadcrumbs/src/SVM.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/ScatterPlot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/ScatterPlot.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,106 @@
+"""
+Author: Timothy Tickle
+Description: Class to create scatter plots.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#External libraries
+from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
+import matplotlib.pyplot as plt
+from pylab import *
+
+#Plots a matrix
+class ScatterPlot:
+
+  @staticmethod
+  def funcPlot(lx, ly, strOutputFigurePath, strTitle = "Title", strXTitle="X Axis", strYTitle="Y Axis", strColor = "#83C8F9", fInvert=False):
+    """
+    Plot a scatter plot.
+
+    :params lx: List of x values
+    :type: List of doubles
+    :params ly: List of y values
+    :type: List of doubles
+    :params strOutputFigurePath: File path to make figure
+    :type: String file path
+    :params strTitle: Title of figure
+    :type: String
+    :params strXTitle: Label of x axis
+    :type: String
+    :params strYTitle: Label of y axis
+    :type: String
+    :params strColor: Hex color for the face of the boxplots
+    :type: String
+    :params fInvert: Invert colors (true)
+    :type: Boolean
+    """
+
+    #Start plot
+    #Get plot object
+    imgFigure = plt.figure()
+
+    #Get plot colorsstrOutFigure
+    objFigureControl = ConstantsFiguresBreadCrumbs()
+    #Boxplots have to be plotted over the scatter so the alpha can not go to 1.0
+    #In this case capturing the alpha before inversion
+    #Inversion automoatically sets it to 1.
+    dAlpha=objFigureControl.c_dAlpha
+    objFigureControl.invertColors(fInvert=fInvert)
+
+    #Color/Invert figure
+    imgFigure.set_facecolor(objFigureControl.c_strBackgroundColorWord)
+    imgSubplot = imgFigure.add_subplot(111,axisbg=objFigureControl.c_strBackgroundColorLetter)
+    imgSubplot.set_xlabel(strXTitle)
+    imgSubplot.set_ylabel(strYTitle)
+    imgSubplot.spines['top'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['bottom'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['left'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.spines['right'].set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.xaxis.label.set_color(objFigureControl.c_strDetailsColorLetter)
+
+    #Adds light grid for numbers and puts them in the background
+    imgSubplot.yaxis.grid(True, linestyle='-', which='major', color=objFigureControl.c_strGridLineColor, alpha=objFigureControl.c_dAlpha)
+    imgSubplot.set_axisbelow(True)
+    imgSubplot.yaxis.label.set_color(objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.tick_params(axis='x', colors=objFigureControl.c_strDetailsColorLetter)
+    imgSubplot.tick_params(axis='y', colors=objFigureControl.c_strDetailsColorLetter)
+    charMarkerEdgeColor = objFigureControl.c_strDetailsColorLetter
+
+    #Make scatter plot
+    plt.scatter(x=lx,y=ly,c=strColor,marker="o",alpha=objFigureControl.c_dAlpha)
+
+    #Set ticks and title
+    imgSubplot.set_title(strTitle)
+    imgSubplot.title.set_color(objFigureControl.c_strDetailsColorLetter)
+
+    #End plot
+    #Save to a file
+    imgFigure.savefig(strOutputFigurePath, facecolor=imgFigure.get_facecolor())

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/Utility.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/Utility.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,93 @@
+"""
+Author: Timothy Tickle
+Description: Utility class for generic functions.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+class Utility():
+    """
+    Class to perform misc methods.
+    """
+
+    #Tested 6
+    @staticmethod
+    def getIndices(aList, dataElement):
+        """
+        Returns the index or indicies of the element in the list.
+
+        :param aList: List ot search for element.
+        :type List.
+        :param dataElement: Element for which to search.
+        :type Object of the same type as is found in the list.
+        :return: List of indicies indicating where the element occurs in the list. Returns [] when the element is not in the list.
+        """
+
+        aretIndices = []
+        for dataIndex in xrange(0,len(aList)):
+            if(aList[dataIndex] == dataElement):
+                aretIndices.append(dataIndex)
+        return aretIndices
+
+    #Tested 6
+    @staticmethod
+    def reduceList(aList, dataIndicies):
+        """
+        Reduces a list to just the data indicies given.
+
+        :param aList: List to reduce.
+        :type List
+        :param dataIndicies: list of indicies to keep.
+        :type List of integers
+        :return: Reduced list.  Returns [] when the and empty index list is given.
+        """
+        return [aList[dataIndicies[dataIndex]] for dataIndex in xrange(0,len(dataIndicies))]
+
+    #Tested 8
+    @staticmethod
+    def RGBToHex(adColor):
+        """
+        Change a RGB float to hex.
+
+        :param adColor: A list of 3 elements which are floats between 0.0 and 1.0
+        :type A list of floats
+        :return: A string (HEX formatted) representation of the RGB color
+        """
+
+        charR = (hex(int(adColor[0]*255)))[2:]
+        if(str(charR) == "0"):
+            charR = "00"
+        charG = (hex(int(adColor[1]*255)))[2:]
+        if(str(charG) == "0"):
+            charG = "00"
+        charB = (hex(int(adColor[2]*255)))[2:]
+        if(str(charB) == "0"):
+            charB = "00"
+        return "".join(["#",charR, charG, charB])

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/UtilityMath.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/UtilityMath.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,156 @@
+"""
+Author: Timothy Tickle
+Description: Utility class for generic math functions.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#Import libaries
+import itertools
+import numpy as np
+import operator
+import random
+from ValidateData import ValidateData
+
+class UtilityMath():
+    """
+    Class to perform misc math methods.
+    """
+
+    ##
+    #Happy path test 2
+    @staticmethod
+    def funcConvertToBHQValue(ldPValues, iNumberOfTests=None):
+        """
+        Convert a list of p-value to a list of q-values.
+
+        :param ldPValues: List of doubles (p-values) to convert.
+        :type List
+        :param iNumberOfTests: Number of (multiple) tests if different than the ldValue length. If not set the length of ldPValues is used.
+        :type Integer
+        :return List: List of Q-values made with a BH modification.
+        """
+
+        #If the number of tests is not specified, use the number of pvalues
+        if(iNumberOfTests == None):
+            iNumberOfTests = len(ldPValues)
+        #Used to hold the pvalues as they are being manipulated
+        lsConvertToQValues = list()
+        #Is used to set the ordr of the pvalues as they are placed in the lsConvertToQValues
+        dOrder = 1
+        for dValue in ldPValues:
+            lsConvertToQValues.append([dValue,dOrder,None])
+            dOrder = dOrder + 1
+
+        #Sort by pvalue
+        lsConvertToQValues.sort(key=lambda x: x[0])
+
+        #Used to keep track of the current test number
+        iTest = 1
+        for dConvValue in lsConvertToQValues:
+            dConvValue[2] = dConvValue[0] * iNumberOfTests / iTest
+            iTest = iTest + 1
+
+        #Sort by original order
+        lsConvertToQValues.sort(key=lambda x: x[1])
+
+        #return just 1 dimension (the qvalue)
+        return [ldValues[2] for ldValues in lsConvertToQValues]
+
+    #Happy path tested 5
+    @staticmethod
+    def funcSampleWithReplacement(aData, iSelect):
+        """
+        Sample from a vector of data (aData) with replacement iSelect many objects.
+
+        :param aData: Data to sample from with replacement.
+        :type List
+        :param iSelect: Amount of data to select from the original data population.
+        :type Integer.
+        :return List: List of sampled data.
+                        Returns an empty list on error.
+        """
+
+        if iSelect and aData:
+            iDataSize = len(aData)
+            funcRandom, funcInt = random.random, int
+            lsSampling =  operator.itemgetter(*[funcInt(funcRandom() * iDataSize) for selected in itertools.repeat(None, iSelect)])(aData)
+            if isinstance(lsSampling, basestring):
+                lsSampling = [lsSampling]
+            return lsSampling
+        return []
+
+    #Happy Path Tested 2
+    @staticmethod
+    def funcSumRowsOfColumns(npaAbundance, lsSampleNames):
+        """
+        Takes the column names of a npArray and sums the rows into one column.
+
+        :param npaAbundance: Array of data to sum.
+        :type Numpy Array
+        :param lsSampleNames: List of sample names.
+        :type List List of strings.
+        :return List List of data summed at each row.
+        """
+
+        #Compress by data name
+        npPooledSample = npaAbundance[lsSampleNames[0]]
+        for strSampleName in lsSampleNames[1:]:
+            #When combining, combine counts by summing
+            npPooledSample = npPooledSample + npaAbundance[strSampleName]
+        return list(npPooledSample)
+
+    #Testing Status: Light happy path testing 2
+    @staticmethod
+    def funcTransposeDataMatrix(npaMatrix, fRemoveAdornments=False):
+        """
+        Transposes a numpy array.
+
+        :param npaMatrix: Data matrix to transpose.
+        :type Numpy Array
+        :param fRemoveAdornments: Remove the first column before transposing.
+        :type Boolean True indicates removing the column.
+        :return Boolean or Numpy Array: Transposed array or a boolean indicating error.
+                                   Boolean False is returned on error.
+        """
+
+        #Validate parameters
+        if(not ValidateData.funcIsValidNPArray(npaMatrix)):
+            print "".join(["Utility_Math:transposeDataMatrix::Error, transposeDataMatrix was an invalid structured array. Value =",str(npaMatrix)])
+            return False
+        if(not ValidateData.funcIsValidBoolean(fRemoveAdornments)):
+            print "".join(["Utility_Math:transposeDataMatrix::Error, fRemoveAdornments was an invalid boolean. Value =",str(fRemoveAdornments)])
+            return False
+
+        #Change to samples x taxa as is needed for the compute method below
+        #Also remove the first row which is taxa identification
+        conversionMatrix = [list(row)[fRemoveAdornments:] for row in npaMatrix]
+        return np.array(conversionMatrix).transpose()
+

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/UtilityMath.pyc

Binary file src/breadcrumbs/src/UtilityMath.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/ValidateData.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/ValidateData.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,624 @@\n+"""\n+Author: Timothy Tickle\n+Description: Validate Data containing methods for testing variables.\n+"""\n+\n+#####################################################################################\n+#Copyright (C) <2012>\n+#\n+#Permission is hereby granted, free of charge, to any person obtaining a copy of\n+#this software and associated documentation files (the "Software"), to deal in the\n+#Software without restriction, including without limitation the rights to use, copy,\n+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,\n+#and to permit persons to whom the Software is furnished to do so, subject to\n+#the following conditions:\n+#\n+#The above copyright notice and this permission notice shall be included in all copies\n+#or substantial portions of the Software.\n+#\n+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,\n+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A\n+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n+#####################################################################################\n+\n+__author__ = "Timothy Tickle"\n+__copyright__ = "Copyright 2012"\n+__credits__ = ["Timothy Tickle"]\n+__license__ = "MIT"\n+__maintainer__ = "Timothy Tickle"\n+__email__ = "ttickle@sph.harvard.edu"\n+__status__ = "Development"\n+\n+#Import local code\n+from types import *\n+import decimal\n+import os\n+import re\n+import string\n+\n+class ValidateData:\n+\n+ #Tested 5\n+ @staticmethod\n+ def funcIsValidBoolean(parameterValue):\n+ """\n+ Validates a parameter as a valid boolean.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is a valid boolean.\n+ :type\tBoolean\n+ """\n+\n+ #Check to make sure it is not null\n+ if parameterValue == None:\n+ return False\n+\n+ #Check to make sure it is a string\n+ if not type(parameterValue) is BooleanType:\n+ return False\n+ return True\n+\n+ #Tested 6\n+ @staticmethod\n+ def funcIsTrue(parameterValue):\n+ """\n+ Validates a parameter as true.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is True.\n+ :type\tBoolean\n+ """\n+\n+ if(ValidateData.funcIsValidBoolean(parameterValue)):\n+ if(parameterValue == True):\n+ return True\n+ return False\n+\n+ #Tested 6\n+ @staticmethod\n+ def funcIsFalse(parameterValue):\n+ """\n+ Validates a parameter as false.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is False.\n+ :type\tBoolean\n+ """\n+\n+ if(ValidateData.funcIsValidBoolean(parameterValue)):\n+ if(parameterValue == False):\n+ return True\n+ return False\n+\n+ #Tested 5\n+ @staticmethod\n+ def funcIsValidInteger(parameterValue):\n+ """\n+ Validates a parameter as an integer.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is an integer.\n+ :type\tBoolean\n+ """\n+\n+ #Check to make sure it is not null\n+ if (parameterValue == None):\n+ return False\n+\n+ #Check to make sure it is an integer\n+ if not type(parameterValue) is IntType:\n+ return False\n+\n+ return True\n+\n+ #Tested 5\n+ @staticmethod\n+ def funcIsValidPositiveInteger(parameterValue, tempZero = False):\n+ """\n+ Validates a parameter as false.\n+\n+ :para'..b' @staticmethod\n+ def funcIsValidDictionary(parameterValue):\n+ """\n+ Validates a parameter as a dictionary.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is a dictionary.\n+ :type\tBoolean\n+ """\n+\n+ #Check to make sure it is not null\n+ if parameterValue == None:\n+ return False\n+\n+ #Check to make sure it is a string\n+ if not type(parameterValue) is DictType:\n+ return False\n+\n+ #Check key elements\n+ keyList = parameterValue.keys()\n+ keyListSize = len(keyList)\n+ for i in range(0,keyListSize):\n+ if keyList[i] == None:\n+ return False\n+ if type(keyList[i]) is ListType:\n+ if validateData.funcIsValidList(keyList[i]) == False:\n+ return False\n+\n+ #Check key elements\n+ itemList = parameterValue.values()\n+ itemListSize = len(itemList)\n+\n+ for i in range(0,itemListSize):\n+ if itemList[i] == None:\n+ return False\n+ if type(itemList[i]) is ListType:\n+ if ValidateData.funcIsValidList(itemList[i]) == False:\n+ return False\n+ return True\n+\n+ #Tested 18\n+ @staticmethod\n+ def funcIsValidDNASequence(parameterValue):\n+ """\n+ Validates a parameter as a valid DNA sequence.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is a valid value.\n+ :type\tBoolean\n+ """\n+\n+ if ValidateData.funcIsValidString(parameterValue):\n+ expression = re.compile(r\'[^atcgATCG]\')\n+ if not None == expression.search(parameterValue):\n+ return False\n+ return True\n+ return False\n+\n+ #Tested 15\n+ @staticmethod\n+ def funcIsValidNucleotideBase(parameterValue):\n+ """\n+ Validates a parameter as a character which is a valid nucleotide representation.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is a valid value.\n+ :type\tBoolean\n+ """\n+\n+ if (ValidateData.funcIsValidDNASequence(parameterValue) or (parameterValue == \'u\') or (parameterValue == "U")):\n+ if (len(parameterValue) == 1):\n+ return True\n+ return False\n+\n+ #Testing 4\n+ @staticmethod\n+ def funcIsValidFileName(parameterValue):\n+ """\n+ Validates a parameter as a valid file name.\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is a valid file path.\n+ :type\tBoolean\n+ """\n+\n+ if parameterValue is None:\n+ return False\n+ elif(ValidateData.funcIsValidString(parameterValue)):\n+ return os.path.exists(parameterValue)\n+ return False\n+\n+ #Tested 5\n+ @staticmethod\n+ def funcIsValidClass(parameterValue, strCorrectName):\n+ """\n+ Validates a parameter as a valid class (of a specifc type given by name).\n+\n+ :param\tparameterValue:\tValue to be evaluated.\n+ :type\tUnknown\n+ :param\tstrCorrectName:\tName of te class the parameter should be.\n+ :type\tUnknown\n+ :return\tBoolean:\tTrue indicates the parameter is a valid value.\n+ :type\tBoolean\n+ """\n+\n+ if(parameterValue==None):\n+ return False\n+ if not ValidateData.funcIsValidString(strCorrectName):\n+ return False\n+ classType = type(parameterValue).__name__\n+ if(classType == strCorrectName):\n+ return True\n+ if(classType == \'instance\'):\n+ if(parameterValue.__class__.__name__==strCorrectName):\n+ return True\n+ else:\n+ return False\n+ return False\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/ValidateData.pyc

Binary file src/breadcrumbs/src/ValidateData.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/__init__.pyc

Binary file src/breadcrumbs/src/__init__.pyc has changed

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/circlader/circlader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/circlader/circlader.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+#-----------------------------------------------------------------------------
+# NAME: circlader.py
+# DESCRIPTION:  Circlader (circular cladogram buider) is a python script for
+#               creating images of circular cladogram starting from any guide
+#               tree in tabular or Newick format
+#
+# Author: Nicola Segata
+# email: nsegata@hsph.harvard.edu
+#
+# Copyright: (c) 2011
+# Licence: <your licence>
+#
+#-----------------------------------------------------------------------------
+
+
+import os
+import sys,argparse
+import circlader_lib as cir
+
+def read_params(args):
+    parser = argparse.ArgumentParser(description='Circlader')
+
+    parser.add_argument('tree_file', nargs='?', default=None, type=str,
+            help=   "the input tree in Newick format (unless --tf is specified)"
+                    "[stdin if not present]")
+    parser.add_argument('out_image', nargs='?', default=None, type=str,
+            help=   "the output image (the format is guessed from the extension "
+                    "[windows visualization if not present]")
+    parser.add_argument('--tree_format', choices=['newick','tabular'],
+                        default='newick', type=str,
+            help=       "specifies the input tree format (default \"newick\", "
+                        "other choice is \"tabular\")")
+    parser.add_argument('--style_file', nargs='?', default=os.getcwd()+"/default_styles/style.txt", type=str,
+            help=       "set the style file (default_styles/style.txt if not specified)")
+    parser.add_argument('--color_file', nargs='?', default=None, type=str,
+            help=       "set the color file (default_styles/colors.txt if not specified)")
+    parser.add_argument('--highlight_file', nargs='?', default=None, type=str,
+            help=       "set the highlight file (default none)")
+    parser.add_argument('--tick_file', nargs='?', default=None, type=str,
+            help=       "set the label file for level's names (default none)")
+    parser.add_argument('--size_file', nargs='?', default=None, type=str,
+            help=       "set the file containing the dimentison of the circles (default none)")
+    parser.add_argument('--circle_file', nargs='?', default=None, type=str,
+            help=       "set the external circles file (default none) [BETA FEATURE]")
+    parser.add_argument('--format', choices=['png','pdf','ps','eps','svg'], default=None, type=str,
+            help=       "set the format of the output image (default none "
+                        "meaning that the format is guessed from the output "
+                        "file extension)")
+    parser.add_argument('--dpi', default=300, type=int )
+
+    return vars(parser.parse_args())
+
+params = read_params(sys.argv)
+
+cladogram = cir.Tree()
+cladogram.read_colors(params['color_file'])
+cladogram.read_style(params['style_file'])
+cladogram.read_sizes(params['size_file'])
+cladogram.read_circles(params['circle_file'])
+cladogram.read_highlights(params['highlight_file'])
+cladogram.read_tick_labels(params['tick_file'])
+if params['tree_format'] == 'newick':
+    cladogram.load_newick(params['tree_file'])
+else:
+    cladogram.load_lefse(params['tree_file'])
+cladogram.pos_rad_leaves()
+cladogram.set_pos()
+cladogram.draw(params['out_image'],outformat=params['format'],dpi=params['dpi'])

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/circlader/circlader_lib.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/circlader/circlader_lib.py Tue May 13 21:58:57 2014 -0400

[

b'@@ -0,0 +1,726 @@\n+#-----------------------------------------------------------------------------\n+# NAME: circlader_lib.py\n+# DESCRIPTION: Circlader (CIRcular CLADogram buidER) is a python script for\n+# creating images of circular cladogram starting from any guide\n+# tree in tabular or Newick format\n+#\n+# Author: Nicola Segata\n+# email: nsegata@hsph.harvard.edu\n+#\n+# Copyright: (c) 2011\n+# Licence: <your licence>\n+#\n+#-----------------------------------------------------------------------------\n+\n+import sys,os,math,matplotlib\n+#matplotlib.use(\'TkAgg\') \n+matplotlib.use(\'Agg\') \n+#matplotlib.use(\'PDF\')\n+from matplotlib import collections\n+from Bio import Phylo\n+import numpy as np\n+from pylab import *\n+import operator\n+import matplotlib.patches as mpatches\n+\n+class Tree:\n+ max_rad_dist = math.pi*0.15\n+\n+# Class specifying clade with strutural characteristics and \n+# associated information\n+ class Clade:\n+ def __init__( self,taxa_id=0, name=\'\',\n+ br_len=-1.0, root_br_dist=-1.0,\n+ highlighted=False,\n+ ext_seg = False,\n+ ext_seg_vec = None):\n+ self.id = taxa_id\n+ self.name = name\n+ self.label = name\n+ self.label_cat = \'\'\n+ self.col = \'w\'\n+ self.br_len = br_len\n+ self.tot_br_len = 0.0 \n+ self.root_br_dist = root_br_dist\n+ self.is_leaf = True\n+ self.is_highlighted = highlighted\n+ self.__children = {}\n+ self.pos = Tree.VisPos()\n+ self.nleaves = 0\n+ self.size = 0\n+ self.ext_seg = ext_seg\n+ self.ext_seg_vec = ext_seg_vec\n+\n+ def add_child(self,cl):\n+ self.__children[cl.id] = cl\n+ self.tot_br_len += (cl.br_len + \n+ (0 if cl.is_leaf else cl.tot_br_len))\n+ self.is_leaf = False\n+ \n+ def get_children(self):\n+ return self.__children.values()\n+\n+# Class decribing graphical information associated with clades\n+ class VisPos:\n+ def __init__( self, r=0.0, rad=0.0, rmin=0.0, \n+ rmax=0.0, lab_lev = 0.0):\n+ self.rad = rad\n+ self.r = r\n+ self.rad_min = rmin\n+ self.rad_max = rmax\n+ self.lab_lev = lab_lev\n+ def __init__(self):\n+ self.__all_taxa = set([])\n+ self.__noname_gen = self.__unique_none_id()\n+ self.__max_br_depth = 0.0\n+ self.__min_br_depth = float(sys.maxint) \n+ self.__leaves = []\n+ self.min_high = float(sys.maxint)\n+ self.max_high = -1.0\n+ self.added_label_cats = []\n+ self.vis_cats = []\n+ self.wing_ext_max = 1.0\n+ self.cseg = None \n+\n+ def __unique_none_id(self):\n+ for i in xrange(2**31-1):\n+ yield "_"+str(i)\n+\n+\n+ def add_clade(self,cl,fn,br_depth):\n+ cl.size = self.sizes[cl.id] if hasattr(self, \'sizes\') and cl.id in self.sizes else self.opt[\'default_taxa_size\']\n+ if cl.is_highlighted:\n+ cl.pos.lab_lev = br_depth+1.0\n+ if self.min_high > cl.pos.lab_lev:\n+ self.min_high = cl.pos.lab_lev\n+ if self.max_high < min( cl.pos.lab_lev,\n+ float(self.opt[\'highlighting_bar_stop_level\'])*1.001):\n+ self.max_high = cl.pos.lab_lev\n+ cl.label = self.labels[fn]\n+ cl.col = self.label_color[fn]\n+ cl.label_cat = self.label_cat[fn]\n+ cl.size *= self.opt[\'highlight_taxa_size_magnifier\']\n+\n+\n+ def load_lefse(self,inp_f):\n+ with open(inp_f, \'r\') as inp:\n+ rows = ["root."+l.rstrip().split("\\t")[0] for l in inp.readlines()]\n+ rows.append("root")\n+\n+ self.opt[\'ignore_branch_len\'] = 1\n+\n+ def rec_add(clade_name,rows,br_depth=0.0'..b'5\n+ format = outformat,\n+ edgecolor=fc) #,format=self.opt[\'img_format\'])\n+\n+ plt.close()\n+ else:\n+ plt.show()\n+ \n+ def read_highlights(self,highlights_file):\n+ self.labels = {}\n+ self.label_color = {}\n+ self.label_cat = {}\n+ if not highlights_file: \n+ return\n+ with open(highlights_file) as inp_f:\n+ labels = [l.rstrip().split(\'\\t\') \n+ for l in inp_f.readlines() if not l.startswith("#")] \n+ for l in labels:\n+ self.labels[l[0]] = l[1] \n+ self.label_cat[l[0]] = l[2]\n+ if l[3].startswith("_c_"):\n+ self.label_color[l[0]] = [float(v) for v in l[3].split("_c_")[-1].split("[")[-1].split("]")[0].split(",")]\n+ else:\n+ self.label_color[l[0]] = self.colors[l[3]]\n+\n+ def read_circles(self,circles_file):\n+ self.cseg = {}\n+ if not circles_file: \n+ return\n+ with open(circles_file) as inp_f:\n+ mat = [l.rstrip().split(\'\\t\') \n+ for l in inp_f.readlines() if not l.startswith("#")] \n+ for m in mat:\n+ cv = []\n+ cs = []\n+ for v in m[1:]:\n+ v00,bor = v.split("#") if "#" in v else (v,None)\n+ v0,dep = v00.split("$") if "$" in v00 else (v00,None)\n+ v1,shape = v0.split("!") if "!" in v0 else (v0,None)\n+ col,alpha = v1.split(":") if ":" in v1 else [v1,"1.0"]\n+ \n+ if col.startswith("_c_"):\n+ c = [float(v) for v in col.split("_c_")[-1].split("[")[-1].split("]")[0].split(",")]\n+ else:\n+ c = self.colors[col]\n+ a = float(alpha)\n+ cv.append((c,a,shape,float(dep) if dep else None,float(bor) if bor else None))\n+ self.cseg[m[0]] = cv\n+\n+ def read_sizes(self,size_file):\n+ self.sizes = {}\n+ if not size_file: \n+ return\n+ with open(size_file) as inp_f:\n+ rows = [l.rstrip().split(\'\\t\') \n+ for l in inp_f.readlines() if not l.startswith("#")] \n+ for l in rows:\n+ self.sizes["root."+l[0]] = float(l[1])\n+\n+ def read_tick_labels(self,ticks_file):\n+ self.tick_labels = {}\n+ if not ticks_file:\n+ return\n+ with open(ticks_file) as inp_f:\n+ labels = [l.rstrip().split(\'\\t\') \n+ for l in inp_f.readlines() if not l.startswith("#")] \n+ for l in labels:\n+ self.tick_labels[int(l[0])-1] = l[1] \n+\n+\n+ default_colors = \'bgrcmy\'\n+\n+ def read_colors(self,colors_file):\n+ if not colors_file:\n+ self.opt = {}\n+ for c in self.default_colors:\n+ self.opt[c] = c\n+ return\n+\n+ self.color_list = []\n+ self.colors = {}\n+ with open(colors_file) as inp_f:\n+ col = [l.rstrip().split(\'\\t\') \n+ for l in inp_f.readlines() if not l.startswith("#")]\n+ for c in col:\n+ self.color_list.append(c[0])\n+ self.colors[c[0]] = [float(cc)/255.0 for cc in c[1].split(\',\')]\n+\n+ def read_style(self,style_file):\n+ with open(style_file) as inp_f:\n+ self.opt = dict([(l.rstrip().split()[0],l.split("#")[0].split()[1:]) \n+ for l in inp_f.readlines() \n+ if l.strip() and not l.startswith("#")])\n+ for o in self.opt:\n+ try:\n+ v= int(self.opt[o][0])\n+ except ValueError:\n+ try:\n+ v= float(self.opt[o][0])\n+ except ValueError:\n+ try:\n+ v = str(self.opt[o][0])[0]\n+ except ValueError:\n+ print "not a valid input",self.opt[o][0]\n+ self.opt[o] = v\n+\n'

diff -r 000000000000 -r 2f4f6f08c8c4 src/breadcrumbs/src/under_development/PCA.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/breadcrumbs/src/under_development/PCA.py Tue May 13 21:58:57 2014 -0400

[

@@ -0,0 +1,133 @@
+"""
+Author: Timothy Tickle
+Description: Performs and plots Principle Components Analysis.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2013"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+#External libraries
+from AbundanceTable import AbundanceTable
+from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
+from Ordination import Ordination
+import matplotlib.cm as cm
+from math import sqrt,asin
+from matplotlib.mlab import PCA as mplPCA
+from matplotlib import pyplot as plt
+from numpy import *
+from UtilityMath import UtilityMath
+from ValidateData import ValidateData
+
+class PCA(Ordination):
+  """
+  Class to Run Principle Components Analysis on an abundance table object
+  """
+
+  def __init__(self):
+    Ordination.__init__(self)
+    self.c_strComponents = "components"
+    self.c_strVariance = "percent_variance"
+
+  def run(self,fScale=True,fCenter=True,fASTransform=False):
+    if not self.dataMatrix is None:
+      mtrxPrepped = self.dataMatrix.T
+      if fASTransform:
+        mtrxPrepped = array([self.doAsinOnList(row) for row in sqrt(mtrxPrepped)])
+      if fCenter:
+        mtrxPrepped = mtrxPrepped-mean(mtrxPrepped,0)
+      if fScale:
+        # This is consistent to R's prcomp method.
+        vStd = std(a=mtrxPrepped,axis=0) if fCenter else [sqrt(sum(square(ldRow))/len(ldRow)) for ldRow in mtrxPrepped.T]
+        mtrxPrepped /= vStd
+      iRows, iCols = mtrxPrepped.shape
+      U,S,V = linalg.svd(a=mtrxPrepped,full_matrices=False)
+      ldVariance = square(S*(iCols-1))
+      ldVariance = ldVariance/sum(ldVariance)
+      # Here components are row-wise so each component is a row.
+      # Here percent variance is given and it is in the order of the components.
+      self.dataProcessed = {self.c_strComponents:V, self.c_strVariance:ldVariance}
+      return True
+    else:
+      print("PCA:run::Error Tried to run analysis on no data load data first.")
+    return False
+
+  def getVariance(self,iIndex=None):
+    if not self.dataProcessed is None:
+      if not iIndex is None:
+        return self.dataProcessed[self.c_strVariance][iIndex]
+      return self.dataProcessed[self.c_strVariance]
+    else:
+      print("PCA:getVariance::Error Tried to run analysis on no data load data first.")
+    return False
+
+  def getComponents(self,iIndex=None):
+    if not self.dataProcessed is None:
+      if not iIndex is None:
+        return self.dataProcessed[self.c_strComponents].T[iIndex]
+      return self.dataProcessed[self.c_strComponents].T
+    else:
+      print("PCA:getComponents::Error Tried to run analysis on no data load data first.")
+    return False
+
+  def doAsinOnList(self, lsValues):
+    return([asin(element) for element in lsValues])
+
+  def convertMetadataForPCA(self,abndTable):
+    """ This takes a metadata dictionary from an abundance table and formats the metadata for use in the PCA.
+        This formatting includes reducing discontinuous data to leveles and replacing NA values to the means of the value (continuous data only)
+        This returns a numpy array of the format needed for this PCA object.
+    """
+
+    # Replace missing values with the mean
+    # dummy the discrete data
+    dictMetadata = abndTable.funcGetMetadataCopy()
+    if(len(dictMetadata) < 2):
+      return None
+
+    ## Remove the metadata id
+    dictMetadata.pop(abndTable.funcGetIDMetadataName(),None)
+    lMetadata = []
+    for lxItem in dictMetadata.values():
+      ## If this is not numeric data then dummy
+      ## Treat NA as a seperate category
+      if not (sum([ ValidateData.funcIsValidStringFloat(xItem) for xItem in lxItem]) == len(lxItem)):
+        # Get levels
+        setsLevels = set(lxItem)
+        # Go through each level and dummy the metadata
+        for sLevel in setsLevels:
+          lMetadata.append([1.0 if xItem==sLevel else 0.0 for xItem in lxItem])
+      else:
+        # Change NA to Mean and store numeric data as float
+        # Also add to the metadata so that there are no negative numbers
+        ldNONA = [float(xItem) for xItem in lxItem if not xItem.strip().lower() in ["na",""]]
+        dMean = sum(ldNONA)/float(len(ldNONA))
+        lsMetadataValues = [dMean if xItem.strip().lower() in ["na",""] else float(xItem) for xItem in lxItem]
+        dMinValueAdj = abs(min(lsMetadataValues))
+        lMetadata.append([sValue + dMinValueAdj for sValue in lsMetadataValues])
+    return(array(lMetadata).T)

diff -r 000000000000 -r 2f4f6f08c8c4 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue May 13 21:58:57 2014 -0400

@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <set_environment version="1.0">
+        <environment_variable name="micropita_SCRIPT_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>
+    </set_environment>
+</tool_dependency>