changeset 26:5c77423823cb

Updates for Mothur version 1.25.0 (includes changes to datatypes metagenomics.py and uses more efficient means for labels and groups options)
author Jim Johnson <jj@umn.edu>
date Wed, 16 May 2012 13:12:05 -0500
parents bfbaf823be4c
children 49058b1f8d3f
files mothur/README mothur/lib/galaxy/datatypes/metagenomics.py mothur/tool-data/datatypes.conf.xml mothur/tools/mothur/cooccurrence.xml mothur/tools/mothur/create.database.xml mothur/tools/mothur/dist.shared.xml mothur/tools/mothur/make.biom.xml mothur/tools/mothur/make.shared_from_biom.xml mothur/tools/mothur/mothur_wrapper.py mothur/tools/mothur/pcr.seqs.xml mothur/tools/mothur/tree.shared.xml mothur/tools/mothur/trim.flows.xml mothur/tools/mothur/unifrac.unweighted.xml mothur/tools/mothur/unifrac.weighted.xml
diffstat 14 files changed, 724 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/mothur/README	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/README	Wed May 16 13:12:05 2012 -0500
@@ -2,7 +2,7 @@
 
 (The environment variable MOTHUR_MAX_PROCESSORS can be used to limit the number of cpu processors used be mothur commands)
 
-Install mothur v.1.24.1 on your galaxy system so galaxy can execute the mothur command
+Install mothur v.1.25.0 on your galaxy system so galaxy can execute the mothur command
   ( This version of wrappers is designed for Mothur version 1.24 - it may work on later versions )
   http://www.mothur.org/wiki/Download_mothur
   http://www.mothur.org/wiki/Installation
@@ -105,6 +105,10 @@
   tool-data/shared/jars/TreeVector.jar
 
 
+################################################################
+#### If you are manually adding this to your local galaxy:  ####
+################################################################
+
 add config files (*.xml) and wrapper code (*.py) from tools/mothur/*  to your galaxy installation 
 
 
@@ -114,6 +118,7 @@
 import metagenomics # added for metagenomics mothur
 
 
+
 add datatypes to:  datatypes_conf.xml
         <!-- Start Mothur Datatypes -->
         <datatype extension="otu" type="galaxy.datatypes.metagenomics:Otu" display_in_upload="true"/>
@@ -165,19 +170,24 @@
       <tool file="mothur/count.groups.xml"/>
       <tool file="mothur/make.design.xml"/>
       <tool file="mothur/sub.sample.xml"/>
+      <tool file="mothur/sort.seqs.xml"/>
+      <tool file="mothur/create.database.xml"/>
     <label text="Mothur Sequence Analysis" id="mothur_sequence_analysis"/>
       <tool file="mothur/sffinfo.xml"/>
       <tool file="mothur/trim.flows.xml"/>
       <tool file="mothur/shhh.flows.xml"/>
+      <tool file="mothur/shhh.seqs.xml"/>
       <tool file="mothur/make.fastq.xml"/>
       <tool file="mothur/fastq.info.xml"/>
       <tool file="mothur/summary.seqs.xml"/>
+      <tool file="mothur/summary.qual.xml"/>
       <tool file="mothur/count.seqs.xml"/>
       <tool file="mothur/reverse.seqs.xml"/>
       <tool file="mothur/list.seqs.xml"/>
       <tool file="mothur/get.seqs.xml"/>
       <tool file="mothur/remove.seqs.xml"/>
       <tool file="mothur/trim.seqs.xml"/>
+      <tool file="mothur/pcr.seqs.xml"/>
       <tool file="mothur/unique.seqs.xml"/>
       <tool file="mothur/deunique.seqs.xml"/>
       <tool file="mothur/chop.seqs.xml"/>
@@ -201,6 +211,7 @@
       <tool file="mothur/chimera.bellerophon.xml"/>
       <tool file="mothur/chimera.ccode.xml"/>
       <tool file="mothur/chimera.check.xml"/>
+      <tool file="mothur/chimera.perseus.xml"/>
       <tool file="mothur/chimera.pintail.xml"/>
       <tool file="mothur/chimera.slayer.xml"/>
       <tool file="mothur/chimera.uchime.xml"/>
@@ -225,6 +236,8 @@
       <tool file="mothur/get.sabund.xml"/>
       <tool file="mothur/get.relabund.xml"/>
       <tool file="mothur/make.shared.xml"/>
+      <tool file="mothur/make.shared_from_biom.xml"/>
+      <tool file="mothur/make.biom.xml"/>
       <tool file="mothur/get.group.xml"/>
       <tool file="mothur/bin.seqs.xml"/>
       <tool file="mothur/get.sharedseqs.xml"/>
@@ -239,8 +252,8 @@
       <tool file="mothur/rarefaction.shared.xml"/>
       <tool file="mothur/normalize.shared.xml"/>
       <tool file="mothur/summary.shared.xml"/>
+      <tool file="mothur/otu.association.xml"/>
       <tool file="mothur/dist.shared.xml"/>
-      <tool file="mothur/heatmap.bin.xml"/>
       <tool file="mothur/heatmap.sim.xml"/>
       <tool file="mothur/venn.xml"/>
       <tool file="mothur/tree.shared.xml"/>
@@ -253,6 +266,7 @@
       <tool file="mothur/homova.xml"/>
       <tool file="mothur/mantel.xml"/>
       <tool file="mothur/anosim.xml"/>
+      <tool file="mothur/cooccurrence.xml"/>
     <label text="Mothur Phylotype Analysis" id="mothur_phylotype_analysis"/>
       <tool file="mothur/get.lineage.xml"/>
       <tool file="mothur/remove.lineage.xml"/>
@@ -261,6 +275,7 @@
       <tool file="mothur/clearcut.xml"/>
       <tool file="mothur/indicator.xml"/>
       <tool file="mothur/deunique.tree.xml"/>
+      <tool file="mothur/classify.tree.xml"/>
       <tool file="mothur/TreeVector.xml"/>
   </section> <!-- metagenomics_mothur -->
 
--- a/mothur/lib/galaxy/datatypes/metagenomics.py	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/lib/galaxy/datatypes/metagenomics.py	Wed May 16 13:12:05 2012 -0500
@@ -17,6 +17,9 @@
 from galaxy.datatypes.sequence import Fasta
 from galaxy import util
 from galaxy.datatypes.images import Html
+import pkg_resources
+pkg_resources.require("simplejson")
+import simplejson
 
 
 log = logging.getLogger(__name__)
@@ -1260,6 +1263,56 @@
         return False
 
 
+## Biom 
+
+class BiologicalObservationMatrix( Text ):
+    file_ext = 'biom'
+    """
+    http://biom-format.org/documentation/biom_format.html
+    The format of the file is JSON:
+    {
+    "id":null,
+    "format": "Biological Observation Matrix 0.9.1-dev",
+    "format_url": "http://biom-format.org",
+    "type": "OTU table",
+    "generated_by": "QIIME revision 1.4.0-dev",
+    "date": "2011-12-19T19:00:00",
+    "rows":[
+            {"id":"GG_OTU_1", "metadata":null},
+            {"id":"GG_OTU_2", "metadata":null},
+            {"id":"GG_OTU_3", "metadata":null},
+        ],
+    "columns": [
+            {"id":"Sample1", "metadata":null},
+            {"id":"Sample2", "metadata":null}
+        ],
+    "matrix_type": "sparse",
+    "matrix_element_type": "int",
+    "shape": [3, 2],
+    "data":[[0,1,1],
+            [1,0,5],
+            [2,1,4]
+           ]
+    }
+
+    """
+
+    def __init__(self, **kwd):
+        Text.__init__( self, **kwd )
+
+    def sniff( self, filename ):
+        if os.path.getsize(filename) < 50000:
+            try:
+                data = simplejson.load(open(filename))
+                if data['format'].find('Biological Observation Matrix'):
+                    return True
+            except:
+                pass
+        return False
+
+
+
+
 ## Qiime Classes
 
 class QiimeMetadataMapping(Tabular):
--- a/mothur/tool-data/datatypes.conf.xml	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/tool-data/datatypes.conf.xml	Wed May 16 13:12:05 2012 -0500
@@ -40,6 +40,7 @@
         <datatype extension="axes" type="galaxy.datatypes.metagenomics:Axes" display_in_upload="true"/>
         <datatype extension="sff.flow" type="galaxy.datatypes.metagenomics:SffFlow" display_in_upload="true"/>
         <datatype extension="tre" type="galaxy.datatypes.metagenomics:Newick" display_in_upload="true"/>
+        <datatype extension="biom" type="galaxy.datatypes.metagenomics:BiologicalObservationMatrix" display_in_upload="true"/>
         <datatype extension="nhx" type="galaxy.datatypes.metagenomics:Nhx" display_in_upload="true"/>
         <datatype extension="nex" type="galaxy.datatypes.metagenomics:Nexus" display_in_upload="true"/>
     </registration>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/cooccurrence.xml	Wed May 16 13:12:05 2012 -0500
@@ -0,0 +1,144 @@
+<tool id="mothur_cooccurrence" name="Cooccurrence" version="1.25.0" >
+ <description>tests whether presence-absence patterns differ from chance</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  #import re, os.path
+  --cmd='cooccurrence'
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.cooccurence\.summary$:'$out_summary
+  --outputdir='$logfile.extra_files_path'
+  --shared=$shared
+  --metric=$metric
+  --matrixmodel=$matrixmodel
+  #if len($iters.__str__) > 0 and int($iters.__str__) > 0:
+   --iters=$iters
+  #end if
+  #if $label.__str__ != "None" and len($label.__str__) > 0:
+   --label='$label'
+  #end if
+  #if $groups.__str__ != "None" and len($groups.__str__) > 0:
+    --groups=$groups
+  #end if
+ </command>
+ <inputs>
+  <param name="shared" type="data" format="shared" label="shared - OTU Shared file"/>
+  <param name="iters" type="integer" value="1000" optional="true" label="iters - Number of iterations to try (default 1000)">
+    <validator type="in_range" message="Number of iterations must be positive" min="1"/>
+  </param>
+  <param name="metric" type="select" label="metric - test metric for scoring">
+    <option value="cscore" selected="true">cscore - species segregation</option>
+    <option value="checker">checker - species segregation</option>
+    <option value="combo">combo - unique species pairs</option>
+    <option value="vratio">vratio - variance</option>
+  </param>
+  <param name="matrixmodel" type="select" label="matrixmodel - the scoring matrix" 
+         help="See the notes below on choosing a metric/matrixmodel combination">
+    <option value="sim1">sim1</option>
+    <option value="sim2" selected="true">sim2</option>
+    <option value="sim3">sim3</option>
+    <option value="sim4">sim4</option>
+    <option value="sim5">sim5</option>
+    <option value="sim6">sim6</option>
+    <option value="sim7">sim7</option>
+    <option value="sim8">sim8</option>
+    <option value="sim9">sim9</option>
+  </param>
+
+  <param name="groups" type="select" optional="true" label="groups - Groups to include" multiple="true"
+     help="By default all are included if no selection is made.">
+   <options>
+    <filter type="data_meta" ref="shared" key="groups" />
+   </options>
+  </param>
+  <param name="label" type="select" optional="true" label="label - Select OTU Labels to include" multiple="true" 
+     help="By default all are included if no selection is made.">
+   <options>
+    <filter type="data_meta" ref="shared" key="labels" />
+   </options>
+  </param>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format="tabular" name="out_summary" label="${tool.name} on ${on_string}: cooccurence.summary" />
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The cooccurrence_ command variance calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance.   The input is a shared_ file.  The output can be filtered by groups and labels.
+
+
+
+**metric**
+
+The metric parameter options are **cscore**, **checker**, **combo** and **vratio**. Default=cscore. The cscore or checkerboard score [1] is a metric that measures species segregation. It is the mean number of checkerboard units per species pair. The checker metric [2] counts the number of species pairs forming a perfect checkerboard. The combo metric [3] is the number of unique species pairs. The vratio or variance ratio [4] is a measure of the species association calculated by the ratio of the variance in total species number to the sum of the variances of the species.  ::
+
+	[1] Stone, L., and A. Roberts. 1990. The checkerboard score and species distributions. Ocelogia. 85:74-79.
+	[2] Diamond, J. M. 1975. Assembly of species communities. Pages 342-444 in M. L. Cody and J. M. Diamond, editors. Ecology and evolution of communities. Harvard University Press, Cambridge, Massachusetts, USA.
+	[3] Pielou, D. P., and E. C. Pielou. 1968 Association among species of infrequent occurrence: the insect and spider fauna of Polypours betulinus (Bulliard) Fries. Journal of Theoretical Biology 21:202-216.
+	[4] Schluter, D. 1984. A variance test for detecting species associations, with some example applications. Ecology 65:998-1005.
+	[5] Gotelli, Nicholas J. 2000. NULL MODEL ANALYSIS OF SPECIES CO-OCCURRENCE PATTERNS. Ecology 81:2606-2621.
+
+
+
+**matrixmodel**
+
+The matrixmodel parameter allows you to select the model you would like to use. Options are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8 and sim9. Default=sim2.
+
+Each sim implements a different algorithm for generating null matrices with constraints on the rows (species) and columns (sites).::
+ ===================== ====================== ======================= ====================== 
+  Rows                  Columns equiprobable   Columns proportional    Column totals fixed   
+ ===================== ====================== ======================= ====================== 
+  Rows equiprobable     sim1                   sim6                    sim3                  
+  Rows proportional     sim7                   sim8                    sim5                  
+  Row totals fixed      sim2                   sim4                    sim9                  
+ ===================== ====================== ======================= ====================== 
+
+Equiprobable rows or columns means that each row, column or both is not dependent on the original co-occurrence matrix. Each species or site has an equal change of occurring in the null matrix.
+Proportional rows or columns means that the proportion of occurrences in rows, columns or both in the original co-occurrence matrix are preserved but the totals may differ. Each species or site's chances of occurring are proportional to their occurrence in the original co-occurrence matrix.
+Fixed row or column totals preserves the total number of occurrences in rows, columns or both in the original co-occurrence matrix. Sim9 is a special case that is not probabilistic. Since both the row and column totals are preserved the only way to randomize the matrix is with a checkerboard swap. When a checkerboard appears in the matrix the 1s and 0s are swapped to their mirror image to preserve the species and site totals.
+
+Checkerboard::
+
+  10
+  01
+
+Swap::
+
+  01
+  10
+
+
+suggested metric/matrixmodel combinations::
+    ========  ========  ======== ========
+     cscore    checker   combo    vratio
+    ========  ========  ======== ========
+     sim9      sim9      sim9     sim2
+     sim2      sim2      sim2     sim4
+     -         -         sim4     sim8
+     -         -         sim8     -
+    ========  ========  ======== ========
+
+
+Careful readers will note that none of the suggested matrixmodels have equiprobable rows (species). This is because tests of co-occurrence are quite sensitive to the frequency of species occurrence. As such, rowtotals should be maintained or at least kept proportional in the null models. Sim9 is well suited to co-occurrence matrices that have an "island list" structure. Island lists are often found in classical ecology datasets that contain species with well defined habitat patches and are rarely degenerate (matrices that contain empty rows or columns). Sim2 is well suited for co-occurrence matrices that have a "sample list" structure. Sample list structured data are found where species have relatively homogeneous habitats and degenerate matrices are not uncommon. In these matrices species will often occur in only one site.
+The default values of cscore and sim2 have been selected because the c-score is not very sensitive to noise in the data and when used with sim9 or sim2 is not particularly prone to false positives. Sim2 has been chosen because of the prevalence of degenerate matrices. These are just guidelines, however, be sure to select a metric and matrix model that is best suited to the type of data you are analyzing.
+It should be noted that sim9 cannot be used with vratio because in sim9 both the column and row totals are maintained, hence there will be no variance.
+Please see [5] for more details on metric/null model selection.
+
+
+.. _shared: http://www.mothur.org/wiki/Shared_file
+.. _cooccurrence: http://www.mothur.org/wiki/Cooccurrence
+
+ </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/create.database.xml	Wed May 16 13:12:05 2012 -0500
@@ -0,0 +1,63 @@
+<tool id="mothur_create_database" name="Create.database" version="1.25.0" >
+ <description>creates a database file from a list, repnames, repfasta and contaxonomy file</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  --cmd='create.database'
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+?\.database$:'$database
+  --outputdir='$logfile.extra_files_path'
+  ## --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
+  ## --new_datasets='^\S+?\.(unique|[0-9.]*\.cons\.taxonomy)$:cons.taxonomy','^\S+?\.(unique|[0-9.]*\.cons\.tax\.summary)$:tax.summary'
+  --list=$otu
+  --repfasta=$repfasta
+  --repname=$repname
+  --contaxonomy=$contaxonomy
+  #if $label.__str__ != "None" and len($label.__str__) > 0:
+   --label='$label'
+  #end if
+  #if $group.__str__ != "None" and len($group.__str__) > 0:
+   --group='$group'
+  #end if
+ </command>
+ <inputs>
+  <param name="otu" type="data" format="list" label="list - OTU List "/>
+  <param name="repfasta" type="data" format="fasta" label="repfasta - rep.fasta"
+         help="fasta file output by get.oturep"/>
+  <param name="repname" type="data" format="names" label="repname - rep.names"
+         help="names file output by get.oturep"/>
+  <param name="contaxonomy" type="select" format="cons.taxonomy" label="contaxonomy - Consensus Taxonomy"
+         help="consensus taxonomy file output by classify.otu"/>
+  <param name="group" type="data" format="groups" optional="true" label="group - Groups for summary file"/>
+  <param name="label" type="select" label="label - OTU Labels" multiple="true">
+   <options>
+    <filter type="data_meta" ref="otu" key="labels" />
+   </options>
+  </param>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format="tabular" name="database" label="${tool.name} on ${on_string}: database" />
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The create.database_ command reads a list_ file, .cons.taxonomy, .rep.fasta, .rep.names and optional group file, and creates a database file. 
+
+.. _list: http://www.mothur.org/wiki/List_file
+.. _create.database: http://www.mothur.org/wiki/Create.database
+
+
+ </help>
+</tool>
--- a/mothur/tools/mothur/dist.shared.xml	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/tools/mothur/dist.shared.xml	Wed May 16 13:12:05 2012 -0500
@@ -1,4 +1,4 @@
-<tool id="mothur_dist_shared" name="Dist.shared" version="1.23.0" force_history_refresh="True">
+<tool id="mothur_dist_shared" name="Dist.shared" version="1.25.0" force_history_refresh="True">
  <description>Generate a phylip-formatted dissimilarity distance matrix among multiple groups</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -9,12 +9,12 @@
    --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
    #if len($output.__str__) > 0:
     #if $output.__str__ == 'square':
-     --new_datasets='^\S+?\.([a-z]+\.(unique|[0-9.]*)\.(square|lt))\.dist$:square.dist'
+     --new_datasets='^\S+?\.([a-z]+\.(unique|[0-9.]*)\.(square|lt)(\.(ave|std))?)\.dist$:square.dist'
     #elif $output.__str__ == 'lt':
-     --new_datasets='^\S+?\.([a-z]+\.(unique|[0-9.]*)\.(square|lt))\.dist$:lower.dist'
+     --new_datasets='^\S+?\.([a-z]+\.(unique|[0-9.]*)\.(square|lt)(\.(ave|std))?)\.dist$:lower.dist'
     #end if
    #else:
-    --new_datasets='^\S+?\.([a-z]+\.(unique|[0-9.]*)\.(square|lt))\.dist$:lower.dist'
+    --new_datasets='^\S+?\.([a-z]+\.(unique|[0-9.]*)\.(square|lt)(\.(ave|std))?)\.dist$:lower.dist'
    #end if
   #end if
   --shared=$otu
@@ -27,6 +27,16 @@
   #if $calc.__str__ != "None" and len($calc.__str__) > 0:
    --calc=$calc
   #end if
+  #if $subsampling.use:
+   #if len($subsampling.subsample.__str__) > 0 and int($subsampling.subsample.__str__) > 0:
+    --subsample=$subsampling.subsample
+   #else
+    --subsample=T
+   #end if
+   #if len($subsampling.iters.__str__) > 0 and int($subsampling.iters.__str__) > 0:
+    --iters=$subsampling.iters
+   #end if
+  #end if
   #if $output.__str__ != "None" and len($output.__str__) > 0:
    --output=$output
   #end if
@@ -87,6 +97,17 @@
    <option value="sharednseqs">sharednseqs - Utility the number of sequences in two samples</option>
    <option value="sharedobserved">sharedobserved - Utility the number of sequences in two samples</option>
   </param>
+  <conditional name="subsampling">
+   <param name="use" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="subsample"/>
+   <when value="yes">
+    <param name="subsample" type="integer" value="" optional="true" label="subsample (defaults to the size of the smallest group)"
+           help="Should not exceed the number of sequences in any group"/>
+    <param name="iters" type="integer" value="" optional="true" label="iters - Number of iterations to try (default 1000)">
+      <validator type="in_range" message="Number of iterations must be positive" min="1"/>
+    </param>
+   </when>
+   <when value="no"/>
+  </conditional> <!-- subsampling -->
   <param name="output" type="select" label="output - Distance Matrix Output Format" help="A Distance Matrix will be generated for each calculator label pair">
    <option value="lt">Phylip formatted Lower Triangle Matrix</option>
    <option value="square">Phylip formatted Square Matrix</option>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/make.biom.xml	Wed May 16 13:12:05 2012 -0500
@@ -0,0 +1,72 @@
+<tool id="mothur_make_biom" name="Make.biom" version="1.25.0" force_history_refresh="True">
+ <description>Make biom files from a shared file</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  #import re, os.path
+  --cmd='make.biom'
+  --result='^mothur.\S+\.logfile$:'$logfile
+  --outputdir='$logfile.extra_files_path'
+  --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
+  --new_datasets='^\S+?\.((\S+)\.biom)$:biom'
+  --shared=$shared
+  #if $contaxonomy != 'none' and len($contaxonomy.__str__) > 0:
+   --contaxonomy=$contaxonomy
+  #end if
+  #if $label.__str__ != "None" and len($label.__str__) > 0:
+   --label='$label'
+  #end if
+  #if $groups.__str__ != "None" and len($groups.__str__) > 0:
+    --groups=$groups
+  #end if
+  --matrixtype=$matrixtype
+ </command>
+ <inputs>
+  <param name="shared" type="data" format="shared" label="shared - OTU Shared file"/>
+  <param name="contaxonomy" type="data" format="cons.taxonomy" label="contaxonomy - consensus taxonomy"
+         help="The contaxonomy file is the taxonomy file outputted by classify.otu"/>
+  <param name="matrixtype" type="select" label="matrixtype - sparse or dense">
+    <option value="sparse">sparse</option>
+    <option value="dense">dense</option>
+  </param>
+  <param name="groups" type="select" label="groups - Groups to include" multiple="true"
+     help="By default all are included if no selection is made.">
+   <options>
+    <filter type="data_meta" ref="shared" key="groups" />
+   </options>
+  </param>
+  <param name="label" type="select" optional="true" label="label - Select OTU Labels to include" multiple="true" 
+     help="By default all are included if no selection is made.">
+   <options>
+    <filter type="data_meta" ref="shared" key="labels" />
+   </options>
+  </param>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The make.biom command converts a shared_ shared file to biom_ files. 
+The output can be filtered by groups and labels.
+
+
+.. _shared: http://www.mothur.org/wiki/Shared_file
+.. _biom:  http://biom-format.org/documentation/biom_format.html
+.. _make.biom: http://www.mothur.org/wiki/Make.biom
+
+ </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/make.shared_from_biom.xml	Wed May 16 13:12:05 2012 -0500
@@ -0,0 +1,59 @@
+<tool id="mothur_make_shared_from_biom" name="Make.shared " version="1.25.0" force_history_refresh="True">
+ <description>Make a shared file from a biom file</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  #import re, os.path
+  --cmd='make.shared'
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.shared$:'$shared
+  --outputdir='$logfile.extra_files_path'
+  #if $as_datasets.__str__ == "yes":
+   --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
+   --new_datasets='^\S+?\.((\S+)\.rabund)$:rabund'
+  #end if
+  --biom=$biom
+  #if $label.__str__ != "None" and len($label.__str__) > 0:
+   --label='$label'
+  #end if
+  #if $groups.__str__ != "None" and len($groups.__str__) > 0:
+    --groups=$groups
+  #end if
+ </command>
+ <inputs>
+  <param name="biom" type="data" format="biom" label="biom -  The Biological Observation Matrix"/>
+  <param name="label" type="text" size="40" label="label - to restrict to these OTU Labels" 
+     help="multiple labels separated by hyphens, example: unique-0.02-0.04"/>
+  <param name="groups" type="text" size="40" label="groups - restrict to these groups"
+     help="multiple groups separated by hyphens, example: control-group1-group2"/>
+  <param name="as_datasets" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Create a new history dataset for each group rabund"/>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format="shared" name="shared" label="${tool.name} on ${on_string}: shared" />
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The make.shared_ command takes a list_ and a group_ file and outputs a shared_ file, as well as a rabund_ file for each group.
+
+
+.. _list: http://www.mothur.org/wiki/List_file
+.. _group: http://www.mothur.org/wiki/Group_file
+.. _shared: http://www.mothur.org/wiki/Shared_file
+.. _rabund: http://www.mothur.org/wiki/Rabund_file
+.. _make.shared: http://www.mothur.org/wiki/Make.shared
+
+ </help>
+</tool>
--- a/mothur/tools/mothur/mothur_wrapper.py	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/tools/mothur/mothur_wrapper.py	Wed May 16 13:12:05 2012 -0500
@@ -202,14 +202,20 @@
     cmd_dict['collect.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','freq','groups','all']})
     cmd_dict['collect.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','size','label','freq']})
     cmd_dict['consensus.seqs'] = dict({'required' : ['fasta'], 'optional' : ['list','name','label','cutoff']})
+
+    cmd_dict['cooccurrence'] = dict({'required' : ['shared'], 'optional' : ['iters','metric','matrixmodel','groups','label']})
+
     cmd_dict['corr.axes'] = dict({'required' : [['shared','relabund','metadata'],'axes'], 'optional' : ['label','groups','method','numaxes']})
     cmd_dict['count.groups'] = dict({'required' : ['group','shared'], 'optional' : ['accnos','groups']})
     cmd_dict['count.seqs'] = dict({'required' : ['name'], 'optional' : ['group','groups']})
+
+    cmd_dict['create.database'] = dict({'required' : ['list, repfasta','repname','contaxonomy'], 'optional' : ['group','label']})
+
     cmd_dict['degap.seqs'] = dict({'required' : ['fasta']})
     cmd_dict['deunique.seqs'] = dict({'required' : ['fasta','name'],  'optional' : []})
     cmd_dict['deunique.tree'] = dict({'required' : ['tree','name'],  'optional' : []})
     cmd_dict['dist.seqs'] = dict({'required' : ['fasta'],  'optional' : ['calc','countends','output','cutoff','oldfasta','column','processors']})
-    cmd_dict['dist.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','groups','output','processors']})
+    cmd_dict['dist.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','groups','output','subsample','iters','processors']})
     cmd_dict['fastq.info'] = dict({'required' : ['fastq'], 'optional' : ['fasta','qfile']})
     cmd_dict['filter.seqs'] = dict({'required' : ['fasta'],  'optional' : ['vertical','trump','soft','hard','processors']})
     cmd_dict['get.group'] = dict({'required' : ['shared'], 'optional' : []})
@@ -230,9 +236,12 @@
     cmd_dict['indicator'] = dict({'required' : [['tree','design'],['shared','relabund']], 'optional' : ['groups','label','processors']})
     cmd_dict['libshuff'] = dict({'required' : ['phylip','group'],'optional' : ['groups','iters','form','sim','step','cutoff']})
     cmd_dict['list.seqs'] = dict({'required' : [['fasta','name','group','list','alignreport','taxonomy']]})
+
+    cmd_dict['make.biom'] = dict({'required' : ['shared'] ,  'optional' : ['contaxonomy','matrixtype','groups','label']})
+
     cmd_dict['make.fastq'] = dict({'required' : ['fasta','qfile'] ,  'optional' : []})
     cmd_dict['make.group'] = dict({'required' : ['fasta','groups'],  'optional' : []})
-    cmd_dict['make.shared'] = dict({'required' : ['list','group'],  'optional' : ['label','groups']})
+    cmd_dict['make.shared'] = dict({'required' : [['list','group','biom']],  'optional' : ['label','groups']})
     cmd_dict['mantel'] = dict({'required' : ['phylip','phylip2'] ,  'optional' : ['method','iters']})
     cmd_dict['merge.files'] = dict({'required' : ['input','output']})
     cmd_dict['merge.groups'] = dict({'required' : [['shared','group'],'design'],  'optional' : ['groups', 'label']})
@@ -246,6 +255,9 @@
     cmd_dict['parsimony'] = dict({'required' : ['tree'], 'optional' : ['group','groups','name','iters','random','processors']})
     cmd_dict['pca'] = dict({'required' : [['shared','relabund']], 'optional' : ['label','groups','metric']})
     cmd_dict['pcoa'] = dict({'required' : ['phylip'], 'optional' : ['metric']})
+
+    cmd_dict['pcr.seqs'] = dict({'required' : ['fasta'], 'optional' : ['oligos','name','group','taxonomy','ecoli','start','end','nomatch','keepprimer','keepdots','processors']})
+
     cmd_dict['phylo.diversity'] = dict({'required' : ['tree'],'optional' : ['group','name','groups','iters','freq','scale','rarefy','collect','summary','processors']})
     cmd_dict['phylotype'] = dict({'required' : ['taxonomy'],'optional' : ['name','cutoff','label']})
     cmd_dict['pre.cluster'] = dict({'required' : ['fasta'],  'optional' : ['name','diffs','group','processors']})
@@ -272,11 +284,11 @@
     cmd_dict['summary.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','groups','all','distance','processors']})
     cmd_dict['summary.single'] = dict({'required' : [['list','sabund','rabund','shared']], 'optional' : ['calc','abund','size','label','groupmode']})
     cmd_dict['summary.tax'] = dict({'required' : ['taxonomy'], 'optional' : ['name','group','reftaxonomy']})
-    cmd_dict['tree.shared'] = dict({'required' : [['shared','phylip','column']], 'optional' : ['name','groups','calc','cutoff','precision','label']})
+    cmd_dict['tree.shared'] = dict({'required' : [['shared','phylip','column']], 'optional' : ['name','groups','calc','cutoff','precision','label','subsample','iters','processors']})
     cmd_dict['trim.flows'] = dict({'required' : ['flow'],  'optional' : ['oligos','bdiffs','pdiffs','tdiffs','ldiffs','sdiffs','minflows','maxflows','fasta','signal','noise','maxhomop','order','processors']})
     cmd_dict['trim.seqs'] = dict({'required' : ['fasta'],  'optional' : ['name','group','oligos','qfile','qaverage','qthreshold','qwindowaverage','qwindowsize','rollaverage','qstepsize','qtrim','flip','maxambig','maxhomop','minlength','maxlength','bdiffs','pdiffs','tdiffs','ldiffs','sdiffs','keepforward','allfiles','keepfirst','removelast','processors']})
-    cmd_dict['unifrac.unweighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']})
-    cmd_dict['unifrac.weighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']})
+    cmd_dict['unifrac.unweighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','subsample','consensus','processors']})
+    cmd_dict['unifrac.weighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','subsample','consensus','processors']})
     cmd_dict['unique.seqs'] = dict({'required' : ['fasta'],  'optional' : ['name']})
     cmd_dict['venn'] = dict({'required' : [['list','shared']], 'optional' : ['calc','label','groups','abund','nseqs','permute','fontsize']})
 
@@ -320,6 +332,7 @@
     parser.add_option( '--fasta', dest='fasta', help='fasta file paths' )
     parser.add_option( '--fastq', dest='fastq', help='fastq file paths' )
     parser.add_option( '--qfile', dest='qfile', help='Sequence read quality file (454 platform)' )
+    parser.add_option( '--repfasta', dest='repfasta', help='fasta file paths' )
     parser.add_option( '--qaverage', dest='qaverage', type="int", help='Remove sequences that have an average quality below the value' )
     parser.add_option( '--qthreshold', dest='qthreshold', type="int", help='If at any point a base call in a sequence has a quality score below the value provided to the option, the sequence is terminated' )
     parser.add_option( '--qwindowaverage', dest='qwindowaverage', type="int", help='Remove sequences that have a window average quality below the value' )
@@ -343,6 +356,7 @@
     parser.add_option( '--allfiles', dest='allfiles', help='T - generate fasta and group for each barcode' )
     parser.add_option( '--keepforward', dest='keepforward', help='T - keep primer' )
     parser.add_option( '--name', dest='name', help='A file containing a 2 column table: name, and comma separated list of represetatives' )
+    parser.add_option( '--repname', dest='repname', help='A file containing a 2 column table: name, and comma separated list of represetatives' )
     parser.add_option( '--accnos', dest='accnos', help='A file containing a list of names' )
     parser.add_option( '--groups', dest='groups', help='pairwise group labels' )
     parser.add_option( '--group', dest='group', help='A file containing a list of names' )
@@ -351,6 +365,7 @@
     parser.add_option( '--report', dest='report', help='' )
     parser.add_option( '--taxonomy', dest='taxonomy', help='A Taxonomy file' )
     parser.add_option( '--reftaxonomy', dest='reftaxonomy', help='A Taxonomy file' )
+    parser.add_option( '--contaxonomy', dest='contaxonomy', help='The Taxonomy file output by classify.otu' )
     parser.add_option( '--taxon', dest='taxon',  help='A Taxon' )
     parser.add_option( '--taxlevel', dest='taxlevel', type="int", help='A Taxonomy level' )
     # parser.add_option( '--taxon', dest='taxon', action="callback", callback=remove_confidence_callback, help='A Taxon' )
@@ -401,6 +416,7 @@
     parser.add_option( '--iters', dest='iters', type='int', help='Iterations of randomizations' )
     parser.add_option( '--maxiter', dest='maxiter', type='int', help='Iterations' )
     parser.add_option( '--maxiters', dest='maxiters', type='int', help='Iterations of randomizations' )
+    parser.add_option( '--subsample', dest='subsample', help='Number of subsample, or T to default to smallest group size' )
     parser.add_option( '--jumble', dest='jumble',  help='If false, just a collector curve across the samples' )
     parser.add_option( '--conservation', dest='conservation',  help='Template frequency information' )
     parser.add_option( '--quantile', dest='quantile',  help='Template quantile information' )
@@ -461,6 +477,7 @@
     parser.add_option( '--design', dest='design', help='' )
     parser.add_option( '--sets', dest='sets', help='' )
     parser.add_option( '--metric', dest='metric', help='' )
+    parser.add_option( '--matrixmodel', dest='matrixmodel', help='' )
     parser.add_option( '--epsilon', dest='epsilon', help='' )
     parser.add_option( '--alpha', dest='alpha', help='' )
     parser.add_option( '--root', dest='root', help='' )
@@ -491,6 +508,13 @@
     parser.add_option( '--queryfract', dest='queryfract', type="float", help='')
     parser.add_option( '--minlen', dest='minlen', type="int", help='Minimun sequence length' )
     parser.add_option( '--maxlen', dest='maxlen', type="int", help='Maximun sequence length' )
+    parser.add_option( '--ecoli', dest='ecoli',  help='ecoli referance fasta' )
+    parser.add_option( '--nomatch', dest='nomatch',  help='What to with non matching items' )
+    parser.add_option( '--keepprimer', dest='keepprimer',  help='Whether to retain the primer' )
+    parser.add_option( '--keepdots', dest='keepdots',  help='Whether to retain dots in the sequence' )
+    parser.add_option( '--matrixtype', dest='matrixtype',  help='' )
+    parser.add_option( '--consensus', dest='consensus',  help='boolean' )
+    parser.add_option( '--biom', dest='biom',  help='biom file' )
     # include read.otu options
     parser.add_option( '--rabund', dest='rabund', help='' )
     parser.add_option( '--sabund', dest='sabund', help='' )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/pcr.seqs.xml	Wed May 16 13:12:05 2012 -0500
@@ -0,0 +1,123 @@
+<tool id="mothur_pcr_seqs" name="Pcr.seqs" version="1.25.0">
+ <description>Trim sequences</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  --cmd='pcr.seqs'
+  #import re, os.path
+  #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__]
+  ## adds .pcr before the last extension to the input file
+  #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1.pcr.\2',$os.path.basename($fasta.__str__)) + ":'" + $pcr_fasta.__str__]
+  #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1.pcr.scrap.\2',$os.path.basename($fasta.__str__)) + ":'" + $scrap_fasta.__str__]
+  --outputdir='$logfile.extra_files_path'
+  --fasta=$fasta
+  #if $name_in.__str__ != "None" and len($name_in.__str__) > 0:
+   --name=$name_in
+   #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1.pcr.\2',$os.path.basename($name_in.__str__)) + ":'" + $name_out.__str__]
+  #end if
+  #if $group_in.__str__ != "None" and len($group_in.__str__) > 0:
+   --group=$group_in
+   #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1.pcr.\2',$os.path.basename($group_in.__str__)) + ":'" + $group_out.__str__]
+  #end if
+  #if $taxonomy_in.__str__ != "None" and len($taxonomy_in.__str__) > 0:
+   --taxonomy=$taxonomy_in
+   #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1.pcr.\2',$os.path.basename($taxonomy_in.__str__)) + ":'" + $taxonomy_out.__str__]
+  #end if
+  #if $trim.method == 'oligos': 
+   --oligos=$trim.oligos
+   --nomatch=$trim.nomatch
+   $trim.keepprimer
+  #elif $trim.method == 'reference': 
+   --ecoli=$trim.ecoli
+  #elif $trim.method == 'position': 
+   #if $trim.start.__str__ != '' and int($trim.start.__str__) > 0:
+    --start=$trim.start
+   #end if
+   #if $trim.end.__str__ != '' and int($trim.end.__str__) > 0:
+    --end=$trim.end
+   #end if
+  #end if
+  $keepdots
+  --result=#echo ','.join($results)
+  --processors=8
+ </command>
+ <inputs>
+  <param name="fasta" type="data" format="fasta" label="fasta - Candiate Sequences"/>
+
+  <conditional name="trim">
+   <param name="method" type="select" label="Trim with an oligos file?" help="">
+    <option value="oligos">oligos</option>
+    <option value="reference">reference sequence</option>
+    <option value="position">start and end positions</option>
+   </param>
+   <when value="oligos">
+    <param name="oligos" type="data" format="oligos" optional="true" label="oligos - barcodes and primers"
+           help="a file that can contain the sequences of the forward and reverse primers and barcodes and their sample identifier. 
+                Each line of the oligos file can start with the key words &quot;forward&quot;, &quot;reverse&quot;, 
+                and &quot;barcode&quot; or it can start with a &quot;#&quot; to tell mothur to ignore that line of the oligos file.  "/>
+    <param name="nomatch" type="select" label="nomatch - action when no primer is found" 
+           help="">
+     <option value="reject" selected="true">reject (default)</option>
+     <option value="keep">keep</option>
+    </param>
+    <param name="keepprimer" type="boolean" falsevalue="" truevalue="--keepprimer=true" checked="false" 
+           label="keepprimer - keep the primer in the output sequence"/>
+   </when>
+   <when value="reference">
+    <param name="ecoli" type="data" format="fasta" optional="true" label="ecoli - An aligned reference sequence for trimming"
+         help="The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence."/>
+   </when>
+   <when value="position">
+    <param name="start" type="integer" value="" optional="true" label="start - a starting position to trim to">
+     <validator type="in_range" message="Starting position can't be negative" min="0"/>
+    </param>
+    <param name="end" type="integer" value="" optional="true" label="end - a ending position to trim from">
+     <validator type="in_range" message="Starting position can't be negative and should be " min="0"/>
+    </param>
+   </when>
+  </conditional> <!-- trimtype -->
+
+  <param name="keepdots" type="boolean" falsevalue="--keepdots=false" truevalue="" checked="true" 
+         label="keepdots - keep the leading and trailing alignment dots in the output sequences"/>
+  <param name="taxonomy_in" type="data" format="seq.taxonomy" optional="true" label="taxonomy - Sequence Taxonomy"/>
+  <param name="name_in" type="data" format="names" optional="true" label="name - Sequence representative name list"/>
+  <param name="group_in" type="data" format="groups" optional="true" label="group - Group file"/>
+
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format_source="fasta" name="pcr_fasta" label="${tool.name} on ${on_string}: pcr.fasta" />
+  <data format_source="fasta" name="scrap_fasta" label="${tool.name} on ${on_string}: pcr.scrap.fasta" />
+  <data format="seq.taxonomy" name="taxonomy_out" label="${tool.name} on ${on_string}: tax.summary" >
+    <filter>taxonomy_in != None</filter>
+  </data>
+  <data format="groups" name="group_out" label="${tool.name} on ${on_string}: " >
+    <filter>group_in != None</filter>
+  </data>
+  <data format="names" name="name_out" label="${tool.name} on ${on_string}: " >
+    <filter>name_in != None</filter>
+  </data>
+
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The pcr.seqs_ command assigns sequences to chosen taxonomy outline.
+
+.. _pcr.seqs: http://www.mothur.org/wiki/Pcr.seqs
+
+
+ </help>
+</tool>
--- a/mothur/tools/mothur/tree.shared.xml	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/tools/mothur/tree.shared.xml	Wed May 16 13:12:05 2012 -0500
@@ -1,4 +1,4 @@
-<tool id="mothur_tree_shared" name="Tree.shared" version="1.23.0" force_history_refresh="True">
+<tool id="mothur_tree_shared" name="Tree.shared" version="1.25.0" force_history_refresh="True">
  <description>Generate a newick tree for dissimilarity among groups</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -7,7 +7,7 @@
    --result='^mothur.\S+\.logfile$:'$logfile
    #if $input.as_datasets.__str__ == "yes":
     --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
-    --new_datasets='^\S+?([a-z]+\.(unique|[0-9.]*)\.tre)$:tre'
+    --new_datasets='^\S+?([a-z]+\.(unique|[0-9.]*)(\.(all|ave|cons))?\.tre)$:tre'
    #end if
    --shared=$input.dist
    #if $input.groups.__str__ != "None" and len($input.groups.__str__) > 0:
@@ -16,6 +16,16 @@
    #if $input.label.__str__ != "None" and len($input.label.__str__) > 0:
     --label='$input.label'
    #end if
+   #if $input.subsampling.use:
+    #if len($input.subsampling.subsample.__str__) > 0 and int($input.subsampling.subsample.__str__) > 0:
+     --subsample=$input.subsampling.subsample
+    #else
+     --subsample=T
+    #end if
+    #if len($input.subsampling.iters.__str__) > 0 and int($input.subsampling.iters.__str__) > 0:
+     --iters=$input.subsampling.iters
+    #end if
+   #end if
   #else: 
    --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.tre$:'$tre
    --outputdir='$logfile.extra_files_path'
@@ -32,6 +42,7 @@
   #if $calc.__str__ != "None" and len($calc.__str__) > 0:
    --calc=$calc
   #end if
+  --processors=8
  </command>
  <inputs>
   <!-- column,name  phylip  or shared -->
@@ -62,6 +73,17 @@
        <filter type="data_meta" ref="dist" key="groups" />
       </options>
      </param>
+     <conditional name="subsampling">
+      <param name="use" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="subsample"/>
+      <when value="yes">
+       <param name="subsample" type="integer" value="" optional="true" label="subsample (defaults to the size of the smallest group)"
+              help="Should not exceed the number of sequences in any group"/>
+       <param name="iters" type="integer" value="" optional="true" label="iters - Number of iterations to try (default 1000)">
+         <validator type="in_range" message="Number of iterations must be positive" min="1"/>
+       </param>
+      </when>
+      <when value="no"/>
+     </conditional> <!-- subsampling -->
    </when>
   </conditional>
   <param name="calc" type="select" label="calc - Calculators (Uses defaults if none selected)" multiple="true">
--- a/mothur/tools/mothur/trim.flows.xml	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/tools/mothur/trim.flows.xml	Wed May 16 13:12:05 2012 -0500
@@ -1,4 +1,4 @@
-<tool id="mothur_trim_flows" name="Trim.flows" version="1.24.0" force_history_refresh="True">
+<tool id="mothur_trim_flows" name="Trim.flows" version="1.22.0" force_history_refresh="True">
  <description>partition by barcode, trim to length, cull by lenght and mismatches</description>
  <command interpreter="python">
   mothur_wrapper.py 
--- a/mothur/tools/mothur/unifrac.unweighted.xml	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/tools/mothur/unifrac.unweighted.xml	Wed May 16 13:12:05 2012 -0500
@@ -1,9 +1,16 @@
-<tool id="mothur_unifrac_unweighted" name="unifrac.unweighted" version="1.19.0">
+<tool id="mothur_unifrac_unweighted" name="unifrac.unweighted" version="1.25.0">
  <description>Describes whether two or more communities have the same structure</description>
  <command interpreter="python">
   mothur_wrapper.py 
+  #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__, "'^\S+\.uwsummary$:'" + $summary.__str__]
+  #set results = $results + ["'^\S+\.unweighted$:'" + $unweighted.__str__]
+  #set results = $results + ["'^\S+\.unweighted\.(column\.|phylip\.)?dist$:'" + $dist.__str__]
+  #set results = $results + ["'^\S+\.unweighted\.ave\.dist$:'" + $ave_dist.__str__]
+  #set results = $results + ["'^\S+\.unweighted\.std\.dist$:'" + $std_dist.__str__]
+  #set results = $results + ["'^\S+\.unweighted\.cons\.tre$:'" + $cons_tree.__str__]
+  #set results = $results + ["'^\S+\.unweighted\.all\.tre$:'" + $all_tree.__str__]
   --cmd='unifrac.unweighted'
-  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.uwsummary$:'$summary,'^\S+\.unweighted\.(column\.|phylip\.)?dist$:'$dist,'^\S+\.unweighted$:'$unweighted
+  ## --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.uwsummary$:'$summary,'^\S+\.unweighted\.(column\.|phylip\.)?dist$:'$dist,'^\S+\.unweighted$:'$unweighted,'^\S+\.unweighted\.ave\.dist$:'$ave_dist,'^\S+\.unweighted\.std\.dist$:'$std_dist,'^\S+\.unweighted\.all\.tre$:'$all_tree,'^\S+\.unweighted\.cons\.tre$:'$cons_tree,
   --outputdir='$logfile.extra_files_path'
   --tree=$tree
   #if $group.__str__ != "None" and len($group.__str__) > 0:
@@ -18,15 +25,24 @@
   #if int($iters.__str__) > 0:
    --iters=$iters
   #end if
-  $random
+  #if $subsampling.use:
+   #if len($subsampling.subsample.__str__) > 0 and int($subsampling.subsample.__str__) > 0:
+    --subsample=$subsampling.subsample
+   #else
+    --subsample=T
+   #end if
+   $subsampling.consensus
+  #else
+   $random
+  #end if
   #if $distance.__str__ != "false":
    --distance=$distance
   #end if
   $root
-  --processors=8
+  --result=#echo ','.join($results)
+  --processors=1
  </command>
  <inputs>
-  <!-- list,group  or shared -->
   <param name="tree" type="data" format="tre" label="tree - Tree"/>
   <param name="group" type="data" format="groups" label="group - Group file for the tree"/>
   <param name="groups" type="select" label="groups - Select groups for pairwise comparisons" multiple="true">
@@ -38,6 +54,18 @@
   <param name="name" type="data" format="names" optional="true" label="name - Names file for the tree"/>
   <param name="iters" type="integer" value="1000" label="iters - Number of iterations to try (default 1000)"/>
   <param name="random" type="boolean" truevalue="--random=true" falsevalue="" checked="false" label="random - Compare your trees with randomly generated trees" />
+  <!-- NOTE: random cannot be used with subsample option, handle in command logic -->
+  <conditional name="subsampling">
+   <param name="use" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="use subsampling of groups"
+          help="(instead of randomly generated comparisons)"/>
+   <when value="yes">
+    <param name="subsample" type="integer" value="" optional="true" label="subsample - (defaults to the size of the smallest group)"
+           help="the size per group of the sample"/>
+    <param name="consensus" type="boolean" truevalue="--consensus=true" falsevalue="" checked="false" label="consensus - " 
+           help="The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F"/>
+   </when>
+   <when value="no"/>
+  </conditional> <!-- subsampling -->
   <param name="distance" type="select" label="distance - Create a distance matrix for your history">
    <option value="false">None</option>
    <option value="lt">Phylip Lower Triangle Matrix</option>
@@ -51,7 +79,7 @@
   <data format="tabular" name="summary" label="${tool.name} on ${on_string}: summary">
   </data>
   <data format="tabular" name="unweighted" label="${tool.name} on ${on_string}: unweighted">
-   <filter>(random == True)</filter>
+   <filter>(random == True and subsampling['use'] == False)</filter>
   </data>
   <data format="lower.dist" name="dist" label="${tool.name} on ${on_string}: dist">
    <filter>distance != 'false'</filter>
@@ -60,6 +88,26 @@
     <when input="distance" value="column" format="pair.dist" />
    </change_format>
   </data>
+  <data format="lower.dist" name="ave_dist" label="${tool.name} on ${on_string}: ave.dist">
+   <filter>subsampling['use'] == True</filter>
+   <change_format>
+    <when input="distance" value="square" format="square.dist" />
+    <when input="distance" value="column" format="pair.dist" />
+   </change_format>
+  </data>
+  <data format="lower.dist" name="std_dist" label="${tool.name} on ${on_string}: std.dist">
+   <filter>subsampling['use'] == True</filter>
+   <change_format>
+    <when input="distance" value="square" format="square.dist" />
+    <when input="distance" value="column" format="pair.dist" />
+   </change_format>
+  </data>
+  <data format="tre" name="all_tree" label="${tool.name} on ${on_string}: all.tre">
+   <filter>(subsampling['use'] == True and subsampling['consensus'] == True)</filter>
+  </data>
+  <data format="tre" name="cons_tree" label="${tool.name} on ${on_string}: cons.tre">
+   <filter>(subsampling['use'] == True and subsampling['consensus'] == True)</filter>
+  </data>
  </outputs>
  <requirements>
   <requirement type="binary">mothur</requirement>
--- a/mothur/tools/mothur/unifrac.weighted.xml	Wed May 16 12:28:44 2012 -0500
+++ b/mothur/tools/mothur/unifrac.weighted.xml	Wed May 16 13:12:05 2012 -0500
@@ -1,9 +1,16 @@
-<tool id="mothur_unifrac_weighted" name="unifrac.weighted" version="1.19.0">
+<tool id="mothur_unifrac_weighted" name="unifrac.weighted" version="1.25.0">
  <description>Describes whether two or more communities have the same structure</description>
  <command interpreter="python">
   mothur_wrapper.py 
+  #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__, "'^\S+\.wsummary$:'" + $summary.__str__]
+  #set results = $results + ["'^\S+\.weighted$:'" + $weighted.__str__]
+  #set results = $results + ["'^\S+\.weighted\.(column\.|phylip\.)?dist$:'" + $dist.__str__]
+  #set results = $results + ["'^\S+\.weighted\.ave\.dist$:'" + $ave_dist.__str__]
+  #set results = $results + ["'^\S+\.weighted\.std\.dist$:'" + $std_dist.__str__]
+  #set results = $results + ["'^\S+\.weighted\.cons\.tre$:'" + $cons_tree.__str__]
+  #set results = $results + ["'^\S+\.weighted\.all\.tre$:'" + $all_tree.__str__]
   --cmd='unifrac.weighted'
-  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.wsummary$:'$summary,'^\S+\.weighted\.(column\.|phylip\.)?dist$:'$dist,'^\S+\.weighted$:'$weighted
+  ## --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.wsummary$:'$summary,'^\S+\.weighted\.(column\.|phylip\.)?dist$:'$dist,'^\S+\.weighted$:'$weighted
   --outputdir='$logfile.extra_files_path'
   --tree=$tree
   #if $group.__str__ != "None" and len($group.__str__) > 0:
@@ -18,11 +25,21 @@
   #if int($iters.__str__) > 0:
    --iters=$iters
   #end if
-  $random
+  #if $subsampling.use:
+   #if len($subsampling.subsample.__str__) > 0 and int($subsampling.subsample.__str__) > 0:
+    --subsample=$subsampling.subsample
+   #else
+    --subsample=T
+   #end if
+   $subsampling.consensus
+  #else
+   $random
+  #end if
   #if $distance.__str__ != "false":
    --distance=$distance
   #end if
   $root
+  --result=#echo ','.join($results)
   --processors=8
  </command>
  <inputs>
@@ -37,7 +54,18 @@
   <param name="name" type="data" format="names" optional="true" label="name - Names file for the tree"/>
   <param name="iters" type="integer" value="1000" label="iters - Number of iterations to try (default 1000)"/>
   <param name="random" type="boolean" truevalue="--random=true" falsevalue="" checked="false" label="random - Compare your trees with randomly generated trees" />
-  <param name="distance" type="boolean" truevalue="--distance=true" falsevalue="" checked="false" label="distance - Add the distance matrix to your history" />
+  <!-- NOTE: random cannot be used with subsample option, handle in command logic -->
+  <conditional name="subsampling">
+   <param name="use" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="use subsampling of groups"
+          help="(instead of randomly generated comparisons)"/>
+   <when value="yes">
+    <param name="subsample" type="integer" value="" optional="true" label="subsample - (defaults to the size of the smallest group)"
+           help="the size per group of the sample"/>
+    <param name="consensus" type="boolean" truevalue="--consensus=true" falsevalue="" checked="false" label="consensus - " 
+           help="The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F"/>
+   </when>
+   <when value="no"/>
+  </conditional> <!-- subsampling -->
   <param name="distance" type="select" label="distance - Create a distance matrix for your history">
    <option value="false">None</option>
    <option value="lt">Phylip Lower Triangle Matrix</option>
@@ -51,7 +79,14 @@
   <data format="tabular" name="summary" label="${tool.name} on ${on_string}: summary">
   </data>
   <data format="tabular" name="weighted" label="${tool.name} on ${on_string}: weighted">
-   <filter>(random == True)</filter>
+   <filter>(random == True and subsampling['use'] == False)</filter>
+  </data>
+  <data format="lower.dist" name="dist" label="${tool.name} on ${on_string}: dist">
+   <filter>distance != 'false'</filter>
+   <change_format>
+    <when input="distance" value="square" format="square.dist" />
+    <when input="distance" value="column" format="pair.dist" />
+   </change_format>
   </data>
   <data format="lower.dist" name="dist" label="${tool.name} on ${on_string}: dist">
    <filter>distance != 'false'</filter>
@@ -60,10 +95,26 @@
     <when input="distance" value="column" format="pair.dist" />
    </change_format>
   </data>
-  <!-- random uses input prompts, not sure how to model that
-  <data format="tabular" name="random" label="${tool.name} on ${on_string}: random">
+  <data format="lower.dist" name="ave_dist" label="${tool.name} on ${on_string}: ave.dist">
+   <filter>subsampling['use'] == True</filter>
+   <change_format>
+    <when input="distance" value="square" format="square.dist" />
+    <when input="distance" value="column" format="pair.dist" />
+   </change_format>
   </data>
-  -->
+  <data format="lower.dist" name="std_dist" label="${tool.name} on ${on_string}: std.dist">
+   <filter>subsampling['use'] == True</filter>
+   <change_format>
+    <when input="distance" value="square" format="square.dist" />
+    <when input="distance" value="column" format="pair.dist" />
+   </change_format>
+  </data>
+  <data format="tre" name="all_tree" label="${tool.name} on ${on_string}: all.tre">
+   <filter>(subsampling['use'] == True and subsampling['consensus'] == True)</filter>
+  </data>
+  <data format="tre" name="cons_tree" label="${tool.name} on ${on_string}: cons.tre">
+   <filter>(subsampling['use'] == True and subsampling['consensus'] == True)</filter>
+  </data>
  </outputs>
  <requirements>
   <requirement type="binary">mothur</requirement>