changeset 15:a6189f58fedb

Mothur - updated for Mothur version 1.22.0
author Jim Johnson <jj@umn.edu>
date Tue, 08 Nov 2011 11:45:32 -0600
parents ee59e5cff3ba
children 541e3c97c240
files mothur/README mothur/lib/galaxy/datatypes/metagenomics.py mothur/tool-data/mothur_aligndb.loc mothur/tool-data/mothur_aligndb.loc.sample mothur/tool-data/mothur_calculators.loc mothur/tool-data/mothur_calculators.loc.sample mothur/tool-data/mothur_calulators.loc mothur/tool-data/mothur_lookup.loc.sample mothur/tool-data/mothur_map.loc mothur/tool-data/mothur_map.loc.sample mothur/tool-data/mothur_taxonomy.loc mothur/tool-data/mothur_taxonomy.loc.sample mothur/tools/mothur/chimera.slayer.xml mothur/tools/mothur/chimera.uchime.xml mothur/tools/mothur/cluster.split.xml mothur/tools/mothur/count.groups.xml mothur/tools/mothur/make.shared.xml mothur/tools/mothur/mothur_wrapper.py mothur/tools/mothur/pre.cluster.xml mothur/tools/mothur/seq.error.xml mothur/tools/mothur/sffinfo.xml mothur/tools/mothur/shhh.flows.xml mothur/tools/mothur/sub.sample.xml mothur/tools/mothur/summary.tax.xml mothur/tools/mothur/trim.flows.xml mothur/tools/mothur/unifrac.weighted.xml
diffstat 26 files changed, 894 insertions(+), 129 deletions(-) [+]
line wrap: on
line diff
--- a/mothur/README	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/README	Tue Nov 08 11:45:32 2011 -0600
@@ -1,7 +1,9 @@
 Provides galaxy tools for the Mothur metagenomics package -  http://www.mothur.org/wiki/Main_Page 
 
-Install mothur v.1.20.0 on your galaxy system so galaxy can execute the mothur command
-  ( This version of wrappers is designed for Mothur version 1.19 - it may work on later versions )
+(The environment variable MOTHUR_MAX_PROCESSORS can be used to limit the number of cpu processors used be mothur commands)
+
+Install mothur v.1.22.0 on your galaxy system so galaxy can execute the mothur command
+  ( This version of wrappers is designed for Mothur version 1.22 - it may work on later versions )
   http://www.mothur.org/wiki/Download_mothur
   http://www.mothur.org/wiki/Installation
   ( This Galaxy Mothur wrapper will invoke Mothur in command line mode: http://www.mothur.org/wiki/Command_line_mode )
@@ -45,6 +47,8 @@
   SILVA-compatible mask:
      - lane1349.silva.filter - Pat Schloss's transcription of the mask from the Lane paper
        http://www.mothur.org/w/images/6/6d/Lane1349.silva.filter
+ Lookup Files for sff flow analysis using shhh.flows:
+  http://www.mothur.org/wiki/Alignment_database
 
  Example from UMN installation: (We also made these available in a Galaxy public data library)
     /project/db/galaxy/mothur/Silva.bacteria.zip
@@ -117,6 +121,9 @@
         <datatype extension="pair.dist" type="galaxy.datatypes.metagenomics:PairwiseDistanceMatrix" display_in_upload="true"/>
         <datatype extension="square.dist" type="galaxy.datatypes.metagenomics:SquareDistanceMatrix" display_in_upload="true"/>
         <datatype extension="lower.dist" type="galaxy.datatypes.metagenomics:LowerTriangleDistanceMatrix" display_in_upload="true"/>
+        <datatype extension="ref.taxonomy" type="galaxy.datatypes.metagenomics:RefTaxonomy" display_in_upload="true">
+            <converter file="ref_to_seq_taxonomy_converter.xml" target_datatype="seq.taxonomy"/>
+        </datatype>
         <datatype extension="seq.taxonomy" type="galaxy.datatypes.metagenomics:SequenceTaxonomy" display_in_upload="true"/>
         <datatype extension="rdp.taxonomy" type="galaxy.datatypes.metagenomics:RDPSequenceTaxonomy" display_in_upload="true"/>
         <datatype extension="cons.taxonomy" type="galaxy.datatypes.metagenomics:ConsensusTaxonomy" display_in_upload="true"/>
@@ -127,6 +134,7 @@
         <datatype extension="masked.quan" type="galaxy.datatypes.metagenomics:MaskedQuantile" display_in_upload="true"/>
         <datatype extension="filtered.masked.quan" type="galaxy.datatypes.metagenomics:FilteredMaskedQuantile" display_in_upload="true"/>
         <datatype extension="axes" type="galaxy.datatypes.metagenomics:Axes" display_in_upload="true"/>
+        <datatype extension="sff.flow" type="galaxy.datatypes.metagenomics:SffFlow" display_in_upload="true"/>
         <datatype extension="tre" type="galaxy.datatypes.data:Newick" display_in_upload="true"/>
         <!-- End Mothur Datatypes -->
 
@@ -138,10 +146,13 @@
       <tool file="mothur/get.groups.xml"/>
       <tool file="mothur/remove.groups.xml"/>
       <tool file="mothur/merge.groups.xml"/>
+      <tool file="mothur/count.groups.xml"/>
       <tool file="mothur/make.design.xml"/>
       <tool file="mothur/sub.sample.xml"/>
     <label text="Mothur Sequence Analysis" id="mothur_sequence_analysis"/>
       <tool file="mothur/sffinfo.xml"/>
+      <tool file="mothur/trim.flows.xml"/>
+      <tool file="mothur/shhh.flows.xml"/>
       <tool file="mothur/make.fastq.xml"/>
       <tool file="mothur/fastq.info.xml"/>
       <tool file="mothur/summary.seqs.xml"/>
@@ -169,6 +180,7 @@
       <tool file="mothur/nmds.xml"/>
       <tool file="mothur/corr.axes.xml"/>
       <tool file="mothur/classify.seqs.xml"/>
+      <tool file="mothur/seq.error.xml"/>
     <label text="Mothur Sequence Chimera Detection" id="mothur_sequence_chimera"/>
       <tool file="mothur/chimera.bellerophon.xml"/>
       <tool file="mothur/chimera.ccode.xml"/>
@@ -200,6 +212,7 @@
       <tool file="mothur/get.group.xml"/>
       <tool file="mothur/bin.seqs.xml"/>
       <tool file="mothur/get.sharedseqs.xml"/>
+      <tool file="mothur/summary.tax.xml"/>
     <label text="Mothur Single Sample Analysis" id="mothur_single_sample_analysis"/>
       <tool file="mothur/collect.single.xml"/>
       <tool file="mothur/rarefaction.single.xml"/>
@@ -235,10 +248,11 @@
       <tool file="mothur/TreeVector.xml"/>
   </section> <!-- metagenomics_mothur -->
 
-
 ############ DESIGN NOTES #########################################################################################################
 Each mothur command has it's own tool_config (.xml) file, but all call the same python wrapper code: mothur_wrapper.py
 
+  (The environment variable MOTHUR_MAX_PROCESSORS can be used to limit the number of cpu processors used be mothur commands)
+
 * Every mothur tool will call mothur_wrapper.py script with a --cmd= parameter that gives the mothur command name.
 * Every tool will produce the logfile of the mothur run as an output.
 * When the outputs of a mothur command could be determined in advance, they are included in the --result= parameter to mothur_wrapper.py
--- a/mothur/lib/galaxy/datatypes/metagenomics.py	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/lib/galaxy/datatypes/metagenomics.py	Tue Nov 08 11:45:32 2011 -0600
@@ -722,7 +722,57 @@
             close(fh)
         return False
 
-class SequenceTaxonomy(Tabular):
+class RefTaxonomy(Tabular):
+    file_ext = 'ref.taxonomy'
+    """
+        A table with 2 or 3 columns:
+        - SequenceName
+        - Taxonomy (semicolon-separated taxonomy in descending order)
+        - integer ?
+        Example: 2-column ( http://www.mothur.org/wiki/Taxonomy_outline )
+          X56533.1        Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma;
+          X97975.1        Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida;
+          AF052717.1      Eukaryota;Parabasalidea;
+        Example: 3-column ( http://vamps.mbl.edu/resources/databases.php )
+          v3_AA008	Bacteria;Firmicutes;Bacilli;Lactobacillales;Streptococcaceae;Streptococcus	5
+          v3_AA016	Bacteria	120
+          v3_AA019	Archaea;Crenarchaeota;Marine_Group_I	1
+    """
+    def __init__(self, **kwd):
+        Tabular.__init__( self, **kwd )
+        self.column_names = ['name','taxonomy']
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is a SequenceTaxonomy
+        """
+        try:
+            pat = '^([^ \t\n\r\x0c\x0b;]+([(]\\d+[)])?(;[^ \t\n\r\x0c\x0b;]+([(]\\d+[)])?)*(;)?)$'
+            fh = open( filename )
+            count = 0
+            while True:
+                line = fh.readline()
+                if not line:
+                    break #EOF
+                line = line.strip()
+                if line:
+                    fields = line.split('\t')
+                    if 2 <= len(fields) <= 3:
+                        return False
+                    if not re.match(pat,fields[1]):
+                        return False
+                    count += 1
+                    if count > 10:
+                        break
+            if count > 0:
+                return True
+        except:
+            pass
+        finally:
+            fh.close()
+        return False
+
+class SequenceTaxonomy(RefTaxonomy):
     file_ext = 'seq.taxonomy'
     """
         A table with 2 columns:
@@ -933,6 +983,57 @@
             fh.close()
         return False
 
+class SffFlow(Tabular):
+    MetadataElement( name="flow_values", default="", no_value="", optional=True , desc="Total number of flow values", readonly=True)
+    MetadataElement( name="flow_order", default="TACG", no_value="TACG", desc="Total number of flow values", readonly=False)
+    file_ext = 'sff.flow'
+    """
+        The first line is the total number of flow values - 800 for Titanium data. For GS FLX it would be 400. 
+        Following lines contain:
+        - SequenceName
+        - the number of useable flows as defined by 454's software
+        - the flow intensity for each base going in the order of TACG.
+        Example:
+          800
+          GQY1XT001CQL4K 85 1.04 0.00 1.00 0.02 0.03 1.02 0.05 ...
+          GQY1XT001CQIRF 84 1.02 0.06 0.98 0.06 0.09 1.05 0.07 ... 
+          GQY1XT001CF5YW 88 1.02 0.02 1.01 0.04 0.06 1.02 0.03 ...
+    """
+    def __init__(self, **kwd):
+        Tabular.__init__( self, **kwd )
+
+    def set_meta( self, dataset, overwrite = True, skip = 1, max_data_lines = None, **kwd ):
+        Tabular.set_meta(self, dataset, overwrite, 1, max_data_lines)
+        try:
+            fh = open( filename )
+            line = fh.readline()
+            line = line.strip()
+            flow_values = int(line)
+            dataset.metadata.flow_values = flow_values
+        finally:
+            fh.close()
+
+    def make_html_table( self, dataset, skipchars=[] ):
+        """Create HTML table, used for displaying peek"""
+        out = ['<table cellspacing="0" cellpadding="3">']
+        comments = []
+        try:
+            # Generate column header
+            out.append('<tr>')
+            out.append( '<th>%d. Name</th>' % 1 )
+            out.append( '<th>%d. Flows</th>' % 2 )
+            for i in range( 3, dataset.metadata.columns+1 ):
+                base = dataset.metadata.flow_order[(i+1)%4]
+                out.append( '<th>%d. %d %s</th>' % (i-2,base) )
+            out.append('</tr>')
+            out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) )
+            out.append( '</table>' )
+            out = "".join( out )
+        except Exception, exc:
+            out = "Can't create peek %s" % str( exc )
+        return out
+
+
 ## Qiime Classes
 
 class QiimeMetadataMapping(Tabular):
--- a/mothur/tool-data/mothur_aligndb.loc	Wed Oct 05 10:37:11 2011 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of metagenomics files.  
-#file has this format (white space characters are TAB characters):
-#
-#<dbname>	<file_base>
-#
-greengenes	/project/db/galaxy/mothur/core_set_aligned.imputed.fasta
-silva archaea	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.fasta
-silva bacteria	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.fasta
-silva eukarya	/project/db/galaxy/mothur/silva.eukarya.fasta
-silva archaea nogap	/project/db/galaxy/mothur/Silva.archaea/nogap.archaea.fasta
-silva bacteria nogap	/project/db/galaxy/mothur/silva.bacteria/nogap.bacteria.fasta
-silva eukarya nogap	/project/db/galaxy/mothur/nogap.eukarya.fasta
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tool-data/mothur_aligndb.loc.sample	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,16 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters):
+#
+# Reference Alignments:  http://www.mothur.org/wiki/Alignment_database
+#
+#<dbname>	<file_base>
+#
+greengenes	/project/db/galaxy/mothur/core_set_aligned.imputed.fasta
+silva archaea	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.fasta
+silva bacteria	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.fasta
+silva eukarya	/project/db/galaxy/mothur/silva.eukarya.fasta
+silva archaea nogap	/project/db/galaxy/mothur/Silva.archaea/nogap.archaea.fasta
+silva bacteria nogap	/project/db/galaxy/mothur/silva.bacteria/nogap.bacteria.fasta
+silva eukarya nogap	/project/db/galaxy/mothur/nogap.eukarya.fasta
+
--- a/mothur/tool-data/mothur_calculators.loc	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tool-data/mothur_calculators.loc	Tue Nov 08 11:45:32 2011 -0600
@@ -1,6 +1,8 @@
 #This is a sample file distributed with Galaxy that enables 
 # Mothur tools to present a choice of values for calculators
+#   (There aren't any local files to point to, so this doesn't need to be modified)
 # source -  http://www.mothur.org/wiki/Calculators
+# 
 #file has this format (fields are separated by TAB characters):
 # 
 ##
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tool-data/mothur_calculators.loc.sample	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,98 @@
+#This is a sample file distributed with Galaxy that enables 
+# Mothur tools to present a choice of values for calculators
+#   (There aren't any local files to point to, so this doesn't need to be modified)
+# source -  http://www.mothur.org/wiki/Calculators
+# 
+#file has this format (fields are separated by TAB characters):
+# 
+##
+# collect.single	ace, bergerparker, boneh, bootstrap, bstick, chao, coverage, default, efron, geometric, goodscoverage, heip, invsimpson, jack, logseries, npshannon, nseqs, qstat, shannon, shannoneven, shen, simpson, simpsoneven, smithwilson, sobs, solow
+# summary.single	ace, bergerparker, boneh, bootstrap, bstick, chao, coverage, default, efron, geometric, goodscoverage, heip, invsimpson, jack, logseries, npshannon, nseqs, qstat, shannon, shannoneven, shen, simpson, simpsoneven, smithwilson, sobs, solow
+# rarefaction.single	ace,                      bootstrap,         chao, coverage, default,                                  heip, invsimpson, jack,            npshannon, nseqs,        shannon, shannoneven,       simpson, simpsoneven, smithwilson, sobs
+##
+# collect.shared	anderberg, braycurtis, canberra, default, gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker
+# summary.shared	anderberg, braycurtis, canberra, default, gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker
+# dist.shared		anderberg, braycurtis, canberra,          gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker
+# tree.shared		anderberg, braycurtis, canberra,          gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker
+# heatmap.sim		           braycurtis,                                               jabund, jclass, jest,                                                                                                     morisitahorn,                                                                        sorabund, sorclass, sorest,                                                                                                      thetan, thetayc
+##
+# venn	sobs,chao,ace	sharedsobs,sharedchao,sharedace
+# rarefaction.shared	sharednseqs,sharedobserved
+#
+##
+#<calculator>	<mult>	<mult2>	<category>	<description>
+#
+##Community richness	
+ace	single	sing	Community richness	the ACE estimator
+bootstrap	single	sing	Community richness	the bootstrap estimator
+chao	single	sing	Community richness	the Chao1 estimator
+jack	single	sing	Community richness	the jackknife estimator
+sobs	single	sing	Community richness	the observed richness
+##Community evenness
+simpsoneven	single	sing	Community evenness	a Simpson index-based measure of evenness
+shannoneven	single	sing	Community evenness	a Shannon index-based measure of evenness
+heip	single	sing	Community evenness	Heip's metric of community evenness
+smithwilson	single	sing	Community evenness	Smith and Wilson's metric of community evenness
+##Community diversity
+bergerparker	single	xxxx	Community diversity	the Berger-Parker index
+coverage	single	sing	Community diversity	the sampling coverage 
+goodscoverage	single	sing	Community diversity	the Good's estimate of sampling coverage 
+invsimpson	single	sing	Community diversity	the Simpson index
+npshannon	single	sing	Community diversity	the non-parametric Shannon index
+qstat	single	xxxx	Community diversity	the Q statistic
+shannon	single	sing	Community diversity	the Shannon index
+simpson	single	sing	Community diversity	the Simpson index
+##Estimates of number of additional OTUs observed with extra sampling
+boneh	single	xxxx	Estimator	Boneh's estimator
+efron	single	xxxx	Estimator	Efron's estimator
+shen	single	xxxx	Estimator	Shen's estimator
+solow	single	xxxx	Estimator	Solow's estimator
+##Statistical distributions
+logseries	single	xxxx	Statistical distribution	tests whether observed data follow the log series distribution
+geometric	single	xxxx	Statistical distribution	tests whether observed data follow the geometric series distribution
+bstick	single	xxxx	Statistical distribution	tests whether observed data follow the broken stick distribution
+## Shared community richness
+sharedsobs	shared	xxxx	Shared community richness	the observed richness shared between two or more samples
+sharedchao	shared	xxxx	Shared community richness	the two or more sample shared Chao1 richness estimator
+sharedace	shared	xxxx	Shared community richness	the two sample shared ACE richness estimator
+##Similarity in community membership
+anderberg	shared	xxxx	Community Membership Similarity	the Anderberg similarity coefficient
+jclass	shared	shar	Community Membership Similarity	the traditional Jaccard similarity coefficient based on the observed richness
+jest	shared	shar	Community Membership Similarity	the Jaccard similarity coefficient based on the Chao1 estimated richnesses
+kulczynski	shared	xxxx	Community Membership Similarity	the Kulczynski similarity coefficient
+kulczynskicody	shared	xxxx	Community Membership Similarity	the Kulczynski-Cody similarity coefficient
+kstest	shared	xxxx	Kolmogorov-Smirnov test 
+lennon	shared	xxxx	Community Membership Similarity	the Lennon similarity coefficient
+ochiai	shared	xxxx	Community Membership Similarity	the Ochiai similarity coefficient
+sorclass	shared	shar	Community Membership Similarity	the Sorenson similarity coefficient based on the observed richness
+sorest	shared	shar	Community Membership Similarity	the Sorenson similarity coefficient based on the Chao1 estimated richnesses
+whittaker	shared	xxxx	Community Membership Similarity	the Whittaker similarity coefficient
+hamming	shared	xxxx	Community Membership Similarity	-
+memchi2	shared	xxxx	Community Membership Similarity	-
+memchord	shared	xxxx	Community Membership Similarity	-
+memeuclidean	shared	xxxx	Community Membership Similarity	-
+mempearson	shared	xxxx	Community Membership Similarity	-
+##Similarity in community structure
+braycurtis	shared	shar	Community Structure Similarity	the Bray-Curtis similarity coefficient
+jabund	shared	shar	Community Structure Similarity	the abundance-based Jaccard similarity coefficient
+morisitahorn	shared	shar	Community Structure Similarity	the Morisita-Horn similarity coefficient
+sorabund	shared	shar	Community Structure Similarity	the abundance-based Sorenson similarity coefficient
+thetan	shared	shar	Community Structure Similarity	the Smith theta similarity coefficient
+thetayc	shared	shar	Community Structure Similarity	the Yue & Clayton theta similarity coefficient
+canberra	shared	xxxx	Community Structure Similarity	-
+gower	shared	xxxx	Community Structure Similarity	-
+hellinger	shared	xxxx	Community Structure Similarity	-
+manhattan	shared	xxxx	Community Structure Similarity	-
+odum	shared	xxxx	Community Structure Similarity	-
+soergel	shared	xxxx	Community Structure Similarity	-
+spearman	shared	xxxx	Community Structure Similarity	-
+speciesprofile	shared	xxxx	Community Structure Similarity	-
+structchi2	shared	xxxx	Community Structure Similarity	-
+structchord	shared	xxxx	Community Structure Similarity	-
+structeuclidean	shared	xxxx	Community Structure Similarity	-
+structkulczynski	shared	xxxx	Community Structure Similarity	-
+structpearson	shared	xxxx	Community Structure Similarity	-
+##Utility calculators
+nseqs	single	sing	Utility	the number of sequences in a sample
+sharednseqs	shared	rare	Utility	the number of sequences in two samples
+sharedobserved	shared	rare	Utility	the number of sequences in two samples
--- a/mothur/tool-data/mothur_calulators.loc	Wed Oct 05 10:37:11 2011 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#This is a sample file distributed with Galaxy that enables 
-# Mothur tools to present a choice of values for calculators
-# source -  http://www.mothur.org/wiki/Calculators
-#file has this format (fields are separated by TAB characters):
-# 
-# venn	sobs,chao,ace	sharedsobs,sharedchao,sharedace
-# rarefaction.shared	sharednseqs,sharedobserved
-# rarefaction.single	ace,bootstrap,chao,coverage,heip,invsimpson,jack,npshannon,nseqs,shannon,shannoneven,simpson,simpsoneven,smithwilson,sobs
-# dist.shared	braycurtis,jabund,jclass,jest,morisitahorn,sorabund,sorclass,sorest,thetan,thetayc
-# tree.shared	braycurtis,jabund,jclass,jest,morisitahorn,sorabund,sorclass,sorest,thetan,thetayc
-# heatmap.sim	braycurtis,jabund,jclass,jest,morisitahorn,sorabund,sorclass,sorest,thetan,thetayc
-# collect.shared	anderberg,braycurtis,jabund,jclass,jest,kstest,kulczynski,kulczynskicody,lennon,morisitahorn,ochiai,sharedace,sharedchao,sharednseqs,sharedsobs,sorabund,sorclass,sorest,thetan,thetayc,whittaker
-# summary.shared	anderberg,braycurtis,jabund,jclass,jest,kstest,kulczynski,kulczynskicody,lennon,morisitahorn,ochiai,sharedace,sharedchao,sharednseqs,sharedsobs,sorabund,sorclass,sorest,thetan,thetayc,whittaker
-# collect.single	ace,bergerparker,boneh,bootstrap,bstick,chao,coverage,efron,geometric,goodscoverage,heip,invsimpson,jack,logseries,npshannon,nseqs,qstat,shannon,shannoneven,shen,simpson,simpsoneven,smithwilson,sobs,solow
-# summary.single	ace,bergerparker,boneh,bootstrap,bstick,chao,coverage,efron,geometric,goodscoverage,heip,invsimpson,jack,logseries,npshannon,nseqs,qstat,shannon,shannoneven,shen,simpson,simpsoneven,smithwilson,sobs,solow
-#
-#<calculator>	<mult>	<mult2>	<category>	<description>
-#
-#Community richness	
-sobs	single	sing	Community richness	the observed richness
-chao	single	sing	Community richness	the Chao1 estimator
-ace	single	sing	Community richness	the ACE estimator
-jack	single	sing	Community richness	the jackknife estimator
-bootstrap	single	sing	Community richness	the bootstrap estimator
-#Community diversity
-bergerparker	single	xxxx	Community diversity	the Berger-Parker index
-shannon	single	sing	Community diversity	the Shannon index
-npshannon	single	sing	Community diversity	the non-parametric Shannon index
-simpson	single	sing	Community diversity	the Simpson index
-simpsoneven	single	sing	Community diversity	the Simpson index
-invsimpson	single	sing	Community diversity	the Simpson index
-coverage	single	sing	Community diversity	the sampling coverage coverage
-qstat	single	xxxx	Community diversity	the Q statistic
-#Estimates of number of additional OTUs observed with extra sampling
-boneh	single	xxxx	Estimator	Boneh's estimator
-efron	single	xxxx	Estimator	Efron's estimator
-shen	single	xxxx	Estimator	Shen's estimator
-solow	single	xxxx	Estimator	Solow's estimator
-#Statistical distributions
-logseries	single	xxxx	Statistical distribution	tests whether observed data follow the log series distribution
-geometric	single	xxxx	Statistical distribution	tests whether observed data follow the geometric series distribution
-bstick	single	xxxx	Statistical distribution	tests whether observed data follow the broken stick distribution
-# Shared community richness
-sharedsobs	shared	xxxx	Shared community richness	the observed richness shared between two or more samples
-sharedchao	shared	xxxx	Shared community richness	the two or more sample shared Chao1 richness estimator
-sharedace	shared	xxxx	Shared community richness	the two sample shared ACE richness estimator
-#Similarity in community membership
-anderberg	shared	xxxx	Community Membership Similarity	the Anderberg similarity coefficient
-jclass	shared	shar	Community Membership Similarity	the traditional Jaccard similarity coefficient based on the observed richness
-jest	shared	shar	Community Membership Similarity	the Jaccard similarity coefficient based on the Chao1 estimated richnesses
-kulczynski	shared	xxxx	Community Membership Similarity	the Kulczynski similarity coefficient
-kulczynskicody	shared	xxxx	Community Membership Similarity	the Kulczynski-Cody similarity coefficient
-lennon	shared	xxxx	Community Membership Similarity	the Lennon similarity coefficient
-ochiai	shared	xxxx	Community Membership Similarity	the Ochiai similarity coefficient
-sorclass	shared	shar	Community Membership Similarity	the Sorenson similarity coefficient based on the observed richness
-sorest	shared	shar	Community Membership Similarity	the Sorenson similarity coefficient based on the Chao1 estimated richnesses
-whittaker	shared	xxxx	Community Membership Similarity	the Whittaker similarity coefficient
-#Similarity in community structure
-braycurtis	shared	shar	Community Structure Similarity	the Bray-Curtis similarity coefficient
-jabund	shared	shar	Community Structure Similarity	the abundance-based Jaccard similarity coefficient
-morisitahorn	shared	shar	Community Structure Similarity	the Morisita-Horn similarity coefficient
-sorabund	shared	shar	Community Structure Similarity	the abundance-based Sorenson similarity coefficient
-thetan	shared	shar	Community Structure Similarity	the Smith theta similarity coefficient
-thetayc	shared	shar	Community Structure Similarity	the Yue & Clayton theta similarity coefficient
-#Utility calculators
-nseqs	single	sing	Utility	the number of sequences in a sample
-sharednseqs	shared	rare	Utility	the number of sequences in two samples
-sharedobserved	shared	rare	Utility	the number of sequences in two samples
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tool-data/mothur_lookup.loc.sample	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,13 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters):
+#
+# lookup files from:   http://www.mothur.org/wiki/Lookup_files
+#
+#<name>	<file_base>
+#
+GS20	/project/db/galaxy/mothur/lookup/LookUp_GS20.pat
+GSFLX	/project/db/galaxy/mothur/lookup/LookUp_GSFLX.pat
+Titanium	/project/db/galaxy/mothur/lookup/LookUp_Titanium.pat
+
+
--- a/mothur/tool-data/mothur_map.loc	Wed Oct 05 10:37:11 2011 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of metagenomics files.  
-#file has this format (white space characters are TAB characters):
-#
-#<name>	<file_base>
-#
-greengenes	/project/db/galaxy/mothur/gg.ss.map
-silva	/project/db/galaxy/mothur/silva.ss.map
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tool-data/mothur_map.loc.sample	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,10 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters):
+#
+# Secondary structure maps:    http://www.mothur.org/wiki/Secondary_structure_map
+#
+#<name>	<file_base>
+#
+greengenes	/project/db/galaxy/mothur/gg.ss.map
+silva	/project/db/galaxy/mothur/silva.ss.map
--- a/mothur/tool-data/mothur_taxonomy.loc	Wed Oct 05 10:37:11 2011 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of metagenomics files.  
-#file has this format (white space characters are TAB characters):
-#
-#<taxonomyname>	<file_base>
-#
-archaea.gg	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.gg.tax
-archaea.silva	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.silva.tax
-archaea.rdp	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.rdp.tax
-archaea.ncbi	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.ncbi.tax
-bacteria.gg	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.gg.tax
-bacteria.silva	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.silva.tax
-bacteria.ncbi	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.ncbi.tax
-bacteria.rdp	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp.tax
-bacteria.rdp6	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp6.tax
-eukarya.silva	/project/db/galaxy/mothur/silva.eukarya.silva.tax
-eukarya.ncbi	/project/db/galaxy/mothur/silva.eukarya.ncbi.tax
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tool-data/mothur_taxonomy.loc.sample	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,20 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters):
+#
+# Silva reference files:    http://www.mothur.org/wiki/Silva_reference_files
+#
+#<taxonomyname>	<file_base>
+#
+archaea.gg	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.gg.tax
+archaea.silva	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.silva.tax
+archaea.rdp	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.rdp.tax
+archaea.ncbi	/project/db/galaxy/mothur/Silva.archaea/silva.archaea.ncbi.tax
+bacteria.gg	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.gg.tax
+bacteria.silva	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.silva.tax
+bacteria.ncbi	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.ncbi.tax
+bacteria.rdp	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp.tax
+bacteria.rdp6	/project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp6.tax
+eukarya.silva	/project/db/galaxy/mothur/silva.eukarya.silva.tax
+eukarya.ncbi	/project/db/galaxy/mothur/silva.eukarya.ncbi.tax
+
--- a/mothur/tools/mothur/chimera.slayer.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/chimera.slayer.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -1,4 +1,4 @@
-<tool id="mothur_chimera_slayer" name="Chimera.slayer" version="1.21.0">
+<tool id="mothur_chimera_slayer" name="Chimera.slayer" version="1.22.0">
  <description>Find putative chimeras using slayer</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -12,6 +12,9 @@
    #if $alignment.name.__str__ != "None" and len($alignment.name.__str__) > 0:
     --name=$alignment.name
    #end if
+   #if $alignment.group.__str__ != '':
+    --group=$alignment.group
+   #end if
   #else:
    --reference=$alignment.template
   #end if
@@ -56,6 +59,8 @@
    </when>
    <when value="self">
     <param name="name" type="data" format="names" optional="true" label="names - Sequences Names"/>
+    <param name="group" type="data" format="groups" optional="true" label="group - Sequences Name reference"
+           help="use the more abundant sequences from the same sample to check the query sequence"/>
    </when>
   </conditional>
   <conditional name="options">
--- a/mothur/tools/mothur/chimera.uchime.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/chimera.uchime.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -1,4 +1,4 @@
-<tool id="mothur_chimera_uchime" name="Chimera.uchime" version="1.20.0">
+<tool id="mothur_chimera_uchime" name="Chimera.uchime" version="1.22.0">
  <description>Find putative chimeras using uchime</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -23,6 +23,9 @@
    #if float($template.abskew.__str__) > 0:
     --abskew=$template.abskew
    #end if
+   #if $template.group.__str__ != '':
+    --group=$template.group
+   #end if
   #elif $template.source == 'names':
    --name=$template.name
   #end if
@@ -95,6 +98,9 @@
    </when>
    <when value="self">
     <param name="abskew" type="float" value="1.9" label="abskew - Abundance skew (default 1.9)" help="Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query)"/>
+    <param name="group" type="data" format="groups" optional="true" label="group - Sequences Name reference"
+           help="use the more abundant sequences from the same sample to check the query sequence"/>
+
    </when>
    <when value="names">
     <param name="name" type="data" format="names" label="name - Sequence names"/>
--- a/mothur/tools/mothur/cluster.split.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/cluster.split.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -1,4 +1,4 @@
-<tool id="mothur_cluster_split" name="Cluster.split" version="1.19.0">
+<tool id="mothur_cluster_split" name="Cluster.split" version="1.22.0">
  <description>Assign sequences to OTUs (Operational Taxonomic Unit) splits large matrices</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -80,7 +80,7 @@
     <param name="fasta" type="data" format="fasta" label="fasta - Sequences"/>
     <param name="name" type="data" format="names" label="name - Sequences Name reference"/>
     <param name="taxonomy" type="data" format="seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/>
-    <param name="taxlevel" type="integer" value="1" label="taxlevel - taxonomy level for split (default=1)" 
+    <param name="taxlevel" type="integer" value="3" label="taxlevel - taxonomy level for split (default=3)" 
            help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/>
    </when>
   </conditional> <!-- splitby -->
@@ -90,7 +90,7 @@
    <option value="average" selected="true">Average neighbor</option>
   </param>
   <param name="cutoff" type="float" value="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0" 
-                 help="Ignore pairwise distances larger than this, a common value would be 0.10"/>
+                 help="Ignore pairwise distances larger than this, a common value would be 0.25"/>
   <param name="hard" type="boolean" checked="true" truevalue="--hard=true" falsevalue="--hard=true" label="hard - Use hard cutoff instead of rounding" 
                  help=""/>
   <param name="precision" type="select" optional="true" label="precision - Precision for rounding distance values"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/count.groups.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,73 @@
+<tool id="mothur_count_groups" name="Count.groups" version="1.22.0" >
+ <description>counts the number of sequences represented by a specific group or set of groups</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  --cmd='count.groups'
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.groups.count$:'$grp_count
+  --outputdir='$logfile.extra_files_path'
+  #if isinstance($group.datatype, $__app__.datatypes_registry.get_datatype_by_extension('shared').__class__):
+   --shared=$group
+  #else:
+   --group=$group
+  #end if
+  #if $groupnames.source == 'groups':
+   #if $groupnames.groups.__str__ != "None" and len($groupnames.groups.__str__) > 0:
+    --groups=$groupnames.groups
+   #end if
+  #elif $groupnames.source == 'accnos':
+   #if $groupnames.accnos.__str__ != "None" and len($groupnames.accnos.__str__) > 0:
+    --accnos=$groupnames.accnos
+   #end if
+  #end if
+ </command>
+ <inputs>
+  <param name="group" type="data" format="groups,shared" label="group or shared - Group file for sequence count"/>
+  <conditional name="groupnames">
+   <param name="source" type="select" label="Filter group names">
+    <option value="none">Report on All Groups</option>
+    <option value="groups">A List of Group Names</option>
+    <option value="accnos">A History Group Name Accnos Dataset</option>
+   </param>
+   <when value="groups">
+    <param name="groups" type="select" label="groups - Pick groups to include" multiple="true">
+     <help>All groups displayed if none are selected.</help>
+     <options from_dataset="group">
+      <column name="name" index="1"/>
+      <column name="value" index="1"/>
+      <filter type="unique_value" name="unq_grp" column="1" />
+     </options>
+    </param>
+   </when>
+   <when value="accnos">
+    <param name="accnos" type="data" format="accnos" optional="true" label="accnos - Group Names from your history"/>
+   </when>
+  </conditional>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format="tabular" name="grp_count" label="${tool.name} on ${on_string}: group.count" />
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The count.groups_ command counts sequences from a specific group or set of groups from a group_ or shared_ file.
+
+.. _shared: http://www.mothur.org/wiki/Shared_file
+.. _group: http://www.mothur.org/wiki/Group_file
+.. _count.groups: http://www.mothur.org/wiki/Count.groups
+
+ </help>
+</tool>
--- a/mothur/tools/mothur/make.shared.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/make.shared.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -1,4 +1,4 @@
-<tool id="mothur_make_shared" name="Make.shared" version="1.19.0" force_history_refresh="True">
+<tool id="mothur_make_shared" name="Make.shared" version="1.22.0" force_history_refresh="True">
  <description>Make a shared file from a list and a group</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -18,9 +18,11 @@
   #if $groups.__str__ != "None" and len($groups.__str__) > 0:
     --groups=$groups
   #end if
+  #*
   #if $ordergroup.__str__ != "None" and len($ordergroup.__str__) > 0:
    --ordergroup=$ordergroup
   #end if
+  *#
  </command>
  <inputs>
   <param name="list" type="data" format="list" label="list - OTU List"/>
@@ -39,7 +41,9 @@
      <filter type="unique_value" name="unq_grp" column="1" />
    </options>
   </param>
+  <!--
   <param name="ordergroup" type="data" format="groups" optional="true" label="ordergroup - one column list of group names in desired order"/>
+  -->
   <param name="as_datasets" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Create a new history dataset for each group rabund"/>
  </inputs>
  <outputs>
--- a/mothur/tools/mothur/mothur_wrapper.py	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/mothur_wrapper.py	Tue Nov 08 11:45:32 2011 -0600
@@ -4,7 +4,7 @@
 http://www.mothur.org/
 
 Supports mothur version 
-mothur v.1.20.0
+mothur v.1.22.0
 
 Class encapsulating Mothur galaxy tool.
 Expect each invocation to include:
@@ -186,8 +186,8 @@
     cmd_dict['chimera.ccode'] = dict({'required' : ['fasta','reference'], 'optional' : ['filter','mask','window','numwanted','save','processors']})
     cmd_dict['chimera.check'] = dict({'required' : ['fasta','reference'], 'optional' : ['ksize','svg','name','increment','save','processors']})
     cmd_dict['chimera.pintail'] = dict({'required' : ['fasta','reference'], 'optional' : ['conservation','quantile','filter','mask','window','increment','save','processors']})
-    cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','reference'], 'optional' : ['name','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','blastlocation','save','processors']})
-    cmd_dict['chimera.uchime'] = dict({'required' : ['fasta'], 'optional' : ['name','reference','abskew','chimealns','minh','mindiv','xn','dn','xa','chunks','minchunk','idsmoothwindow','minsmoothid','maxp','skipgaps','skipgaps2','minlen','maxlen','ucl','queryfract','processors']})
+    cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','reference'], 'optional' : ['name','group','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','blastlocation','save','processors']})
+    cmd_dict['chimera.uchime'] = dict({'required' : ['fasta'], 'optional' : ['name','group','reference','abskew','chimealns','minh','mindiv','xn','dn','xa','chunks','minchunk','idsmoothwindow','minsmoothid','maxp','skipgaps','skipgaps2','minlen','maxlen','ucl','queryfract','processors']})
     cmd_dict['chop.seqs'] = dict({'required' : ['fasta','numbases'],  'optional' : ['countgaps','keep','short']})
     cmd_dict['classify.otu'] = dict({'required' : ['list','taxonomy'],'optional' : ['name','cutoff','label','group','probs','basis','reftaxonomy']})
     cmd_dict['classify.seqs'] = dict({'required' : ['fasta','reference','taxonomy'],'optional' : ['name','search','ksize','method','match','mismatch','gapopen','gapextend','numwanted','probs','save','processors']})
@@ -201,6 +201,7 @@
     cmd_dict['collect.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','size','label','freq']})
     cmd_dict['consensus.seqs'] = dict({'required' : ['fasta'], 'optional' : ['list','name','label','cutoff']})
     cmd_dict['corr.axes'] = dict({'required' : [['shared','relabund','metadata'],'axes'], 'optional' : ['label','groups','method','numaxes']})
+    cmd_dict['count.groups'] = dict({'required' : ['group','shared'], 'optional' : ['accnos','groups']})
     cmd_dict['count.seqs'] = dict({'required' : ['name'], 'optional' : ['group','groups']})
     cmd_dict['degap.seqs'] = dict({'required' : ['fasta']})
     cmd_dict['deunique.seqs'] = dict({'required' : ['fasta','name'],  'optional' : []})
@@ -229,7 +230,7 @@
     cmd_dict['list.seqs'] = dict({'required' : [['fasta','name','group','list','alignreport','taxonomy']]})
     cmd_dict['make.fastq'] = dict({'required' : ['fasta','qfile'] ,  'optional' : []})
     cmd_dict['make.group'] = dict({'required' : ['fasta','groups'],  'optional' : []})
-    cmd_dict['make.shared'] = dict({'required' : ['list','group'],  'optional' : ['label','groups','ordergroup']})
+    cmd_dict['make.shared'] = dict({'required' : ['list','group'],  'optional' : ['label','groups']})
     cmd_dict['mantel'] = dict({'required' : ['phylip','phylip2'] ,  'optional' : ['method','iters']})
     cmd_dict['merge.files'] = dict({'required' : ['input','output']})
     cmd_dict['merge.groups'] = dict({'required' : ['shared','design'],  'optional' : ['groups', 'label']})
@@ -244,7 +245,7 @@
     cmd_dict['pcoa'] = dict({'required' : ['phylip'], 'optional' : ['metric']})
     cmd_dict['phylo.diversity'] = dict({'required' : ['tree'],'optional' : ['group','name','groups','iters','freq','scale','rarefy','collect','summary','processors']})
     cmd_dict['phylotype'] = dict({'required' : ['taxonomy'],'optional' : ['name','cutoff','label']})
-    cmd_dict['pre.cluster'] = dict({'required' : ['fasta'],  'optional' : ['name','diffs']})
+    cmd_dict['pre.cluster'] = dict({'required' : ['fasta'],  'optional' : ['name','diffs','group']})
     cmd_dict['rarefaction.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','iters','groups','jumble']})
     cmd_dict['rarefaction.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','iters','label','freq','processors']})
     cmd_dict['remove.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','shared','taxonomy']})
@@ -255,14 +256,22 @@
     cmd_dict['reverse.seqs'] = dict({'required' : ['fasta']})
     cmd_dict['screen.seqs'] = dict({'required' : ['fasta'],  'optional' : ['start','end','maxambig','maxhomop','minlength','maxlength','criteria','optimize','name','group','alignreport','processors']})
     cmd_dict['sens.spec'] = dict({'required' : ['list',['column','phylip']] , 'optional' : ['label','cutoff','hard','precision']})
+
+    cmd_dict['seq.error'] = dict({'required' : ['fasta','reference'] , 'optional' : ['name','qfile','report','ignorechimeras','threshold','processors']})
+
     cmd_dict['sffinfo'] = dict({'required' : [['sff','sfftxt']], 'optional' : ['fasta','qfile','trim','sfftxt','flow','accnos']})
+
+    cmd_dict['shhh.flows'] = dict({'required' : [['flow','files']], 'optional' : ['lookup','maxiter','mindelta','cutoff','sigma','order','processors']})
+
     cmd_dict['split.abund'] = dict({'required' : ['fasta',['name','list']], 'optional' : ['cutoff','group','groups','label','accnos']})
     cmd_dict['split.groups'] = dict({'required' : ['fasta','group'], 'optional' : ['name','groups']})
     cmd_dict['sub.sample'] = dict({'required' : [['fasta','list','sabund','rabund','shared']], 'optional' : ['name','group','groups','label','size','persample']})
     cmd_dict['summary.seqs'] = dict({'required' : ['fasta'], 'optional' : ['name','processors']})
     cmd_dict['summary.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','groups','all','distance','processors']})
     cmd_dict['summary.single'] = dict({'required' : [['list','sabund','rabund','shared']], 'optional' : ['calc','abund','size','label','groupmode']})
+    cmd_dict['summary.tax'] = dict({'required' : ['taxonomy'], 'optional' : ['name','group','reftaxonomy']})
     cmd_dict['tree.shared'] = dict({'required' : [['shared','phylip','column']], 'optional' : ['name','groups','calc','cutoff','precision','label']})
+    cmd_dict['trim.flows'] = dict({'required' : ['flow'],  'optional' : ['oligos','bdiffs','pdiffs','tdiffs','minflows','maxflows','fasta','signal','noise','maxhomop','order','processors']})
     cmd_dict['trim.seqs'] = dict({'required' : ['fasta'],  'optional' : ['name','group','oligos','qfile','qaverage','qthreshold','qwindowaverage','qwindowsize','rollaverage','qstepsize','qtrim','flip','maxambig','maxhomop','minlength','maxlength','bdiffs','pdiffs','tdiffs','allfiles','keepfirst','removelast','processors']})
     cmd_dict['unifrac.unweighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']})
     cmd_dict['unifrac.weighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']})
@@ -316,6 +325,7 @@
     parser.add_option( '--rollaverage', dest='rollaverage', type="int", help='Remove sequences that have a average quality below the value in a rolling window' )
     parser.add_option( '--qstepsize', dest='qstepsize', type="int", help='Distance to move a rolling window for each step' )
     parser.add_option( '--qtrim', dest='qtrim', help='For sequence below qthreshold, false to scrap file, true to trimmed and in trim file' )
+    parser.add_option( '--ignorechimeras', dest='ignorechimeras', help='ignorechimeras' )
     parser.add_option( '--flip', dest='flip', help='If true, reverse complement the sequences' )
     parser.add_option( '--maxambig', dest='maxambig', type="int", help='Number of ambiguous base calls to allow' )
     parser.add_option( '--maxhomop', dest='maxhomop', type="int", help='Maximun homopolymer length allowed' )
@@ -333,6 +343,7 @@
     parser.add_option( '--group', dest='group', help='A file containing a list of names' )
     parser.add_option( '--list', dest='list', help='A file containing a list of names' )
     parser.add_option( '--alignreport', dest='alignreport', help='A align.report file ' )
+    parser.add_option( '--report', dest='report', help='' )
     parser.add_option( '--taxonomy', dest='taxonomy', help='A Taxonomy file' )
     parser.add_option( '--reftaxonomy', dest='reftaxonomy', help='A Taxonomy file' )
     parser.add_option( '--taxon', dest='taxon',  help='A Taxon' )
@@ -383,6 +394,7 @@
     parser.add_option( '--all', dest='all', help='Calculate for all' )
     parser.add_option( '--freq', dest='freq', type="float", help='Frequency of sequences to choose, as fraction is 0.0 - 1.0 or iteration if int > 1' )
     parser.add_option( '--iters', dest='iters', type='int', help='Iterations of randomizations' )
+    parser.add_option( '--maxiter', dest='maxiter', type='int', help='Iterations' )
     parser.add_option( '--maxiters', dest='maxiters', type='int', help='Iterations of randomizations' )
     parser.add_option( '--jumble', dest='jumble',  help='If false, just a collector curve across the samples' )
     parser.add_option( '--conservation', dest='conservation',  help='Template frequency information' )
@@ -400,6 +412,15 @@
     parser.add_option( '--svg', dest='svg',  help='SVG' )
     parser.add_option( '--sfftxt', dest='sfftxt',  help='Generate a sff.txt file' )
     parser.add_option( '--flow', dest='flow',  help='Generate a flowgram file' )
+    parser.add_option( '--minflows', dest='minflows', type='int', help='the minimum number of flows that each sequence must contain' )
+    parser.add_option( '--maxflows', dest='maxflows', type='int', help='the number of flows after which all other flows should be ignored.' )
+    parser.add_option( '--signal', dest='signal', type='float', help='threshold for intensity to be signal' )
+    parser.add_option( '--noise', dest='noise', type='float', help='threshold for intensity to be noise' )
+    parser.add_option( '--mindelta', dest='mindelta', type='float', help='threshold for determining how much change in the flowgram correction' )
+    parser.add_option( '--sigma', dest='sigma', type='float', help='sigma option is used to set the dispersion of the data in the expectation-maximization' )
+    parser.add_option( '--order', dest='order', help='flow order e.g. TACG' )
+    parser.add_option( '--lookup', dest='lookup', help='lookup file that are needed to run shhh.seqs' )
+    
     parser.add_option( '--trim', dest='trim', help='Whether sequences and quality scores are trimmed to the clipQualLeft and clipQualRight values' )
     parser.add_option( '--input', dest='input', help='' )
     parser.add_option( '--phylip', dest='phylip', help='' )
--- a/mothur/tools/mothur/pre.cluster.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/pre.cluster.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -1,4 +1,4 @@
-<tool id="mothur_pre_cluster" name="Pre.cluster" version="1.20.0">
+<tool id="mothur_pre_cluster" name="Pre.cluster" version="1.22.0">
  <description>Remove sequences due to pyrosequencing errors</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -13,6 +13,9 @@
   #if $name.__str__ != "None" and len($name.__str__) > 0:
    --name=$name
   #end if
+  #if $group.__str__ != "None" and len($group.__str__) > 0:
+   --group=$group
+  #end if
   #if 20 >= int($diffs.__str__) >= 0:
    --diffs=$diffs
   #end if
@@ -21,6 +24,7 @@
  <inputs>
   <param name="fasta" type="data" format="fasta" label="fasta - Sequence Fasta"/>
   <param name="name" type="data" format="names" optional="true" label="name - Sequences Name reference"/>
+  <param name="group" type="data" format="groups" optional="true" label="group - Sequences Name reference"/>
   <param name="diffs" type="integer" value="1" label="diffs - Number of mismatched bases to allow between sequences in a group (default 1)"/>
  </inputs>
  <outputs>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/seq.error.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,172 @@
+<tool id="mothur_seq_error" name="Seq.error" version="1.22.0">
+ <description>assess error rates in sequencing data</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  #import re, os.path
+  #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__]
+  #if 'summary' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.summary$:'" + $summary_out.__str__]
+  #end if
+  #if 'seq' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.seq$:'" + $seq_out.__str__]
+  #end if
+  #if 'seq_forward' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.seq.forward$:'" + $seq_forward_out.__str__]
+  #end if
+  #if 'seq_reverse' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.seq.reverse$:'" + $seq_reverse_out.__str__]
+  #end if
+  #if 'chimera' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.chimera$:'" + $chimera_out.__str__]
+  #end if
+  #if 'count' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.count$:'" + $count_out.__str__]
+  #end if
+  #if 'matrix' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.matrix$:'" + $matrix_out.__str__]
+  #end if
+  #if 'ref_query' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.ref-query$:'" + $ref_query_out.__str__]
+  #end if
+  --cmd='seq.error'
+  --outputdir='$logfile.extra_files_path'
+  --fasta=$fasta_in
+  --reference=$alignment.template
+  #if $name_in.__str__ != "None" and len($name_in.__str__) > 0:
+   --name=$name_in
+  #end if
+  #if $qual.use.__str__ == 'yes':
+   --qfile=$qfile_in
+   --alignreport=$alignreport_in
+   #if 'quality' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.quality$:'" + $quality_out.__str__]
+   #end if
+   #if 'qual_forward' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.qual.forward$:'" + $qual_forward_out.__str__]
+   #end if
+   #if 'qual_reverse' in $output_sel.__str__.split(','):
+    #set results = $results + ["'^\S+.error.qual.reverse$:'" + $qual_reverse_out.__str__]
+   #end if
+  #end if
+  #if $threshold.__str__ != '':
+   --threshold=$threshold
+  #end if
+  $ignorechimeras
+  --result=#echo ','.join($results)
+  --processors=8
+ </command>
+ <inputs>
+  <param name="fasta_in" type="data" format="align" label="fasta - Candiate Sequences"/>
+  <conditional name="alignment">
+   <param name="source" type="select" label="Select Reference Template from" help="">
+    <option value="ref">Cached Reference</option>
+    <option value="history">Your History</option>
+   </param>
+   <when value="ref">
+    <param name="template" type="select" label="reference - Select an alignment database " help="">
+     <options from_file="mothur_aligndb.loc">
+      <column name="name" index="0" />
+      <column name="value" index="1" />
+     </options>
+    </param>
+   </when>
+   <when value="history">
+    <param name="template" type="data" format="align" label="reference - Reference to align with" help=""/>
+   </when>
+  </conditional>
+
+  <param name="name_in" type="data" format="names" optional="true" label="name - Sequences Name reference"/>
+
+  <conditional name="qual">
+   <param name="use" type="select" label="Include a quality file and an alignment report as inputs" help="">
+    <option value="no">No</option>
+    <option value="yes">Yes</option>
+   </param>
+   <when value="yes"> 
+    <param name="qfile_in" type="data" format="qual" label="qfile - Fasta Quality"/>
+    <param name="alignreport_in" type="data" format="align.report" label="alignreport - Align Report"/>
+   </when>
+   <when value="no"/> 
+  </conditional>
+
+  <param name="threshold" type="float" value="" optional="true" label="threshold - error rate at which to report (default 1.)"
+         help="">
+   <validator type="in_range" message="error rate threshold between 0. and 1." min="0.0" max="1.0"/>
+  </param>
+  <param name="ignorechimeras" type="boolean" truevalue="" falsevalue="--ignorechimeras=false" checked="true" label="ignorechimeras - " />
+
+  <param name="output_sel" type="select" multiple="true" display="checkboxes" label="Outputs as history datasets" >
+    <option value="summary">error.summary</option>
+    <option value="seq">error.seq</option>
+    <option value="seq_forward">error.seq.forward</option>
+    <option value="seq_reverse">error.seq.reverse</option>
+    <option value="chimera">error.chimera</option>
+    <option value="count">error.count</option>
+    <option value="matrix">error.matrix</option>
+    <option value="quality">error.quality</option>
+    <option value="qual_forward">error.qual.forward</option>
+    <option value="qual_reverse">error.qual.reverse</option>
+    <option value="ref_query">error.ref-query</option>
+  </param>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format="tabular" name="summary_out" label="${tool.name} on ${on_string}: error.summary" >
+   <filter>('summary' in output_sel)</filter>
+  </data>
+  <data format="fasta" name="seq_out" label="${tool.name} on ${on_string}: error.seq" >
+   <filter>('seq' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="seq_forward_out" label="${tool.name} on ${on_string}: error.seq.forward" >
+   <filter>('seq_forward' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="seq_reverse_out" label="${tool.name} on ${on_string}: error.seq.reverse" >
+   <filter>('seq_reverse' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="chimera_out" label="${tool.name} on ${on_string}: error.chimera" >
+   <filter>('chimera' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="count_out" label="${tool.name} on ${on_string}: error.count" >
+   <filter>('count' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="matrix_out" label="${tool.name} on ${on_string}: error.matrix" >
+   <filter>('matrix' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="quality_out" label="${tool.name} on ${on_string}: error.quality" >
+   <filter>('quality' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="qual_forward_out" label="${tool.name} on ${on_string}: error.qual.forward" >
+   <filter>('qual_forward' in output_sel)</filter>
+  </data>
+  <data format="tabular" name="qual_reverse_out" label="${tool.name} on ${on_string}: error.qual.reverse" >
+   <filter>('qual_reverse' in output_sel)</filter>
+  </data>
+  <data format="align" name="ref_query_out" label="${tool.name} on ${on_string}: error.ref-query" >
+   <filter>('ref_query' in output_sel)</filter>
+  </data>
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The seq.error_ command evaluates error rate for sequences by comparing to the fasta-formatted template_alignment_.
+This is demonstrated in http://www.mothur.org/wiki/Schloss_SOP#Error_analysis
+
+.. _template_alignment: http://www.mothur.org/wiki/Alignment_database
+.. _seq.error: http://www.mothur.org/wiki/Seq.error
+
+
+ </help>
+</tool>
--- a/mothur/tools/mothur/sffinfo.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/sffinfo.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -48,7 +48,7 @@
   <data format="txt" name="out_sfftxt" label="${tool.name} on ${on_string}: sff.txt">
    <filter>sfftxt == True</filter>
   </data>
-  <data format="txt" name="out_flow" label="${tool.name} on ${on_string}: flowgram">
+  <data format="sff.flow" name="out_flow" label="${tool.name} on ${on_string}: flowgram">
    <filter>flow == True</filter>
   </data>
  </outputs>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/shhh.flows.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,103 @@
+<tool id="mothur_shhh_flows" name="Shhh.flows" version="1.22.0" force_history_refresh="True">
+ <description>Denoise flowgrams (PyroNoise algorithm)</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  #import re, os.path
+  --cmd='shhh.flows'
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.shhh\.fasta$:'$shhh_fasta,'^\S+\.shhh\.qual$:'$shhh_qual,'^\S+\.shhh\.names$:'$shhh_names,'^\S+\.shhh\.groups$:'$shhh_groups,'^\S+\.shhh\.counts$:'$shhh_counts
+  --outputdir='$logfile.extra_files_path'
+  --flow=$flow
+  --lookup=$prob.lookup
+  #if $maxiter.__str__ != '':
+   --maxiter=$maxiter 
+  #end if
+  #if $mindelta.__str__ != '':
+   --mindelta=$mindelta 
+  #end if
+  #if $cutoff.__str__ != '':
+   --cutoff=$cutoff 
+  #end if
+  #if $sigma.__str__ != '':
+   --sigma=$sigma 
+  #end if
+  #if $order.__str__.strip() != '':
+   --order=$order 
+  #end if
+  --processors=8
+ </command>
+ <inputs>
+  <param name="flow" type="data" format="sff.flow" label="flow - flowgram data" 
+         help="Use sffinfo to generate flow data from an sff file and usually trimmed by trim.flows"/>
+  <conditional name="prob">
+   <param name="source" type="select" label="Select Taxonomy from" help="">
+    <option value="ref">Cached Reference</option>
+    <option value="hist">History</option>
+   </param>
+   <when value="ref">
+    <param name="lookup" type="select" format="tabular" label="lookup - intensity value per homopolymer length"
+     help="table of the probability of observing an intensity value for a given homopolymer length">
+     <options from_file="mothur_lookup.loc">
+      <column name="name" index="0" />
+      <column name="value" index="1" />
+     </options>
+    </param>
+   </when>
+   <when value="hist">
+    <param name="lookup" type="data" format="tabular" label="lookup - intensity value per homopolymer length"
+           help="from http://www.mothur.org/wiki/Lookup_files"/>
+   </when>
+  </conditional>
+
+  <param name="maxiter" type="integer" value="1000" optional="true" label="maxiter - maximum iterations to run (default 1000)" help="if the delta value does not first drop below the mindelta value. ">
+      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
+  </param>
+
+  <param name="mindelta" type="float" value="" optional="true" label="mindelta - threshold for determining how much change in the flowgram correction is allowed" 
+         help="default .0000001 (10^-6)">
+   <validator type="in_range" message="mindelta between 0. and .1" min="0.0" max="0.1"/>
+  </param>
+
+  <param name="cutoff" type="float" value="" optional="true" label="cutoff - seed the expectation-maximizaton step" 
+         help="default .01 (usually doesn't need to be changed)">
+   <validator type="in_range" message="cutoff between 0. and 1." min="0.0" max="1.0"/>
+  </param>
+
+  <param name="sigma" type="float" value="" optional="true" label="sigma - the dispersion of the data in the expectation-maximization step of the algorithm" 
+         help="default .06 (usually doesn't need to be changed)">
+   <validator type="in_range" message="sigma between 0. and 1." min="0.0" max="1.0"/>
+  </param>
+
+  <param name="order" type="text" value="" label="order - flow order for nucleotides in the sequencer" 
+         help="default is TACG"/>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format_source="fasta" name="shhh_fasta" label="${tool.name} on ${on_string}: shhh.fasta"/>
+  <data format_source="qual454" name="shhh_qual" label="${tool.name} on ${on_string}: shhh.qual"/>
+  <data format_source="names" name="shhh_names" label="${tool.name} on ${on_string}: shhh.names"/>
+  <data format_source="groups" name="shhh_groups" label="${tool.name} on ${on_string}: shhh.groups"/>
+  <data format_source="tabular" name="shhh_counts" label="${tool.name} on ${on_string}: shhh.counts"/>
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**mothur overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The shhh.flows_ command is Pat Schloss's translation of Chris Quince's PyroNoise algorithm [1] from C to C++ with the incorporation of mothur's bells and whistles. Based on processing of test datasets provided by Quince, shhh.flows gives the same/similar output to AmpliconNoise. shhh.flows uses a expectation-maximization algorithm to correct flowgrams to identify the idealized form of each flowgram and translate that flowgram to a DNA sequence. Our testing has shown that when Titanium data are trimmed to 450 flows using trim.flows, shhh.flows provides the highest quality data for any other method available. In contrast, when we use the min/max number of flows suggested by Quince of 360/720, the error rate is not that great. This much improved error rate does come at a computational cost. Whereas the features in trim.seqs take on the order of minutes, shhh.flows can take on the order of hours.  You will also need a lookup file that tells shhh.flows the probability of observing an intensity value for a given homopolymer length. You can get mothur-compatible files at: http://www.mothur.org/wiki/Lookup_files 
+
+.. _shhh.flows: http://www.mothur.org/wiki/Shhh.flows
+
+
+ </help>
+</tool>
--- a/mothur/tools/mothur/sub.sample.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/sub.sample.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -1,4 +1,4 @@
-<tool id="mothur_sub_sample" name="Sub.sample" version="1.21.0">
+<tool id="mothur_sub_sample" name="Sub.sample" version="1.22.0">
  <description>Create a sub sample</description>
  <command interpreter="python">
   mothur_wrapper.py 
@@ -12,7 +12,7 @@
    #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1\\.subsample\\.\2',$os.path.basename($input.fasta_in.__str__)) + ":'" + $fasta_out.__str__]
    #if $input.name_in.__str__ != "None" and len($input.name_in.__str__) > 0:
     --name=$input.name_in
-    ## #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1\\.subsample\\.\2',$os.path.basename($input.name_in.__str__)) + ":'" + $names_out.__str__]
+    #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1\\.subsample\\.\2',$os.path.basename($input.name_in.__str__)) + ":'" + $names_out.__str__]
    #end if
    #if $input.use_group.to_filter == "yes":
     #if $input.use_group.group_in.__str__ != "None" and len($input.use_group.group_in.__str__) > 0:
@@ -183,10 +183,10 @@
    <filter>input['format'] == 'rabund'</filter>
   </data>
   <!--  This doesn't appear to be generated even though the documentation says it is
+  -->
   <data format="names" name="names_out" label="${tool.name} on ${on_string}: subsample.names">
    <filter>(input['format'] == 'fasta' and input['name_in'] != None)</filter>
   </data>
-  -->
   <data format="groups" name="group_out" label="${tool.name} on ${on_string}: subsample.groups">
    <filter>((input['format'] == 'fasta' or input['format'] == 'list') and input['use_group'] == 'yes')</filter>
   </data>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/summary.tax.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,84 @@
+<tool id="mothur_summary_tax" name="Summary.tax" version="1.22.0">
+ <description>Assign sequences to taxonomy</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  --cmd='summary.tax'
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.tax\.summary$:'$tax_summary
+  --outputdir='$logfile.extra_files_path'
+  --taxonomy=$tax.taxonomy
+  #if $name.__str__ != "None" and len($name.__str__) > 0:
+   --name='$name'
+  #end if
+  #if $group.__str__ != "None" and len($group.__str__) > 0:
+   --group='$group'
+  #end if
+  #if $reftax.source != 'none' and len($reftax.taxonomy.__str__) > 0:
+   --reftaxonomy=$reftax.taxonomy
+  #end if
+ </command>
+ <inputs>
+  <conditional name="tax">
+   <param name="source" type="select" label="Select Taxonomy from" help="">
+    <option value="hist">History</option>
+    <option value="ref">Cached Reference</option>
+   </param>
+   <when value="ref">
+    <param name="taxonomy" type="select" format="seq.taxonomy" label="taxonomy - Taxonomy Reference">
+     <options from_file="mothur_taxonomy.loc">
+      <column name="name" index="0" />
+      <column name="value" index="1" />
+     </options>
+    </param>
+   </when>
+   <when value="hist">
+    <param name="taxonomy" type="data" format="seq.taxonomy" label="taxonomy - Taxonomy Reference"/>
+   </when>
+  </conditional>
+  <param name="name" type="data" format="names" optional="true" label="name - taxonomy sequence names"/>
+  <param name="group" type="data" format="groups" optional="true" label="group - Groups for summary file"/>
+  <conditional name="reftax">
+   <param name="source" type="select" label="Select Reference Taxonomy used in Summary.seqs from" help="Including the reference taxonomy file used when you classified your sequences keep the rankIDs in the summary file static.">
+    <option value="none">Selection is Optional</option>
+    <option value="hist">History</option>
+    <option value="ref">Cached Reference</option>
+   </param>
+   <when value="none"/>
+   <when value="ref">
+    <param name="taxonomy" type="select" format="seq.taxonomy" label="reftaxonomy - Taxonomy Reference used when sequences were classified">
+     <options from_file="mothur_taxonomy.loc">
+      <column name="name" index="0" />
+      <column name="value" index="1" />
+     </options>
+    </param>
+   </when>
+   <when value="hist">
+    <param name="taxonomy" type="data" format="seq.taxonomy" label="reftaxonomy - Taxonomy Reference used when sequences were classified"/>
+   </when>
+  </conditional>
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format="summary" name="tax_summary" label="${tool.name} on ${on_string}: summary" />
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**Mothur Overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The summary.tax_ command reads a taxonomy file and an optional name and or group file, and summarizes the taxonomy information.
+
+.. _summary.tax: http://www.mothur.org/wiki/Summary.otu
+
+ </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mothur/tools/mothur/trim.flows.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -0,0 +1,127 @@
+<tool id="mothur_trim_flows" name="Trim.flows" version="1.22.0" force_history_refresh="True">
+ <description>partition by barcode, trim to length, cull by lenght and mismatches</description>
+ <command interpreter="python">
+  mothur_wrapper.py 
+  #import re, os.path
+  --cmd='trim.flows'
+  ## #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__]
+  ## #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($flow.__str__)) + ":'" + $trim_flow.__str__]
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.trim\.flow$:'$trim_flow, '^\S+\.scrap\.flow$:'$scrap_flow,'^\S+\.flow\.files$:'$flow_files,'^\S+\.flow\.fasta$:'$flow_fasta
+  --outputdir='$logfile.extra_files_path'
+  --flow=$flow
+  #if $minflows.__str__ != '':
+   --minflows=$minflows 
+  #end if
+  #if $maxflows.__str__ != '':
+   --maxflows=$maxflows 
+  #end if
+  #if $maxhomop.__str__ != '':
+   --maxhomop=$maxhomop 
+  #end if
+  #if $order.__str__.strip() != '':
+   --order=$order 
+  #end if
+  #if $signal.__str__ != ''
+   --signal=$signal 
+  #end if
+  #if $noise.__str__ != ''
+   --noise=$noise 
+  #end if
+  #if $oligo.add == "yes":
+   --oligos=$oligo.oligos
+   #if int($oligo.bdiffs.__str__) > 0:
+    --bdiffs=$oligo.bdiffs
+   #end if
+   #if int($oligo.pdiffs.__str__) > 0:
+    --pdiffs=$oligo.pdiffs
+   #end if
+   #if int($oligo.tdiffs.__str__) > 0:
+    --tdiffs=$oligo.tdiffs
+   #end if
+   --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
+   --new_datasets='^\S+?\.(\S+\.flow)$:sff.flow'
+  #end if
+  $fasta
+  --processors=8
+ </command>
+ <inputs>
+  <param name="flow" type="data" format="sff.flow" label="flow - flowgram data" 
+         help="Use sffinfo to generate flow data from an sff file"/>
+
+  <conditional name="oligo">
+   <param name="add" type="select" label="Trim with an oligos file?" 
+    help="a file that can contain the sequences of the forward and reverse primers and barcodes and their sample identifier. 
+         Each line of the oligos file can start with the key words &quot;forward&quot;, &quot;reverse&quot;, 
+         and &quot;barcode&quot; or it can start with a &quot;#&quot; to tell mothur to ignore that line of the oligos file.  ">
+    <option value="no">no</option>
+    <option value="yes">yes</option>
+   </param>
+   <when value="no"/>
+   <when value="yes">
+    <param name="oligos" type="data" format="oligos" label="oligos - barcodes and primers"/>
+    <param name="bdiffs" type="integer" value="0" label="bdiffs - number of differences to allow in the barcode (default 0)">
+      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
+    </param>
+    <param name="pdiffs" type="integer" value="0" label="pdiffs - number of differences to allow in the primer (default 0)">
+      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
+    </param>
+    <param name="tdiffs" type="integer" value="0" label="tdiffs - total number of differences to allow in primer and barcode (ignored if &#060; 1)">
+      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
+    </param>
+   </when>
+  </conditional>
+
+  <param name="minflows" type="integer" value="" optional="true" label="minflows - Minimum number of flows that each sequence must contain to make it in to a &quot;trim&quot; file. (default 450)" help="(Quince uses 360)"/>
+  <param name="maxflows" type="integer" value="" optional="true" label="maxflows - Maximum number of flows after which all other flows should be ignored (default 450)" help="(Quince uses 360 for GSFLX and 720 for Titanium)"/>
+
+  <param name="maxhomop" type="integer" value="" optional="true" label="maxhomop - Maximum homopolymers" 
+         help=""/>
+
+  <param name="signal" type="float" value="" optional="true" label="signal - treat any intensity signal greater than this threshold as a real signal" 
+         help="default .5">
+   <validator type="in_range" message="signal between 0. and 1." min="0.0" max="1.0"/>
+  </param>
+  <param name="noise" type="float" value="" optional="true" label="noise - treat any intensity signal less than this threshold as noise" 
+         help="default .7">
+   <validator type="in_range" message="signal between 0. and 1." min="0.0" max="1.0"/>
+  </param>
+  <param name="order" type="text" value="" label="order - flow order for nucleotides in the sequencer" 
+         help="default is TACG"/>
+
+  <param name="fasta" type="boolean" truevalue="--fasta=true" falsevalue="" checked="false" label="fasta - translate the flowgram data to fasta sequence format"/>
+
+ </inputs>
+ <outputs>
+  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
+  <data format="sff.flow" name="trim_flow" label="${tool.name} on ${on_string}: trim.flow"/>
+  <data format="sff.flow" name="scrap_flow" label="${tool.name} on ${on_string}: scrap.flow"/>
+  <data format="tabular" name="flow_files" label="${tool.name} on ${on_string}: flow.files">
+   <filter>oligos != None</filter>
+  </data>
+  <data format_source="fasta" name="flow_fasta" label="${tool.name} on ${on_string}: flow.fasta">
+   <filter>fasta == True</filter>
+  </data>
+ </outputs>
+ <requirements>
+  <requirement type="binary">mothur</requirement>
+ </requirements>
+ <tests>
+ </tests>
+ <help>
+**mothur overview**
+
+Mothur_, initiated by Dr. Patrick Schloss and his software development team
+in the Department of Microbiology and Immunology at The University of Michigan,
+provides bioinformatics for the microbial ecology community.
+
+.. _Mothur: http://www.mothur.org/wiki/Main_Page
+
+**Command Documenation**
+
+The trim.flows_ command is analogous to the trim.seqs command, except that it uses the flowgram data that comes bundled in the sff file that is generated by 454 sequencing. It's primary usage is as a preliminary step to running shhh.seqs. Chris Quince has a series of perl scripts that fulfill a similar role [1]. This command will allow you to partition your flowgram data by sample based on the barcode, trim the flows to a specified length range, and cull sequences that are too short or have too many mismatches to barcodes and primers.
+
+.. _trim.flows: http://www.mothur.org/wiki/Trim.flows
+
+
+ </help>
+</tool>
--- a/mothur/tools/mothur/unifrac.weighted.xml	Wed Oct 05 10:37:11 2011 -0500
+++ b/mothur/tools/mothur/unifrac.weighted.xml	Tue Nov 08 11:45:32 2011 -0600
@@ -23,7 +23,7 @@
    --distance=$distance
   #end if
   $root
-  --processors=2
+  --processors=8
  </command>
  <inputs>
   <param name="tree" type="data" format="tre" label="tree - Tree"/>