Mercurial > repos > jjohnson > mothur_toolsuite
changeset 15:a6189f58fedb
Mothur - updated for Mothur version 1.22.0
line wrap: on
line diff
--- a/mothur/README Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/README Tue Nov 08 11:45:32 2011 -0600 @@ -1,7 +1,9 @@ Provides galaxy tools for the Mothur metagenomics package - http://www.mothur.org/wiki/Main_Page -Install mothur v.1.20.0 on your galaxy system so galaxy can execute the mothur command - ( This version of wrappers is designed for Mothur version 1.19 - it may work on later versions ) +(The environment variable MOTHUR_MAX_PROCESSORS can be used to limit the number of cpu processors used be mothur commands) + +Install mothur v.1.22.0 on your galaxy system so galaxy can execute the mothur command + ( This version of wrappers is designed for Mothur version 1.22 - it may work on later versions ) http://www.mothur.org/wiki/Download_mothur http://www.mothur.org/wiki/Installation ( This Galaxy Mothur wrapper will invoke Mothur in command line mode: http://www.mothur.org/wiki/Command_line_mode ) @@ -45,6 +47,8 @@ SILVA-compatible mask: - lane1349.silva.filter - Pat Schloss's transcription of the mask from the Lane paper http://www.mothur.org/w/images/6/6d/Lane1349.silva.filter + Lookup Files for sff flow analysis using shhh.flows: + http://www.mothur.org/wiki/Alignment_database Example from UMN installation: (We also made these available in a Galaxy public data library) /project/db/galaxy/mothur/Silva.bacteria.zip @@ -117,6 +121,9 @@ <datatype extension="pair.dist" type="galaxy.datatypes.metagenomics:PairwiseDistanceMatrix" display_in_upload="true"/> <datatype extension="square.dist" type="galaxy.datatypes.metagenomics:SquareDistanceMatrix" display_in_upload="true"/> <datatype extension="lower.dist" type="galaxy.datatypes.metagenomics:LowerTriangleDistanceMatrix" display_in_upload="true"/> + <datatype extension="ref.taxonomy" type="galaxy.datatypes.metagenomics:RefTaxonomy" display_in_upload="true"> + <converter file="ref_to_seq_taxonomy_converter.xml" target_datatype="seq.taxonomy"/> + </datatype> <datatype extension="seq.taxonomy" type="galaxy.datatypes.metagenomics:SequenceTaxonomy" display_in_upload="true"/> <datatype extension="rdp.taxonomy" type="galaxy.datatypes.metagenomics:RDPSequenceTaxonomy" display_in_upload="true"/> <datatype extension="cons.taxonomy" type="galaxy.datatypes.metagenomics:ConsensusTaxonomy" display_in_upload="true"/> @@ -127,6 +134,7 @@ <datatype extension="masked.quan" type="galaxy.datatypes.metagenomics:MaskedQuantile" display_in_upload="true"/> <datatype extension="filtered.masked.quan" type="galaxy.datatypes.metagenomics:FilteredMaskedQuantile" display_in_upload="true"/> <datatype extension="axes" type="galaxy.datatypes.metagenomics:Axes" display_in_upload="true"/> + <datatype extension="sff.flow" type="galaxy.datatypes.metagenomics:SffFlow" display_in_upload="true"/> <datatype extension="tre" type="galaxy.datatypes.data:Newick" display_in_upload="true"/> <!-- End Mothur Datatypes --> @@ -138,10 +146,13 @@ <tool file="mothur/get.groups.xml"/> <tool file="mothur/remove.groups.xml"/> <tool file="mothur/merge.groups.xml"/> + <tool file="mothur/count.groups.xml"/> <tool file="mothur/make.design.xml"/> <tool file="mothur/sub.sample.xml"/> <label text="Mothur Sequence Analysis" id="mothur_sequence_analysis"/> <tool file="mothur/sffinfo.xml"/> + <tool file="mothur/trim.flows.xml"/> + <tool file="mothur/shhh.flows.xml"/> <tool file="mothur/make.fastq.xml"/> <tool file="mothur/fastq.info.xml"/> <tool file="mothur/summary.seqs.xml"/> @@ -169,6 +180,7 @@ <tool file="mothur/nmds.xml"/> <tool file="mothur/corr.axes.xml"/> <tool file="mothur/classify.seqs.xml"/> + <tool file="mothur/seq.error.xml"/> <label text="Mothur Sequence Chimera Detection" id="mothur_sequence_chimera"/> <tool file="mothur/chimera.bellerophon.xml"/> <tool file="mothur/chimera.ccode.xml"/> @@ -200,6 +212,7 @@ <tool file="mothur/get.group.xml"/> <tool file="mothur/bin.seqs.xml"/> <tool file="mothur/get.sharedseqs.xml"/> + <tool file="mothur/summary.tax.xml"/> <label text="Mothur Single Sample Analysis" id="mothur_single_sample_analysis"/> <tool file="mothur/collect.single.xml"/> <tool file="mothur/rarefaction.single.xml"/> @@ -235,10 +248,11 @@ <tool file="mothur/TreeVector.xml"/> </section> <!-- metagenomics_mothur --> - ############ DESIGN NOTES ######################################################################################################### Each mothur command has it's own tool_config (.xml) file, but all call the same python wrapper code: mothur_wrapper.py + (The environment variable MOTHUR_MAX_PROCESSORS can be used to limit the number of cpu processors used be mothur commands) + * Every mothur tool will call mothur_wrapper.py script with a --cmd= parameter that gives the mothur command name. * Every tool will produce the logfile of the mothur run as an output. * When the outputs of a mothur command could be determined in advance, they are included in the --result= parameter to mothur_wrapper.py
--- a/mothur/lib/galaxy/datatypes/metagenomics.py Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/lib/galaxy/datatypes/metagenomics.py Tue Nov 08 11:45:32 2011 -0600 @@ -722,7 +722,57 @@ close(fh) return False -class SequenceTaxonomy(Tabular): +class RefTaxonomy(Tabular): + file_ext = 'ref.taxonomy' + """ + A table with 2 or 3 columns: + - SequenceName + - Taxonomy (semicolon-separated taxonomy in descending order) + - integer ? + Example: 2-column ( http://www.mothur.org/wiki/Taxonomy_outline ) + X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma; + X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida; + AF052717.1 Eukaryota;Parabasalidea; + Example: 3-column ( http://vamps.mbl.edu/resources/databases.php ) + v3_AA008 Bacteria;Firmicutes;Bacilli;Lactobacillales;Streptococcaceae;Streptococcus 5 + v3_AA016 Bacteria 120 + v3_AA019 Archaea;Crenarchaeota;Marine_Group_I 1 + """ + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = ['name','taxonomy'] + + def sniff( self, filename ): + """ + Determines whether the file is a SequenceTaxonomy + """ + try: + pat = '^([^ \t\n\r\x0c\x0b;]+([(]\\d+[)])?(;[^ \t\n\r\x0c\x0b;]+([(]\\d+[)])?)*(;)?)$' + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + if not line: + break #EOF + line = line.strip() + if line: + fields = line.split('\t') + if 2 <= len(fields) <= 3: + return False + if not re.match(pat,fields[1]): + return False + count += 1 + if count > 10: + break + if count > 0: + return True + except: + pass + finally: + fh.close() + return False + +class SequenceTaxonomy(RefTaxonomy): file_ext = 'seq.taxonomy' """ A table with 2 columns: @@ -933,6 +983,57 @@ fh.close() return False +class SffFlow(Tabular): + MetadataElement( name="flow_values", default="", no_value="", optional=True , desc="Total number of flow values", readonly=True) + MetadataElement( name="flow_order", default="TACG", no_value="TACG", desc="Total number of flow values", readonly=False) + file_ext = 'sff.flow' + """ + The first line is the total number of flow values - 800 for Titanium data. For GS FLX it would be 400. + Following lines contain: + - SequenceName + - the number of useable flows as defined by 454's software + - the flow intensity for each base going in the order of TACG. + Example: + 800 + GQY1XT001CQL4K 85 1.04 0.00 1.00 0.02 0.03 1.02 0.05 ... + GQY1XT001CQIRF 84 1.02 0.06 0.98 0.06 0.09 1.05 0.07 ... + GQY1XT001CF5YW 88 1.02 0.02 1.01 0.04 0.06 1.02 0.03 ... + """ + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + + def set_meta( self, dataset, overwrite = True, skip = 1, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, 1, max_data_lines) + try: + fh = open( filename ) + line = fh.readline() + line = line.strip() + flow_values = int(line) + dataset.metadata.flow_values = flow_values + finally: + fh.close() + + def make_html_table( self, dataset, skipchars=[] ): + """Create HTML table, used for displaying peek""" + out = ['<table cellspacing="0" cellpadding="3">'] + comments = [] + try: + # Generate column header + out.append('<tr>') + out.append( '<th>%d. Name</th>' % 1 ) + out.append( '<th>%d. Flows</th>' % 2 ) + for i in range( 3, dataset.metadata.columns+1 ): + base = dataset.metadata.flow_order[(i+1)%4] + out.append( '<th>%d. %d %s</th>' % (i-2,base) ) + out.append('</tr>') + out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) + out.append( '</table>' ) + out = "".join( out ) + except Exception, exc: + out = "Can't create peek %s" % str( exc ) + return out + + ## Qiime Classes class QiimeMetadataMapping(Tabular):
--- a/mothur/tool-data/mothur_aligndb.loc Wed Oct 05 10:37:11 2011 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of metagenomics files. -#file has this format (white space characters are TAB characters): -# -#<dbname> <file_base> -# -greengenes /project/db/galaxy/mothur/core_set_aligned.imputed.fasta -silva archaea /project/db/galaxy/mothur/Silva.archaea/silva.archaea.fasta -silva bacteria /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.fasta -silva eukarya /project/db/galaxy/mothur/silva.eukarya.fasta -silva archaea nogap /project/db/galaxy/mothur/Silva.archaea/nogap.archaea.fasta -silva bacteria nogap /project/db/galaxy/mothur/silva.bacteria/nogap.bacteria.fasta -silva eukarya nogap /project/db/galaxy/mothur/nogap.eukarya.fasta -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tool-data/mothur_aligndb.loc.sample Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,16 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of metagenomics files. +#file has this format (white space characters are TAB characters): +# +# Reference Alignments: http://www.mothur.org/wiki/Alignment_database +# +#<dbname> <file_base> +# +greengenes /project/db/galaxy/mothur/core_set_aligned.imputed.fasta +silva archaea /project/db/galaxy/mothur/Silva.archaea/silva.archaea.fasta +silva bacteria /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.fasta +silva eukarya /project/db/galaxy/mothur/silva.eukarya.fasta +silva archaea nogap /project/db/galaxy/mothur/Silva.archaea/nogap.archaea.fasta +silva bacteria nogap /project/db/galaxy/mothur/silva.bacteria/nogap.bacteria.fasta +silva eukarya nogap /project/db/galaxy/mothur/nogap.eukarya.fasta +
--- a/mothur/tool-data/mothur_calculators.loc Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tool-data/mothur_calculators.loc Tue Nov 08 11:45:32 2011 -0600 @@ -1,6 +1,8 @@ #This is a sample file distributed with Galaxy that enables # Mothur tools to present a choice of values for calculators +# (There aren't any local files to point to, so this doesn't need to be modified) # source - http://www.mothur.org/wiki/Calculators +# #file has this format (fields are separated by TAB characters): # ##
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tool-data/mothur_calculators.loc.sample Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,98 @@ +#This is a sample file distributed with Galaxy that enables +# Mothur tools to present a choice of values for calculators +# (There aren't any local files to point to, so this doesn't need to be modified) +# source - http://www.mothur.org/wiki/Calculators +# +#file has this format (fields are separated by TAB characters): +# +## +# collect.single ace, bergerparker, boneh, bootstrap, bstick, chao, coverage, default, efron, geometric, goodscoverage, heip, invsimpson, jack, logseries, npshannon, nseqs, qstat, shannon, shannoneven, shen, simpson, simpsoneven, smithwilson, sobs, solow +# summary.single ace, bergerparker, boneh, bootstrap, bstick, chao, coverage, default, efron, geometric, goodscoverage, heip, invsimpson, jack, logseries, npshannon, nseqs, qstat, shannon, shannoneven, shen, simpson, simpsoneven, smithwilson, sobs, solow +# rarefaction.single ace, bootstrap, chao, coverage, default, heip, invsimpson, jack, npshannon, nseqs, shannon, shannoneven, simpson, simpsoneven, smithwilson, sobs +## +# collect.shared anderberg, braycurtis, canberra, default, gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker +# summary.shared anderberg, braycurtis, canberra, default, gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker +# dist.shared anderberg, braycurtis, canberra, gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker +# tree.shared anderberg, braycurtis, canberra, gower, hamming, hellinger, jabund, jclass, jest, kstest, kulczynski, kulczynskicody, lennon, manhattan, memchi2, memchord, memeuclidean, mempearson, morisitahorn, ochiai, odum, sharedace, sharedchao, sharednseqs, sharedsobs, soergel, sorabund, sorclass, sorest, spearman, speciesprofile, structchi2, structchord, structeuclidean, structkulczynski, structpearson, thetan, thetayc, whittaker +# heatmap.sim braycurtis, jabund, jclass, jest, morisitahorn, sorabund, sorclass, sorest, thetan, thetayc +## +# venn sobs,chao,ace sharedsobs,sharedchao,sharedace +# rarefaction.shared sharednseqs,sharedobserved +# +## +#<calculator> <mult> <mult2> <category> <description> +# +##Community richness +ace single sing Community richness the ACE estimator +bootstrap single sing Community richness the bootstrap estimator +chao single sing Community richness the Chao1 estimator +jack single sing Community richness the jackknife estimator +sobs single sing Community richness the observed richness +##Community evenness +simpsoneven single sing Community evenness a Simpson index-based measure of evenness +shannoneven single sing Community evenness a Shannon index-based measure of evenness +heip single sing Community evenness Heip's metric of community evenness +smithwilson single sing Community evenness Smith and Wilson's metric of community evenness +##Community diversity +bergerparker single xxxx Community diversity the Berger-Parker index +coverage single sing Community diversity the sampling coverage +goodscoverage single sing Community diversity the Good's estimate of sampling coverage +invsimpson single sing Community diversity the Simpson index +npshannon single sing Community diversity the non-parametric Shannon index +qstat single xxxx Community diversity the Q statistic +shannon single sing Community diversity the Shannon index +simpson single sing Community diversity the Simpson index +##Estimates of number of additional OTUs observed with extra sampling +boneh single xxxx Estimator Boneh's estimator +efron single xxxx Estimator Efron's estimator +shen single xxxx Estimator Shen's estimator +solow single xxxx Estimator Solow's estimator +##Statistical distributions +logseries single xxxx Statistical distribution tests whether observed data follow the log series distribution +geometric single xxxx Statistical distribution tests whether observed data follow the geometric series distribution +bstick single xxxx Statistical distribution tests whether observed data follow the broken stick distribution +## Shared community richness +sharedsobs shared xxxx Shared community richness the observed richness shared between two or more samples +sharedchao shared xxxx Shared community richness the two or more sample shared Chao1 richness estimator +sharedace shared xxxx Shared community richness the two sample shared ACE richness estimator +##Similarity in community membership +anderberg shared xxxx Community Membership Similarity the Anderberg similarity coefficient +jclass shared shar Community Membership Similarity the traditional Jaccard similarity coefficient based on the observed richness +jest shared shar Community Membership Similarity the Jaccard similarity coefficient based on the Chao1 estimated richnesses +kulczynski shared xxxx Community Membership Similarity the Kulczynski similarity coefficient +kulczynskicody shared xxxx Community Membership Similarity the Kulczynski-Cody similarity coefficient +kstest shared xxxx Kolmogorov-Smirnov test +lennon shared xxxx Community Membership Similarity the Lennon similarity coefficient +ochiai shared xxxx Community Membership Similarity the Ochiai similarity coefficient +sorclass shared shar Community Membership Similarity the Sorenson similarity coefficient based on the observed richness +sorest shared shar Community Membership Similarity the Sorenson similarity coefficient based on the Chao1 estimated richnesses +whittaker shared xxxx Community Membership Similarity the Whittaker similarity coefficient +hamming shared xxxx Community Membership Similarity - +memchi2 shared xxxx Community Membership Similarity - +memchord shared xxxx Community Membership Similarity - +memeuclidean shared xxxx Community Membership Similarity - +mempearson shared xxxx Community Membership Similarity - +##Similarity in community structure +braycurtis shared shar Community Structure Similarity the Bray-Curtis similarity coefficient +jabund shared shar Community Structure Similarity the abundance-based Jaccard similarity coefficient +morisitahorn shared shar Community Structure Similarity the Morisita-Horn similarity coefficient +sorabund shared shar Community Structure Similarity the abundance-based Sorenson similarity coefficient +thetan shared shar Community Structure Similarity the Smith theta similarity coefficient +thetayc shared shar Community Structure Similarity the Yue & Clayton theta similarity coefficient +canberra shared xxxx Community Structure Similarity - +gower shared xxxx Community Structure Similarity - +hellinger shared xxxx Community Structure Similarity - +manhattan shared xxxx Community Structure Similarity - +odum shared xxxx Community Structure Similarity - +soergel shared xxxx Community Structure Similarity - +spearman shared xxxx Community Structure Similarity - +speciesprofile shared xxxx Community Structure Similarity - +structchi2 shared xxxx Community Structure Similarity - +structchord shared xxxx Community Structure Similarity - +structeuclidean shared xxxx Community Structure Similarity - +structkulczynski shared xxxx Community Structure Similarity - +structpearson shared xxxx Community Structure Similarity - +##Utility calculators +nseqs single sing Utility the number of sequences in a sample +sharednseqs shared rare Utility the number of sequences in two samples +sharedobserved shared rare Utility the number of sequences in two samples
--- a/mothur/tool-data/mothur_calulators.loc Wed Oct 05 10:37:11 2011 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -#This is a sample file distributed with Galaxy that enables -# Mothur tools to present a choice of values for calculators -# source - http://www.mothur.org/wiki/Calculators -#file has this format (fields are separated by TAB characters): -# -# venn sobs,chao,ace sharedsobs,sharedchao,sharedace -# rarefaction.shared sharednseqs,sharedobserved -# rarefaction.single ace,bootstrap,chao,coverage,heip,invsimpson,jack,npshannon,nseqs,shannon,shannoneven,simpson,simpsoneven,smithwilson,sobs -# dist.shared braycurtis,jabund,jclass,jest,morisitahorn,sorabund,sorclass,sorest,thetan,thetayc -# tree.shared braycurtis,jabund,jclass,jest,morisitahorn,sorabund,sorclass,sorest,thetan,thetayc -# heatmap.sim braycurtis,jabund,jclass,jest,morisitahorn,sorabund,sorclass,sorest,thetan,thetayc -# collect.shared anderberg,braycurtis,jabund,jclass,jest,kstest,kulczynski,kulczynskicody,lennon,morisitahorn,ochiai,sharedace,sharedchao,sharednseqs,sharedsobs,sorabund,sorclass,sorest,thetan,thetayc,whittaker -# summary.shared anderberg,braycurtis,jabund,jclass,jest,kstest,kulczynski,kulczynskicody,lennon,morisitahorn,ochiai,sharedace,sharedchao,sharednseqs,sharedsobs,sorabund,sorclass,sorest,thetan,thetayc,whittaker -# collect.single ace,bergerparker,boneh,bootstrap,bstick,chao,coverage,efron,geometric,goodscoverage,heip,invsimpson,jack,logseries,npshannon,nseqs,qstat,shannon,shannoneven,shen,simpson,simpsoneven,smithwilson,sobs,solow -# summary.single ace,bergerparker,boneh,bootstrap,bstick,chao,coverage,efron,geometric,goodscoverage,heip,invsimpson,jack,logseries,npshannon,nseqs,qstat,shannon,shannoneven,shen,simpson,simpsoneven,smithwilson,sobs,solow -# -#<calculator> <mult> <mult2> <category> <description> -# -#Community richness -sobs single sing Community richness the observed richness -chao single sing Community richness the Chao1 estimator -ace single sing Community richness the ACE estimator -jack single sing Community richness the jackknife estimator -bootstrap single sing Community richness the bootstrap estimator -#Community diversity -bergerparker single xxxx Community diversity the Berger-Parker index -shannon single sing Community diversity the Shannon index -npshannon single sing Community diversity the non-parametric Shannon index -simpson single sing Community diversity the Simpson index -simpsoneven single sing Community diversity the Simpson index -invsimpson single sing Community diversity the Simpson index -coverage single sing Community diversity the sampling coverage coverage -qstat single xxxx Community diversity the Q statistic -#Estimates of number of additional OTUs observed with extra sampling -boneh single xxxx Estimator Boneh's estimator -efron single xxxx Estimator Efron's estimator -shen single xxxx Estimator Shen's estimator -solow single xxxx Estimator Solow's estimator -#Statistical distributions -logseries single xxxx Statistical distribution tests whether observed data follow the log series distribution -geometric single xxxx Statistical distribution tests whether observed data follow the geometric series distribution -bstick single xxxx Statistical distribution tests whether observed data follow the broken stick distribution -# Shared community richness -sharedsobs shared xxxx Shared community richness the observed richness shared between two or more samples -sharedchao shared xxxx Shared community richness the two or more sample shared Chao1 richness estimator -sharedace shared xxxx Shared community richness the two sample shared ACE richness estimator -#Similarity in community membership -anderberg shared xxxx Community Membership Similarity the Anderberg similarity coefficient -jclass shared shar Community Membership Similarity the traditional Jaccard similarity coefficient based on the observed richness -jest shared shar Community Membership Similarity the Jaccard similarity coefficient based on the Chao1 estimated richnesses -kulczynski shared xxxx Community Membership Similarity the Kulczynski similarity coefficient -kulczynskicody shared xxxx Community Membership Similarity the Kulczynski-Cody similarity coefficient -lennon shared xxxx Community Membership Similarity the Lennon similarity coefficient -ochiai shared xxxx Community Membership Similarity the Ochiai similarity coefficient -sorclass shared shar Community Membership Similarity the Sorenson similarity coefficient based on the observed richness -sorest shared shar Community Membership Similarity the Sorenson similarity coefficient based on the Chao1 estimated richnesses -whittaker shared xxxx Community Membership Similarity the Whittaker similarity coefficient -#Similarity in community structure -braycurtis shared shar Community Structure Similarity the Bray-Curtis similarity coefficient -jabund shared shar Community Structure Similarity the abundance-based Jaccard similarity coefficient -morisitahorn shared shar Community Structure Similarity the Morisita-Horn similarity coefficient -sorabund shared shar Community Structure Similarity the abundance-based Sorenson similarity coefficient -thetan shared shar Community Structure Similarity the Smith theta similarity coefficient -thetayc shared shar Community Structure Similarity the Yue & Clayton theta similarity coefficient -#Utility calculators -nseqs single sing Utility the number of sequences in a sample -sharednseqs shared rare Utility the number of sequences in two samples -sharedobserved shared rare Utility the number of sequences in two samples
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tool-data/mothur_lookup.loc.sample Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,13 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of metagenomics files. +#file has this format (white space characters are TAB characters): +# +# lookup files from: http://www.mothur.org/wiki/Lookup_files +# +#<name> <file_base> +# +GS20 /project/db/galaxy/mothur/lookup/LookUp_GS20.pat +GSFLX /project/db/galaxy/mothur/lookup/LookUp_GSFLX.pat +Titanium /project/db/galaxy/mothur/lookup/LookUp_Titanium.pat + +
--- a/mothur/tool-data/mothur_map.loc Wed Oct 05 10:37:11 2011 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of metagenomics files. -#file has this format (white space characters are TAB characters): -# -#<name> <file_base> -# -greengenes /project/db/galaxy/mothur/gg.ss.map -silva /project/db/galaxy/mothur/silva.ss.map
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tool-data/mothur_map.loc.sample Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,10 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of metagenomics files. +#file has this format (white space characters are TAB characters): +# +# Secondary structure maps: http://www.mothur.org/wiki/Secondary_structure_map +# +#<name> <file_base> +# +greengenes /project/db/galaxy/mothur/gg.ss.map +silva /project/db/galaxy/mothur/silva.ss.map
--- a/mothur/tool-data/mothur_taxonomy.loc Wed Oct 05 10:37:11 2011 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of metagenomics files. -#file has this format (white space characters are TAB characters): -# -#<taxonomyname> <file_base> -# -archaea.gg /project/db/galaxy/mothur/Silva.archaea/silva.archaea.gg.tax -archaea.silva /project/db/galaxy/mothur/Silva.archaea/silva.archaea.silva.tax -archaea.rdp /project/db/galaxy/mothur/Silva.archaea/silva.archaea.rdp.tax -archaea.ncbi /project/db/galaxy/mothur/Silva.archaea/silva.archaea.ncbi.tax -bacteria.gg /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.gg.tax -bacteria.silva /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.silva.tax -bacteria.ncbi /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.ncbi.tax -bacteria.rdp /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp.tax -bacteria.rdp6 /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp6.tax -eukarya.silva /project/db/galaxy/mothur/silva.eukarya.silva.tax -eukarya.ncbi /project/db/galaxy/mothur/silva.eukarya.ncbi.tax -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tool-data/mothur_taxonomy.loc.sample Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,20 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of metagenomics files. +#file has this format (white space characters are TAB characters): +# +# Silva reference files: http://www.mothur.org/wiki/Silva_reference_files +# +#<taxonomyname> <file_base> +# +archaea.gg /project/db/galaxy/mothur/Silva.archaea/silva.archaea.gg.tax +archaea.silva /project/db/galaxy/mothur/Silva.archaea/silva.archaea.silva.tax +archaea.rdp /project/db/galaxy/mothur/Silva.archaea/silva.archaea.rdp.tax +archaea.ncbi /project/db/galaxy/mothur/Silva.archaea/silva.archaea.ncbi.tax +bacteria.gg /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.gg.tax +bacteria.silva /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.silva.tax +bacteria.ncbi /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.ncbi.tax +bacteria.rdp /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp.tax +bacteria.rdp6 /project/db/galaxy/mothur/silva.bacteria/silva.bacteria.rdp6.tax +eukarya.silva /project/db/galaxy/mothur/silva.eukarya.silva.tax +eukarya.ncbi /project/db/galaxy/mothur/silva.eukarya.ncbi.tax +
--- a/mothur/tools/mothur/chimera.slayer.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/chimera.slayer.xml Tue Nov 08 11:45:32 2011 -0600 @@ -1,4 +1,4 @@ -<tool id="mothur_chimera_slayer" name="Chimera.slayer" version="1.21.0"> +<tool id="mothur_chimera_slayer" name="Chimera.slayer" version="1.22.0"> <description>Find putative chimeras using slayer</description> <command interpreter="python"> mothur_wrapper.py @@ -12,6 +12,9 @@ #if $alignment.name.__str__ != "None" and len($alignment.name.__str__) > 0: --name=$alignment.name #end if + #if $alignment.group.__str__ != '': + --group=$alignment.group + #end if #else: --reference=$alignment.template #end if @@ -56,6 +59,8 @@ </when> <when value="self"> <param name="name" type="data" format="names" optional="true" label="names - Sequences Names"/> + <param name="group" type="data" format="groups" optional="true" label="group - Sequences Name reference" + help="use the more abundant sequences from the same sample to check the query sequence"/> </when> </conditional> <conditional name="options">
--- a/mothur/tools/mothur/chimera.uchime.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/chimera.uchime.xml Tue Nov 08 11:45:32 2011 -0600 @@ -1,4 +1,4 @@ -<tool id="mothur_chimera_uchime" name="Chimera.uchime" version="1.20.0"> +<tool id="mothur_chimera_uchime" name="Chimera.uchime" version="1.22.0"> <description>Find putative chimeras using uchime</description> <command interpreter="python"> mothur_wrapper.py @@ -23,6 +23,9 @@ #if float($template.abskew.__str__) > 0: --abskew=$template.abskew #end if + #if $template.group.__str__ != '': + --group=$template.group + #end if #elif $template.source == 'names': --name=$template.name #end if @@ -95,6 +98,9 @@ </when> <when value="self"> <param name="abskew" type="float" value="1.9" label="abskew - Abundance skew (default 1.9)" help="Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query)"/> + <param name="group" type="data" format="groups" optional="true" label="group - Sequences Name reference" + help="use the more abundant sequences from the same sample to check the query sequence"/> + </when> <when value="names"> <param name="name" type="data" format="names" label="name - Sequence names"/>
--- a/mothur/tools/mothur/cluster.split.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/cluster.split.xml Tue Nov 08 11:45:32 2011 -0600 @@ -1,4 +1,4 @@ -<tool id="mothur_cluster_split" name="Cluster.split" version="1.19.0"> +<tool id="mothur_cluster_split" name="Cluster.split" version="1.22.0"> <description>Assign sequences to OTUs (Operational Taxonomic Unit) splits large matrices</description> <command interpreter="python"> mothur_wrapper.py @@ -80,7 +80,7 @@ <param name="fasta" type="data" format="fasta" label="fasta - Sequences"/> <param name="name" type="data" format="names" label="name - Sequences Name reference"/> <param name="taxonomy" type="data" format="seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/> - <param name="taxlevel" type="integer" value="1" label="taxlevel - taxonomy level for split (default=1)" + <param name="taxlevel" type="integer" value="3" label="taxlevel - taxonomy level for split (default=3)" help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/> </when> </conditional> <!-- splitby --> @@ -90,7 +90,7 @@ <option value="average" selected="true">Average neighbor</option> </param> <param name="cutoff" type="float" value="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0" - help="Ignore pairwise distances larger than this, a common value would be 0.10"/> + help="Ignore pairwise distances larger than this, a common value would be 0.25"/> <param name="hard" type="boolean" checked="true" truevalue="--hard=true" falsevalue="--hard=true" label="hard - Use hard cutoff instead of rounding" help=""/> <param name="precision" type="select" optional="true" label="precision - Precision for rounding distance values"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tools/mothur/count.groups.xml Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,73 @@ +<tool id="mothur_count_groups" name="Count.groups" version="1.22.0" > + <description>counts the number of sequences represented by a specific group or set of groups</description> + <command interpreter="python"> + mothur_wrapper.py + --cmd='count.groups' + --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.groups.count$:'$grp_count + --outputdir='$logfile.extra_files_path' + #if isinstance($group.datatype, $__app__.datatypes_registry.get_datatype_by_extension('shared').__class__): + --shared=$group + #else: + --group=$group + #end if + #if $groupnames.source == 'groups': + #if $groupnames.groups.__str__ != "None" and len($groupnames.groups.__str__) > 0: + --groups=$groupnames.groups + #end if + #elif $groupnames.source == 'accnos': + #if $groupnames.accnos.__str__ != "None" and len($groupnames.accnos.__str__) > 0: + --accnos=$groupnames.accnos + #end if + #end if + </command> + <inputs> + <param name="group" type="data" format="groups,shared" label="group or shared - Group file for sequence count"/> + <conditional name="groupnames"> + <param name="source" type="select" label="Filter group names"> + <option value="none">Report on All Groups</option> + <option value="groups">A List of Group Names</option> + <option value="accnos">A History Group Name Accnos Dataset</option> + </param> + <when value="groups"> + <param name="groups" type="select" label="groups - Pick groups to include" multiple="true"> + <help>All groups displayed if none are selected.</help> + <options from_dataset="group"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unq_grp" column="1" /> + </options> + </param> + </when> + <when value="accnos"> + <param name="accnos" type="data" format="accnos" optional="true" label="accnos - Group Names from your history"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" /> + <data format="tabular" name="grp_count" label="${tool.name} on ${on_string}: group.count" /> + </outputs> + <requirements> + <requirement type="binary">mothur</requirement> + </requirements> + <tests> + </tests> + <help> +**Mothur Overview** + +Mothur_, initiated by Dr. Patrick Schloss and his software development team +in the Department of Microbiology and Immunology at The University of Michigan, +provides bioinformatics for the microbial ecology community. + +.. _Mothur: http://www.mothur.org/wiki/Main_Page + +**Command Documenation** + +The count.groups_ command counts sequences from a specific group or set of groups from a group_ or shared_ file. + +.. _shared: http://www.mothur.org/wiki/Shared_file +.. _group: http://www.mothur.org/wiki/Group_file +.. _count.groups: http://www.mothur.org/wiki/Count.groups + + </help> +</tool>
--- a/mothur/tools/mothur/make.shared.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/make.shared.xml Tue Nov 08 11:45:32 2011 -0600 @@ -1,4 +1,4 @@ -<tool id="mothur_make_shared" name="Make.shared" version="1.19.0" force_history_refresh="True"> +<tool id="mothur_make_shared" name="Make.shared" version="1.22.0" force_history_refresh="True"> <description>Make a shared file from a list and a group</description> <command interpreter="python"> mothur_wrapper.py @@ -18,9 +18,11 @@ #if $groups.__str__ != "None" and len($groups.__str__) > 0: --groups=$groups #end if + #* #if $ordergroup.__str__ != "None" and len($ordergroup.__str__) > 0: --ordergroup=$ordergroup #end if + *# </command> <inputs> <param name="list" type="data" format="list" label="list - OTU List"/> @@ -39,7 +41,9 @@ <filter type="unique_value" name="unq_grp" column="1" /> </options> </param> + <!-- <param name="ordergroup" type="data" format="groups" optional="true" label="ordergroup - one column list of group names in desired order"/> + --> <param name="as_datasets" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Create a new history dataset for each group rabund"/> </inputs> <outputs>
--- a/mothur/tools/mothur/mothur_wrapper.py Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/mothur_wrapper.py Tue Nov 08 11:45:32 2011 -0600 @@ -4,7 +4,7 @@ http://www.mothur.org/ Supports mothur version -mothur v.1.20.0 +mothur v.1.22.0 Class encapsulating Mothur galaxy tool. Expect each invocation to include: @@ -186,8 +186,8 @@ cmd_dict['chimera.ccode'] = dict({'required' : ['fasta','reference'], 'optional' : ['filter','mask','window','numwanted','save','processors']}) cmd_dict['chimera.check'] = dict({'required' : ['fasta','reference'], 'optional' : ['ksize','svg','name','increment','save','processors']}) cmd_dict['chimera.pintail'] = dict({'required' : ['fasta','reference'], 'optional' : ['conservation','quantile','filter','mask','window','increment','save','processors']}) - cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','reference'], 'optional' : ['name','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','blastlocation','save','processors']}) - cmd_dict['chimera.uchime'] = dict({'required' : ['fasta'], 'optional' : ['name','reference','abskew','chimealns','minh','mindiv','xn','dn','xa','chunks','minchunk','idsmoothwindow','minsmoothid','maxp','skipgaps','skipgaps2','minlen','maxlen','ucl','queryfract','processors']}) + cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','reference'], 'optional' : ['name','group','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','blastlocation','save','processors']}) + cmd_dict['chimera.uchime'] = dict({'required' : ['fasta'], 'optional' : ['name','group','reference','abskew','chimealns','minh','mindiv','xn','dn','xa','chunks','minchunk','idsmoothwindow','minsmoothid','maxp','skipgaps','skipgaps2','minlen','maxlen','ucl','queryfract','processors']}) cmd_dict['chop.seqs'] = dict({'required' : ['fasta','numbases'], 'optional' : ['countgaps','keep','short']}) cmd_dict['classify.otu'] = dict({'required' : ['list','taxonomy'],'optional' : ['name','cutoff','label','group','probs','basis','reftaxonomy']}) cmd_dict['classify.seqs'] = dict({'required' : ['fasta','reference','taxonomy'],'optional' : ['name','search','ksize','method','match','mismatch','gapopen','gapextend','numwanted','probs','save','processors']}) @@ -201,6 +201,7 @@ cmd_dict['collect.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','size','label','freq']}) cmd_dict['consensus.seqs'] = dict({'required' : ['fasta'], 'optional' : ['list','name','label','cutoff']}) cmd_dict['corr.axes'] = dict({'required' : [['shared','relabund','metadata'],'axes'], 'optional' : ['label','groups','method','numaxes']}) + cmd_dict['count.groups'] = dict({'required' : ['group','shared'], 'optional' : ['accnos','groups']}) cmd_dict['count.seqs'] = dict({'required' : ['name'], 'optional' : ['group','groups']}) cmd_dict['degap.seqs'] = dict({'required' : ['fasta']}) cmd_dict['deunique.seqs'] = dict({'required' : ['fasta','name'], 'optional' : []}) @@ -229,7 +230,7 @@ cmd_dict['list.seqs'] = dict({'required' : [['fasta','name','group','list','alignreport','taxonomy']]}) cmd_dict['make.fastq'] = dict({'required' : ['fasta','qfile'] , 'optional' : []}) cmd_dict['make.group'] = dict({'required' : ['fasta','groups'], 'optional' : []}) - cmd_dict['make.shared'] = dict({'required' : ['list','group'], 'optional' : ['label','groups','ordergroup']}) + cmd_dict['make.shared'] = dict({'required' : ['list','group'], 'optional' : ['label','groups']}) cmd_dict['mantel'] = dict({'required' : ['phylip','phylip2'] , 'optional' : ['method','iters']}) cmd_dict['merge.files'] = dict({'required' : ['input','output']}) cmd_dict['merge.groups'] = dict({'required' : ['shared','design'], 'optional' : ['groups', 'label']}) @@ -244,7 +245,7 @@ cmd_dict['pcoa'] = dict({'required' : ['phylip'], 'optional' : ['metric']}) cmd_dict['phylo.diversity'] = dict({'required' : ['tree'],'optional' : ['group','name','groups','iters','freq','scale','rarefy','collect','summary','processors']}) cmd_dict['phylotype'] = dict({'required' : ['taxonomy'],'optional' : ['name','cutoff','label']}) - cmd_dict['pre.cluster'] = dict({'required' : ['fasta'], 'optional' : ['name','diffs']}) + cmd_dict['pre.cluster'] = dict({'required' : ['fasta'], 'optional' : ['name','diffs','group']}) cmd_dict['rarefaction.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','iters','groups','jumble']}) cmd_dict['rarefaction.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','iters','label','freq','processors']}) cmd_dict['remove.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','shared','taxonomy']}) @@ -255,14 +256,22 @@ cmd_dict['reverse.seqs'] = dict({'required' : ['fasta']}) cmd_dict['screen.seqs'] = dict({'required' : ['fasta'], 'optional' : ['start','end','maxambig','maxhomop','minlength','maxlength','criteria','optimize','name','group','alignreport','processors']}) cmd_dict['sens.spec'] = dict({'required' : ['list',['column','phylip']] , 'optional' : ['label','cutoff','hard','precision']}) + + cmd_dict['seq.error'] = dict({'required' : ['fasta','reference'] , 'optional' : ['name','qfile','report','ignorechimeras','threshold','processors']}) + cmd_dict['sffinfo'] = dict({'required' : [['sff','sfftxt']], 'optional' : ['fasta','qfile','trim','sfftxt','flow','accnos']}) + + cmd_dict['shhh.flows'] = dict({'required' : [['flow','files']], 'optional' : ['lookup','maxiter','mindelta','cutoff','sigma','order','processors']}) + cmd_dict['split.abund'] = dict({'required' : ['fasta',['name','list']], 'optional' : ['cutoff','group','groups','label','accnos']}) cmd_dict['split.groups'] = dict({'required' : ['fasta','group'], 'optional' : ['name','groups']}) cmd_dict['sub.sample'] = dict({'required' : [['fasta','list','sabund','rabund','shared']], 'optional' : ['name','group','groups','label','size','persample']}) cmd_dict['summary.seqs'] = dict({'required' : ['fasta'], 'optional' : ['name','processors']}) cmd_dict['summary.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','groups','all','distance','processors']}) cmd_dict['summary.single'] = dict({'required' : [['list','sabund','rabund','shared']], 'optional' : ['calc','abund','size','label','groupmode']}) + cmd_dict['summary.tax'] = dict({'required' : ['taxonomy'], 'optional' : ['name','group','reftaxonomy']}) cmd_dict['tree.shared'] = dict({'required' : [['shared','phylip','column']], 'optional' : ['name','groups','calc','cutoff','precision','label']}) + cmd_dict['trim.flows'] = dict({'required' : ['flow'], 'optional' : ['oligos','bdiffs','pdiffs','tdiffs','minflows','maxflows','fasta','signal','noise','maxhomop','order','processors']}) cmd_dict['trim.seqs'] = dict({'required' : ['fasta'], 'optional' : ['name','group','oligos','qfile','qaverage','qthreshold','qwindowaverage','qwindowsize','rollaverage','qstepsize','qtrim','flip','maxambig','maxhomop','minlength','maxlength','bdiffs','pdiffs','tdiffs','allfiles','keepfirst','removelast','processors']}) cmd_dict['unifrac.unweighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']}) cmd_dict['unifrac.weighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']}) @@ -316,6 +325,7 @@ parser.add_option( '--rollaverage', dest='rollaverage', type="int", help='Remove sequences that have a average quality below the value in a rolling window' ) parser.add_option( '--qstepsize', dest='qstepsize', type="int", help='Distance to move a rolling window for each step' ) parser.add_option( '--qtrim', dest='qtrim', help='For sequence below qthreshold, false to scrap file, true to trimmed and in trim file' ) + parser.add_option( '--ignorechimeras', dest='ignorechimeras', help='ignorechimeras' ) parser.add_option( '--flip', dest='flip', help='If true, reverse complement the sequences' ) parser.add_option( '--maxambig', dest='maxambig', type="int", help='Number of ambiguous base calls to allow' ) parser.add_option( '--maxhomop', dest='maxhomop', type="int", help='Maximun homopolymer length allowed' ) @@ -333,6 +343,7 @@ parser.add_option( '--group', dest='group', help='A file containing a list of names' ) parser.add_option( '--list', dest='list', help='A file containing a list of names' ) parser.add_option( '--alignreport', dest='alignreport', help='A align.report file ' ) + parser.add_option( '--report', dest='report', help='' ) parser.add_option( '--taxonomy', dest='taxonomy', help='A Taxonomy file' ) parser.add_option( '--reftaxonomy', dest='reftaxonomy', help='A Taxonomy file' ) parser.add_option( '--taxon', dest='taxon', help='A Taxon' ) @@ -383,6 +394,7 @@ parser.add_option( '--all', dest='all', help='Calculate for all' ) parser.add_option( '--freq', dest='freq', type="float", help='Frequency of sequences to choose, as fraction is 0.0 - 1.0 or iteration if int > 1' ) parser.add_option( '--iters', dest='iters', type='int', help='Iterations of randomizations' ) + parser.add_option( '--maxiter', dest='maxiter', type='int', help='Iterations' ) parser.add_option( '--maxiters', dest='maxiters', type='int', help='Iterations of randomizations' ) parser.add_option( '--jumble', dest='jumble', help='If false, just a collector curve across the samples' ) parser.add_option( '--conservation', dest='conservation', help='Template frequency information' ) @@ -400,6 +412,15 @@ parser.add_option( '--svg', dest='svg', help='SVG' ) parser.add_option( '--sfftxt', dest='sfftxt', help='Generate a sff.txt file' ) parser.add_option( '--flow', dest='flow', help='Generate a flowgram file' ) + parser.add_option( '--minflows', dest='minflows', type='int', help='the minimum number of flows that each sequence must contain' ) + parser.add_option( '--maxflows', dest='maxflows', type='int', help='the number of flows after which all other flows should be ignored.' ) + parser.add_option( '--signal', dest='signal', type='float', help='threshold for intensity to be signal' ) + parser.add_option( '--noise', dest='noise', type='float', help='threshold for intensity to be noise' ) + parser.add_option( '--mindelta', dest='mindelta', type='float', help='threshold for determining how much change in the flowgram correction' ) + parser.add_option( '--sigma', dest='sigma', type='float', help='sigma option is used to set the dispersion of the data in the expectation-maximization' ) + parser.add_option( '--order', dest='order', help='flow order e.g. TACG' ) + parser.add_option( '--lookup', dest='lookup', help='lookup file that are needed to run shhh.seqs' ) + parser.add_option( '--trim', dest='trim', help='Whether sequences and quality scores are trimmed to the clipQualLeft and clipQualRight values' ) parser.add_option( '--input', dest='input', help='' ) parser.add_option( '--phylip', dest='phylip', help='' )
--- a/mothur/tools/mothur/pre.cluster.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/pre.cluster.xml Tue Nov 08 11:45:32 2011 -0600 @@ -1,4 +1,4 @@ -<tool id="mothur_pre_cluster" name="Pre.cluster" version="1.20.0"> +<tool id="mothur_pre_cluster" name="Pre.cluster" version="1.22.0"> <description>Remove sequences due to pyrosequencing errors</description> <command interpreter="python"> mothur_wrapper.py @@ -13,6 +13,9 @@ #if $name.__str__ != "None" and len($name.__str__) > 0: --name=$name #end if + #if $group.__str__ != "None" and len($group.__str__) > 0: + --group=$group + #end if #if 20 >= int($diffs.__str__) >= 0: --diffs=$diffs #end if @@ -21,6 +24,7 @@ <inputs> <param name="fasta" type="data" format="fasta" label="fasta - Sequence Fasta"/> <param name="name" type="data" format="names" optional="true" label="name - Sequences Name reference"/> + <param name="group" type="data" format="groups" optional="true" label="group - Sequences Name reference"/> <param name="diffs" type="integer" value="1" label="diffs - Number of mismatched bases to allow between sequences in a group (default 1)"/> </inputs> <outputs>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tools/mothur/seq.error.xml Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,172 @@ +<tool id="mothur_seq_error" name="Seq.error" version="1.22.0"> + <description>assess error rates in sequencing data</description> + <command interpreter="python"> + mothur_wrapper.py + #import re, os.path + #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__] + #if 'summary' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.summary$:'" + $summary_out.__str__] + #end if + #if 'seq' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.seq$:'" + $seq_out.__str__] + #end if + #if 'seq_forward' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.seq.forward$:'" + $seq_forward_out.__str__] + #end if + #if 'seq_reverse' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.seq.reverse$:'" + $seq_reverse_out.__str__] + #end if + #if 'chimera' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.chimera$:'" + $chimera_out.__str__] + #end if + #if 'count' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.count$:'" + $count_out.__str__] + #end if + #if 'matrix' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.matrix$:'" + $matrix_out.__str__] + #end if + #if 'ref_query' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.ref-query$:'" + $ref_query_out.__str__] + #end if + --cmd='seq.error' + --outputdir='$logfile.extra_files_path' + --fasta=$fasta_in + --reference=$alignment.template + #if $name_in.__str__ != "None" and len($name_in.__str__) > 0: + --name=$name_in + #end if + #if $qual.use.__str__ == 'yes': + --qfile=$qfile_in + --alignreport=$alignreport_in + #if 'quality' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.quality$:'" + $quality_out.__str__] + #end if + #if 'qual_forward' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.qual.forward$:'" + $qual_forward_out.__str__] + #end if + #if 'qual_reverse' in $output_sel.__str__.split(','): + #set results = $results + ["'^\S+.error.qual.reverse$:'" + $qual_reverse_out.__str__] + #end if + #end if + #if $threshold.__str__ != '': + --threshold=$threshold + #end if + $ignorechimeras + --result=#echo ','.join($results) + --processors=8 + </command> + <inputs> + <param name="fasta_in" type="data" format="align" label="fasta - Candiate Sequences"/> + <conditional name="alignment"> + <param name="source" type="select" label="Select Reference Template from" help=""> + <option value="ref">Cached Reference</option> + <option value="history">Your History</option> + </param> + <when value="ref"> + <param name="template" type="select" label="reference - Select an alignment database " help=""> + <options from_file="mothur_aligndb.loc"> + <column name="name" index="0" /> + <column name="value" index="1" /> + </options> + </param> + </when> + <when value="history"> + <param name="template" type="data" format="align" label="reference - Reference to align with" help=""/> + </when> + </conditional> + + <param name="name_in" type="data" format="names" optional="true" label="name - Sequences Name reference"/> + + <conditional name="qual"> + <param name="use" type="select" label="Include a quality file and an alignment report as inputs" help=""> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param name="qfile_in" type="data" format="qual" label="qfile - Fasta Quality"/> + <param name="alignreport_in" type="data" format="align.report" label="alignreport - Align Report"/> + </when> + <when value="no"/> + </conditional> + + <param name="threshold" type="float" value="" optional="true" label="threshold - error rate at which to report (default 1.)" + help=""> + <validator type="in_range" message="error rate threshold between 0. and 1." min="0.0" max="1.0"/> + </param> + <param name="ignorechimeras" type="boolean" truevalue="" falsevalue="--ignorechimeras=false" checked="true" label="ignorechimeras - " /> + + <param name="output_sel" type="select" multiple="true" display="checkboxes" label="Outputs as history datasets" > + <option value="summary">error.summary</option> + <option value="seq">error.seq</option> + <option value="seq_forward">error.seq.forward</option> + <option value="seq_reverse">error.seq.reverse</option> + <option value="chimera">error.chimera</option> + <option value="count">error.count</option> + <option value="matrix">error.matrix</option> + <option value="quality">error.quality</option> + <option value="qual_forward">error.qual.forward</option> + <option value="qual_reverse">error.qual.reverse</option> + <option value="ref_query">error.ref-query</option> + </param> + </inputs> + <outputs> + <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" /> + <data format="tabular" name="summary_out" label="${tool.name} on ${on_string}: error.summary" > + <filter>('summary' in output_sel)</filter> + </data> + <data format="fasta" name="seq_out" label="${tool.name} on ${on_string}: error.seq" > + <filter>('seq' in output_sel)</filter> + </data> + <data format="tabular" name="seq_forward_out" label="${tool.name} on ${on_string}: error.seq.forward" > + <filter>('seq_forward' in output_sel)</filter> + </data> + <data format="tabular" name="seq_reverse_out" label="${tool.name} on ${on_string}: error.seq.reverse" > + <filter>('seq_reverse' in output_sel)</filter> + </data> + <data format="tabular" name="chimera_out" label="${tool.name} on ${on_string}: error.chimera" > + <filter>('chimera' in output_sel)</filter> + </data> + <data format="tabular" name="count_out" label="${tool.name} on ${on_string}: error.count" > + <filter>('count' in output_sel)</filter> + </data> + <data format="tabular" name="matrix_out" label="${tool.name} on ${on_string}: error.matrix" > + <filter>('matrix' in output_sel)</filter> + </data> + <data format="tabular" name="quality_out" label="${tool.name} on ${on_string}: error.quality" > + <filter>('quality' in output_sel)</filter> + </data> + <data format="tabular" name="qual_forward_out" label="${tool.name} on ${on_string}: error.qual.forward" > + <filter>('qual_forward' in output_sel)</filter> + </data> + <data format="tabular" name="qual_reverse_out" label="${tool.name} on ${on_string}: error.qual.reverse" > + <filter>('qual_reverse' in output_sel)</filter> + </data> + <data format="align" name="ref_query_out" label="${tool.name} on ${on_string}: error.ref-query" > + <filter>('ref_query' in output_sel)</filter> + </data> + </outputs> + <requirements> + <requirement type="binary">mothur</requirement> + </requirements> + <tests> + </tests> + <help> +**Mothur Overview** + +Mothur_, initiated by Dr. Patrick Schloss and his software development team +in the Department of Microbiology and Immunology at The University of Michigan, +provides bioinformatics for the microbial ecology community. + +.. _Mothur: http://www.mothur.org/wiki/Main_Page + +**Command Documenation** + +The seq.error_ command evaluates error rate for sequences by comparing to the fasta-formatted template_alignment_. +This is demonstrated in http://www.mothur.org/wiki/Schloss_SOP#Error_analysis + +.. _template_alignment: http://www.mothur.org/wiki/Alignment_database +.. _seq.error: http://www.mothur.org/wiki/Seq.error + + + </help> +</tool>
--- a/mothur/tools/mothur/sffinfo.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/sffinfo.xml Tue Nov 08 11:45:32 2011 -0600 @@ -48,7 +48,7 @@ <data format="txt" name="out_sfftxt" label="${tool.name} on ${on_string}: sff.txt"> <filter>sfftxt == True</filter> </data> - <data format="txt" name="out_flow" label="${tool.name} on ${on_string}: flowgram"> + <data format="sff.flow" name="out_flow" label="${tool.name} on ${on_string}: flowgram"> <filter>flow == True</filter> </data> </outputs>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tools/mothur/shhh.flows.xml Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,103 @@ +<tool id="mothur_shhh_flows" name="Shhh.flows" version="1.22.0" force_history_refresh="True"> + <description>Denoise flowgrams (PyroNoise algorithm)</description> + <command interpreter="python"> + mothur_wrapper.py + #import re, os.path + --cmd='shhh.flows' + --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.shhh\.fasta$:'$shhh_fasta,'^\S+\.shhh\.qual$:'$shhh_qual,'^\S+\.shhh\.names$:'$shhh_names,'^\S+\.shhh\.groups$:'$shhh_groups,'^\S+\.shhh\.counts$:'$shhh_counts + --outputdir='$logfile.extra_files_path' + --flow=$flow + --lookup=$prob.lookup + #if $maxiter.__str__ != '': + --maxiter=$maxiter + #end if + #if $mindelta.__str__ != '': + --mindelta=$mindelta + #end if + #if $cutoff.__str__ != '': + --cutoff=$cutoff + #end if + #if $sigma.__str__ != '': + --sigma=$sigma + #end if + #if $order.__str__.strip() != '': + --order=$order + #end if + --processors=8 + </command> + <inputs> + <param name="flow" type="data" format="sff.flow" label="flow - flowgram data" + help="Use sffinfo to generate flow data from an sff file and usually trimmed by trim.flows"/> + <conditional name="prob"> + <param name="source" type="select" label="Select Taxonomy from" help=""> + <option value="ref">Cached Reference</option> + <option value="hist">History</option> + </param> + <when value="ref"> + <param name="lookup" type="select" format="tabular" label="lookup - intensity value per homopolymer length" + help="table of the probability of observing an intensity value for a given homopolymer length"> + <options from_file="mothur_lookup.loc"> + <column name="name" index="0" /> + <column name="value" index="1" /> + </options> + </param> + </when> + <when value="hist"> + <param name="lookup" type="data" format="tabular" label="lookup - intensity value per homopolymer length" + help="from http://www.mothur.org/wiki/Lookup_files"/> + </when> + </conditional> + + <param name="maxiter" type="integer" value="1000" optional="true" label="maxiter - maximum iterations to run (default 1000)" help="if the delta value does not first drop below the mindelta value. "> + <validator type="in_range" message="Number of differences can't be negative" min="0"/> + </param> + + <param name="mindelta" type="float" value="" optional="true" label="mindelta - threshold for determining how much change in the flowgram correction is allowed" + help="default .0000001 (10^-6)"> + <validator type="in_range" message="mindelta between 0. and .1" min="0.0" max="0.1"/> + </param> + + <param name="cutoff" type="float" value="" optional="true" label="cutoff - seed the expectation-maximizaton step" + help="default .01 (usually doesn't need to be changed)"> + <validator type="in_range" message="cutoff between 0. and 1." min="0.0" max="1.0"/> + </param> + + <param name="sigma" type="float" value="" optional="true" label="sigma - the dispersion of the data in the expectation-maximization step of the algorithm" + help="default .06 (usually doesn't need to be changed)"> + <validator type="in_range" message="sigma between 0. and 1." min="0.0" max="1.0"/> + </param> + + <param name="order" type="text" value="" label="order - flow order for nucleotides in the sequencer" + help="default is TACG"/> + </inputs> + <outputs> + <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" /> + <data format_source="fasta" name="shhh_fasta" label="${tool.name} on ${on_string}: shhh.fasta"/> + <data format_source="qual454" name="shhh_qual" label="${tool.name} on ${on_string}: shhh.qual"/> + <data format_source="names" name="shhh_names" label="${tool.name} on ${on_string}: shhh.names"/> + <data format_source="groups" name="shhh_groups" label="${tool.name} on ${on_string}: shhh.groups"/> + <data format_source="tabular" name="shhh_counts" label="${tool.name} on ${on_string}: shhh.counts"/> + </outputs> + <requirements> + <requirement type="binary">mothur</requirement> + </requirements> + <tests> + </tests> + <help> +**mothur overview** + +Mothur_, initiated by Dr. Patrick Schloss and his software development team +in the Department of Microbiology and Immunology at The University of Michigan, +provides bioinformatics for the microbial ecology community. + +.. _Mothur: http://www.mothur.org/wiki/Main_Page + +**Command Documenation** + +The shhh.flows_ command is Pat Schloss's translation of Chris Quince's PyroNoise algorithm [1] from C to C++ with the incorporation of mothur's bells and whistles. Based on processing of test datasets provided by Quince, shhh.flows gives the same/similar output to AmpliconNoise. shhh.flows uses a expectation-maximization algorithm to correct flowgrams to identify the idealized form of each flowgram and translate that flowgram to a DNA sequence. Our testing has shown that when Titanium data are trimmed to 450 flows using trim.flows, shhh.flows provides the highest quality data for any other method available. In contrast, when we use the min/max number of flows suggested by Quince of 360/720, the error rate is not that great. This much improved error rate does come at a computational cost. Whereas the features in trim.seqs take on the order of minutes, shhh.flows can take on the order of hours. You will also need a lookup file that tells shhh.flows the probability of observing an intensity value for a given homopolymer length. You can get mothur-compatible files at: http://www.mothur.org/wiki/Lookup_files + +.. _shhh.flows: http://www.mothur.org/wiki/Shhh.flows + + + </help> +</tool>
--- a/mothur/tools/mothur/sub.sample.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/sub.sample.xml Tue Nov 08 11:45:32 2011 -0600 @@ -1,4 +1,4 @@ -<tool id="mothur_sub_sample" name="Sub.sample" version="1.21.0"> +<tool id="mothur_sub_sample" name="Sub.sample" version="1.22.0"> <description>Create a sub sample</description> <command interpreter="python"> mothur_wrapper.py @@ -12,7 +12,7 @@ #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1\\.subsample\\.\2',$os.path.basename($input.fasta_in.__str__)) + ":'" + $fasta_out.__str__] #if $input.name_in.__str__ != "None" and len($input.name_in.__str__) > 0: --name=$input.name_in - ## #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1\\.subsample\\.\2',$os.path.basename($input.name_in.__str__)) + ":'" + $names_out.__str__] + #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)',r'\1\\.subsample\\.\2',$os.path.basename($input.name_in.__str__)) + ":'" + $names_out.__str__] #end if #if $input.use_group.to_filter == "yes": #if $input.use_group.group_in.__str__ != "None" and len($input.use_group.group_in.__str__) > 0: @@ -183,10 +183,10 @@ <filter>input['format'] == 'rabund'</filter> </data> <!-- This doesn't appear to be generated even though the documentation says it is + --> <data format="names" name="names_out" label="${tool.name} on ${on_string}: subsample.names"> <filter>(input['format'] == 'fasta' and input['name_in'] != None)</filter> </data> - --> <data format="groups" name="group_out" label="${tool.name} on ${on_string}: subsample.groups"> <filter>((input['format'] == 'fasta' or input['format'] == 'list') and input['use_group'] == 'yes')</filter> </data>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tools/mothur/summary.tax.xml Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,84 @@ +<tool id="mothur_summary_tax" name="Summary.tax" version="1.22.0"> + <description>Assign sequences to taxonomy</description> + <command interpreter="python"> + mothur_wrapper.py + --cmd='summary.tax' + --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.tax\.summary$:'$tax_summary + --outputdir='$logfile.extra_files_path' + --taxonomy=$tax.taxonomy + #if $name.__str__ != "None" and len($name.__str__) > 0: + --name='$name' + #end if + #if $group.__str__ != "None" and len($group.__str__) > 0: + --group='$group' + #end if + #if $reftax.source != 'none' and len($reftax.taxonomy.__str__) > 0: + --reftaxonomy=$reftax.taxonomy + #end if + </command> + <inputs> + <conditional name="tax"> + <param name="source" type="select" label="Select Taxonomy from" help=""> + <option value="hist">History</option> + <option value="ref">Cached Reference</option> + </param> + <when value="ref"> + <param name="taxonomy" type="select" format="seq.taxonomy" label="taxonomy - Taxonomy Reference"> + <options from_file="mothur_taxonomy.loc"> + <column name="name" index="0" /> + <column name="value" index="1" /> + </options> + </param> + </when> + <when value="hist"> + <param name="taxonomy" type="data" format="seq.taxonomy" label="taxonomy - Taxonomy Reference"/> + </when> + </conditional> + <param name="name" type="data" format="names" optional="true" label="name - taxonomy sequence names"/> + <param name="group" type="data" format="groups" optional="true" label="group - Groups for summary file"/> + <conditional name="reftax"> + <param name="source" type="select" label="Select Reference Taxonomy used in Summary.seqs from" help="Including the reference taxonomy file used when you classified your sequences keep the rankIDs in the summary file static."> + <option value="none">Selection is Optional</option> + <option value="hist">History</option> + <option value="ref">Cached Reference</option> + </param> + <when value="none"/> + <when value="ref"> + <param name="taxonomy" type="select" format="seq.taxonomy" label="reftaxonomy - Taxonomy Reference used when sequences were classified"> + <options from_file="mothur_taxonomy.loc"> + <column name="name" index="0" /> + <column name="value" index="1" /> + </options> + </param> + </when> + <when value="hist"> + <param name="taxonomy" type="data" format="seq.taxonomy" label="reftaxonomy - Taxonomy Reference used when sequences were classified"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" /> + <data format="summary" name="tax_summary" label="${tool.name} on ${on_string}: summary" /> + </outputs> + <requirements> + <requirement type="binary">mothur</requirement> + </requirements> + <tests> + </tests> + <help> +**Mothur Overview** + +Mothur_, initiated by Dr. Patrick Schloss and his software development team +in the Department of Microbiology and Immunology at The University of Michigan, +provides bioinformatics for the microbial ecology community. + +.. _Mothur: http://www.mothur.org/wiki/Main_Page + +**Command Documenation** + +The summary.tax_ command reads a taxonomy file and an optional name and or group file, and summarizes the taxonomy information. + +.. _summary.tax: http://www.mothur.org/wiki/Summary.otu + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tools/mothur/trim.flows.xml Tue Nov 08 11:45:32 2011 -0600 @@ -0,0 +1,127 @@ +<tool id="mothur_trim_flows" name="Trim.flows" version="1.22.0" force_history_refresh="True"> + <description>partition by barcode, trim to length, cull by lenght and mismatches</description> + <command interpreter="python"> + mothur_wrapper.py + #import re, os.path + --cmd='trim.flows' + ## #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__] + ## #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($flow.__str__)) + ":'" + $trim_flow.__str__] + --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.trim\.flow$:'$trim_flow, '^\S+\.scrap\.flow$:'$scrap_flow,'^\S+\.flow\.files$:'$flow_files,'^\S+\.flow\.fasta$:'$flow_fasta + --outputdir='$logfile.extra_files_path' + --flow=$flow + #if $minflows.__str__ != '': + --minflows=$minflows + #end if + #if $maxflows.__str__ != '': + --maxflows=$maxflows + #end if + #if $maxhomop.__str__ != '': + --maxhomop=$maxhomop + #end if + #if $order.__str__.strip() != '': + --order=$order + #end if + #if $signal.__str__ != '' + --signal=$signal + #end if + #if $noise.__str__ != '' + --noise=$noise + #end if + #if $oligo.add == "yes": + --oligos=$oligo.oligos + #if int($oligo.bdiffs.__str__) > 0: + --bdiffs=$oligo.bdiffs + #end if + #if int($oligo.pdiffs.__str__) > 0: + --pdiffs=$oligo.pdiffs + #end if + #if int($oligo.tdiffs.__str__) > 0: + --tdiffs=$oligo.tdiffs + #end if + --datasetid='$logfile.id' --new_file_path='$__new_file_path__' + --new_datasets='^\S+?\.(\S+\.flow)$:sff.flow' + #end if + $fasta + --processors=8 + </command> + <inputs> + <param name="flow" type="data" format="sff.flow" label="flow - flowgram data" + help="Use sffinfo to generate flow data from an sff file"/> + + <conditional name="oligo"> + <param name="add" type="select" label="Trim with an oligos file?" + help="a file that can contain the sequences of the forward and reverse primers and barcodes and their sample identifier. + Each line of the oligos file can start with the key words "forward", "reverse", + and "barcode" or it can start with a "#" to tell mothur to ignore that line of the oligos file. "> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="oligos" type="data" format="oligos" label="oligos - barcodes and primers"/> + <param name="bdiffs" type="integer" value="0" label="bdiffs - number of differences to allow in the barcode (default 0)"> + <validator type="in_range" message="Number of differences can't be negative" min="0"/> + </param> + <param name="pdiffs" type="integer" value="0" label="pdiffs - number of differences to allow in the primer (default 0)"> + <validator type="in_range" message="Number of differences can't be negative" min="0"/> + </param> + <param name="tdiffs" type="integer" value="0" label="tdiffs - total number of differences to allow in primer and barcode (ignored if < 1)"> + <validator type="in_range" message="Number of differences can't be negative" min="0"/> + </param> + </when> + </conditional> + + <param name="minflows" type="integer" value="" optional="true" label="minflows - Minimum number of flows that each sequence must contain to make it in to a "trim" file. (default 450)" help="(Quince uses 360)"/> + <param name="maxflows" type="integer" value="" optional="true" label="maxflows - Maximum number of flows after which all other flows should be ignored (default 450)" help="(Quince uses 360 for GSFLX and 720 for Titanium)"/> + + <param name="maxhomop" type="integer" value="" optional="true" label="maxhomop - Maximum homopolymers" + help=""/> + + <param name="signal" type="float" value="" optional="true" label="signal - treat any intensity signal greater than this threshold as a real signal" + help="default .5"> + <validator type="in_range" message="signal between 0. and 1." min="0.0" max="1.0"/> + </param> + <param name="noise" type="float" value="" optional="true" label="noise - treat any intensity signal less than this threshold as noise" + help="default .7"> + <validator type="in_range" message="signal between 0. and 1." min="0.0" max="1.0"/> + </param> + <param name="order" type="text" value="" label="order - flow order for nucleotides in the sequencer" + help="default is TACG"/> + + <param name="fasta" type="boolean" truevalue="--fasta=true" falsevalue="" checked="false" label="fasta - translate the flowgram data to fasta sequence format"/> + + </inputs> + <outputs> + <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" /> + <data format="sff.flow" name="trim_flow" label="${tool.name} on ${on_string}: trim.flow"/> + <data format="sff.flow" name="scrap_flow" label="${tool.name} on ${on_string}: scrap.flow"/> + <data format="tabular" name="flow_files" label="${tool.name} on ${on_string}: flow.files"> + <filter>oligos != None</filter> + </data> + <data format_source="fasta" name="flow_fasta" label="${tool.name} on ${on_string}: flow.fasta"> + <filter>fasta == True</filter> + </data> + </outputs> + <requirements> + <requirement type="binary">mothur</requirement> + </requirements> + <tests> + </tests> + <help> +**mothur overview** + +Mothur_, initiated by Dr. Patrick Schloss and his software development team +in the Department of Microbiology and Immunology at The University of Michigan, +provides bioinformatics for the microbial ecology community. + +.. _Mothur: http://www.mothur.org/wiki/Main_Page + +**Command Documenation** + +The trim.flows_ command is analogous to the trim.seqs command, except that it uses the flowgram data that comes bundled in the sff file that is generated by 454 sequencing. It's primary usage is as a preliminary step to running shhh.seqs. Chris Quince has a series of perl scripts that fulfill a similar role [1]. This command will allow you to partition your flowgram data by sample based on the barcode, trim the flows to a specified length range, and cull sequences that are too short or have too many mismatches to barcodes and primers. + +.. _trim.flows: http://www.mothur.org/wiki/Trim.flows + + + </help> +</tool>
--- a/mothur/tools/mothur/unifrac.weighted.xml Wed Oct 05 10:37:11 2011 -0500 +++ b/mothur/tools/mothur/unifrac.weighted.xml Tue Nov 08 11:45:32 2011 -0600 @@ -23,7 +23,7 @@ --distance=$distance #end if $root - --processors=2 + --processors=8 </command> <inputs> <param name="tree" type="data" format="tre" label="tree - Tree"/>