view mothur/tools/mothur/shhh.flows.xml @ 31:a3eed59297ea

Patches courtesy of Peter Briggs, Bioinformatics Core Facility University of Manchester make.contigs.xml.patch:# make.contigs.xml.patch make.contigs.xml.patch:# make.contigs.xml.patch:# 1. Fix cosmetic typo in <description> (forard -> forward) make.contigs.xml.patch:# 2. Address error due to having 'mismatch' as the name for both an input and an output parameter: make.contigs.xml.patch:# rename output parameter to 'cmismatch' make.contigs.xml.patch:# 3. Remove 'threshold' parameter: make.contigs in mothur doesn't support a 'threshold' parameter metagenomics.py.patch:# metagenomics.py.patch metagenomics.py.patch:# metagenomics.py.patch:# 1. Groups class: names were being taken from the wrong field (affected shhh.flows tool) metagenomics.py.patch:# 2. Axes class: make 'sniff' method more sensitive to try and restrict arbitrary tabular metagenomics.py.patch:# data uploads being sniffed as this type mothur_wrapper.py.patch:# mothur_wrapper.py.patch mothur_wrapper.py.patch:# mothur_wrapper.py.patch:# 1. Update 'cmd_dict' settings for shhh.flows and shhh.seqs (otherwise these functions will mothur_wrapper.py.patch:# fail on execution) mothur_wrapper.py.patch:# 2. Fix add_option calls defining '--match' and '--mismatch' command line options (otherwise mothur_wrapper.py.patch:# syntax error causes immediate failure) screen.seqs.xml.patch:# screen.seqs.xml.patch screen.seqs.xml.patch:# screen.seqs.xml.patch:# Replace pattern for align.report output file in definiting of 'results' parameter in screen.seqs.xml.patch:# <command> section (otherwise output_alignreport data item is empty). shhh.flows.xml.patch:# shhh.flows.xml.patch shhh.flows.xml.patch:# shhh.flows.xml.patch:# Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.flows.xml.patch:# correctly assigned to output datasets) shhh.seqs.xml.patch:# shhh.seqs.xml.patch shhh.seqs.xml.patch:# shhh.seqs.xml.patch:# 1. Fix patterns in --result (in <command> section) for shhh_seqs.fasta and shhh_seqs.names shhh.seqs.xml.patch:# output files (otherwise files are not collected and associated data items are empty) shhh.seqs.xml.patch:# 2. Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.seqs.xml.patch:# correctly assigned to output datasets) trim.flows.xml.patch:# trim.flows.xml.patch trim.flows.xml.patch:# trim.flows.xml.patch:# Remove erroneous space from --result definition in <command> section (otherwise causes tool trim.flows.xml.patch:# failure) trim.seqs.xml.patch:# trim.seqs.xml.patch trim.seqs.xml.patch:# trim.seqs.xml.patch:# 1. Remove reference to undefined 'oligo.allvalues' varible in <command> section (otherwise trim.seqs.xml.patch:# causes failure on execution) trim.seqs.xml.patch:# 2. Fix format for input parameter 'names' (format should be 'names' not 'name') trim.seqs.xml.patch:# 3. Add output parameter 'scrap_names' (to ensure consistent collection of all outputs) trim.seqs.xml.patch:# 4. Update --result definition in <command> section to collect both trim.names and scrap.names
author Jim Johnson <jj@umn.edu>
date Tue, 30 Jul 2013 09:26:31 -0500
parents 49058b1f8d3f
children ec8df51e841a
line wrap: on
line source

<tool id="mothur_shhh_flows" name="Shhh.flows" version="1.26.0" force_history_refresh="True">
 <description>Denoise flowgrams (PyroNoise algorithm)</description>
 <command interpreter="python">
  mothur_wrapper.py 
  #import re, os.path
  --cmd='shhh.flows'
  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.shhh\.fasta$:'$shhh_fasta,'^\S+\.shhh\.qual$:'$shhh_qual,'^\S+\.shhh\.names$:'$shhh_names,'^\S+\.shhh\.groups$:'$shhh_groups,'^\S+\.shhh\.counts$:'$shhh_counts
  --outputdir='$logfile.extra_files_path'
  --flow=$flow
  --lookup=$prob.lookup
  #if $maxiter.__str__ != '':
   --maxiter=$maxiter 
  #end if
  #if $mindelta.__str__ != '':
   --mindelta=$mindelta 
  #end if
  #if $cutoff.__str__ != '':
   --cutoff=$cutoff 
  #end if
  #if $sigma.__str__ != '':
   --sigma=$sigma 
  #end if
  #if $order.__str__.strip() != '':
   --order=$order 
  #end if
  #if $large.__str__ != '':
   --large=$large 
  #end if
  --processors=8
 </command>
 <inputs>
  <param name="flow" type="data" format="sff.flow" label="flow - flowgram data" 
         help="Use sffinfo to generate flow data from an sff file and usually trimmed by trim.flows"/>
  <conditional name="prob">
   <param name="source" type="select" label="Select Taxonomy from" help="">
    <option value="ref">Cached Reference</option>
    <option value="hist">History</option>
   </param>
   <when value="ref">
    <param name="lookup" type="select" format="tabular" label="lookup - intensity value per homopolymer length"
     help="table of the probability of observing an intensity value for a given homopolymer length">
     <options from_file="mothur_lookup.loc">
      <column name="name" index="0" />
      <column name="value" index="1" />
     </options>
    </param>
   </when>
   <when value="hist">
    <param name="lookup" type="data" format="tabular" label="lookup - intensity value per homopolymer length"
           help="from http://www.mothur.org/wiki/Lookup_files"/>
   </when>
  </conditional>

  <param name="maxiter" type="integer" value="1000" optional="true" label="maxiter - maximum iterations to run (default 1000)" help="if the delta value does not first drop below the mindelta value. ">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
  </param>

  <param name="mindelta" type="float" value="" optional="true" label="mindelta - threshold for determining how much change in the flowgram correction is allowed" 
         help="default .0000001 (10^-6)">
   <validator type="in_range" message="mindelta between 0. and .1" min="0.0" max="0.1"/>
  </param>

  <param name="cutoff" type="float" value="" optional="true" label="cutoff - seed the expectation-maximizaton step" 
         help="default .01 (usually doesn't need to be changed)">
   <validator type="in_range" message="cutoff between 0. and 1." min="0.0" max="1.0"/>
  </param>

  <param name="sigma" type="float" value="" optional="true" label="sigma - the dispersion of the data in the expectation-maximization step of the algorithm" 
         help="default .06 (usually doesn't need to be changed)">
   <validator type="in_range" message="sigma between 0. and 1." min="0.0" max="1.0"/>
  </param>

  <param name="order" type="text" value="" label="order - flow order for nucleotides in the sequencer" 
         help="default is TACG"/>

  <param name="large" type="integer" value="10000" optional="true" label="large - split your flow file and process the pieces separately (default 10000)" help="">
      <validator type="in_range" message="large value must be positive" min="1"/>
  </param>
 </inputs>
 <outputs>
  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
  <data format="fasta" name="shhh_fasta" label="${tool.name} on ${on_string}: shhh.fasta"/>
  <data format="qual454" name="shhh_qual" label="${tool.name} on ${on_string}: shhh.qual"/>
  <data format="names" name="shhh_names" label="${tool.name} on ${on_string}: shhh.names"/>
  <data format="groups" name="shhh_groups" label="${tool.name} on ${on_string}: shhh.groups"/>
  <data format="tabular" name="shhh_counts" label="${tool.name} on ${on_string}: shhh.counts"/>
 </outputs>
 <requirements>
  <requirement type="package" version="1.27">mothur</requirement>
 </requirements>
 <tests>
 </tests>
 <help>
**mothur overview**

Mothur_, initiated by Dr. Patrick Schloss and his software development team
in the Department of Microbiology and Immunology at The University of Michigan,
provides bioinformatics for the microbial ecology community.

.. _Mothur: http://www.mothur.org/wiki/Main_Page

**Command Documenation**

The shhh.flows_ command is Pat Schloss's translation of Chris Quince's PyroNoise algorithm [1] from C to C++ with the incorporation of mothur's bells and whistles. Based on processing of test datasets provided by Quince, shhh.flows gives the same/similar output to AmpliconNoise. shhh.flows uses a expectation-maximization algorithm to correct flowgrams to identify the idealized form of each flowgram and translate that flowgram to a DNA sequence. Our testing has shown that when Titanium data are trimmed to 450 flows using trim.flows, shhh.flows provides the highest quality data for any other method available. In contrast, when we use the min/max number of flows suggested by Quince of 360/720, the error rate is not that great. This much improved error rate does come at a computational cost. Whereas the features in trim.seqs take on the order of minutes, shhh.flows can take on the order of hours.  You will also need a lookup file that tells shhh.flows the probability of observing an intensity value for a given homopolymer length. You can get mothur-compatible files at: http://www.mothur.org/wiki/Lookup_files 

.. _shhh.flows: http://www.mothur.org/wiki/Shhh.flows


 </help>
</tool>