view mothur/tools/mothur/trim.flows.xml @ 31:a3eed59297ea

Patches courtesy of Peter Briggs, Bioinformatics Core Facility University of Manchester make.contigs.xml.patch:# make.contigs.xml.patch make.contigs.xml.patch:# make.contigs.xml.patch:# 1. Fix cosmetic typo in <description> (forard -> forward) make.contigs.xml.patch:# 2. Address error due to having 'mismatch' as the name for both an input and an output parameter: make.contigs.xml.patch:# rename output parameter to 'cmismatch' make.contigs.xml.patch:# 3. Remove 'threshold' parameter: make.contigs in mothur doesn't support a 'threshold' parameter metagenomics.py.patch:# metagenomics.py.patch metagenomics.py.patch:# metagenomics.py.patch:# 1. Groups class: names were being taken from the wrong field (affected shhh.flows tool) metagenomics.py.patch:# 2. Axes class: make 'sniff' method more sensitive to try and restrict arbitrary tabular metagenomics.py.patch:# data uploads being sniffed as this type mothur_wrapper.py.patch:# mothur_wrapper.py.patch mothur_wrapper.py.patch:# mothur_wrapper.py.patch:# 1. Update 'cmd_dict' settings for shhh.flows and shhh.seqs (otherwise these functions will mothur_wrapper.py.patch:# fail on execution) mothur_wrapper.py.patch:# 2. Fix add_option calls defining '--match' and '--mismatch' command line options (otherwise mothur_wrapper.py.patch:# syntax error causes immediate failure) screen.seqs.xml.patch:# screen.seqs.xml.patch screen.seqs.xml.patch:# screen.seqs.xml.patch:# Replace pattern for align.report output file in definiting of 'results' parameter in screen.seqs.xml.patch:# <command> section (otherwise output_alignreport data item is empty). shhh.flows.xml.patch:# shhh.flows.xml.patch shhh.flows.xml.patch:# shhh.flows.xml.patch:# Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.flows.xml.patch:# correctly assigned to output datasets) shhh.seqs.xml.patch:# shhh.seqs.xml.patch shhh.seqs.xml.patch:# shhh.seqs.xml.patch:# 1. Fix patterns in --result (in <command> section) for shhh_seqs.fasta and shhh_seqs.names shhh.seqs.xml.patch:# output files (otherwise files are not collected and associated data items are empty) shhh.seqs.xml.patch:# 2. Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.seqs.xml.patch:# correctly assigned to output datasets) trim.flows.xml.patch:# trim.flows.xml.patch trim.flows.xml.patch:# trim.flows.xml.patch:# Remove erroneous space from --result definition in <command> section (otherwise causes tool trim.flows.xml.patch:# failure) trim.seqs.xml.patch:# trim.seqs.xml.patch trim.seqs.xml.patch:# trim.seqs.xml.patch:# 1. Remove reference to undefined 'oligo.allvalues' varible in <command> section (otherwise trim.seqs.xml.patch:# causes failure on execution) trim.seqs.xml.patch:# 2. Fix format for input parameter 'names' (format should be 'names' not 'name') trim.seqs.xml.patch:# 3. Add output parameter 'scrap_names' (to ensure consistent collection of all outputs) trim.seqs.xml.patch:# 4. Update --result definition in <command> section to collect both trim.names and scrap.names
author Jim Johnson <jj@umn.edu>
date Tue, 30 Jul 2013 09:26:31 -0500
parents 49058b1f8d3f
children ec8df51e841a
line wrap: on
line source

<tool id="mothur_trim_flows" name="Trim.flows" version="1.22.0" force_history_refresh="True">
 <description>partition by barcode, trim to length, cull by lenght and mismatches</description>
 <command interpreter="python">
  mothur_wrapper.py 
  #import re, os.path
  --cmd='trim.flows'
  ## #set results = ["'^mothur.\S+\.logfile$:'" + $logfile.__str__]
  ## #set results = $results + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.good.\2',$os.path.basename($flow.__str__)) + ":'" + $trim_flow.__str__]
  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.trim\.flow$:'$trim_flow,'^\S+\.scrap\.flow$:'$scrap_flow,'^\S+\.flow\.files$:'$flow_files,'^\S+\.flow\.fasta$:'$flow_fasta
  --outputdir='$logfile.extra_files_path'
  --flow=$flow
  #if $minflows.__str__ != '':
   --minflows=$minflows 
  #end if
  #if $maxflows.__str__ != '':
   --maxflows=$maxflows 
  #end if
  #if $maxhomop.__str__ != '':
   --maxhomop=$maxhomop 
  #end if
  #if $order.__str__.strip() != '':
   --order=$order 
  #end if
  #if $signal.__str__ != ''
   --signal=$signal 
  #end if
  #if $noise.__str__ != ''
   --noise=$noise 
  #end if
  #if $oligo.add == "yes":
   --oligos=$oligo.oligos
   #if $oligo.bdiffs.__str__ != '' and int($oligo.bdiffs.__str__) > 0:
    --bdiffs=$oligo.bdiffs
   #end if
   #if $oligo.pdiffs.__str__ != '' and int($oligo.pdiffs.__str__) > 0:
    --pdiffs=$oligo.pdiffs
   #end if
   #if $oligo.tdiffs.__str__ != '' and int($oligo.tdiffs.__str__) > 0:
    --tdiffs=$oligo.tdiffs
   #end if
   #if $oligo.ldiffs.__str__ != '' and int($oligo.ldiffs.__str__) > 0:
    --ldiffs=$oligo.ldiffs
   #end if
   #if $oligo.sdiffs.__str__ != '' and int($oligo.sdiffs.__str__) > 0:
    --sdiffs=$oligo.sdiffs
   #end if
   --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
   --new_datasets='^\S+?\.(\S+\.flow)$:sff.flow'
  #end if
  $fasta
  --processors=8
 </command>
 <inputs>
  <param name="flow" type="data" format="sff.flow" label="flow - flowgram data" 
         help="Use sffinfo to generate flow data from an sff file"/>

  <conditional name="oligo">
   <param name="add" type="select" label="Trim with an oligos file?" 
    help="a file that can contain the sequences of the forward and reverse primers and barcodes and their sample identifier. 
         Each line of the oligos file can start with the key words &quot;forward&quot;, &quot;reverse&quot;, 
         and &quot;barcode&quot; or it can start with a &quot;#&quot; to tell mothur to ignore that line of the oligos file.  ">
    <option value="no">no</option>
    <option value="yes">yes</option>
   </param>
   <when value="no"/>
   <when value="yes">
    <param name="oligos" type="data" format="oligos" label="oligos - barcodes and primers"/>
    <param name="bdiffs" type="integer" value="0" label="bdiffs - number of differences to allow in the barcode (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="pdiffs" type="integer" value="0" label="pdiffs - number of differences to allow in the primer (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="tdiffs" type="integer" value="0" label="tdiffs - total number of differences to allow in primer and barcode (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="ldiffs" type="integer" value="0" optional="true" label="ldiffs - total number of differences to allow in linker sequence (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="sdiffs" type="integer" value="0" optional="true" label="sdiffs - total number of differences to allow in spacer sequence (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
   </when>
  </conditional>

  <param name="minflows" type="integer" value="" optional="true" label="minflows - Minimum number of flows that each sequence must contain to make it in to a &quot;trim&quot; file. (default 450)" help="(Quince uses 360)"/>
  <param name="maxflows" type="integer" value="" optional="true" label="maxflows - Maximum number of flows after which all other flows should be ignored (default 450)" help="(Quince uses 360 for GSFLX and 720 for Titanium)"/>

  <param name="maxhomop" type="integer" value="" optional="true" label="maxhomop - Maximum homopolymers" 
         help=""/>

  <param name="signal" type="float" value="" optional="true" label="signal - treat any intensity signal greater than this threshold as a real signal" 
         help="default .5">
   <validator type="in_range" message="signal between 0. and 1." min="0.0" max="1.0"/>
  </param>
  <param name="noise" type="float" value="" optional="true" label="noise - treat any intensity signal less than this threshold as noise" 
         help="default .7">
   <validator type="in_range" message="signal between 0. and 1." min="0.0" max="1.0"/>
  </param>
  <param name="order" type="text" value="" label="order - flow order for nucleotides in the sequencer" 
         help="default is TACG"/>

  <param name="fasta" type="boolean" truevalue="--fasta=true" falsevalue="" checked="false" label="fasta - translate the flowgram data to fasta sequence format"/>

 </inputs>
 <outputs>
  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
  <data format="sff.flow" name="trim_flow" label="${tool.name} on ${on_string}: trim.flow"/>
  <data format="sff.flow" name="scrap_flow" label="${tool.name} on ${on_string}: scrap.flow"/>
  <data format="tabular" name="flow_files" label="${tool.name} on ${on_string}: flow.files">
   <filter>oligos != None</filter>
  </data>
  <data format_source="fasta" name="flow_fasta" label="${tool.name} on ${on_string}: flow.fasta">
   <filter>fasta == True</filter>
  </data>
 </outputs>
 <requirements>
  <requirement type="package" version="1.27">mothur</requirement>
 </requirements>
 <tests>
 </tests>
 <help>
**mothur overview**

Mothur_, initiated by Dr. Patrick Schloss and his software development team
in the Department of Microbiology and Immunology at The University of Michigan,
provides bioinformatics for the microbial ecology community.

.. _Mothur: http://www.mothur.org/wiki/Main_Page

**Command Documenation**

The trim.flows_ command is analogous to the trim.seqs command, except that it uses the flowgram data that comes bundled in the sff file that is generated by 454 sequencing. It's primary usage is as a preliminary step to running shhh.seqs. Chris Quince has a series of perl scripts that fulfill a similar role [1]. This command will allow you to partition your flowgram data by sample based on the barcode, trim the flows to a specified length range, and cull sequences that are too short or have too many mismatches to barcodes and primers.

.. _trim.flows: http://www.mothur.org/wiki/Trim.flows


 </help>
</tool>