view mothur/tools/mothur/trim.seqs.xml @ 31:a3eed59297ea

Patches courtesy of Peter Briggs, Bioinformatics Core Facility University of Manchester make.contigs.xml.patch:# make.contigs.xml.patch make.contigs.xml.patch:# make.contigs.xml.patch:# 1. Fix cosmetic typo in <description> (forard -> forward) make.contigs.xml.patch:# 2. Address error due to having 'mismatch' as the name for both an input and an output parameter: make.contigs.xml.patch:# rename output parameter to 'cmismatch' make.contigs.xml.patch:# 3. Remove 'threshold' parameter: make.contigs in mothur doesn't support a 'threshold' parameter metagenomics.py.patch:# metagenomics.py.patch metagenomics.py.patch:# metagenomics.py.patch:# 1. Groups class: names were being taken from the wrong field (affected shhh.flows tool) metagenomics.py.patch:# 2. Axes class: make 'sniff' method more sensitive to try and restrict arbitrary tabular metagenomics.py.patch:# data uploads being sniffed as this type mothur_wrapper.py.patch:# mothur_wrapper.py.patch mothur_wrapper.py.patch:# mothur_wrapper.py.patch:# 1. Update 'cmd_dict' settings for shhh.flows and shhh.seqs (otherwise these functions will mothur_wrapper.py.patch:# fail on execution) mothur_wrapper.py.patch:# 2. Fix add_option calls defining '--match' and '--mismatch' command line options (otherwise mothur_wrapper.py.patch:# syntax error causes immediate failure) screen.seqs.xml.patch:# screen.seqs.xml.patch screen.seqs.xml.patch:# screen.seqs.xml.patch:# Replace pattern for align.report output file in definiting of 'results' parameter in screen.seqs.xml.patch:# <command> section (otherwise output_alignreport data item is empty). shhh.flows.xml.patch:# shhh.flows.xml.patch shhh.flows.xml.patch:# shhh.flows.xml.patch:# Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.flows.xml.patch:# correctly assigned to output datasets) shhh.seqs.xml.patch:# shhh.seqs.xml.patch shhh.seqs.xml.patch:# shhh.seqs.xml.patch:# 1. Fix patterns in --result (in <command> section) for shhh_seqs.fasta and shhh_seqs.names shhh.seqs.xml.patch:# output files (otherwise files are not collected and associated data items are empty) shhh.seqs.xml.patch:# 2. Replace 'format_source' with 'format' for output parameters (otherwise formats are not shhh.seqs.xml.patch:# correctly assigned to output datasets) trim.flows.xml.patch:# trim.flows.xml.patch trim.flows.xml.patch:# trim.flows.xml.patch:# Remove erroneous space from --result definition in <command> section (otherwise causes tool trim.flows.xml.patch:# failure) trim.seqs.xml.patch:# trim.seqs.xml.patch trim.seqs.xml.patch:# trim.seqs.xml.patch:# 1. Remove reference to undefined 'oligo.allvalues' varible in <command> section (otherwise trim.seqs.xml.patch:# causes failure on execution) trim.seqs.xml.patch:# 2. Fix format for input parameter 'names' (format should be 'names' not 'name') trim.seqs.xml.patch:# 3. Add output parameter 'scrap_names' (to ensure consistent collection of all outputs) trim.seqs.xml.patch:# 4. Update --result definition in <command> section to collect both trim.names and scrap.names
author Jim Johnson <jj@umn.edu>
date Tue, 30 Jul 2013 09:26:31 -0500
parents 49058b1f8d3f
children 95d75b35e4d2
line wrap: on
line source

<tool id="mothur_trim_seqs" name="Trim.seqs" version="1.24.0" force_history_refresh="True">
 <description>Trim sequences - primers, barcodes, quality</description>
 <command interpreter="python">
  mothur_wrapper.py 
  --cmd='trim.seqs'
  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.trim\.fasta$:'$trim_fasta,'^\S+\.trim\.names$:'$trim_names,'^\S+\.trim\.qual$:'$trim_qual,'^\S+\.scrap\.fasta$:'$scrap_fasta,'^\S+\.scrap\.names$:'$scrap_names,'^\S+\.scrap\.qual$:'$scrap_qual,'^\S+\.groups$:'$groups_file
  --outputdir='$logfile.extra_files_path'
  #if int($minlength.__str__) > 0:
   --minlength=$minlength 
  #end if
  #if int($maxlength.__str__) > 0:
   --maxlength=$maxlength 
  #end if
  #if int($maxambig.__str__) >= 0:
   --maxambig=$maxambig 
  #end if
  #if int($maxhomop.__str__) > 0:
   --maxhomop=$maxhomop 
  #end if
  #if int($keepfirst.__str__) > 0:
   --keepfirst=$keepfirst 
  #end if
  #if int($removelast.__str__) > 0:
   --removelast=$removelast 
  #end if
  #if $oligo.add == "yes":
   --oligos=$oligo.oligos
   #if $oligo.bdiffs.__str__ != '' and int($oligo.bdiffs.__str__) > 0:
    --bdiffs=$oligo.bdiffs
   #end if
   #if $oligo.pdiffs.__str__ != '' and int($oligo.pdiffs.__str__) > 0:
    --pdiffs=$oligo.pdiffs
   #end if
   #if $oligo.tdiffs.__str__ != '' and int($oligo.tdiffs.__str__) > 0:
    --tdiffs=$oligo.tdiffs
   #end if
   #if $oligo.ldiffs.__str__ != '' and int($oligo.ldiffs.__str__) > 0:
    --ldiffs=$oligo.ldiffs
   #end if
   #if $oligo.sdiffs.__str__ != '' and int($oligo.sdiffs.__str__) > 0:
    --sdiffs=$oligo.sdiffs
   #end if
   $oligo.keepforward
   ##$oligo.allvalues
   #if $oligo.allfiles == True:
    --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
    --new_datasets='^\S+?\.(\S+\.fasta)$:${fasta.ext}','^\S+?\.(\S+\.groups)$:groups'
   #end if
  #end if
  #if $qual.add == "yes":
   --qfile=$qual.qfile
   #if int($qual.qaverage.__str__) > 0:
    --qaverage=$qual.qaverage
   #end if
   #if int($qual.qthreshold.__str__) > 0:
    --qthreshold=$qual.qthreshold
   #end if
   #if int($qual.qwindowaverage.__str__) > 0:
    --qwindowaverage=$qual.qwindowaverage
   #end if
   #if int($qual.qwindowsize.__str__) > 0:
    --qwindowsize=$qual.qwindowsize
   #end if
   #if int($qual.rollaverage.__str__) > 0:
    --rollaverage=$qual.rollaverage
   #end if
   #if int($qual.qstepsize.__str__) > 0:
    --qstepsize=$qual.qstepsize
   #end if
   $qual.qtrim
  #end if
  $flip 
  --fasta=$fasta
  #if $names.__str__ != "None" and len($names.__str__) > 0:
   --name=$names
  #end if
  --processors=8
 </command>
 <inputs>
  <param name="fasta" type="data" format="fasta" label="fasta - Sequences"/>
  <param name="names" type="data" format="names" optional="true" label="name - Sequence representative name list"/>
  <param name="minlength" type="integer" value="0" label="minlength - Minimum Sequence Length (default 0, ignored if &#060; 1 )"/>
  <param name="maxlength" type="integer" value="0" label="maxlength - Maximum Sequence Length (default 0, ignored if &#060; 1)"/>
  <param name="maxambig" type="integer" value="-1" label="maxambig - Maximum ambiguous bases (default -1, ignored if &#060; 0)"/>
  <param name="maxhomop" type="integer" value="0" label="maxhomop - Maximum homopolymers (default 0, ignored if &#060; 1)"/>
  <param name="keepfirst" type="integer" value="0" label="keepfirst - ignored if &#060; 0)" 
         help="trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements"/>
  <param name="removelast" type="integer" value="0" label="removelast - ignored if &#060; 0)" 
         help="removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements."/>
  <conditional name="oligo">
   <param name="add" type="select" label="Trim with an oligos file?" help="">
    <option value="no">no</option>
    <option value="yes">yes</option>
   </param>
   <when value="no"/>
   <when value="yes">
    <param name="oligos" type="data" format="oligos" label="oligos - barcodes and primers"/>
    <param name="bdiffs" type="integer" value="0" label="bdiffs - number of differences to allow in the barcode (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="pdiffs" type="integer" value="0" label="pdiffs - number of differences to allow in the primer (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="tdiffs" type="integer" value="0" label="tdiffs - total number of differences to allow in primer and barcode (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="ldiffs" type="integer" value="0" optional="true" label="ldiffs - total number of differences to allow in linker sequence (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="sdiffs" type="integer" value="0" optional="true" label="sdiffs - total number of differences to allow in spacer sequence (default 0)">
      <validator type="in_range" message="Number of differences can't be negative" min="0"/>
    </param>
    <param name="keepforward" type="boolean" truevalue="--keepforward=true" falsevalue="" checked="false" label="keepforward - keep the primer"/>
    <param name="allfiles" type="boolean" truevalue="--allfiles=true" falsevalue="" checked="false" label="allfiles - separate into file per barcode"/>
   </when>
  </conditional>
  <conditional name="qual">
   <param name="add" type="select" label="Trim based on a quality file?" help="">
    <option value="no">no</option>
    <option value="yes">yes</option>
   </param>
   <when value="no"/>
   <when value="yes">
    <param name="qfile" type="data" format="qual454" label="qfile - 454 quality file"/>
    <param name="qaverage" type="integer" value="0" label="qaverage - remove sequences that have an average base quality below this value (ignored if &#060; 1)"/>
    <param name="qthreshold" type="integer" value="0" label="qthreshold - remove sequences that have any base quality below this value (ignored if &#060; 1)"/>
    <param name="qwindowaverage" type="integer" value="0" label="qwindowaverage - remove sequences that have an average base quality below this value over a window (ignored if &#060; 1)"/>
    <param name="qwindowsize" type="integer" value="50" label="qwindowsize - number of bases in a window. Default=50."/>
    <param name="rollaverage" type="integer" value="0" label="rollaverage - remove sequences that have an average base quality below this value in a rolling window (ignored if &#060; 1)"/>
    <param name="qstepsize" type="integer" value="1" label="qstepsize - number of bases to move the window over. Default=1."/>
    <param name="qtrim" type="boolean" truevalue="--qtrim=true" falsevalue="" checked="false" label="qtrim - trim sequences below qthreshold and put in trim output, else put in scrap "/>
   </when>
  </conditional>
  <param name="flip" type="boolean" truevalue="--flip=true" falsevalue="" checked="false" label="flip - reverse complement the trimmed sequences"/>
 </inputs>
 <outputs>
  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
  <data format_source="fasta" name="trim_fasta" label="${tool.name} on ${on_string}: trim.fasta"/>
  <data format_source="names" name="trim_names" label="${tool.name} on ${on_string}: trim.names">
   <filter>names != None</filter>
  </data>
  <data format_source="qfile" name="trim_qual" label="${tool.name} on ${on_string}: trim.qual">
   <filter>(qual['add'] == 'yes'  and len(qual['qfile'].__str__) > 0)</filter>
  </data>
  <data format_source="fasta" name="scrap_fasta" label="${tool.name} on ${on_string}: scrap.fasta"/>
  <data format_source="names" name="scrap_names" label="${tool.name} on ${on_string}: scrap.names">
   <filter>names != None</filter>
  </data>
  <data format_source="qfile" name="scrap_qual" label="${tool.name} on ${on_string}: scrap.qual">
   <filter>(qual['add'] == 'yes'  and len(qual['qfile'].__str__) > 0)</filter>
  </data>
  <data format="groups" name="groups_file" label="${tool.name} on ${on_string}: groups">
   <filter>(oligo['add'] == 'yes' and len(oligo['oligos']) > 0)</filter>
  </data>
 </outputs>
 <requirements>
  <requirement type="package" version="1.27">mothur</requirement>
 </requirements>
 <tests>
 </tests>
 <help>
**mothur overview**

Mothur_, initiated by Dr. Patrick Schloss and his software development team
in the Department of Microbiology and Immunology at The University of Michigan,
provides bioinformatics for the microbial ecology community.

.. _Mothur: http://www.mothur.org/wiki/Main_Page

**Command Documenation**

The trim.seqs_ command provides the preprocessing features needed to screen and sort pyrosequences.  The command will enable you to trim off primer sequences and barcodes, use the barcode information to generate a group file and split a fasta file into sub-files, screen sequences based on the qual file that comes from 454 sequencers, cull sequences based on sequence length and the presence of ambiguous bases and get the reverse complement of your sequences. While this analysis is clearly geared towards pyrosequencing collections, it can also be used with traditional Sanger sequences. 

.. _trim.seqs: http://www.mothur.org/wiki/Trim.seqs


 </help>
</tool>