view mothur/tools/mothur/chimera.slayer.xml @ 2:e990ac8a0f58

Migrated tool version 1.19.0 from old tool shed archive to new tool shed repository
author jjohnson
date Tue, 07 Jun 2011 17:39:06 -0400
parents fcc0778f6987
children ce6e81622c6a
line wrap: on
line source

<tool id="mothur_chimera_slayer" name="Chimera.slayer" version="1.19.0">
 <description>Find putative chimeras using slayer</description>
 <command interpreter="python">
  mothur_wrapper.py 
  --cmd='chimera.slayer'
  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.slayer\.chimeras$:'$out_file,'^\S+\.slayer\.accnos$:'$out_accnos
  --outputdir='$logfile.extra_files_path'
  --tmpdir='${logfile.extra_files_path}/input'
  --fasta=$fasta
  #if $alignment.source == 'self':
   --reference='self'
   #if $alignment.name.__str__ != "None" and len($alignment.name.__str__) > 0:
    --name=$alignment.name
   #end if
  #else:
   --reference=$alignment.template
  #end if
  #if $options.setby == 'user':
   --search=$options.search
   --window=$options.window
   --increment=$options.increment
   --match=$options.match
   --mismatch=$options.mismatch
   --numwanted=$options.numwanted
   --parents=$options.parents
   --minsim=$options.minsim
   --mincov=$options.mincov
   --iters=$options.iters
   --minbs=$options.minbs
   --minsnp=$options.minsnp
   --divergence=$options.divergence
   $options.trim
   $options.split
  #end if
  --processors=2
 </command>
 <inputs>
  <param name="fasta" type="data" format="fasta" label="fasta - Candiate Sequences"/>
  <conditional name="alignment">
   <param name="source" type="select" label="Select Reference Template from" help="">
    <option value="hist">History</option>
    <option value="ref">Cached Reference</option>
    <option value="self">Self - Use abundant sequences from the input Candiate Sequences fasta </option>
   </param>
   <when value="ref">
    <param name="template" type="select" label="reference - Select an alignment database " help="">
     <options from_file="mothur_aligndb.loc">
      <column name="name" index="0" />
      <column name="value" index="1" />
     </options>
    </param>
   </when>
   <when value="hist">
    <param name="template" type="data" format="fasta" label="reference - Reference to align with" help=""/>
   </when>
   <when value="self">
    <param name="name" type="data" format="names" optional="true" label="names - Sequences Names"/>
   </when>
  </conditional>
  <conditional name="options">
   <param name="setby" type="select" label="Slayer Options" help="">
    <option value="default">Use default settings</option>
    <option value="user">Manually set options</option>
   </param>
   <when value="default"/>
   <when value="user">
    <param name="search" type="select" label="search - Search method for finding the closest parent" help="">
     <option value="distance" selected="true">distance</option>
     <option value="kmer">kmer</option>
     <option value="blast">blast</option>
    </param>
    <param name="window" type="integer" value="50" label="window - Window size for searching for chimeras (default 50)" />
    <param name="increment" type="integer" value="5" label="increment - Increment for window slide on each iteration (default 5)" />
    <param name="match" type="integer" value="5" label="match - Reward matched bases (default 5)"/>
    <param name="mismatch" type="integer" value="-4" label="mismatch - Penalty for mismatched bases (default -4)"/>
    <param name="numwanted" type="integer" value="15" label="numwanted - Number of potential parents to to compare with query sequence (default 15)"/>
    <param name="parents" type="integer" value="3" label="parents - Number of potential parents to investigate from the numwanted best matches"/>
    <param name="minsim" type="integer" value="90" label="minsim - Minimum similarity % between the query and parent (default 90)"/>
    <param name="mincov" type="integer" value="70" label="mincov - Minimum coverage % of closest matches in reference and the query (default 70)"/>
    <param name="iters" type="integer" value="100" label="iters - Number of bootstrap iterations to try (default 100)"/>
    <param name="minbs" type="integer" value="90" label="minbs - Minimum bootstrap support % for calling a sequence chimeric (default 90)"/>
    <param name="minsnp" type="integer" value="100" label="minsnp - Percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default 100)"/>
    <param name="divergence" type="float" value="1.007" label="divergence - Divergence cutoff for chimera determination (default 1.007)"/>
    <param name="trim" type="boolean" truevalue="--trim=True" falsevalue="" checked="false" label="trim - include chimeric sequences trimmed to their longest peice" />
    <param name="split" type="boolean" truevalue="--split=True" falsevalue="" checked="false" label="split - detect tri- and quadmeras" help="if a sequence comes back as non-chimeric, mothur will test the two sides to see if they are chimeric."/>
   </when>
  </conditional>
 </inputs>
 <outputs>
  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
  <data format="txt" name="out_file" label="${tool.name} on ${on_string}: slayer.chimeras" />
  <data format="accnos" name="out_accnos" label="${tool.name} on ${on_string}: slayer.accnos" />
 </outputs>
 <requirements>
  <requirement type="binary">mothur</requirement>
 </requirements>
 <tests>
 </tests>
 <help>
**Mothur Overview**

Mothur_, initiated by Dr. Patrick Schloss and his software development team
in the Department of Microbiology and Immunology at The University of Michigan,
provides bioinformatics for the microbial ecology community.

.. _Mothur: http://www.mothur.org/wiki/Main_Page

**Command Documenation**

The chimera.slayer_ command identifies putative chimeras using the slayer approach.

ChimeraSlayer_ is a chimeric sequence detection utility, compatible with near-full length Sanger sequences and shorter 454-FLX sequences (~500 bp).

Chimera Slayer involves the following series of steps that operate to flag chimeric 16S rRNA sequences: 

 (A) the ends of a query sequence are searched against an included database of reference chimera-free 16S sequences to identify potential parents of a chimera; 
 (B) candidate parents of a chimera are selected as those that form a branched best scoring alignment to the NAST-formatted query sequence; 
 (C) the NAST alignment of the query sequence is improved in a `chimera-aware' profile-based NAST realignment to the selected reference parent sequences; and 
 (D) an evolutionary framework is used to flag query sequences found to exhibit greater sequence homology to an in silico chimera formed between any two of the selected reference parent sequences.

Note: 
It is not recommended to blindly discard all sequences flagged as chimeras. Some may represent naturally formed chimeras that do not represent PCR artifacts. Sequences flagged may warrant further investigation.


.. _ChimeraSlayer: http://microbiomeutil.sourceforge.net/
.. _chimera.slayer: http://www.mothur.org/wiki/Chimera.slayer


 </help>
</tool>