view mothur/tools/mothur/filter.seqs.xml @ 2:e990ac8a0f58

Migrated tool version 1.19.0 from old tool shed archive to new tool shed repository
author jjohnson
date Tue, 07 Jun 2011 17:39:06 -0400
parents fcc0778f6987
children 4f797d3eee3a
line wrap: on
line source

<tool id="mothur_filter_seqs" name="Filter.seqs" version="1.19.0" force_history_refresh="True">
 <description>removes columns from alignments</description>
 <command interpreter="python">
  mothur_wrapper.py 
  #import re, os.path
  --cmd='filter.seqs'
  ## #set fasta_result = "'" + "^\S+\.filter.fasta$:'" + $out_fasta.__str__
  #set fasta_result = "'" + $re.sub(r'(^.*)\.(.*?)$',r'\1.filter.fasta$',$os.path.basename($fasta.__str__)) + ":'" + $out_fasta.__str__
  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.filter$:'$out_filter,$fasta_result
  --outputdir='$logfile.extra_files_path'
  ## filter.seqs creates a temp file, this can fail if the path contains dashes (common on MacOSX temp files)
  ## so specify an tmpdir for the working dir, and the wrapper will symlink inputs into that dir
  --tmpdir='${logfile.extra_files_path}/input'
  --fasta=$fasta#for i in $inputs#,${i.fasta}#end for#
  #if $inputs != None and len($inputs) > 0:
   --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
   #set datasets = []
   #for i in $inputs:
    #set datasets = $datasets + ["'" + $re.sub(r'(^.*)\.(.*?)$',r'^\1.filter.fasta$',$os.path.basename($i.fasta.__str__)) + ":align'"]
   #end for
   --new_datasets=#echo ','.join($datasets)
  #end if
  $vertical
  #if $trump.__str__ != None and len($trump.__str__) > 0:
   --trump=$trump
  #end if
  #if 100 >= $soft >= 0:
   --soft='$soft'
  #end if
  #if $hard.__str__ != "None" and len($hard.__str__) > 0:
   --hard=$hard
  #end if
  --processors=2
 </command>
 <inputs>
  <param name="fasta" type="data" format="align" label="fasta - Alignment Fasta"/>
  <repeat name="inputs" title="Additional Alignment File">
   <param name="fasta" type="data" format="align" label="fasta - Alignment Fasta"/>
  </repeat>
  <param name="vertical" type="boolean" checked="true" truevalue="" falsevalue="--vertical=false" label="vertical - Vertical column" 
         help="Ignore any column that only contains gap characters (i.e. '-' or '.')"/>
  <param name="trump" type="select" label="trump - Trump character" 
         help="Remove a column if the trump character is found at that position in any sequence of the alignment.">
   <option value="">Off</option>
   <option value=".">.</option>
   <option value="-">-</option>
   <option value="N">N</option>
  </param>
  <param name="soft" type="integer" value="-1" label="soft - percentage required to retain column. (0-100)" 
         help="Removes any column where the dominant base (i.e. A, T, G, C, or U) does not occur in at least a designated percentage of sequences."/>
  <param name="hard" type="data" format="filter" optional="True" label="hard - Hard Column Filter"
         help="A file should only contain one line consisting of 0's and 1's. "/>
 </inputs>
 <outputs>
  <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
  <data format="filter" name="out_filter" label="${tool.name} on ${on_string}: filter" />
  <data format="align" name="out_fasta" label="${tool.name} on ${on_string}: filtered fasta" />
 </outputs>
 <requirements>
  <requirement type="binary">mothur</requirement>
 </requirements>
 <tests>
 </tests>
 <help>
**Mothur Overview**

Mothur_, initiated by Dr. Patrick Schloss and his software development team
in the Department of Microbiology and Immunology at The University of Michigan,
provides bioinformatics for the microbial ecology community.

.. _Mothur: http://www.mothur.org/wiki/Main_Page

**Command Documenation**

The filter.seqs_ command removes columns from alignments based on a criteria defined by the user. For example, alignments generated against reference alignments (e.g. from RDP, SILVA, or greengenes) often have columns where every character is either a '.' or a '-'. These columns are not included in calculating distances because they have no information in them. By removing these columns, the calculation of a large number of distances is accelerated. Also, people also like to mask their sequences to remove variable regions using a soft or hard mask (e.g. Lane's mask). This type of masking is only encouraged for deep-level phylogenetic analysis, not fine level analysis such as that needed with calculating OTUs. 

.. _filter.seqs: http://www.mothur.org/wiki/Filter.seqs


 </help>
</tool>