changeset 0:f3ac34855f5e default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author edward-kirton
date Tue, 07 Jun 2011 17:30:11 -0400
parents
children
files blast/README blast/blastdb.py blast/blastdb_wrapper.sh blast/blastn.xml blast/blastp.xml blast/blastx.xml blast/dustmasker.xml blast/makeblastdb.xml blast/suite_config.xml blast/tblastn.xml blast/tblastx.xml
diffstat 11 files changed, 1359 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/README	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,5 @@
+the blast xml files are just modified versions of these included with the galaxy distribution; they add support for blastdb files so that users can create a database and resuse them.
+the makeblastdb tool was added for this purpose.  dustmasker was also added to allow masking of low complexity sequences.
+additional ncbi blast+ tools will be added in the near future.
+
+blastdb.py goes in lib/galaxy/datatypes and must be registered in both the datatypes_conf.xml file and registry.py files.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastdb.py	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,50 @@
+"""
+BLAST Database classes
+"""
+
+import data
+import logging
+import re
+import string
+from cgi import escape
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+import galaxy.model
+from galaxy import util
+from sniff import *
+
+log = logging.getLogger(__name__)
+
+class BlastDb( data.Data ):
+    """Class for BLAST database files"""
+
+    file_ext = 'blastdb'
+    composite_type='basic'
+
+    MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
+
+    def __init__(self,**kwd):
+        data.Data.__init__(self, **kwd)
+        self.add_composite_file('blastdb.nhr')
+        self.add_composite_file('blastdb.nin')
+        self.add_composite_file('blastdb.nsq')
+        self.add_composite_file('blastdb.nhd', optional=True)
+        self.add_composite_file('blastdb.nsi', optional=True)
+        self.add_composite_file('blastdb.nhi', optional=True)
+        self.add_composite_file('blastdb.nog', optional=True)
+        self.add_composite_file('blastdb.nsd', optional=True)
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek  = "Folder of multiple files"
+            dataset.blurb = "Folder of multiple files"
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Folder of multiple files"
+    def get_mime(self):
+        """Returns the mime type of the datatype"""
+        return 'text/plain'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastdb_wrapper.sh	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+if [ -z $1 ]
+then
+    echo "Missing arguments" 1>&2
+    exit 1
+fi
+mkdir $1
+shift
+OUT=`$* 2>&1`
+if [ $? != 0 ]
+then
+    echo $OUT 1>&2
+    exit 1
+else
+    echo $OUT
+    exit 0
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastn.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,212 @@
+<tool id="blastn" name="blastn" version="0.0.1">
+<description>Search nucleotide database with nucleotide query sequence(s)</description>
+<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastn
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+  -db ${os.path.join($db_opts.db.extra_files_path,'blastdb')}
+#else:
+  -subject "$db_opts.subject"
+#end if
+-task $blast_type
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">Precompiled BLAST Database</option>
+              <option value="user_db">BLAST Database in your History</option>
+              <option value="fasta">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Precompiled Nucleotide BLAST database">
+                    <!-- The BLAST loc file has three columns:
+                         column 0 is an identifier (not used here, see legacy megablast wrapper),
+                         column 1 is the caption (show this to the user),
+                         column 2 is the database path (given to BLAST+) -->
+                    <options from_file="blastdb.loc">
+                      <column name="name" index="1"/>
+                      <column name="value" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="user_db">
+                <param name="database" type="hidden" value="" /> 
+                <param name="db" type="data" format="blastdb" label="Blast DB" />
+            </when>
+            <when value="fasta">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="megablast">megablast</option>
+            <option value="blastn">blastn</option>
+            <option value="blastn-short">blastn-short</option>
+            <option value="dc-megablast">dc-megablast</option>
+            <!-- Using BLAST 2.2.24+ this gives an error:
+            BLAST engine error: Program type 'vecscreen' not supported
+            <option value="vecscreen">vecscreen</option>
+            -->
+        </param>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq">Tabular (extended 22 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" />
+                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
+                    <option value="-strand both">Both</option>
+                    <option value="-strand plus">Plus (forward)</option>
+                    <option value="-strand minus">Minus (reverse complement)</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastn</requirement>
+    </requirements>
+    <tests>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *nucleotide database* using a *nucleotide query*,
+using the NCBI BLAST+ blastn command line tool.
+Algorithms include blastn, megablast, and discontiguous megablast.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastp.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,259 @@
+<tool id="blastp" name="blastp" version="0.0.1">
+    <description>Search protein database with protein query sequence(s)</description>
+<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastp
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+  -db "$db_opts.db"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-task $blast_type
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">Precompiled BLAST Database</option>
+              <option value="user_db">BLAST Database in your History</option>
+              <option value="fasta">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Precompiled Protein BLAST database">
+                    <!-- The BLAST loc file has three columns:
+                         column 0 is an identifier (not used),
+                         column 1 is the caption (show this to the user),
+                         column 2 is the database path (given to BLAST+) -->
+                    <options from_file="blastdb_p.loc">
+                      <column name="name" index="1"/>
+                      <column name="value" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="user_db">
+                <param name="database" type="hidden" value="" /> 
+                <param name="db" type="data" format="blastdb" label="Blast DB" />
+            </when>
+            <when value="fasta">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="blast_type" type="select" display="radio" label="Type of BLAST">
+            <option value="blastp">blastp</option>
+            <option value="blastp-short">blastp-short</option>
+        </param>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq">Tabular (extended 22 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!--
+                Can't use '-ungapped' on its own, error back is:
+                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+                Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+                -->
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastp</requirement>
+    </requirements>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="True" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="True" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="True" />
+            <output name="output1" file="blastp_four_human_vs_rhodopsin_22c.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-8" />
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastp_rhodopsin_vs_four_human.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *protein database* using a *protein query*,
+using the NCBI BLAST+ blastp command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastx.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,211 @@
+<tool id="blastx" name="blastx" version="0.0.1">
+    <description>Search protein database with translated nucleotide query sequence(s)</description>
+<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastx
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+  -db "$db_opts.db"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">Precompiled BLAST Database</option>
+              <option value="user_db">BLAST Database in your History</option>
+              <option value="fasta">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Precompiled Protein BLAST database">
+                    <!-- The BLAST loc file has three columns:
+                         column 0 is an identifier (not used),
+                         column 1 is the caption (show this to the user),
+                         column 2 is the database path (given to BLAST+) -->
+                    <options from_file="blastdb_p.loc">
+                      <column name="name" index="1"/>
+                      <column name="value" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="user_db">
+                <param name="database" type="hidden" value="" /> 
+                <param name="db" type="data" format="blastdb" label="Blast DB" />
+            </when>
+            <when value="fasta">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq">Tabular (extended 22 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
+                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
+                    <option value="-strand both">Both</option>
+                    <option value="-strand plus">Plus (forward)</option>
+                    <option value="-strand minus">Minus (reverse complement)</option>
+                </param>
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for blastx -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="blastx on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">blastx</requirement>
+    </requirements>
+    <tests>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *protein database* using a *translated nucleotide query*,
+using the NCBI BLAST+ blastx command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/dustmasker.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,44 @@
+<tool id="dustmasker" name="dustmasker" version="0.0.1">
+<description>Low complexity region masker</description>
+<command interpreter='bash'>blastdb_wrapper.sh $outfile.extra_files_path
+dustmasker -outfmt seqloc_asn1_text -out ${os.path.join($outfile.extra_files_path,'blastdb')}
+#if $in.fmt == 'blastdb':
+-infmt blastdb -in ${os.path.join($in.file.extra_files_path,'blastdb')}
+#else:
+-infmt fasta -in $in.file -parse_seqids
+#end if
+-window $window -level $level -linker $linker
+</command>
+<inputs>
+    <conditional name="in">
+        <param name="fmt" type="select" label="Input format">
+            <option value="blastdb">BLAST Database</option>
+            <option value="fasta">Fasta Database</option>
+        </param>
+        <when value="blastdb">
+            <param name="file" type="data" format="blastdb" label="BLAST database" />
+        </when>
+        <when value="fasta">
+            <param name="file" type="data" format="fasta" label="FASTA file" />
+        </when>
+    </conditional>
+    <param name="window" type="integer" value="64" label="DUST window length" />
+    <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" />
+    <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" />
+</inputs>
+<outputs>
+    <data name="outfile" format="asn1" />
+</outputs>
+<requirements>
+    <binary>dustmasker</binary>
+</requirements>
+<help>
+**What it does**
+
+Low complexity region masker based on Symmetric DUST algorithm
+
+**Documentation**
+
+http://www.ncbi.nlm.nih.gov/books/NBK1763/
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/makeblastdb.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,85 @@
+<tool id="makeblastdb" name="makeblastdb" version="0.0.1">
+<description>Make BLAST database</description>
+<command interpreter="bash">blastdb_wrapper.sh $outfile.extra_files_path
+makeblastdb -logfile $outfile -out ${os.path.join($outfile.extra_files_path,'blastdb')}
+-parse_seqids
+$hash_index
+#set $sep = '-in '
+#for $i in $in
+$sep${i.file}
+#set $set = ', '
+#end for
+-title $title
+-dbtype $dbtype 
+#set $sep = '-mask_data '
+#for $i in $mask_data
+$sep${i.file}
+#set $set = ', '
+#end for 
+#set $sep = '-gi_mask -gi_mask_name '
+#for $i in $gi_mask
+$sep${i.file}
+#set $set = ', '
+#end for 
+#if $tax.select == 'id':
+-taxid $tax.id
+#elsif $tax.select == 'map':
+-taxid_map $tax.map
+#end if
+</command>
+<inputs>
+    <repeat name="in" title="Blast or Fasta Database">
+        <param name="file" type="data" format="fasta,blastdb" label="Blast or Fasta database" />
+    </repeat>
+    <param name="title" type="text" value="" label="Title for BLAST database" />
+    <param name="dbtype" type="select" display="radio" label="[-dbtype] Molecule type of input">
+        <option value="prot">protein</option>
+        <option value="nucl">nucleotide</option>
+    </param>
+    <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="[-hash_index] Enables the creation of sequence hash values. These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
+
+    <!-- SEQUENCE MASKING OPTIONS -->
+    <repeat name="mask_data" title="[-mask_data] Provide one or more files containing masking data">
+        <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
+    </repeat>
+    <repeat name="gi_mask" title="[-gi_mask_name] Create GI indexed masking data">
+        <param name="file" type="data" format="asnb" label="Masking data output file" />
+    </repeat>
+
+    <!-- TAXONOMY OPTIONS -->
+    <conditional name="tax">
+        <param name="select" type="select" label="Taxonomy options">
+            <option value="">Do not assign sequences to Taxonomy IDs</option>
+            <option value="id">[-taxid] Assign all sequences to one Taxonomy ID</option>
+            <option value="map">[-taxid_map] Supply text file mapping sequence IDs to taxnomy IDs</option>
+        </param>
+        <when value="">
+        </when>
+        <when value="id">
+            <param name="id" type="integer" value="" label="NCBI taxonomy ID" help="Integer &gt;=0" />
+        </when>
+        <when value="map">
+            <param name="file" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" />
+        </when>
+    </conditional>
+</inputs>
+<outputs>
+    <data name="outfile" format="blastdb" />
+</outputs>
+<requirements>
+</requirements>
+<help>
+**What it does**
+
+Make BLAST database from one or more FASTA files and/or BLAST databases.
+This application serves as a replacement for formatdb.
+
+Applying masks to an existing BLAST database will not change the original database; a new database will be created.
+For this reason, it's best to apply all masks at once to minimize the number of unnecessary intermediate databases.
+
+
+**Documentation**
+
+http://www.ncbi.nlm.nih.gov/books/NBK1763/
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/suite_config.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,24 @@
+<suite id="blast" name="Modified NCBI Blast+ Tools" version="1.0.0">
+	<description>Modified Galaxy wrappers add support for makeblastdb files and add dustmasker</description>
+	<tool id="blastn" name="blastn" version="0.0.1">
+		<description>blastn with blastdb support</description>
+	</tool>
+	<tool id="blastp" name="blastp" version="0.0.1">
+		<description>blastp with blastdb support</description>
+	</tool>
+	<tool id="blastx" name="blastx" version="0.0.1">
+		<description>blastx with blastdb support</description>
+	</tool>
+	<tool id="tblastn" name="tblastn" version="0.0.1">
+		<description>tblastn with blastdb support</description>
+	</tool>
+	<tool id="tblastx" name="tblastx" version="0.0.1">
+		<description>tblastx with blastdb support</description>
+	</tool>
+	<tool id="makeblastdb" name="makeblastdb" version="0.0.1">
+		<description>Make blast Db file</description>
+	</tool>
+	<tool id="dustmasker" name="dustmasker" version="0.0.1">
+		<description>dust masking of blast db file</description>
+	</tool>
+</suite>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/tblastn.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,242 @@
+<tool id="tblastn" name="tblastn" version="0.0.1">
+    <description>Search translated nucleotide database with protein query sequence(s)</description>
+<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastn
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+  -db "$db_opts.db"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">Precompiled BLAST Database</option>
+              <option value="user_db">BLAST Database in your History</option>
+              <option value="fasta">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Precompiled Nucleotide BLAST database">
+                    <!-- The BLAST loc file has three columns:
+                         column 0 is an identifier (not used here, see legacy megablast wrapper),
+                         column 1 is the caption (show this to the user),
+                         column 2 is the database path (given to BLAST+) -->
+                    <options from_file="blastdb.loc">
+                      <column name="name" index="1"/>
+                      <column name="value" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="user_db">
+                <param name="database" type="hidden" value="" /> 
+                <param name="db" type="data" format="blastdb" label="Blast DB" />
+            </when>
+            <when value="fasta">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq">Tabular (extended 22 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!--
+                Can't use '-ungapped' on its own, error back is:
+                Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
+                Tried using '-ungapped -comp_based_stats F' and tblastn crashed with 'Attempt to access NULL pointer.'
+                <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
+                -->
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="tblastn on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">tblastn</requirement>
+    </requirements>
+    <tests>
+        <test>
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="false" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <!-- Same as above, but parse deflines -->
+            <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="filter_query" value="false" />
+            <param name="matrix" value="BLOSUM80" />
+            <param name="max_hits" value="0" />
+            <param name="word_size" value="0" />
+            <param name="parse_deflines" value="true" />
+            <output name="output1" file="tblastn_four_human_vs_rhodopsin_parse_deflines.tabular" ftype="tabular" />
+        </test>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastn command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/tblastx.xml	Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,209 @@
+<tool id="tblastx" name="tblastx" version="0.0.1">
+    <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
+<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastx
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+  -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+  -db "$db_opts.db"
+#else:
+  -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+    </command>
+    <inputs>
+        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Subject database/sequences">
+              <option value="db" selected="True">Precompiled BLAST Database</option>
+              <option value="user_db">BLAST Database in your History</option>
+              <option value="file">FASTA file</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" label="Precompiled Nucleotide BLAST database">
+                    <!-- The BLAST loc file has three columns:
+                         column 0 is an identifier (not used here, see legacy megablast wrapper),
+                         column 1 is the caption (show this to the user),
+                         column 2 is the database path (given to BLAST+) -->
+                    <options from_file="blastdb.loc">
+                      <column name="name" index="1"/>
+                      <column name="value" index="2"/>
+                    </options>
+                </param>
+                <param name="subject" type="hidden" value="" /> 
+            </when>
+            <when value="user_db">
+                <param name="database" type="hidden" value="" /> 
+                <param name="db" type="data" format="blastdb" label="Blast DB" />
+            </when>
+            <when value="fasta">
+                <param name="database" type="hidden" value="" /> 
+                <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> 
+            </when>
+        </conditional>
+        <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
+        <param name="out_format" type="select" label="Output format">
+            <option value="6" selected="True">Tabular (standard 12 columns)</option>
+            <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq">Tabular (extended 22 columns)</option>
+            <option value="5">BLAST XML</option>
+            <option value="0">Pairwise text</option>
+            <option value="0 -html">Pairwise HTML</option>
+            <option value="2">Query-anchored text</option>
+            <option value="2 -html">Query-anchored HTML</option>
+            <option value="4">Flat query-anchored text</option>
+            <option value="4 -html">Flat query-anchored HTML</option>
+            <!--
+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>
+            -->
+        </param>
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+              <option value="basic" selected="True">Hide Advanced Options</option>
+              <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
+                <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
+                <param name="strand" type="select" label="Query strand(s) to search against database/subject">
+                    <option value="-strand both">Both</option>
+                    <option value="-strand plus">Plus (forward)</option>
+                    <option value="-strand minus">Minus (reverse complement)</option>
+                </param>
+                <param name="matrix" type="select" label="Scoring matrix">
+                    <option value="BLOSUM90">BLOSUM90</option>
+                    <option value="BLOSUM80">BLOSUM80</option>
+                    <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
+                    <option value="BLOSUM50">BLOSUM50</option> 
+                    <option value="BLOSUM45">BLOSUM45</option>
+                    <option value="PAM250">PAM250</option>
+                    <option value="PAM70">PAM70</option>
+                    <option value="PAM30">PAM30</option>
+                </param>
+                <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer -->
+                <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits">
+                    <validator type="in_range" min="0" />
+                </param>
+                <!-- I'd like word_size to be optional, with minimum 2 for tblastx -->
+                <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2.">
+                    <validator type="in_range" min="0" />
+                </param>
+                <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" label="tblastx on ${db_opts.db_opts_selector}">
+            <change_format>
+                <when input="out_format" value="0" format="txt"/>
+                <when input="out_format" value="0 -html" format="html"/>
+                <when input="out_format" value="2" format="txt"/>
+                <when input="out_format" value="2 -html" format="html"/>
+                <when input="out_format" value="4" format="txt"/>
+                <when input="out_format" value="4 -html" format="html"/>
+                <when input="out_format" value="5" format="blastxml"/>
+            </change_format>
+        </data>
+    </outputs>
+    <requirements>
+        <requirement type="binary">tblastx</requirement>
+    </requirements>
+    <tests>
+    </tests>
+    <help>
+    
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.  
+
+-----
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastx command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+    </help>
+</tool>