# HG changeset patch
# User edward-kirton
# Date 1307482211 14400
# Node ID f3ac34855f5effab54a6e228746edaafdd9067fd
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
diff -r 000000000000 -r f3ac34855f5e blast/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/README Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,5 @@
+the blast xml files are just modified versions of these included with the galaxy distribution; they add support for blastdb files so that users can create a database and resuse them.
+the makeblastdb tool was added for this purpose. dustmasker was also added to allow masking of low complexity sequences.
+additional ncbi blast+ tools will be added in the near future.
+
+blastdb.py goes in lib/galaxy/datatypes and must be registered in both the datatypes_conf.xml file and registry.py files.
diff -r 000000000000 -r f3ac34855f5e blast/blastdb.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastdb.py Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,50 @@
+"""
+BLAST Database classes
+"""
+
+import data
+import logging
+import re
+import string
+from cgi import escape
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+import galaxy.model
+from galaxy import util
+from sniff import *
+
+log = logging.getLogger(__name__)
+
+class BlastDb( data.Data ):
+ """Class for BLAST database files"""
+
+ file_ext = 'blastdb'
+ composite_type='basic'
+
+ MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
+
+ def __init__(self,**kwd):
+ data.Data.__init__(self, **kwd)
+ self.add_composite_file('blastdb.nhr')
+ self.add_composite_file('blastdb.nin')
+ self.add_composite_file('blastdb.nsq')
+ self.add_composite_file('blastdb.nhd', optional=True)
+ self.add_composite_file('blastdb.nsi', optional=True)
+ self.add_composite_file('blastdb.nhi', optional=True)
+ self.add_composite_file('blastdb.nog', optional=True)
+ self.add_composite_file('blastdb.nsd', optional=True)
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ dataset.peek = "Folder of multiple files"
+ dataset.blurb = "Folder of multiple files"
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+ def display_peek( self, dataset ):
+ try:
+ return dataset.peek
+ except:
+ return "Folder of multiple files"
+ def get_mime(self):
+ """Returns the mime type of the datatype"""
+ return 'text/plain'
diff -r 000000000000 -r f3ac34855f5e blast/blastdb_wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastdb_wrapper.sh Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+if [ -z $1 ]
+then
+ echo "Missing arguments" 1>&2
+ exit 1
+fi
+mkdir $1
+shift
+OUT=`$* 2>&1`
+if [ $? != 0 ]
+then
+ echo $OUT 1>&2
+ exit 1
+else
+ echo $OUT
+ exit 0
+fi
diff -r 000000000000 -r f3ac34855f5e blast/blastn.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastn.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,212 @@
+
+Search nucleotide database with nucleotide query sequence(s)
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastn
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+ -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+ -db ${os.path.join($db_opts.db.extra_files_path,'blastdb')}
+#else:
+ -subject "$db_opts.subject"
+#end if
+-task $blast_type
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ blastn
+
+
+
+
+
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.
+
+-----
+
+**What it does**
+
+Search a *nucleotide database* using a *nucleotide query*,
+using the NCBI BLAST+ blastn command line tool.
+Algorithms include blastn, megablast, and discontiguous megablast.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+ 1 qseqid Query Seq-id (ID of your sequence)
+ 2 sseqid Subject Seq-id (ID of the database hit)
+ 3 pident Percentage of identical matches
+ 4 length Alignment length
+ 5 mismatch Number of mismatches
+ 6 gapopen Number of gap openings
+ 7 qstart Start of alignment in query
+ 8 qend End of alignment in query
+ 9 sstart Start of alignment in subject (database hit)
+ 10 send End of alignment in subject (database hit)
+ 11 evalue Expectation value (E-value)
+ 12 bitscore Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name Description
+------ ------------- -------------------------------------------
+ 13 sallseqid All subject Seq-id(s), separated by a ';'
+ 14 score Raw score
+ 15 nident Number of identical matches
+ 16 positive Number of positive-scoring matches
+ 17 gaps Total number of gaps
+ 18 ppos Percentage of positive-scoring matches
+ 19 qframe Query frame
+ 20 sframe Subject frame
+ 21 qseq Aligned part of query sequence
+ 22 sseq Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214.
+
+
+
diff -r 000000000000 -r f3ac34855f5e blast/blastp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastp.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,259 @@
+
+ Search protein database with protein query sequence(s)
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastp
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+ -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+ -db "$db_opts.db"
+#else:
+ -subject "$db_opts.subject"
+#end if
+-task $blast_type
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ blastp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.
+
+-----
+
+**What it does**
+
+Search a *protein database* using a *protein query*,
+using the NCBI BLAST+ blastp command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+ 1 qseqid Query Seq-id (ID of your sequence)
+ 2 sseqid Subject Seq-id (ID of the database hit)
+ 3 pident Percentage of identical matches
+ 4 length Alignment length
+ 5 mismatch Number of mismatches
+ 6 gapopen Number of gap openings
+ 7 qstart Start of alignment in query
+ 8 qend End of alignment in query
+ 9 sstart Start of alignment in subject (database hit)
+ 10 send End of alignment in subject (database hit)
+ 11 evalue Expectation value (E-value)
+ 12 bitscore Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name Description
+------ ------------- -------------------------------------------
+ 13 sallseqid All subject Seq-id(s), separated by a ';'
+ 14 score Raw score
+ 15 nident Number of identical matches
+ 16 positive Number of positive-scoring matches
+ 17 gaps Total number of gaps
+ 18 ppos Percentage of positive-scoring matches
+ 19 qframe Query frame
+ 20 sframe Subject frame
+ 21 qseq Aligned part of query sequence
+ 22 sseq Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+
+
+
diff -r 000000000000 -r f3ac34855f5e blast/blastx.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/blastx.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,211 @@
+
+ Search protein database with translated nucleotide query sequence(s)
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastx
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+ -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+ -db "$db_opts.db"
+#else:
+ -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ blastx
+
+
+
+
+
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.
+
+-----
+
+**What it does**
+
+Search a *protein database* using a *translated nucleotide query*,
+using the NCBI BLAST+ blastx command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+ 1 qseqid Query Seq-id (ID of your sequence)
+ 2 sseqid Subject Seq-id (ID of the database hit)
+ 3 pident Percentage of identical matches
+ 4 length Alignment length
+ 5 mismatch Number of mismatches
+ 6 gapopen Number of gap openings
+ 7 qstart Start of alignment in query
+ 8 qend End of alignment in query
+ 9 sstart Start of alignment in subject (database hit)
+ 10 send End of alignment in subject (database hit)
+ 11 evalue Expectation value (E-value)
+ 12 bitscore Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name Description
+------ ------------- -------------------------------------------
+ 13 sallseqid All subject Seq-id(s), separated by a ';'
+ 14 score Raw score
+ 15 nident Number of identical matches
+ 16 positive Number of positive-scoring matches
+ 17 gaps Total number of gaps
+ 18 ppos Percentage of positive-scoring matches
+ 19 qframe Query frame
+ 20 sframe Subject frame
+ 21 qseq Aligned part of query sequence
+ 22 sseq Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+
+
diff -r 000000000000 -r f3ac34855f5e blast/dustmasker.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/dustmasker.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,44 @@
+
+Low complexity region masker
+blastdb_wrapper.sh $outfile.extra_files_path
+dustmasker -outfmt seqloc_asn1_text -out ${os.path.join($outfile.extra_files_path,'blastdb')}
+#if $in.fmt == 'blastdb':
+-infmt blastdb -in ${os.path.join($in.file.extra_files_path,'blastdb')}
+#else:
+-infmt fasta -in $in.file -parse_seqids
+#end if
+-window $window -level $level -linker $linker
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dustmasker
+
+
+**What it does**
+
+Low complexity region masker based on Symmetric DUST algorithm
+
+**Documentation**
+
+http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+
diff -r 000000000000 -r f3ac34855f5e blast/makeblastdb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/makeblastdb.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,85 @@
+
+Make BLAST database
+blastdb_wrapper.sh $outfile.extra_files_path
+makeblastdb -logfile $outfile -out ${os.path.join($outfile.extra_files_path,'blastdb')}
+-parse_seqids
+$hash_index
+#set $sep = '-in '
+#for $i in $in
+$sep${i.file}
+#set $set = ', '
+#end for
+-title $title
+-dbtype $dbtype
+#set $sep = '-mask_data '
+#for $i in $mask_data
+$sep${i.file}
+#set $set = ', '
+#end for
+#set $sep = '-gi_mask -gi_mask_name '
+#for $i in $gi_mask
+$sep${i.file}
+#set $set = ', '
+#end for
+#if $tax.select == 'id':
+-taxid $tax.id
+#elsif $tax.select == 'map':
+-taxid_map $tax.map
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Make BLAST database from one or more FASTA files and/or BLAST databases.
+This application serves as a replacement for formatdb.
+
+Applying masks to an existing BLAST database will not change the original database; a new database will be created.
+For this reason, it's best to apply all masks at once to minimize the number of unnecessary intermediate databases.
+
+
+**Documentation**
+
+http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+
diff -r 000000000000 -r f3ac34855f5e blast/suite_config.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/suite_config.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,24 @@
+
+ Modified Galaxy wrappers add support for makeblastdb files and add dustmasker
+
+ blastn with blastdb support
+
+
+ blastp with blastdb support
+
+
+ blastx with blastdb support
+
+
+ tblastn with blastdb support
+
+
+ tblastx with blastdb support
+
+
+ Make blast Db file
+
+
+ dust masking of blast db file
+
+
diff -r 000000000000 -r f3ac34855f5e blast/tblastn.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/tblastn.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,242 @@
+
+ Search translated nucleotide database with protein query sequence(s)
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastn
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+ -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+ -db "$db_opts.db"
+#else:
+ -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+##Ungapped disabled for now - see comments below
+##$adv_opts.ungapped
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ tblastn
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.
+
+-----
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastn command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+ 1 qseqid Query Seq-id (ID of your sequence)
+ 2 sseqid Subject Seq-id (ID of the database hit)
+ 3 pident Percentage of identical matches
+ 4 length Alignment length
+ 5 mismatch Number of mismatches
+ 6 gapopen Number of gap openings
+ 7 qstart Start of alignment in query
+ 8 qend End of alignment in query
+ 9 sstart Start of alignment in subject (database hit)
+ 10 send End of alignment in subject (database hit)
+ 11 evalue Expectation value (E-value)
+ 12 bitscore Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name Description
+------ ------------- -------------------------------------------
+ 13 sallseqid All subject Seq-id(s), separated by a ';'
+ 14 score Raw score
+ 15 nident Number of identical matches
+ 16 positive Number of positive-scoring matches
+ 17 gaps Total number of gaps
+ 18 ppos Percentage of positive-scoring matches
+ 19 qframe Query frame
+ 20 sframe Subject frame
+ 21 qseq Aligned part of query sequence
+ 22 sseq Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+
+
diff -r 000000000000 -r f3ac34855f5e blast/tblastx.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blast/tblastx.xml Tue Jun 07 17:30:11 2011 -0400
@@ -0,0 +1,209 @@
+
+ Search translated nucleotide database with translated nucleotide query sequence(s)
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+tblastx
+-query "$query"
+#if $db_opts.db_opts_selector == "db":
+ -db "$db_opts.database"
+#elif $db_opts.db_opts_selector == "user_db":
+ -db "$db_opts.db"
+#else:
+ -subject "$db_opts.subject"
+#end if
+-evalue $evalue_cutoff
+-out $output1
+-outfmt "$out_format"
+#if $adv_opts.adv_opts_selector=="advanced":
+$adv_opts.filter_query
+$adv_opts.strand
+-matrix $adv_opts.matrix
+## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
+## Note -max_target_seqs overrides -num_descriptions and -num_alignments
+#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
+-max_target_seqs $adv_opts.max_hits
+#end if
+#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):
+-word_size $adv_opts.word_size
+#end if
+$adv_opts.parse_deflines
+## End of advanced options:
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ tblastx
+
+
+
+
+
+.. class:: warningmark
+
+**Note**. Database searches may take a substantial amount of time.
+For large input datasets it is advisable to allow overnight processing.
+
+-----
+
+**What it does**
+
+Search a *translated nucleotide database* using a *protein query*,
+using the NCBI BLAST+ tblastx command line tool.
+
+-----
+
+**Output format**
+
+Because Galaxy focuses on processing tabular data, the default output of this
+tool is tabular. The standard BLAST+ tabular output contains 12 columns:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+ 1 qseqid Query Seq-id (ID of your sequence)
+ 2 sseqid Subject Seq-id (ID of the database hit)
+ 3 pident Percentage of identical matches
+ 4 length Alignment length
+ 5 mismatch Number of mismatches
+ 6 gapopen Number of gap openings
+ 7 qstart Start of alignment in query
+ 8 qend End of alignment in query
+ 9 sstart Start of alignment in subject (database hit)
+ 10 send End of alignment in subject (database hit)
+ 11 evalue Expectation value (E-value)
+ 12 bitscore Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 22 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name Description
+------ ------------- -------------------------------------------
+ 13 sallseqid All subject Seq-id(s), separated by a ';'
+ 14 score Raw score
+ 15 nident Number of identical matches
+ 16 positive Number of positive-scoring matches
+ 17 gaps Total number of gaps
+ 18 ppos Percentage of positive-scoring matches
+ 19 qframe Query frame
+ 20 sframe Subject frame
+ 21 qseq Aligned part of query sequence
+ 22 sseq Aligned part of subject sequence
+====== ============= ===========================================
+
+The third option is BLAST XML output, which is designed to be parsed by
+another program, and is understood by some Galaxy tools.
+
+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+
+