# HG changeset patch
# User peterjc
# Date 1352462035 18000
# Node ID 393a7a35383cd19dd11d1c528bc5c7529d67b35d
# Parent 9d5beacae92b094c22b439cf0eb9e51edada34b9
Uploaded v0.0.14 adding local BLAST database support.
This *requires* the matching update to the blast_datatypes repository. This adds basic wrappers for makeblastdb and blastdbinfo.
This update includes work by Edward Kirton.
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_blast_plus.txt
--- a/tools/ncbi_blast_plus/ncbi_blast_plus.txt Wed Sep 19 13:08:31 2012 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blast_plus.txt Fri Nov 09 06:53:55 2012 -0500
@@ -42,6 +42,8 @@
e-values
v0.0.13 - Use the new error handling options in Galaxy (the previously
bundled hide_stderr.py script is no longer needed).
+v0.0.14 - Support for makeblastdb and local BLAST databases in the history
+ (using work from Edward Kirton).
Developers
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Fri Nov 09 06:53:55 2012 -0500
@@ -0,0 +1,63 @@
+
+ Show BLAST database information from blastdbcmd
+
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out $info
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ blastdbcmd
+
+
+
+**What it does**
+
+Calls the NCBI BLAST+ blastdbcmd command line tool with the -info
+switch to give summary information about a BLAST database, such as
+the size (number of sequences and total length) and date.
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+
+
+
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Fri Nov 09 06:53:55 2012 -0500
@@ -0,0 +1,135 @@
+
+ Extract sequence(s) from BLAST database
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}"
+
+##TODO: What about -ctrl_a and -target_only as advanced options?
+
+#if $id_opts.id_type=="file":
+-entry_batch "$id_opts.entries"
+#else:
+##Perform some simple search/replaces to remove whitespace
+##and make it comma separated, and escape any pipe characters
+-entry "$id_opts.entries.replace('\r',',').replace('\n',',').replace(' ','').replace(',,',',').replace(',,',',').strip(',').replace('|','\|')"
+#end if
+
+##When building a BLAST database, to ensure unique IDs makeblastdb will
+##do things like turning a FASTA entry with ID of ERP44 into lcl|ERP44
+##(if using -parse_seqids) or simply assign it an ID using the record
+##number like gnl|BL_ORD_ID|123 (to cope with duplicate IDs in the FASTA
+##file). In -parse_seqids mode, a duplicate FASTA ID gives an error.
+##
+##The BLAST plain text and XML output will contain these BLAST IDs, but
+##the tabular output does not (at least, not in BLAST 2.2.25+).
+##Therefore in general, Galaxy users won't care about the (internal)
+##BLAST identifiers.
+##
+##The blastdbcmd FASTA output will also contain these IDs, but in the
+##context of the BLAST tabular output they are not helpful. Therefore
+##to recover the original ID as used in the FASTA file for makeblastdb
+##we need a litte post processing.
+##
+##We remove the NCBI's lcl|... or gnl|BL_ORD_ID|123 prefixes
+##using sed, however the exact syntax differs for Mac OS X's sed
+
+#if str($outfmt)=="blastid":
+-out "$seq"
+#else if sys.platform == "darwin":
+| sed -E 's/^>(lcl\||gnl\|BL_ORD_ID\|[0-9]* )/>/1' > "$seq"
+#else:
+| sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq"
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ blastdbcmd
+
+
+
+**What it does**
+
+Extracts FASTA formatted sequences from a BLAST database
+using the NCBI BLAST+ blastdbcmd command line tool.
+
+.. class:: warningmark
+
+**BLAST assigned identifiers**
+
+When a BLAST database is constructed from a FASTA file, the
+original identifiers can be replaced with BLAST assigned
+identifiers, partly to ensure uniqueness. e.g. Sometimes
+a prefix of 'lcl|' is added (lcl is short for local),
+or an arbitrary name starting 'gnl|BL_ORD_ID|' is created.
+
+If you are using the tabular output from BLAST, it will contain
+the original identifiers - not the BLAST assigned identifiers
+suitable for use with the blastdbcmd tool.
+
+If you are using the XML or plain text output, this will also
+contain the BLAST assigned identifiers. However, this means
+getting a list of BLAST assigned identifiers isn't straightforward.
+
+-------
+
+**References**
+
+Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
+
+Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
+
+
+
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Wed Sep 19 13:08:31 2012 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Nov 09 06:53:55 2012 -0500
@@ -1,7 +1,7 @@
-
+Search nucleotide database with nucleotide query sequence(s)
-
+ blastn -version
## The command is a Cheetah template which allows some Python based syntax.
@@ -10,6 +10,8 @@
-query "$query"
#if $db_opts.db_opts_selector == "db":
-db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
#else:
-subject "$db_opts.subject"
#end if
@@ -40,15 +42,20 @@
#end if
+
-
+
+
+
+
-
+
+
@@ -58,10 +65,17 @@
+
+
+
+
+
+
-
+
+
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Wed Sep 19 13:08:31 2012 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Nov 09 06:53:55 2012 -0500
@@ -1,7 +1,7 @@
-
+Search protein database with protein query sequence(s)
-
+ blastp -version
## The command is a Cheetah template which allows some Python based syntax.
@@ -10,6 +10,8 @@
-query "$query"
#if $db_opts.db_opts_selector == "db":
-db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
#else:
-subject "$db_opts.subject"
#end if
@@ -41,15 +43,20 @@
#end if
+
-
+
+
+
+
-
+
+
@@ -59,10 +66,17 @@
+
+
+
+
+
+
+
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Wed Sep 19 13:08:31 2012 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Nov 09 06:53:55 2012 -0500
@@ -1,7 +1,7 @@
-
+Search protein database with translated nucleotide query sequence(s)
-
+ blastx -version
## The command is a Cheetah template which allows some Python based syntax.
@@ -10,6 +10,8 @@
-query "$query"
#if $db_opts.db_opts_selector == "db":
-db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
#else:
-subject "$db_opts.subject"
#end if
@@ -41,15 +43,20 @@
#end if
+
-
+
+
+
+
-
+
+
@@ -59,10 +66,17 @@
-
+
+
+
+
+
+
+
-
+
+
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Fri Nov 09 06:53:55 2012 -0500
@@ -0,0 +1,121 @@
+
+Make BLAST database
+makeblastdb -version
+
+makeblastdb -out ${os.path.join($outfile.extra_files_path,'blastdb')}
+$parse_seqids
+$hash_index
+## Single call to -in with multiple filenames space separated with outer quotes
+## (presumably any filenames with spaces would be a problem). Note this gives
+## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy:
+-in "
+#for $i in $in
+${i.file} #end for
+"
+#if $title:
+-title "$title"
+#else:
+##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
+-title "BLAST Database"
+#end if
+-dbtype $dbtype
+## #set $sep = '-mask_data '
+## #for $i in $mask_data
+## $sep${i.file}
+## #set $set = ', '
+## #end for
+## #set $sep = '-gi_mask -gi_mask_name '
+## #for $i in $gi_mask
+## $sep${i.file}
+## #set $set = ', '
+## #end for
+## #if $tax.select == 'id':
+## -taxid $tax.id
+## #else if $tax.select == 'map':
+## -taxid_map $tax.map
+## #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ makeblastdb
+
+
+**What it does**
+
+Make BLAST database from one or more FASTA files and/or BLAST databases.
+
+This is a wrapper for the NCBI BLAST+ tool 'makeblastdb', which is the
+replacement for the 'formatdb' tool in the NCBI 'legacy' BLAST suite.
+
+
+
+**Documentation**
+
+http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Wed Sep 19 13:08:31 2012 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Nov 09 06:53:55 2012 -0500
@@ -1,7 +1,7 @@
-
+Search translated nucleotide database with protein query sequence(s)
-
+ tblastn -version
## The command is a Cheetah template which allows some Python based syntax.
@@ -10,6 +10,8 @@
-query "$query"
#if $db_opts.db_opts_selector == "db":
-db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
#else:
-subject "$db_opts.subject"
#end if
@@ -41,15 +43,20 @@
#end if
+
-
+
+
+
+
-
+
+
@@ -59,10 +66,17 @@
-
+
+
+
+
+
+
+
-
+
+
diff -r 9d5beacae92b -r 393a7a35383c tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Wed Sep 19 13:08:31 2012 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Nov 09 06:53:55 2012 -0500
@@ -1,7 +1,7 @@
-
+Search translated nucleotide database with translated nucleotide query sequence(s)
-
+ tblastx -version
## The command is a Cheetah template which allows some Python based syntax.
@@ -10,6 +10,8 @@
-query "$query"
#if $db_opts.db_opts_selector == "db":
-db "${db_opts.database.fields.path}"
+#elif $db_opts.db_opts_selector == "histdb":
+ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}"
#else:
-subject "$db_opts.subject"
#end if
@@ -41,15 +43,20 @@
#end if
+
-
+
+
+
+
-
+
+
@@ -59,10 +66,17 @@
-
+
+
+
+
+
+
+
-
+
+