changeset 5:b3a3ba0c1d47 draft

Uploaded v0.0.15 which updates the BLAST database definitions. Fixes a MetadataElement bug and includes more of the optional BLAST database files (contribution from Nicola Soranzo).
author peterjc
date Wed, 20 Mar 2013 10:39:27 -0400
parents f9a7783ed7b6
children a04cf51612f1
files blast.py blast_datatypes.txt
diffstat 2 files changed, 41 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/blast.py	Fri Nov 09 06:50:05 2012 -0500
+++ b/blast.py	Wed Mar 20 10:39:27 2013 -0400
@@ -7,6 +7,7 @@
 from galaxy.datatypes.xml import GenericXml
 from galaxy.datatypes.metadata import MetadataElement
 
+
 class BlastXml( GenericXml ):
     """NCBI Blast XML Output data"""
     file_ext = "blastxml"
@@ -174,18 +175,24 @@
     """Class for nucleotide BLAST database files."""
     file_ext = 'blastdbn'
     composite_type ='basic'
-    MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
 
-    def __init__(self,**kwd):
+    def __init__(self, **kwd):
         Data.__init__(self, **kwd)
-        self.add_composite_file('blastdb.nhr')
-        self.add_composite_file('blastdb.nin')
-        self.add_composite_file('blastdb.nsq')
-        self.add_composite_file('blastdb.nhd', optional=True)
-        self.add_composite_file('blastdb.nsi', optional=True)
-        self.add_composite_file('blastdb.nhi', optional=True)
-        self.add_composite_file('blastdb.nog', optional=True)
-        self.add_composite_file('blastdb.nsd', optional=True)
+        self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
+        self.add_composite_file('blastdb.nin', is_binary=True) # index file
+        self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences
+        self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb)
+        self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb)
+        self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb)
+        self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
+        self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
+        self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
+        self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
+        self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
+#        self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data
+#        self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column
+#        self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column
+# The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
 
     def display_data(self, trans, data, preview=False, filename=None,
                      to_ext=None, size=None, offset=None, **kwd):
@@ -195,25 +202,29 @@
         """
         return "This is a BLAST nucleotide database."
 
+
 class BlastProtDb( _BlastDb, Data ):
     """Class for protein BLAST database files."""
     file_ext = 'blastdbp'
     composite_type ='basic'
-    MetadataElement( readonly=True, optional=True, visible=False, no_value=0 )
 
-    def __init__(self,**kwd):
+    def __init__(self, **kwd):
         Data.__init__(self, **kwd)
-        self.add_composite_file('blastdb.phr')
-        self.add_composite_file('blastdb.pin')
-        self.add_composite_file('blastdb.psq')
-        self.add_composite_file('blastdb.pnd', optional=True)
-        self.add_composite_file('blastdb.pni', optional=True)
-        self.add_composite_file('blastdb.psd', optional=True)
-        self.add_composite_file('blastdb.psi', optional=True)
-        self.add_composite_file('blastdb.psq', optional=True)
-        self.add_composite_file('blastdb.phd', optional=True)
-        self.add_composite_file('blastdb.phi', optional=True)
-        self.add_composite_file('blastdb.pog', optional=True)
+# Component file comments are as in BlastNucDb except where noted
+        self.add_composite_file('blastdb.phr', is_binary=True)
+        self.add_composite_file('blastdb.pin', is_binary=True)
+        self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences
+        self.add_composite_file('blastdb.phd', is_binary=True, optional=True)
+        self.add_composite_file('blastdb.phi', is_binary=True, optional=True)
+        self.add_composite_file('blastdb.pnd', is_binary=True, optional=True)
+        self.add_composite_file('blastdb.pni', is_binary=True, optional=True)
+        self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
+        self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
+        self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
+#        self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
+#        self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
+#        self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
+# The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
 
     def display_data(self, trans, data, preview=False, filename=None,
                      to_ext=None, size=None, offset=None, **kwd):
--- a/blast_datatypes.txt	Fri Nov 09 06:50:05 2012 -0500
+++ b/blast_datatypes.txt	Wed Mar 20 10:39:27 2013 -0400
@@ -1,8 +1,11 @@
 Galaxy datatypes for NCBI BLAST+ suite
 ======================================
 
-These Galaxy datatypes are copyright 2010-2012 by Peter Cock, The James Hutton
+These Galaxy datatypes are copyright 2010-2013 by Peter Cock, The James Hutton
 Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+Contributions/revisions copyright 2012 Edward Kirton. All rights reserved.
+Contributions/revisions copyright 2013 Nicola Soranzo. All rights reserved.
+
 See the licence text below.
 
 Note that these files (and the associated BLAST+ wrappers) were originally
@@ -23,6 +26,8 @@
 v0.0.13 - Uses blast.py instead of xml.py to define the datatypes
 v0.0.14 - Includes datatypes for protein and nucleotide BLAST databases
           (based on work by Edward Kirton)
+v0.0.15 - Fixes a MetadataElement bug and includes more of the optional
+          BLAST database files (contribution from Nicola Soranzo)
 
 
 Installation
@@ -61,7 +66,7 @@
 BLAST+ datatypes and wrappers, and other tools are being developed on the
 following hg branch: http://bitbucket.org/peterjc/galaxy-central/src/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball I use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
 the following command from the Galaxy tools/ncbi_blast_plus folder:
 
 $ tar -czf blast_datatypes.tar.gz blast_datatypes.txt datatypes_conf.xml blast.py