# HG changeset patch
# User devteam
# Date 1345209031 14400
# Node ID e1c29f3023014106d9997a433c9a4948f83af4a1
Uploaded
diff -r 000000000000 -r e1c29f302301 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Fri Aug 17 09:10:31 2012 -0400
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r e1c29f302301 xml.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xml.py Fri Aug 17 09:10:31 2012 -0400
@@ -0,0 +1,124 @@
+"""
+BlastXml class
+"""
+
+from galaxy.datatypes.data import get_file_peek
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.xml import GenericXml
+
+class BlastXml( GenericXml ):
+ """NCBI Blast XML Output data"""
+ file_ext = "blastxml"
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ """Set the peek and blurb text"""
+ if not dataset.dataset.purged:
+ dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+ dataset.blurb = 'NCBI Blast XML data'
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+ def sniff( self, filename ):
+ """
+ Determines whether the file is blastxml
+
+ >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
+ >>> BlastXml().sniff( fname )
+ True
+ >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' )
+ >>> BlastXml().sniff( fname )
+ True
+ >>> fname = get_test_fname( 'interval.interval' )
+ >>> BlastXml().sniff( fname )
+ False
+ """
+ #TODO - Use a context manager on Python 2.5+ to close handle
+ handle = open(filename)
+ line = handle.readline()
+ if line.strip() != '':
+ handle.close()
+ return False
+ line = handle.readline()
+ if line.strip() not in ['',
+ '']:
+ handle.close()
+ return False
+ line = handle.readline()
+ if line.strip() != '':
+ handle.close()
+ return False
+ handle.close()
+ return True
+
+ def merge(split_files, output_file):
+ """Merging multiple XML files is non-trivial and must be done in subclasses."""
+ if len(split_files) == 1:
+ #For one file only, use base class method (move/copy)
+ return Text.merge(split_files, output_file)
+ out = open(output_file, "w")
+ h = None
+ for f in split_files:
+ h = open(f)
+ body = False
+ header = h.readline()
+ if not header:
+ out.close()
+ h.close()
+ raise ValueError("BLAST XML file %s was empty" % f)
+ if header.strip() != '':
+ out.write(header) #for diagnosis
+ out.close()
+ h.close()
+ raise ValueError("%s is not an XML file!" % f)
+ line = h.readline()
+ header += line
+ if line.strip() not in ['',
+ '']:
+ out.write(header) #for diagnosis
+ out.close()
+ h.close()
+ raise ValueError("%s is not a BLAST XML file!" % f)
+ while True:
+ line = h.readline()
+ if not line:
+ out.write(header) #for diagnosis
+ out.close()
+ h.close()
+ raise ValueError("BLAST XML file %s ended prematurely" % f)
+ header += line
+ if "" in line:
+ break
+ if len(header) > 10000:
+ #Something has gone wrong, don't load too much into memory!
+ #Write what we have to the merged file for diagnostics
+ out.write(header)
+ out.close()
+ h.close()
+ raise ValueError("BLAST XML file %s has too long a header!" % f)
+ if "" not in header:
+ out.close()
+ h.close()
+ raise ValueError("%s is not a BLAST XML file:\n%s\n..." % (f, header))
+ if f == split_files[0]:
+ out.write(header)
+ old_header = header
+ elif old_header[:300] != header[:300]:
+ #Enough to check and match
+ out.close()
+ h.close()
+ raise ValueError("BLAST XML headers don't match for %s and %s - have:\n%s\n...\n\nAnd:\n%s\n...\n" \
+ % (split_files[0], f, old_header[:300], header[:300]))
+ else:
+ out.write(" \n")
+ for line in h:
+ if "" in line:
+ break
+ #TODO - Increment and if required automatic query names
+ #like Query_3 to be increasing?
+ out.write(line)
+ h.close()
+ out.write(" \n")
+ out.write("\n")
+ out.close()
+ merge = staticmethod(merge)
+