rtg_investigator: lib/galaxy/datatypes/rtg.py comparison

comparison lib/galaxy/datatypes/rtg.py @ 1:8593828f91e7 default tip

Full galaxy wrapper

author	diego
date	Sat, 21 Apr 2012 21:36:15 -0400
parents
children

comparison

equal deleted inserted replaced

-:d50638ebd809
+:8593828f91e7
+"""
+rtg datatypes
+"""
+import data
+from galaxy.datatypes import sequence
+import logging, os, sys, time, tempfile, shutil, string, glob, re, subprocess
+import galaxy.model
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy import util
+from galaxy.datatypes.images import Html
+from galaxy.datatypes.sequence import Sequence
+from galaxy.datatypes.binary import Binary
+from sniff import *
+from pprint import pprint
+from ConfigParser import ConfigParser
+log = logging.getLogger(__name__)
+basepath = os.path.dirname(__file__)
+rtgcfg = os.path.abspath(os.path.join(basepath, "..", "..", "..", "tools", "rtg", "rtg-galaxy.cfg"))
+class FakeSecHead(object):
+def __init__(self, fp):
+self.fp = fp
+self.sechead = '[asection]\n'
+def readline(self):
+if self.sechead:
+try: return self.sechead
+finally: self.sechead = None
+else: return self.fp.readline()
+cfg = ConfigParser()
+cfg.readfp(FakeSecHead(open(rtgcfg)))
+class Sdf( Html ):
+composite_type = 'auto_primary_file'
+allow_datatype_change = False
+file_ext = 'sdf'
+MetadataElement(name="sdfId", desc="SDF Id", readonly="true", param=metadata.MetadataParameter)
+MetadataElement(name="source", desc="Source", readonly="true", values=[('UNKNOWN', 'Unknown'), ('CG', 'Complete Genomics'), ('SOLEXA', 'Solexa')], param=metadata.SelectParameter)
+MetadataElement(name="sequences", desc="Number of Sequences", readonly="true", param=metadata.MetadataParameter)
+MetadataElement(name="hasQuality", desc="Has Quality", readonly="true", values=[('FALSE', 'False'), ('TRUE', 'True')], param=metadata.SelectParameter)
+MetadataElement(name="type", desc="Type", readonly="true", values=[('DNA', 'DNA'), ('PROTEIN', 'Protein')], param=metadata.SelectParameter)
+MetadataElement(name="paired", desc="Paired-End", readonly="true", values=[('FALSE', 'False'), ('TRUE', 'True')], param=metadata.SelectParameter)
+MetadataElement(name="maxLength", desc="Maximum sequence length", readonly="true", param=metadata.MetadataParameter)
+MetadataElement(name="minLength", desc="Minimum sequence length", readonly="true", param=metadata.MetadataParameter)
+def __init__( self, **kwd ):
+Html.__init__( self, **kwd )
+log.debug( "Rtg log info  %s" % ' __init__')
+self.add_composite_file( 'format.log', mimetype = 'text/plain', description = 'Log', substitute_name_with_metadata = None, is_binary = False )
+self.add_composite_file( 'done', mimetype = 'text/plain', description = 'Completion', substitute_name_with_metadata = None, is_binary = False )
+self.add_composite_file( 'progress', mimetype = 'text/plain', description = 'Progress', substitute_name_with_metadata = None, is_binary = False )
+self.add_composite_file( 'mainIndex', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
+self.add_composite_file( 'nameIndex0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
+self.add_composite_file( 'namedata0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
+self.add_composite_file( 'namepointer0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
+self.add_composite_file( 'seqdata0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
+self.add_composite_file( 'seqpointer0', mimetype = 'application/octet-stream', description = 'Index', substitute_name_with_metadata = None, is_binary = True )
+def generate_primary_file( self, dataset = None ):
+log.debug( "Rtg log info  %s %s" % ('generate_primary_file',dataset))
+rval = ['<html><head><title>RTG SDF Dataset </title></head><p/>']
+rval.append('<div>This SDF dataset is composed of the following files:<p/><ul>')
+for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
+fn = composite_name
+log.debug( "Rtg log info  %s %s %s" % ('generate_primary_file',fn,composite_file))
+opt_text = ''
+if composite_file.optional:
+opt_text = ' (optional)'
+if composite_file.get('description'):
+rval.append( '<li><a href="%s" type="application/octet-stream">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
+else:
+rval.append( '<li><a href="%s" type="application/octet-stream">%s</a>%s</li>' % ( fn, fn, opt_text ) )
+rval.append( '</ul></div></html>' )
+return "\n".join( rval )
+def regenerate_primary_file(self,dataset):
+"""
+cannot do this until we are setting metadata
+"""
+log.debug( "Rtg log info  %s %s" % ('regenerate_primary_file',dataset))
+bn = dataset.metadata.base_name
+flist = os.listdir(dataset.extra_files_path)
+rval = ['<html><head><title>Files for RTG SDF Dataset %s</title></head><p/>Comprises the following files:<p/><ul>' % (bn)]
+for i,fname in enumerate(flist):
+sfname = os.path.split(fname)[-1]
+rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) )
+rval.append( '</ul></html>' )
+f = file(dataset.file_name,'w')
+f.write("\n".join( rval ))
+f.write('\n')
+f.close()
+def set_meta( self, dataset, **kwd ):
+Html.set_meta( self, dataset, **kwd )
+self.regenerate_primary_file(dataset)
+if (os.path.isdir(dataset.extra_files_path + '/left')):
+sdfDir = dataset.extra_files_path + '/left'
+dataset.metadata.paired = 'TRUE'
+else:
+sdfDir = dataset.extra_files_path
+dataset.metadata.paired = 'FALSE'
+p = os.popen(cfg.get('asection', 'rtg') + ' sdfstats ' + sdfDir,"r")
+while 1:
+line = p.readline()
+if not line:
+break
+if line.startswith('SDF-ID'):
+dataset.metadata.sdfId = line.split(':', 1)[1].strip()
+elif line.startswith('Number of sequences'):
+dataset.metadata.sequences = line.split(':', 1)[1].strip()
+elif line.startswith('Type'):
+dataset.metadata.type = line.split(':', 1)[1].strip()
+elif line.startswith('Source'):
+dataset.metadata.source = line.split(':', 1)[1].strip()
+elif line.startswith('Quality scores available'):
+dataset.metadata.hasQuality = 'TRUE'
+elif line.startswith('Maximum length'):
+dataset.metadata.maxLength = line.split(':', 1)[1].strip()
+elif line.startswith('Minimum length'):
+dataset.metadata.minLength = line.split(':', 1)[1].strip()
+if dataset.metadata.hasQuality != 'TRUE':
+dataset.metadata.hasQuality = 'FALSE'
+if __name__ == '__main__':
+import doctest, sys
+doctest.testmod(sys.modules[__name__])
+class Cgtsv ( Sequence ):
+"""Class representing a generic CG TSV sequence"""
+file_ext = "tsvcg"
+def set_meta( self, dataset, **kwd ):
+"""
+Set the number of sequences and the number of data lines
+in dataset.
+"""
+if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
+dataset.metadata.sequences = None
+return
+sequences = 0
+for line in file( dataset.file_name ):
+line = line.strip()
+if line:
+if len(line) == 0 or line.startswith( '#' ) or line.startswith( '>' ):
+# We don't count comment lines for sequence data types
+continue
+sequences += 1
+dataset.metadata.sequences = sequences
+def sniff ( self, filename ):
+"""
+Determines whether the file is in CG TSV format
+For details, see http://media.completegenomics.com/documents/DataFileFormats.pdf
+"""
+bases_regexp = re.compile( "^[NGTAC]*" )
+headers = get_headers( filename, '\t' )
+try:
+count = 0
+if len(headers) < 2:
+return False
+for hdr in headers:
+if len( hdr ) > 1 and hdr[0]:
+if hdr[0].startswith( '#' ):
+continue
+if len(hdr) != 3:
+return False
+if  hdr[0].startswith( '>' ):
+if hdr[0] != ">flags":
+return False
+if hdr[1] != "reads":
+return False
+else:
+try:
+map( int, [hdr[0]] )
+if not bases_regexp.match(hdr[1]):
+return False
+except:
+return False
+count += 1
+if count >= 5:
+return True
+# Do other necessary checking here...
+except:
+return False
+# If we haven't yet returned False, then...
+return True
+class Samix( Binary ):
+"""Class describing a tabix-ed SAM file"""
+file_ext = "sam.gz"
+MetadataElement( name="sam_index", desc="SAM Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True )
+def init_meta( self, dataset, copy_from=None ):
+Binary.init_meta( self, dataset, copy_from=copy_from )
+def set_meta( self, dataset, overwrite = True, **kwd ):
+""" Creates the index for the SAM file. """
+# These metadata values are not accessible by users, always overwrite
+#f = open('/home/alan/galtmp', 'w')
+index_file = dataset.metadata.sam_index
+if not index_file:
+index_file = dataset.metadata.spec['sam_index'].param.new_file( dataset = dataset )
+#   print >>f, 'idx file ', index_file, '\n'
+# Create the Sam index
+stderr_name = tempfile.NamedTemporaryFile( prefix = "sam_index_stderr" ).name
+command = cfg.get('asection', 'rtg') + (' index -f sam %s' % ( dataset.file_name))
+#print >>f, 'idx cmd ', command, '\n'
+proc = subprocess.Popen( args=command, shell=True, stderr=open( stderr_name, 'wb' ) )
+exit_code = proc.wait()
+#Did index succeed?
+stderr = open( stderr_name ).read().strip()
+if stderr:
+if exit_code != 0:
+os.unlink( stderr_name ) #clean up
+f.close();
+raise Exception, "Error Setting tabix-ed SAM Metadata: %s" % stderr
+else:
+print stderr
+#print >>f, 'move ', dataset.file_name, '.tbi to ', index_file.file_name
+shutil.move(dataset.file_name + '.tbi', index_file.file_name)
+dataset.metadata.sam_index = index_file
+# f.close();
+# Remove temp file
+os.unlink( stderr_name )
+def set_peek( self, dataset, is_multi_byte=False ):
+if not dataset.dataset.purged:
+dataset.peek  = "Tabix-ed sam alignments file"
+dataset.blurb = data.nice_size( dataset.get_size() )
+else:
+dataset.peek = 'file does not exist'
+dataset.blurb = 'file purged from disk'
+def display_peek( self, dataset ):
+try:
+return dataset.peek
+except:
+return "Tabix-ed sam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )

Mercurial > repos > diego > rtg_investigator

comparison lib/galaxy/datatypes/rtg.py @ 1:8593828f91e7 default tip