view rsem.py @ 0:ca988deacfd1

Uploaded
author jjohnson
date Fri, 07 Feb 2014 08:07:29 -0500
parents
children
line wrap: on
line source

"""
RSEM datatypes
"""
import os,os.path,re,sys
import galaxy.datatypes.data
from galaxy.datatypes.images import Html
from galaxy.datatypes.metadata import MetadataElement

class RsemReference( Html ):
    """Class describing an RSEM reference"""
    MetadataElement( name='reference_name', default=None, desc='RSEM Reference Name', readonly=True, visible=True, no_value=None )

    file_ext = 'rsem_ref'
    is_binary = True
    composite_type = 'auto_primary_file'
    allow_datatype_change = False

    def generate_primary_file( self, dataset = None ):
        """
        This is called only at upload to write the html file
        cannot rename the datasets here - they come with the default unfortunately
        """
        return '<html><head></head><body>AutoGenerated Primary File for RSEM Reference Composite Dataset</body></html>'

    def regenerate_primary_file(self,dataset):
        """
        cannot do this until we are setting metadata
        """
        refname = dataset.metadata.reference_name
        flist = os.listdir(dataset.extra_files_path)
        rval = ['<html><head><title>RSEM Reference %s</title></head><p/>Comprises the following files:<p/><ul>' % (refname)]
        for i,fname in enumerate(flist):
            sfname = os.path.split(fname)[-1]
            rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) )
        rval.append( '</ul></html>' )
        f = file(dataset.file_name,'w')
        f.write("\n".join( rval ))
        f.write('\n')
        f.close()

    def set_peek( self, dataset, is_multi_byte=False ):
        if not dataset.dataset.purged:
            dataset.peek  = "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
            dataset.blurb =  "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
        else:
            dataset.peek = 'RSEM Reference (%s) does not exist' % ( dataset.metadata.reference_name )
            dataset.blurb = 'RSEM Reference (%s) purged from disk' % ( dataset.metadata.reference_name )

    def display_peek( self, dataset ):
        try:
            return dataset.peek
        except:
            return "RSEM Reference"

    def set_meta( self, dataset, overwrite = True, **kwd ):
        """
        Expecting files:
        extra_files_path/<reference_name>.grp
        extra_files_path/<reference_name>.ti
        extra_files_path/<reference_name>.seq
        extra_files_path/<reference_name>.transcripts.fa
        Optionally includes files:
        extra_files_path/<reference_name>.chrlist
        extra_files_path/<reference_name>.idx.fa
        extra_files_path/<reference_name>.4.ebwt
        extra_files_path/<reference_name>.3.ebwt
        extra_files_path/<reference_name>.2.ebwt
        extra_files_path/<reference_name>.1.ebwt
        extra_files_path/<reference_name>.rev.2.ebwt
        extra_files_path/<reference_name>.rev.1.ebwt
        """
        pat = '^(.*)\.grp$'
        efp = dataset.extra_files_path
        flist = os.listdir(efp)
        for i,fname in enumerate(flist):
          m = re.match(pat,fname)
          if m: 
            dataset.metadata.reference_name = m.groups()[0]
            break
        self.regenerate_primary_file(dataset)