view prims_proteomics_datatypes.py @ 12:a4d11b23377b

new release, including better permgen solution for msfilt
author pieter.lukasse@wur.nl
date Fri, 07 Mar 2014 14:51:54 +0100
parents d50f079096ee
children
line wrap: on
line source

"""
PRIMS proteomics classes for types defined in datatypes_conf.xml
"""
import logging
import re
from galaxy.datatypes.data import *
from galaxy.datatypes.xml import *
from galaxy.datatypes.sniff import *
from galaxy.datatypes.binary import *
from galaxy.datatypes.interval import *

log = logging.getLogger(__name__)


class ProteomicsXml(GenericXml):
    """ An enhanced XML datatype used to reuse code across several
    proteomic/mass-spec datatypes. (this part of the code is taken from protk proteomics datatypes package) """

    def sniff(self, filename):
        """ Determines whether the file is the correct XML type. """
        with open(filename, 'r') as contents:            
            while True:
                line = contents.readline()
                if line == None or not line.startswith('<?'):
                    break
            pattern = '^<(\w*:)?%s' % self.root # pattern match <root or <ns:root for any ns string
            return line != None and re.match(pattern, line) != None

    def set_peek( self, dataset, is_multi_byte=False ):
        """Set the peek and blurb text"""
        if not dataset.dataset.purged:
            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
            dataset.blurb = self.blurb
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

class Apml( ProteomicsXml ):
    """APML data"""
    file_ext = "apml"
    blurb = 'PRIMS APML proteomics data'
    root = "apml"