Repository 'proteomics_datatypes'
hg clone https://toolshed.g2.bx.psu.edu/repos/iracooke/proteomics_datatypes

Changeset 8:58469754fd82 (2014-06-05)
Previous changeset 7:b82d4034e0f8 (2014-02-11) Next changeset 9:6ca516faacfc (2014-06-05)
Commit message:
Uploaded
removed:
README
datatypes_conf.xml
display_applications/proteomics/PepXml.xml
display_applications/proteomics/ProtGff.xml
display_applications/proteomics/ProtXml.xml
display_applications/proteomics/mzML.xml
proteomics.py
tool-data/proteogenomics_display_site.txt.sample
tool-data/protk_display_site.txt.sample
b
diff -r b82d4034e0f8 -r 58469754fd82 README
--- a/README Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,9 +0,0 @@
-## What is it?
-Galaxy datatype and display-application definitions for Proteomics data
-
-## Installation
-Install into your local galaxy instance from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/
-
-To visualize data you will need to install the protviz visualization web application.  This is available at
-[https://bitbucket.org/Andrew_Brock/proteomics-visualise](https://bitbucket.org/Andrew_Brock/proteomics-visualise)
-
b
diff -r b82d4034e0f8 -r 58469754fd82 datatypes_conf.xml
--- a/datatypes_conf.xml Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,59 +0,0 @@
-<?xml version="1.0"?>
-<datatypes>
-  <datatype_files>
-    <datatype_file name="proteomics.py"/>
-  </datatype_files>
-  <registration display_path="display_applications">
-    <datatype extension="prot_gff" type="galaxy.datatypes.proteomics:ProtGff" mimetype="application/xml" display_in_upload="true">
-       <display file="proteomics/ProtGff.xml" />
-    </datatype>
-    <datatype extension="pepxml" type="galaxy.datatypes.proteomics:PepXml" mimetype="application/xml" display_in_upload="true">
-      <display file="proteomics/PepXml.xml" />
-    </datatype>
-    <datatype extension="raw_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true">
-      <display file="proteomics/PepXml.xml" />
-    </datatype>
-    <datatype extension="peptideprophet_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true">
-      <display file="proteomics/PepXml.xml" />
-    </datatype>
-    <datatype extension="interprophet_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true">
-      <display file="proteomics/PepXml.xml" />
-    </datatype>
-    <datatype extension="protxml" type="galaxy.datatypes.proteomics:ProtXML" display_in_upload="true" >
-      <display file="proteomics/ProtXml.xml"/>
-    </datatype>
-    <datatype extension="pepxml.tsv" type="galaxy.datatypes.proteomics:PepXmlReport" display_in_upload="true" />
-    <datatype extension="protxml.tsv" type="galaxy.datatypes.proteomics:ProtXmlReport" display_in_upload="true" />
-    <datatype extension="mascotdat" type="galaxy.datatypes.proteomics:MascotDat" display_in_upload="false" />
-    <datatype extension="mzml" type="galaxy.datatypes.proteomics:MzML" mimetype="application/xml" display_in_upload="true">
-      <display file="proteomics/mzML.xml"/>
-      <display file="proteomics/protvis_mzml.xml"/>
-    </datatype>
-    <datatype extension="mgf" type="galaxy.datatypes.proteomics:Mgf" display_in_upload="true" />
-    <datatype extension="xls" type="galaxy.datatypes.proteomics:Xls" display_in_upload="true" />
-    <datatype extension="idpdb" type="galaxy.datatypes.proteomics:IdpDB" display_in_upload="true" />
-    <datatype extension="mzxml" type="galaxy.datatypes.proteomics:MzXML" mimetype="application/xml" display_in_upload="true" />
-    <datatype extension="mzq" type="galaxy.datatypes.proteomics:MzQuantML" mimetype="application/xml" display_in_upload="true" />
-    <datatype extension="mzid" type="galaxy.datatypes.proteomics:MzIdentML" mimetype="application/xml" display_in_upload="true" />
-    <datatype extension="traml" type="galaxy.datatypes.proteomics:TraML" mimetype="application/xml" display_in_upload="true" />
-    <datatype extension="featurexml" type="galaxy.datatypes.proteomics:FeatureXML" mimetype="application/xml" display_in_upload="true" />
-    <datatype extension="consensusxml" type="galaxy.datatypes.proteomics:ConsensusXML" mimetype="application/xml" display_in_upload="true" />
-    <datatype extension="idxml" type="galaxy.datatypes.proteomics:IdXML" mimetype="application/xml" display_in_upload="true" />
-    <datatype extension="raw" type="galaxy.datatypes.proteomics:RAW" display_in_upload="true" />
-    <datatype extension="msp" type="galaxy.datatypes.proteomics:Msp" display_in_upload="true" />
-    <datatype extension="ms2" type="galaxy.datatypes.proteomics:Ms2" display_in_upload="true" />
-    <datatype extension="hlf" type="galaxy.datatypes.proteomics:XHunterAslFormat" display_in_upload="true" />
-  </registration>
-  <sniffers>
-    <sniffer type="galaxy.datatypes.proteomics:ProtGff"/>
-    <sniffer type="galaxy.datatypes.proteomics:MzML"/>
-    <sniffer type="galaxy.datatypes.proteomics:PepXml"/>
-    <sniffer type="galaxy.datatypes.proteomics:Mgf"/>
-    <sniffer type="galaxy.datatypes.proteomics:ProtXML"/>
-    <sniffer type="galaxy.datatypes.proteomics:MzXML"/>
-    <sniffer type="galaxy.datatypes.proteomics:TraML"/>
-    <sniffer type="galaxy.datatypes.proteomics:MzIdentML"/>
-    <sniffer type="galaxy.datatypes.proteomics:MzQuantML"/>
-    <sniffer type="galaxy.datatypes.proteomics:Xls"/>
-  </sniffers>
-</datatypes>
b
diff -r b82d4034e0f8 -r 58469754fd82 display_applications/proteomics/PepXml.xml
--- a/display_applications/proteomics/PepXml.xml Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,18 +0,0 @@
-<display id="proteomics_pepxml" version="1.0.0" name="view pepXML in">
- <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0">
-        <!-- Define parameters by column from file -->
-        <dynamic_param name="site_id" value="0"/>
-        <dynamic_param name="site_url" value="1"/>
-        <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name -->
-        <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&amp;type=pepxml</url>
-        <param type="data" name="pep_file" viewable="False" format="pepXML"/>
-        <param type="data" dataset="pep_file" name="pepxml_file" format="pepXML" viewable="False" />
-        <param type="template" name="encoded_filename" strip="True" >
-            #import binascii
-            ${binascii.hexlify( $pepxml_file.file_name )}
-        </param>
-        <param type="template" name="galaxy_url" strip="True" >
-                ${BASE_URL.split(":")[1][2:]}
-        </param>
-    </dynamic_links>
-</display>
b
diff -r b82d4034e0f8 -r 58469754fd82 display_applications/proteomics/ProtGff.xml
--- a/display_applications/proteomics/ProtGff.xml Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,18 +0,0 @@
-<display id="proteomics_gff" version="1.0.0" name="view gff in">
-        <dynamic_links from_file="tool-data/proteogenomics_display_site.txt" skip_startswith="#" id="0" name="0">
-        <!-- Define parameters by column from file -->
-        <dynamic_param name="site_id" value="0"/>
-        <dynamic_param name="site_url" value="1"/>
-        <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name -->
-        <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&amp;type=protgff</url>
-        <param type="data" name="prot_file" viewable="False" format="protgff"/>
-        <param type="data" dataset="prot_file" name="protgff_file" format="protgff" viewable="False" />
-        <param type="template" name="encoded_filename" strip="True" >
-            #import binascii
-            ${binascii.hexlify( $protgff_file.file_name )}
-        </param>
-        <param type="template" name="galaxy_url" strip="True" >
-                ${BASE_URL.split(":")[1][2:]}
-        </param>
-    </dynamic_links>
-</display>
\ No newline at end of file
b
diff -r b82d4034e0f8 -r 58469754fd82 display_applications/proteomics/ProtXml.xml
--- a/display_applications/proteomics/ProtXml.xml Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,18 +0,0 @@
-<display id="proteomics_protxml" version="1.0.0" name="view protXML in">
- <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0">
-        <!-- Define parameters by column from file -->
-        <dynamic_param name="site_id" value="0"/>
-        <dynamic_param name="site_url" value="1"/>
-        <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name -->
-        <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&amp;type=protxml</url>
-        <param type="data" name="prot_file" viewable="False" format="protXML"/>
-        <param type="data" dataset="prot_file" name="protxml_file" format="protXML" viewable="False" />
-        <param type="template" name="encoded_filename" strip="True" >
-            #import binascii
-            ${binascii.hexlify( $protxml_file.file_name )}
-        </param>
-        <param type="template" name="galaxy_url" strip="True" >
-                ${BASE_URL.split(":")[1][2:]}
-        </param>
-    </dynamic_links>
-</display>
\ No newline at end of file
b
diff -r b82d4034e0f8 -r 58469754fd82 display_applications/proteomics/mzML.xml
--- a/display_applications/proteomics/mzML.xml Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,18 +0,0 @@
-<display id="proteomics_mzml" version="1.0.2" name="view mzML data">
- <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0">
-        <!-- Define parameters by column from file -->
-        <dynamic_param name="site_id" value="0"/>
-        <dynamic_param name="site_url" value="1"/>
-        <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name -->
-        <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&amp;type=mzml</url>
-        <param type="data" name="raw_file" viewable="False" format="mzML"/>
-        <param type="data" dataset="raw_file" name="mzml_file" format="mzML" viewable="False" />
-        <param type="template" name="encoded_filename" strip="True" >
-            #import binascii
-            ${binascii.hexlify( $mzml_file.file_name )}
-        </param>
-        <param type="template" name="galaxy_url" strip="True" >
-                ${BASE_URL.split(":")[1][2:]}
-        </param>
-    </dynamic_links>
-</display>
\ No newline at end of file
b
diff -r b82d4034e0f8 -r 58469754fd82 proteomics.py
--- a/proteomics.py Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,330 +0,0 @@\n-"""\n-Proteomics format classes\n-"""\n-import logging\n-import re\n-import binascii\n-\n-from galaxy.datatypes.sniff import *\n-from galaxy.datatypes import data\n-from galaxy.datatypes.data import Text\n-from galaxy.datatypes.xml import GenericXml\n-from galaxy.datatypes.binary import Binary\n-from galaxy.datatypes.tabular import Tabular\n-from galaxy.datatypes.interval import Gff\n-\n-log = logging.getLogger(__name__)\n-\n-class ProtGff( Gff ):\n-    """Tab delimited data in Gff format"""\n-    file_ext = "prot_gff"\n-    def set_peek( self, dataset, is_multi_byte=False ):\n-        """Set the peek and blurb text"""\n-        if not dataset.dataset.purged:\n-            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )\n-            dataset.blurb = \'Proteogenomics GFF\'\n-        else:\n-            dataset.peek = \'file does not exist\'\n-            dataset.blurb = \'file purged from disk\'\n-\n-    def sniff( self, filename ):\n-        handle = open(filename)\n-        xmlns_re = re.compile("^##gff-version")\n-        for i in range(3):\n-            line = handle.readline()\n-            if xmlns_re.match(line.strip()):\n-                handle.close()\n-                return True\n-\n-        handle.close()\n-        return False\n-\n-\n-class Xls( Binary ):\n-    """Class describing a binary excel spreadsheet file"""\n-    file_ext = "xls"\n-\n-    def set_peek( self, dataset, is_multi_byte=False ):\n-        if not dataset.dataset.purged:\n-            dataset.peek  = "Excel Spreadsheet file"\n-            dataset.blurb = data.nice_size( dataset.get_size() )\n-        else:\n-            dataset.peek = \'file does not exist\'\n-            dataset.blurb = \'file purged from disk\'\n-    def display_peek( self, dataset ):\n-        try:\n-            return dataset.peek\n-        except:\n-            return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )\n-\n-class IdpDB( Binary ):\n-    file_ext = "idpDB"\n-\n-if hasattr(Binary, \'register_unsniffable_binary_ext\'):\n-    Binary.register_unsniffable_binary_ext(\'idpDB\')\n-\n-\n-class PepXmlReport( Tabular ):\n-    """pepxml converted to tabular report"""\n-    file_ext = "tsv"\n-\n-    def __init__(self, **kwd):\n-        Tabular.__init__( self, **kwd )\n-        self.column_names = [\'Protein\', \'Peptide\', \'Assumed Charge\', \'Neutral Pep Mass (calculated)\', \'Neutral Mass\', \'Retention Time\', \'Start Scan\', \'End Scan\', \'Search Engine\', \'PeptideProphet Probability\', \'Interprophet Probabaility\']\n-\n-    def display_peek( self, dataset ):\n-        """Returns formated html of peek"""\n-        return Tabular.make_html_table( self, dataset, column_names=self.column_names )\n-\n-\n-class ProtXmlReport( Tabular ):\n-    """protxml converted to tabular report"""\n-    file_ext = "tsv"\n-    comment_lines = 1\n-    \n-    def __init__(self, **kwd):\n-        Tabular.__init__( self, **kwd )\n-        self.column_names = ["Entry Number", "Group Probability", "Protein", "Protein Link", "Protein Probability", "Percent Coverage", "Number of Unique Peptides", "Total Independent Spectra", "Percent Share of Spectrum ID\'s", "Description", "Protein Molecular Weight", "Protein Length", "Is Nondegenerate Evidence", "Weight", "Precursor Ion Charge", "Peptide sequence", "Peptide Link", "NSP Adjusted Probability", "Initial Probability", "Number of Total Termini", "Number of Sibling Peptides Bin", "Number of Instances", "Peptide Group Designator", "Is Evidence?"]\n-\n-    def display_peek( self, dataset ):\n-        """Returns formated html of peek"""\n-        return Tabular.make_html_table( self, dataset, column_names=self.column_names )\n-\n-class ProteomicsXml( GenericXml ):\n-    """ An enhanced XML datatype used to reuse code across several\n-    proteomic/mass-spec datatypes. """\n-\n-    def sniff(self, filename):\n-        """ Determines whether the file is the correct XML type. """\n-        with open(filename, \'r\') as contents:\n-            while True:\n-                line = contents.readline()\n-                if li'..b'  mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"\n-        max_lines=10\n-\n-        for i, line in enumerate( file( filename ) ):\n-            line = line.rstrip( \'\\n\\r\' )\n-            if line==mime_version:\n-                return True\n-            if i>max_lines:\n-                return False\n-\n-\n-class RAW( Binary ):\n-    """Class describing a Thermo Finnigan binary RAW file"""\n-    file_ext = "raw"\n-    def sniff( self, filename ):\n-        # Thermo Finnigan RAW format is proprietary and hence not well documented.\n-        # Files start with 2 bytes that seem to differ followed by F\\0i\\0n\\0n\\0i\\0g\\0a\\0n\n-        # This combination represents 17 bytes, but to play safe we read 20 bytes from \n-        # the start of the file.\n-        try:\n-            header = open( filename ).read(20)\n-            hexheader = binascii.b2a_hex( header )\n-            finnigan  = binascii.hexlify( \'F\\0i\\0n\\0n\\0i\\0g\\0a\\0n\' )\n-            if hexheader.find(finnigan) != -1:\n-                return True\n-            return False\n-        except:\n-            return False\n-    def set_peek( self, dataset, is_multi_byte=False ):\n-        if not dataset.dataset.purged:\n-            dataset.peek  = "Thermo Finnigan RAW file"\n-            dataset.blurb = data.nice_size( dataset.get_size() )\n-        else:\n-            dataset.peek = \'file does not exist\'\n-            dataset.blurb = \'file purged from disk\'\n-    def display_peek( self, dataset ):\n-        try:\n-            return dataset.peek\n-        except:\n-            return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )\n-\n-\n-if hasattr(Binary, \'register_sniffable_binary_format\'):\n-    Binary.register_sniffable_binary_format(\'raw\', \'raw\', RAW)\n-\n-\n-class Msp( Text ):\n-    """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """\n-    file_ext = "msp"\n-    \n-    @staticmethod\n-    def next_line_starts_with(contents, prefix):\n-        next_line = contents.readline()\n-        return next_line != None and next_line.startswith(prefix)\n-\n-    def sniff(self, filename):\n-        """ Determines whether the file is a NIST MSP output file. \n-\n-        >>> fname = get_test_fname(\'test.msp\')  \n-        >>> Msp().sniff(fname)\n-        True\n-        >>> fname = get_test_fname(\'test.mzXML\')\n-        >>> Msp().sniff(fname)\n-        False\n-        """\n-        with open(filename, \'r\') as contents:\n-            return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")\n-\n-class Ms2(Text):\n-    file_ext = "ms2"\n-    \n-    def sniff(self, filename):\n-        """ Determines whether the file is a valid ms2 file. \n-\n-        >>> fname = get_test_fname(\'test.msp\')  \n-        >>> Ms2().sniff(fname)\n-        False\n-        >>> fname = get_test_fname(\'test.ms2\')\n-        >>> Ms2().sniff(fname)\n-        True\n-        """\n-\n-        with open(filename, \'r\') as contents:\n-            header_lines = []\n-            while True:\n-                line = contents.readline()\n-                if line == None or len(line) == 0:\n-                    pass\n-                elif line.startswith(\'H\\t\'):\n-                    header_lines.append(line)\n-                else:\n-                    break\n-        for header_field in [\'CreationDate\', \'Extractor\', \'ExtractorVersion\', \'ExtractorOptions\']:\n-            found_header = False\n-            for header_line in header_lines:\n-                if header_line.startswith(\'H\\t%s\' % (header_field)):\n-                    found_header = True\n-                    break\n-            if not found_header:\n-                return False\n-\n-        return True\n-\n-# unsniffable binary format, should do something about this\n-class XHunterAslFormat( Binary ):\n-    """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """\n-    file_ext = "hlf"\n-\n-if hasattr(Binary, \'register_unsniffable_binary_ext\'):\n-    Binary.register_unsniffable_binary_ext(\'hlf\')\n'
b
diff -r b82d4034e0f8 -r 58469754fd82 tool-data/proteogenomics_display_site.txt.sample
--- a/tool-data/proteogenomics_display_site.txt.sample Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-#Proteomic Visualization application should be hosted on the same server as galaxy
-#Entries in this file are of the format "site_id" site_url
-Proteogenomics Browser http://127.0.0.1:8600
b
diff -r b82d4034e0f8 -r 58469754fd82 tool-data/protk_display_site.txt.sample
--- a/tool-data/protk_display_site.txt.sample Tue Feb 11 17:51:48 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-#Proteomic Visualization application should be hosted on the same server as galaxy
-#Entries in this file are of the format "site_id" site_url
-Proteomics Visualize http://127.0.0.1:8500