diff augment_maxquant_mods.py @ 0:d4b6c9eae635 draft

Initial commit.
author galaxyp
date Fri, 10 May 2013 17:22:51 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/augment_maxquant_mods.py	Fri May 10 17:22:51 2013 -0400
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+"""
+Usage:
+    python augment_maxquant_mods.py
+
+Assuming Unimod XML file (unimod.xml) and stock MaxQuant modifications
+file (modifications.xml) are in this same directory, this script will
+create a new MaxQuant modifications file (extended_modifications.xml)
+with an a new modification for each unimod entry. These new entires
+will be suffixed with [Unimod] to distinguish them from existing
+MaxQuant entries. This file should be copied to
+<MaxQuant Path>\bin\conf\modifications.xml
+
+"""
+import xml.etree.ElementTree as ET
+import re
+
+FAKE_DATE = "2012-06-11T21:21:24.4946343+02:00"
+
+POSITION_MAP = {
+    "Anywhere": "anywhere",
+    "Any N-term": "anyNterm",
+    "Any C-term": "anyCterm",
+    "Protein N-term": "proteinNterm",
+    "Protein C-term": "proteinCterm",
+}
+
+unimod_tree = ET.parse('unimod.xml')
+unimod_ns = '{http://www.unimod.org/xmlns/schema/unimod_2}'
+unimod_modifications_el = unimod_tree.getroot().find('%smodifications' % unimod_ns)
+mq_tree = ET.parse("modifications.xml")
+mq_root = mq_tree.getroot()
+
+
+def to_label(title, site):
+    return "%s (%s) [Unimod]" % (title, site)
+
+
+def copy_modification(unimod_modification):
+    if unimod_modification.hidden:
+        return False
+    if unimod_modification.delta_el is None:
+        return False
+    comp_array = unimod_modification.composition_array
+    for aa, count in comp_array:
+        if len(aa) > 1 and aa not in COMP_REPLACES.keys():
+            # Complex stuff like Hep, that I cannot translate into MaxQuant.
+            return False
+    return True
+
+
+COMP_REPLACES = {
+    "15N": "Nx",
+    "13C": "Cx",
+    "18O": "Ox",
+    "2H": "Hx",
+}
+
+## HEP?
+
+
+def convert_composition(unimod_composition):
+    """
+    Convert Unimod representation of composition to MaxQuant
+    """
+    composition = unimod_composition
+    for key, value in COMP_REPLACES.iteritems():
+        composition = composition.replace(key, value)
+    print composition
+    return composition
+
+
+def populate_modification(modification, unimod_modification):
+    """
+    Copy unimod entry ``unimod_modification`` to MaxQuant entry ``modification``.
+    """
+    attrib = modification.attrib
+    attrib["create_date"] = FAKE_DATE
+    attrib["last_modified_date"] = FAKE_DATE
+    attrib["reporterCorrectionM1"] = str(0)
+    attrib["reporterCorrectionM2"] = str(0)
+    attrib["reporterCorrectionP1"] = str(0)
+    attrib["reporterCorrectionP2"] = str(0)
+    attrib["user"] = "build_mods_script"
+    label = unimod_modification.label
+    attrib["title"] = label
+    attrib["description"] = label
+    attrib["composition"] = convert_composition(unimod_modification.raw_composition)
+    unimod_position = unimod_modification.position
+    maxquant_position = POSITION_MAP[unimod_position]
+    assert maxquant_position != None
+    position_el = ET.SubElement(modification, "position")
+    position_el.text = maxquant_position
+    modification_site_el = ET.SubElement(modification, "modification_site")
+    modification_site_el.attrib["index"] = "0"
+    unimod_site = unimod_modification.site
+    modification_site_el.attrib["site"] = "-" if len(unimod_site) > 1 else unimod_site
+    type_el = ET.SubElement(modification, "type")
+    type_el.text = "standard"
+    return modification
+
+
+class UnimodModification:
+
+    def __init__(self, modification, specificity):
+        self.modification = modification
+        self.specificity = specificity
+
+    @property
+    def title(self):
+        return self.modification.attrib["title"]
+
+    @property
+    def site(self):
+        return self.specificity.attrib["site"]
+
+    @property
+    def label(self):
+        return "%s (%s) [Unimod]" % (self.title, self.site)
+
+    @property
+    def delta_el(self):
+        return self.modification.find("%sdelta" % unimod_ns)
+
+    @property
+    def raw_composition(self):
+        return self.delta_el.attrib["composition"]
+
+    @property
+    def composition_array(self):
+        raw_composition = self.raw_composition
+        aa_and_counts = re.split("\s+", raw_composition)
+        comp_array = []
+        for aa_and_count in aa_and_counts:
+            match = re.match(r"(\w+)(\((-?\d+)\))?", aa_and_count)
+            aa = match.group(1)
+            count = match.group(3) or 1
+            comp_array.append((aa, count))
+        return comp_array
+
+    @property
+    def position(self):
+        return self.specificity.attrib["position"]
+
+    @property
+    def hidden(self):
+        return self.specificity.attrib["hidden"] == "true"
+
+unimod_modifications = []
+for mod in unimod_modifications_el.findall('%smod' % unimod_ns):
+    for specificity in mod.findall('%sspecificity' % unimod_ns):
+        unimod_modifications.append(UnimodModification(mod, specificity))
+
+max_index = 0
+for modification in mq_root.getchildren():
+    index = int(modification.attrib["index"])
+    max_index = max(max_index, index)
+
+for unimod_modification in unimod_modifications:
+    if copy_modification(unimod_modification):
+        print unimod_modification.composition_array
+        max_index += 1
+        modification = ET.SubElement(mq_root, "modification", attrib={"index": str(max_index)})
+        populate_modification(modification, unimod_modification)
+
+mq_tree.write("extended_modifications.xml")