comparison augment_maxquant_mods.py @ 0:d4b6c9eae635 draft

Initial commit.
author galaxyp
date Fri, 10 May 2013 17:22:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d4b6c9eae635
1 #!/usr/bin/env python
2 """
3 Usage:
4 python augment_maxquant_mods.py
5
6 Assuming Unimod XML file (unimod.xml) and stock MaxQuant modifications
7 file (modifications.xml) are in this same directory, this script will
8 create a new MaxQuant modifications file (extended_modifications.xml)
9 with an a new modification for each unimod entry. These new entires
10 will be suffixed with [Unimod] to distinguish them from existing
11 MaxQuant entries. This file should be copied to
12 <MaxQuant Path>\bin\conf\modifications.xml
13
14 """
15 import xml.etree.ElementTree as ET
16 import re
17
18 FAKE_DATE = "2012-06-11T21:21:24.4946343+02:00"
19
20 POSITION_MAP = {
21 "Anywhere": "anywhere",
22 "Any N-term": "anyNterm",
23 "Any C-term": "anyCterm",
24 "Protein N-term": "proteinNterm",
25 "Protein C-term": "proteinCterm",
26 }
27
28 unimod_tree = ET.parse('unimod.xml')
29 unimod_ns = '{http://www.unimod.org/xmlns/schema/unimod_2}'
30 unimod_modifications_el = unimod_tree.getroot().find('%smodifications' % unimod_ns)
31 mq_tree = ET.parse("modifications.xml")
32 mq_root = mq_tree.getroot()
33
34
35 def to_label(title, site):
36 return "%s (%s) [Unimod]" % (title, site)
37
38
39 def copy_modification(unimod_modification):
40 if unimod_modification.hidden:
41 return False
42 if unimod_modification.delta_el is None:
43 return False
44 comp_array = unimod_modification.composition_array
45 for aa, count in comp_array:
46 if len(aa) > 1 and aa not in COMP_REPLACES.keys():
47 # Complex stuff like Hep, that I cannot translate into MaxQuant.
48 return False
49 return True
50
51
52 COMP_REPLACES = {
53 "15N": "Nx",
54 "13C": "Cx",
55 "18O": "Ox",
56 "2H": "Hx",
57 }
58
59 ## HEP?
60
61
62 def convert_composition(unimod_composition):
63 """
64 Convert Unimod representation of composition to MaxQuant
65 """
66 composition = unimod_composition
67 for key, value in COMP_REPLACES.iteritems():
68 composition = composition.replace(key, value)
69 print composition
70 return composition
71
72
73 def populate_modification(modification, unimod_modification):
74 """
75 Copy unimod entry ``unimod_modification`` to MaxQuant entry ``modification``.
76 """
77 attrib = modification.attrib
78 attrib["create_date"] = FAKE_DATE
79 attrib["last_modified_date"] = FAKE_DATE
80 attrib["reporterCorrectionM1"] = str(0)
81 attrib["reporterCorrectionM2"] = str(0)
82 attrib["reporterCorrectionP1"] = str(0)
83 attrib["reporterCorrectionP2"] = str(0)
84 attrib["user"] = "build_mods_script"
85 label = unimod_modification.label
86 attrib["title"] = label
87 attrib["description"] = label
88 attrib["composition"] = convert_composition(unimod_modification.raw_composition)
89 unimod_position = unimod_modification.position
90 maxquant_position = POSITION_MAP[unimod_position]
91 assert maxquant_position != None
92 position_el = ET.SubElement(modification, "position")
93 position_el.text = maxquant_position
94 modification_site_el = ET.SubElement(modification, "modification_site")
95 modification_site_el.attrib["index"] = "0"
96 unimod_site = unimod_modification.site
97 modification_site_el.attrib["site"] = "-" if len(unimod_site) > 1 else unimod_site
98 type_el = ET.SubElement(modification, "type")
99 type_el.text = "standard"
100 return modification
101
102
103 class UnimodModification:
104
105 def __init__(self, modification, specificity):
106 self.modification = modification
107 self.specificity = specificity
108
109 @property
110 def title(self):
111 return self.modification.attrib["title"]
112
113 @property
114 def site(self):
115 return self.specificity.attrib["site"]
116
117 @property
118 def label(self):
119 return "%s (%s) [Unimod]" % (self.title, self.site)
120
121 @property
122 def delta_el(self):
123 return self.modification.find("%sdelta" % unimod_ns)
124
125 @property
126 def raw_composition(self):
127 return self.delta_el.attrib["composition"]
128
129 @property
130 def composition_array(self):
131 raw_composition = self.raw_composition
132 aa_and_counts = re.split("\s+", raw_composition)
133 comp_array = []
134 for aa_and_count in aa_and_counts:
135 match = re.match(r"(\w+)(\((-?\d+)\))?", aa_and_count)
136 aa = match.group(1)
137 count = match.group(3) or 1
138 comp_array.append((aa, count))
139 return comp_array
140
141 @property
142 def position(self):
143 return self.specificity.attrib["position"]
144
145 @property
146 def hidden(self):
147 return self.specificity.attrib["hidden"] == "true"
148
149 unimod_modifications = []
150 for mod in unimod_modifications_el.findall('%smod' % unimod_ns):
151 for specificity in mod.findall('%sspecificity' % unimod_ns):
152 unimod_modifications.append(UnimodModification(mod, specificity))
153
154 max_index = 0
155 for modification in mq_root.getchildren():
156 index = int(modification.attrib["index"])
157 max_index = max(max_index, index)
158
159 for unimod_modification in unimod_modifications:
160 if copy_modification(unimod_modification):
161 print unimod_modification.composition_array
162 max_index += 1
163 modification = ET.SubElement(mq_root, "modification", attrib={"index": str(max_index)})
164 populate_modification(modification, unimod_modification)
165
166 mq_tree.write("extended_modifications.xml")