annotate augment_maxquant_mods.py @ 0:d4b6c9eae635 draft

Initial commit.
author galaxyp
date Fri, 10 May 2013 17:22:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
2 """
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
3 Usage:
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
4 python augment_maxquant_mods.py
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
5
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
6 Assuming Unimod XML file (unimod.xml) and stock MaxQuant modifications
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
7 file (modifications.xml) are in this same directory, this script will
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
8 create a new MaxQuant modifications file (extended_modifications.xml)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
9 with an a new modification for each unimod entry. These new entires
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
10 will be suffixed with [Unimod] to distinguish them from existing
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
11 MaxQuant entries. This file should be copied to
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
12 <MaxQuant Path>\bin\conf\modifications.xml
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
13
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
14 """
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
15 import xml.etree.ElementTree as ET
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
16 import re
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
17
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
18 FAKE_DATE = "2012-06-11T21:21:24.4946343+02:00"
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
19
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
20 POSITION_MAP = {
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
21 "Anywhere": "anywhere",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
22 "Any N-term": "anyNterm",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
23 "Any C-term": "anyCterm",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
24 "Protein N-term": "proteinNterm",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
25 "Protein C-term": "proteinCterm",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
26 }
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
27
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
28 unimod_tree = ET.parse('unimod.xml')
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
29 unimod_ns = '{http://www.unimod.org/xmlns/schema/unimod_2}'
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
30 unimod_modifications_el = unimod_tree.getroot().find('%smodifications' % unimod_ns)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
31 mq_tree = ET.parse("modifications.xml")
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
32 mq_root = mq_tree.getroot()
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
33
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
34
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
35 def to_label(title, site):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
36 return "%s (%s) [Unimod]" % (title, site)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
37
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
38
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
39 def copy_modification(unimod_modification):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
40 if unimod_modification.hidden:
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
41 return False
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
42 if unimod_modification.delta_el is None:
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
43 return False
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
44 comp_array = unimod_modification.composition_array
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
45 for aa, count in comp_array:
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
46 if len(aa) > 1 and aa not in COMP_REPLACES.keys():
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
47 # Complex stuff like Hep, that I cannot translate into MaxQuant.
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
48 return False
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
49 return True
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
50
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
51
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
52 COMP_REPLACES = {
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
53 "15N": "Nx",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
54 "13C": "Cx",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
55 "18O": "Ox",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
56 "2H": "Hx",
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
57 }
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
58
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
59 ## HEP?
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
60
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
61
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
62 def convert_composition(unimod_composition):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
63 """
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
64 Convert Unimod representation of composition to MaxQuant
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
65 """
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
66 composition = unimod_composition
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
67 for key, value in COMP_REPLACES.iteritems():
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
68 composition = composition.replace(key, value)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
69 print composition
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
70 return composition
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
71
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
72
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
73 def populate_modification(modification, unimod_modification):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
74 """
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
75 Copy unimod entry ``unimod_modification`` to MaxQuant entry ``modification``.
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
76 """
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
77 attrib = modification.attrib
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
78 attrib["create_date"] = FAKE_DATE
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
79 attrib["last_modified_date"] = FAKE_DATE
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
80 attrib["reporterCorrectionM1"] = str(0)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
81 attrib["reporterCorrectionM2"] = str(0)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
82 attrib["reporterCorrectionP1"] = str(0)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
83 attrib["reporterCorrectionP2"] = str(0)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
84 attrib["user"] = "build_mods_script"
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
85 label = unimod_modification.label
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
86 attrib["title"] = label
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
87 attrib["description"] = label
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
88 attrib["composition"] = convert_composition(unimod_modification.raw_composition)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
89 unimod_position = unimod_modification.position
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
90 maxquant_position = POSITION_MAP[unimod_position]
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
91 assert maxquant_position != None
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
92 position_el = ET.SubElement(modification, "position")
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
93 position_el.text = maxquant_position
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
94 modification_site_el = ET.SubElement(modification, "modification_site")
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
95 modification_site_el.attrib["index"] = "0"
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
96 unimod_site = unimod_modification.site
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
97 modification_site_el.attrib["site"] = "-" if len(unimod_site) > 1 else unimod_site
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
98 type_el = ET.SubElement(modification, "type")
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
99 type_el.text = "standard"
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
100 return modification
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
101
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
102
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
103 class UnimodModification:
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
104
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
105 def __init__(self, modification, specificity):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
106 self.modification = modification
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
107 self.specificity = specificity
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
108
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
109 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
110 def title(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
111 return self.modification.attrib["title"]
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
112
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
113 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
114 def site(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
115 return self.specificity.attrib["site"]
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
116
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
117 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
118 def label(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
119 return "%s (%s) [Unimod]" % (self.title, self.site)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
120
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
121 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
122 def delta_el(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
123 return self.modification.find("%sdelta" % unimod_ns)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
124
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
125 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
126 def raw_composition(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
127 return self.delta_el.attrib["composition"]
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
128
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
129 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
130 def composition_array(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
131 raw_composition = self.raw_composition
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
132 aa_and_counts = re.split("\s+", raw_composition)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
133 comp_array = []
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
134 for aa_and_count in aa_and_counts:
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
135 match = re.match(r"(\w+)(\((-?\d+)\))?", aa_and_count)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
136 aa = match.group(1)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
137 count = match.group(3) or 1
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
138 comp_array.append((aa, count))
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
139 return comp_array
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
140
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
141 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
142 def position(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
143 return self.specificity.attrib["position"]
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
144
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
145 @property
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
146 def hidden(self):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
147 return self.specificity.attrib["hidden"] == "true"
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
148
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
149 unimod_modifications = []
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
150 for mod in unimod_modifications_el.findall('%smod' % unimod_ns):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
151 for specificity in mod.findall('%sspecificity' % unimod_ns):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
152 unimod_modifications.append(UnimodModification(mod, specificity))
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
153
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
154 max_index = 0
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
155 for modification in mq_root.getchildren():
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
156 index = int(modification.attrib["index"])
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
157 max_index = max(max_index, index)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
158
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
159 for unimod_modification in unimod_modifications:
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
160 if copy_modification(unimod_modification):
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
161 print unimod_modification.composition_array
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
162 max_index += 1
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
163 modification = ET.SubElement(mq_root, "modification", attrib={"index": str(max_index)})
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
164 populate_modification(modification, unimod_modification)
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
165
d4b6c9eae635 Initial commit.
galaxyp
parents:
diff changeset
166 mq_tree.write("extended_modifications.xml")