comparison mqparam.py @ 4:dcd39bcc7481 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/maxquant commit da342a782ccc391b87fb4fead956b7b3cbd21258"
author galaxyp
date Sat, 11 Apr 2020 11:49:19 -0400
parents 175e062b6a17
children 2133b0be850a
comparison
equal deleted inserted replaced
3:175e062b6a17 4:dcd39bcc7481
1 """ 1 """
2 Create a project-specific MaxQuant parameter file. 2 Create a project-specific MaxQuant parameter file.
3
4 TODO: check validity of parsed experimental design template
5 add support for parameter groups
6 add reporter ion MS2
7 add label free quantification
8 don't hardcode parse rules for fasta files
9
10 Author: Damian Glaetzer <d.glaetzer@mailbox.org>
11 """ 3 """
12 4
5 import copy
13 import ntpath 6 import ntpath
14 import os 7 import os
15 import re 8 import re
9 import yaml
16 import xml.etree.ElementTree as ET 10 import xml.etree.ElementTree as ET
17 from itertools import zip_longest 11 from itertools import zip_longest
18 from xml.dom import minidom 12 from xml.dom import minidom
13
14
15 def et_add_child(el, name, text, attrib=None):
16 "Add a child element to an xml.etree.ElementTree.Element"
17 child = ET.SubElement(el, name, attrib=attrib if attrib else {})
18 child.text = str(text)
19 return child
20
21
22 class ParamGroup:
23 """Represents one parameter Group
24 """
25
26 def __init__(self, root):
27 """Initialize with its xml.etree.ElementTree root Element.
28 """
29 self._root = copy.deepcopy(root)
30
31 def set_list_param(self, key, vals):
32 """Set a list parameter.
33 """
34 node = self._root.find(key)
35 if node is None:
36 raise ValueError('Element {} not found in parameter file'
37 .format(key))
38 node.clear()
39 node.tag = key
40 for e in vals:
41 et_add_child(node, name='string', text=e)
42
43 def set_simple_param(self, key, value):
44 """Set a simple parameter.
45 """
46 node = self._root.find(key)
47 if node is None:
48 raise ValueError('Element {} not found in parameter file'
49 .format(key))
50 node.text = str(value)
51
52 def set_silac(self, light_labels, medium_labels, heavy_labels):
53 """Set label modifications.
54 """
55 if medium_labels and not (heavy_labels or light_labels): # medium omly with heavy and light
56 raise Exception("Incorrect SILAC specification. Use medium only together with light and heavy labels.")
57 multiplicity = 3 if medium_labels else 2 if heavy_labels else 1
58 max_label = str(max(len(light_labels) if light_labels else 0,
59 len(medium_labels) if medium_labels else 0,
60 len(heavy_labels) if heavy_labels else 0))
61 self._root.find('multiplicity').text = str(multiplicity)
62 self._root.find('maxLabeledAa').text = max_label
63 node = self._root.find('labelMods')
64 node[0].text = ';'.join(light_labels) if light_labels else ''
65 if multiplicity == 3:
66 et_add_child(node, name='string', text=';'.join(medium_labels))
67 if multiplicity > 1:
68 et_add_child(node, name='string',
69 text=';'.join(heavy_labels) if heavy_labels else '')
70
71 def set_isobaric_label(self, internalLabel, terminalLabel,
72 cm2, cm1, cp1, cp2, tmtLike):
73 """Add isobaric label info.
74 Args:
75 internalLabel: string
76 terminalLabel: string
77 cm2: (float) correction factor
78 cm1: (float) correction factor
79 cp1: (float) correction factor
80 cp2: (float) correction factor
81 tmtLike: bool or string
82 Returns:
83 None
84 """
85 iso_labels_node = self._root.find('isobaricLabels')
86 label = et_add_child(iso_labels_node, 'IsobaricLabelInfo', '')
87 et_add_child(label, 'internalLabel', internalLabel)
88 et_add_child(label, 'terminalLabel', terminalLabel)
89 for num, factor in (('M2', cm2), ('M1', cm1), ('P1', cp1), ('P2', cp2)):
90 et_add_child(label, 'correctionFactor' + num,
91 str(float(factor) if factor % 1 else int(factor)))
92 et_add_child(label, 'tmtLike', str(tmtLike))
19 93
20 94
21 class MQParam: 95 class MQParam:
22 """Represents a mqpar.xml and provides methods to modify 96 """Represents a mqpar.xml and provides methods to modify
23 some of its parameters. 97 some of its parameters.
24 """ 98 """
25 99
26 fasta_template = """<FastaFileInfo> 100 def __init__(self, mqpar_in, exp_design=None, yaml=None, substitution_rx=r'[^\s\S]'): # no sub by default
27 <fastaFilePath></fastaFilePath>
28 <identifierParseRule></identifierParseRule>
29 <descriptionParseRule></descriptionParseRule>
30 <taxonomyParseRule></taxonomyParseRule>
31 <variationParseRule></variationParseRule>
32 <modificationParseRule></modificationParseRule>
33 <taxonomyId></taxonomyId>
34 </FastaFileInfo>"""
35
36 def __init__(self, mqpar_out, mqpar_in, exp_design,
37 substitution_rx=r'[^\s\S]'): # no sub by default
38 """Initialize MQParam class. mqpar_in can either be a template 101 """Initialize MQParam class. mqpar_in can either be a template
39 or a already suitable mqpar file. 102 or a already suitable mqpar file.
40 >>> t = MQParam("test", './test-data/template.xml', None) 103 Args:
41 >>> t.root.tag 104 mqpar_in: a template parameter file
42 'MaxQuantParams' 105 exp_design: a experimental design template (see MaxQuant documentation),
43 >>> (t.root.find('maxQuantVersion')).text 106 can be None
44 '1.6.3.4' 107 substitution_rx: a regular expression for replacements in the file names.
45 """ 108 It is applied before comparing input file names (e.g. from the exp. design)
46 109 """
47 self.orig_mqpar = mqpar_in 110 self.orig_mqpar = mqpar_in
48 self.exp_design = exp_design 111 self.exp_design = exp_design
49 self.mqpar_out = mqpar_out 112 self._root = ET.parse(mqpar_in).getroot()
50 self.root = ET.parse(mqpar_in).getroot() 113 self.version = self._root.find('maxQuantVersion').text
51 self.version = self.root.find('maxQuantVersion').text
52 # regex for substitution of certain file name characters 114 # regex for substitution of certain file name characters
53 self.substitution_rx = substitution_rx 115 self.substitution_rx = substitution_rx
54 116 self.pg_node = copy.deepcopy(self._root.find('parameterGroups')[0])
55 @staticmethod 117 self._paramGroups = []
56 def _add_child(el, name, text, attrib=None): 118 self.fasta_file_node = copy.deepcopy(self._root.find('fastaFiles')[0])
57 """Add a child element to an element. 119 if yaml:
58 120 self._from_yaml(yaml)
59 >>> t = MQParam("test", './test-data/template.xml', None) 121
60 >>> MQParam._add_child(t.root, "test", "test") 122 def __getitem__(self, index):
61 >>> t.root.find('test').text == "test" 123 """Return paramGroup if indexed with integer, else try to find
62 True 124 matching Element in XML root and return its text or None.
63 """ 125 """
64 126 try:
65 child = ET.SubElement(el, name, attrib=attrib if attrib else {}) 127 return self._paramGroups[index]
66 child.text = str(text) 128 except TypeError:
129 ret = self._root.find(index)
130 return ret.text if ret is not None else None
67 131
68 @staticmethod 132 @staticmethod
69 def _check_validity(design, len_infiles): 133 def _check_validity(design, len_infiles):
70 "Perform some checks on the exp. design template" 134 """Perform some checks on the exp. design template"""
71 design_len = len(design['Name']) 135 design_len = len(design['Name'])
136 # 'Name' can be None, we need at least len_infiles valid entries
72 match = len(list(filter(lambda x: bool(x), design['Name']))) 137 match = len(list(filter(lambda x: bool(x), design['Name'])))
73 if match < len_infiles: 138 if match < len_infiles:
74 raise Exception("Error parsing experimental design template: " + 139 raise Exception(' '.join(["Error parsing experimental design template:",
75 "Found only {} matching entries ".format(design_len) + 140 "Found only {} matching entries".format(match),
76 "for {} input files".format(len_infiles)) 141 "for {} input files".format(len_infiles)]))
77 for i in range(0, design_len): 142 for i in range(0, design_len):
78 msg = "Error in line " + str(i + 2) + " of experimental design: " 143 msg = "(in line " + str(i + 2) + " of experimental design) "
79 if not (design['Name'][i] and design['Experiment'][i]): 144 if not design['Experiment'][i]:
80 raise Exception(msg + " Name or Experiment is empty.") 145 raise ValueError(msg + " Experiment is empty.")
81 if design['PTM'][i].lower() not in ('true', 'false'): 146 if design['PTM'][i].lower() not in ('true', 'false'):
82 raise Exception(msg + "Defines invalid PTM value, " + 147 raise ValueError(msg + "Defines invalid PTM value, should be 'True' or 'False'.")
83 "should be 'True' or 'False'.")
84 try: 148 try:
85 int(design['Fraction'][i]) 149 int(design['Fraction'][i])
86 except ValueError as e: 150 except ValueError as e:
87 raise Exception(msg + str(e)) 151 raise ValueError(msg + str(e))
88 152
89 def _make_exp_design(self, infiles): 153 def _make_exp_design(self, groups, files):
90 """Create a dict representing an experimental design from 154 """Create a dict representing an experimental design from an
91 an experimental design template and a list of input files. 155 experimental design template and a list input files.
92 If the experimental design template is None, create a default 156 If the experimental design template is None, create a default
93 design with one experiment for each input file, no fractions and 157 design with one experiment for each input file and no fractions
94 parameter group 0 for all files. 158 for all files.
95 >>> t2 = MQParam("test", './test-data/template.xml', \ 159 Args:
96 './test-data/two/exp_design_template.txt') 160 files: list of input file paths
97 >>> design = t2._make_exp_design(['./test-data/BSA_min_21.mzXML', \ 161 groups: list of parameter group indices
98 './test-data/BSA_min_22.mzXML']) 162 Returns:
99 >>> design['Name'] 163 dict: The (complete) experimental design template
100 ['./test-data/BSA_min_21.mzXML', './test-data/BSA_min_22.mzXML'] 164 """
101 >>> design['Fraction'] 165 design = {s: [] for s in ("Name", "PTM", "Fraction", "Experiment", "paramGroup")}
102 ['1', '2']
103 """
104
105 design = {s: [] for s in ("Name", "PTM", "Fraction", "Experiment")}
106 if not self.exp_design: 166 if not self.exp_design:
107 design["Name"] = infiles 167 design["Name"] = files
108 design["Fraction"] = ('32767',) * len(infiles) 168 design["Fraction"] = ('32767',) * len(files)
109 design["Experiment"] = [os.path.split(f)[1] for f in infiles] 169 design["Experiment"] = [os.path.split(f)[1] for f in files]
110 design["PTM"] = ('False',) * len(infiles) 170 design["PTM"] = ('False',) * len(files)
171 design["paramGroup"] = groups
111 else: 172 else:
112 with open(self.exp_design) as design_file: 173 with open(self.exp_design) as design_file:
113 index_line = design_file.readline().strip() 174 index_line = design_file.readline().strip()
114 index = [] 175 index = []
115 for i in index_line.split('\t'): 176 for i in index_line.split('\t'):
116 if i in design: 177 if i in design:
117 index.append(i) 178 index.append(i)
118 else: 179 else:
119 raise Exception("Invalid column index in experimental" 180 raise Exception("Invalid column index in experimental design template: {}".format(i))
120 + " design template: {}".format(i))
121
122 for line in design_file: 181 for line in design_file:
123 row = line.strip().split('\t') 182 row = line.strip().split('\t')
124 for e, i in zip_longest(row, index): 183 for e, i in zip_longest(row, index):
125 if i == "Fraction" and e == '': 184 if i == "Fraction" and not e:
126 e = 32767 185 e = '32767'
127 elif i == "PTM" and not e: 186 elif i == "PTM" and not e:
128 e = 'False' 187 e = 'False'
129 design[i].append(e) 188 design[i].append(e)
130 189 # map files to names in exp. design template
131 # map infiles to names in exp. design template
132 names = [] 190 names = []
133 names_to_paths = {} 191 names_to_paths = {}
134 # strip path and extension 192 # strip path and extension
135 for f in infiles: 193 for f in files:
136 b = os.path.basename(f) 194 b = os.path.basename(f)
137 basename = b[:-6] if b.endswith('.mzXML') else b[:-11] 195 basename = b[:-11] if b.lower().endswith('.thermo.raw') else b.rsplit('.', maxsplit=1)[0]
138 names_to_paths[basename] = f 196 names_to_paths[basename] = f
139 for name in design['Name']: 197 for name in design['Name']:
140 # same substitution as in maxquant.xml, 198 # same substitution as in maxquant.xml,
141 # when passing the element identifiers 199 # when passing the element identifiers
142 fname = re.sub(self.substitution_rx, '_', name) 200 fname = re.sub(self.substitution_rx, '_', name)
143 names.append(names_to_paths[fname] if fname in names_to_paths 201 names.append(names_to_paths[fname] if fname in names_to_paths
144 else None) 202 else None)
145 # replace orig. file names with matching links to galaxy datasets 203 # replace orig. file names with matching links to galaxy datasets
146 design['Name'] = names 204 design['Name'] = names
147 MQParam._check_validity(design, len(infiles)) 205 design['paramGroup'] = groups
148 206 MQParam._check_validity(design, len(files))
149 return design 207 return design
150 208
151 def add_infiles(self, infiles, interactive): 209 def add_infiles(self, infiles):
152 """Add a list of raw/mzxml files to the mqpar.xml. 210 """Add a list of raw/mzxml files to the mqpar.xml.
153 If experimental design template was specified, 211 If experimental design template was specified,
154 modify other parameters accordingly. 212 modify other parameters accordingly.
155 The files must be specified as absolute paths 213 The files must be specified as absolute paths
156 for maxquant to find them. 214 for maxquant to find them.
157 >>> t1 = MQParam("test", './test-data/template.xml', None) 215 Also add parameter Groups.
158 >>> t1.add_infiles(('test1', ), True) 216 Args:
159 >>> t1.root.find("filePaths")[0].text 217 infiles: a list of infile lists. first dimension denotes the
160 'test1' 218 parameter group.
161 >>> t1.root.find("fractions")[0].text 219 Returns:
162 '32767' 220 None
163 >>> len(t1.root.find("fractions")) 221 """
164 1 222 groups, files = zip(*[(num, f) for num, l in enumerate(infiles) for f in l])
165 >>> t2 = MQParam("test", './test-data/template.xml', \ 223 self._paramGroups = [ParamGroup(self.pg_node) for i in range(len(infiles))]
166 './test-data/exp_design_test.txt') 224 nodenames = ('filePaths', 'experiments', 'fractions',
167 >>> t2.add_infiles(('test-data/QEplus021874.thermo.raw', \ 225 'ptms', 'paramGroupIndices', 'referenceChannel')
168 'test-data/QEplus021876.thermo.raw'), True) 226 design = self._make_exp_design(groups, files)
169 >>> len(t2.root.find("filePaths"))
170 2
171 >>> t2.root.find("filePaths")[1].text
172 'test-data/QEplus021876.thermo.raw'
173 >>> t2.root.find("experiments")[1].text
174 '2'
175 >>> t2.root.find("fractions")[0].text
176 '3'
177 """
178
179 # Create experimental design for interactive mode.
180 # In non-interactive mode only filepaths are modified, but
181 # their order from the original mqpar must be kept.
182 if interactive:
183 index = range(len(infiles))
184 nodenames = ('filePaths', 'experiments', 'fractions',
185 'ptms', 'paramGroupIndices', 'referenceChannel')
186 design = self._make_exp_design(infiles)
187 else:
188 index = [-1] * len(infiles)
189 # kind of a BUG: fails if filename starts with '.'
190 infilenames = [os.path.basename(f).split('.')[0] for f in infiles]
191 i = 0
192 for child in self.root.find('filePaths'):
193 # either windows or posix path
194 win = ntpath.basename(child.text)
195 posix = os.path.basename(child.text)
196 basename = win if len(win) < len(posix) else posix
197 basename_with_sub = re.sub(self.substitution_rx, '_',
198 basename.split('.')[0])
199 # match infiles to their names in mqpar.xml,
200 # ignore files missing in mqpar.xml
201 if basename_with_sub in infilenames:
202 index[i] = infilenames.index(basename_with_sub)
203 i += 1
204 else:
205 raise ValueError("no matching infile found for "
206 + child.text)
207
208 nodenames = ('filePaths', )
209 design = {'Name': infiles}
210
211 # Get parent nodes from document 227 # Get parent nodes from document
212 nodes = dict() 228 nodes = dict()
213 for nodename in nodenames: 229 for nodename in nodenames:
214 node = self.root.find(nodename) 230 node = self._root.find(nodename)
215 if node is None: 231 if node is None:
216 raise ValueError('Element {} not found in parameter file' 232 raise ValueError('Element {} not found in parameter file'
217 .format(nodename)) 233 .format(nodename))
218 nodes[nodename] = node 234 nodes[nodename] = node
219 node.clear() 235 node.clear()
220 node.tag = nodename 236 node.tag = nodename
221
222 # Append sub-elements to nodes (one per file) 237 # Append sub-elements to nodes (one per file)
223 for i in index: 238 for i, name in enumerate(design['Name']):
224 if i > -1 and design['Name'][i]: 239 if name:
225 MQParam._add_child(nodes['filePaths'], 'string', 240 et_add_child(nodes['filePaths'], 'string', name)
226 design['Name'][i]) 241 et_add_child(nodes['experiments'], 'string',
227 if interactive: 242 design['Experiment'][i])
228 MQParam._add_child(nodes['experiments'], 'string', 243 et_add_child(nodes['fractions'], 'short',
229 design['Experiment'][i]) 244 design['Fraction'][i])
230 MQParam._add_child(nodes['fractions'], 'short', 245 et_add_child(nodes['ptms'], 'boolean',
231 design['Fraction'][i]) 246 design['PTM'][i])
232 MQParam._add_child(nodes['ptms'], 'boolean', 247 et_add_child(nodes['paramGroupIndices'], 'int',
233 design['PTM'][i]) 248 design['paramGroup'][i])
234 MQParam._add_child(nodes['paramGroupIndices'], 'int', 0) 249 et_add_child(nodes['referenceChannel'], 'string', '')
235 MQParam._add_child(nodes['referenceChannel'], 'string', '') 250
236 251 def translate(self, infiles):
237 def add_fasta_files(self, files, 252 """Map a list of given infiles to the files specified in the parameter file.
238 identifier=r'>([^\s]*)', 253 Needed for the mqpar upload in galaxy. Removes the path and then tries
239 description=r'>(.*)'): 254 to match the files.
255 Args:
256 infiles: list or tuple of the input
257 Returns:
258 None
259 """
260 # kind of a BUG: fails if filename starts with '.'
261 infilenames = [os.path.basename(f).split('.')[0] for f in infiles]
262 filesNode = self._root.find('filePaths')
263 files_from_mqpar = [e.text for e in filesNode]
264 filesNode.clear()
265 filesNode.tag = 'filePaths'
266 for f in files_from_mqpar:
267 # either windows or posix path
268 win = ntpath.basename(f)
269 posix = os.path.basename(f)
270 basename = win if len(win) < len(posix) else posix
271 basename_with_sub = re.sub(self.substitution_rx, '_',
272 basename.split('.')[0])
273 # match infiles to their names in mqpar.xml,
274 # ignore files missing in mqpar.xml
275 if basename_with_sub in infilenames:
276 i = infilenames.index(basename_with_sub)
277 et_add_child(filesNode, 'string', infiles[i])
278 else:
279 raise ValueError("no matching infile found for " + f)
280
281 def add_fasta_files(self, files, parse_rules={}):
240 """Add fasta file groups. 282 """Add fasta file groups.
241 >>> t = MQParam('test', './test-data/template.xml', None) 283 Args:
242 >>> t.add_fasta_files(('test1', 'test2')) 284 files: (list) of fasta file paths
243 >>> len(t.root.find('fastaFiles')) 285 parseRules: (dict) the parse rules as (tag, text)-pairs
244 2 286 Returns:
245 >>> t.root.find('fastaFiles')[0].find("fastaFilePath").text 287 None
246 'test1' 288 """
247 """ 289 fasta_node = self._root.find('fastaFiles')
248 fasta_node = self.root.find("fastaFiles")
249 fasta_node.clear() 290 fasta_node.clear()
250 fasta_node.tag = "fastaFiles" 291 for f in files:
251 292 fasta_node.append(copy.deepcopy(self.fasta_file_node))
252 for index in range(len(files)): 293 fasta_node[-1].find('fastaFilePath').text = f
253 filepath = '<fastaFilePath>' + files[index] 294 for rule in parse_rules:
254 identifier = identifier.replace('<', '&lt;') 295 fasta_node[-1].find(rule).text = parse_rules[rule]
255 description = description.replace('<', '&lt;')
256 fasta = self.fasta_template.replace('<fastaFilePath>', filepath)
257 fasta = fasta.replace('<identifierParseRule>',
258 '<identifierParseRule>' + identifier)
259 fasta = fasta.replace('<descriptionParseRule>',
260 '<descriptionParseRule>' + description)
261 ff_node = self.root.find('.fastaFiles')
262 fastaentry = ET.fromstring(fasta)
263 ff_node.append(fastaentry)
264 296
265 def set_simple_param(self, key, value): 297 def set_simple_param(self, key, value):
266 """Set a simple parameter. 298 """Set a simple parameter.
267 >>> t = MQParam(None, './test-data/template.xml', None) 299 Args:
268 >>> t.set_simple_param('min_unique_pep', 4) 300 key: (string) XML tag of the parameter
269 >>> t.root.find('.minUniquePeptides').text 301 value: the text of the parameter XML node
270 '4' 302 Returns:
271 """ 303 None
272 # map simple params to their node in the xml tree 304 """
273 simple_params = {'missed_cleavages': 305 node = self._root.find(key)
274 '.parameterGroups/parameterGroup/maxMissedCleavages', 306 if node is None:
275 'min_unique_pep': '.minUniquePeptides', 307 raise ValueError('Element {} not found in parameter file'
276 'num_threads': 'numThreads', 308 .format(key))
277 'calc_peak_properties': '.calcPeakProperties', 309 node.text = str(value)
278 'write_mztab': 'writeMzTab', 310
279 'min_peptide_len': 'minPepLen', 311 def _from_yaml(self, conf):
280 'max_peptide_mass': 'maxPeptideMass', 312 """Read a yaml config file.
281 'match_between_runs': 'matchBetweenRuns', 313 Args:
282 'ibaq': 'ibaq', # lfq global options 314 conf: (string) path to the yaml conf file
283 'ibaq_log_fit': 'ibaqLogFit', 315 Returns:
284 'separate_lfq': 'separateLfq', 316 None
285 'lfq_stabilize_large_ratios': 317 """
286 'lfqStabilizeLargeRatios', 318 with open(conf) as f:
287 'lfq_require_msms': 'lfqRequireMsms', 319 conf_dict = yaml.safe_load(f.read())
288 'advanced_site_intensities': 320 paramGroups = conf_dict.pop('paramGroups')
289 'advancedSiteIntensities', 321 self.add_infiles([pg.pop('files') for pg in paramGroups])
290 'lfq_mode': # lfq param group options 322 for i, pg in enumerate(paramGroups):
291 '.parameterGroups/parameterGroup/lfqMode', 323 silac = pg.pop('labelMods', False)
292 'lfq_skip_norm': 324 if silac:
293 '.parameterGroups/parameterGroup/lfqSkipNorm', 325 self[i].set_silac(*silac)
294 'lfq_min_edges_per_node': 326 isobaricLabels = pg.pop('isobaricLabels', False)
295 '.parameterGroups/parameterGroup/lfqMinEdgesPerNode', 327 if isobaricLabels:
296 'lfq_avg_edges_per_node': 328 for l in isobaricLabels:
297 '.parameterGroups/parameterGroup/lfqAvEdgesPerNode', 329 self[i].set_isobaric_label(*l)
298 'lfq_min_ratio_count': 330 for el in ['fixedModifications', 'variableModifications', 'enzymes']:
299 '.parameterGroups/parameterGroup/lfqMinRatioCount'} 331 lst = pg.pop(el, None)
300 332 if lst is not None:
301 if key in simple_params: 333 self[i].set_list_param(el, lst)
302 node = self.root.find(simple_params[key]) 334 for key in pg:
303 if node is None: 335 self[i].set_simple_param(key, pg[key])
304 raise ValueError('Element {} not found in parameter file' 336 fastafiles = conf_dict.pop('fastaFiles', False)
305 .format(simple_params[key])) 337 if fastafiles:
306 node.text = str(value) 338 self.add_fasta_files(fastafiles, parse_rules=conf_dict.pop('parseRules', {}))
307 else: 339 else:
308 raise ValueError("Parameter not found.") 340 raise Exception('No fasta files provided.')
309 341 for key in conf_dict:
310 def set_silac(self, light_mods, medium_mods, heavy_mods): 342 self.set_simple_param(key, conf_dict[key])
311 """Set label modifications. 343
312 >>> t1 = MQParam('test', './test-data/template.xml', None) 344 def write(self, mqpar_out):
313 >>> t1.set_silac(None, ('test1', 'test2'), None) 345 """Write pretty formatted xml parameter file.
314 >>> t1.root.find('.parameterGroups/parameterGroup/maxLabeledAa').text 346 Compose it from global parameters and parameter Groups.
315 '2' 347 """
316 >>> t1.root.find('.parameterGroups/parameterGroup/multiplicity').text 348 if self._paramGroups:
317 '3' 349 pg_node = self._root.find('parameterGroups')
318 >>> t1.root.find('.parameterGroups/parameterGroup/labelMods')[1].text 350 pg_node.remove(pg_node[0])
319 'test1;test2' 351 for group in self._paramGroups:
320 >>> t1.root.find('.parameterGroups/parameterGroup/labelMods')[2].text 352 pg_node.append(group._root)
321 '' 353 rough_string = ET.tostring(self._root, 'utf-8', short_empty_elements=False)
322 """
323 multiplicity = 3 if medium_mods else 2 if heavy_mods else 1
324 max_label = str(max(len(light_mods) if light_mods else 0,
325 len(medium_mods) if medium_mods else 0,
326 len(heavy_mods) if heavy_mods else 0))
327 multiplicity_node = self.root.find('.parameterGroups/parameterGroup/'
328 + 'multiplicity')
329 multiplicity_node.text = str(multiplicity)
330 max_label_node = self.root.find('.parameterGroups/parameterGroup/'
331 + 'maxLabeledAa')
332 max_label_node.text = max_label
333
334 node = self.root.find('.parameterGroups/parameterGroup/labelMods')
335 node[0].text = ';'.join(light_mods) if light_mods else ''
336 if multiplicity == 3:
337 MQParam._add_child(node, name='string', text=';'.join(medium_mods))
338 if multiplicity > 1:
339 MQParam._add_child(node, name='string',
340 text=';'.join(heavy_mods) if heavy_mods else '')
341
342 def set_list_params(self, key, vals):
343 """Set a list parameter.
344 >>> t = MQParam(None, './test-data/template.xml', None)
345 >>> t.set_list_params('proteases', ('test 1', 'test 2'))
346 >>> len(t.root.find('.parameterGroups/parameterGroup/enzymes'))
347 2
348 >>> t.set_list_params('var_mods', ('Oxidation (M)', ))
349 >>> var_mods = '.parameterGroups/parameterGroup/variableModifications'
350 >>> t.root.find(var_mods)[0].text
351 'Oxidation (M)'
352 """
353
354 params = {'var_mods':
355 '.parameterGroups/parameterGroup/variableModifications',
356 'fixed_mods':
357 '.parameterGroups/parameterGroup/fixedModifications',
358 'proteases':
359 '.parameterGroups/parameterGroup/enzymes'}
360
361 if key in params:
362 node = self.root.find(params[key])
363 if node is None:
364 raise ValueError('Element {} not found in parameter file'
365 .format(params[key]))
366 node.clear()
367 node.tag = params[key].split('/')[-1]
368 for e in vals:
369 MQParam._add_child(node, name='string', text=e)
370 else:
371 raise ValueError("Parameter {} not found.".format(key))
372
373 def write(self):
374 rough_string = ET.tostring(self.root, 'utf-8', short_empty_elements=False)
375 reparsed = minidom.parseString(rough_string) 354 reparsed = minidom.parseString(rough_string)
376 pretty = reparsed.toprettyxml(indent="\t") 355 pretty = reparsed.toprettyxml(indent="\t")
377 even_prettier = re.sub(r"\n\s+\n", r"\n", pretty) 356 even_prettier = re.sub(r"\n\s+\n", r"\n", pretty)
378 with open(self.mqpar_out, 'w') as f: 357 with open(mqpar_out, 'w') as f:
379 print(even_prettier, file=f) 358 print(even_prettier, file=f)