Mercurial > repos > galaxyp > maxquant
comparison mqparam.py @ 4:dcd39bcc7481 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/maxquant commit da342a782ccc391b87fb4fead956b7b3cbd21258"
| author | galaxyp |
|---|---|
| date | Sat, 11 Apr 2020 11:49:19 -0400 |
| parents | 175e062b6a17 |
| children | 2133b0be850a |
comparison
equal
deleted
inserted
replaced
| 3:175e062b6a17 | 4:dcd39bcc7481 |
|---|---|
| 1 """ | 1 """ |
| 2 Create a project-specific MaxQuant parameter file. | 2 Create a project-specific MaxQuant parameter file. |
| 3 | |
| 4 TODO: check validity of parsed experimental design template | |
| 5 add support for parameter groups | |
| 6 add reporter ion MS2 | |
| 7 add label free quantification | |
| 8 don't hardcode parse rules for fasta files | |
| 9 | |
| 10 Author: Damian Glaetzer <d.glaetzer@mailbox.org> | |
| 11 """ | 3 """ |
| 12 | 4 |
| 5 import copy | |
| 13 import ntpath | 6 import ntpath |
| 14 import os | 7 import os |
| 15 import re | 8 import re |
| 9 import yaml | |
| 16 import xml.etree.ElementTree as ET | 10 import xml.etree.ElementTree as ET |
| 17 from itertools import zip_longest | 11 from itertools import zip_longest |
| 18 from xml.dom import minidom | 12 from xml.dom import minidom |
| 13 | |
| 14 | |
| 15 def et_add_child(el, name, text, attrib=None): | |
| 16 "Add a child element to an xml.etree.ElementTree.Element" | |
| 17 child = ET.SubElement(el, name, attrib=attrib if attrib else {}) | |
| 18 child.text = str(text) | |
| 19 return child | |
| 20 | |
| 21 | |
| 22 class ParamGroup: | |
| 23 """Represents one parameter Group | |
| 24 """ | |
| 25 | |
| 26 def __init__(self, root): | |
| 27 """Initialize with its xml.etree.ElementTree root Element. | |
| 28 """ | |
| 29 self._root = copy.deepcopy(root) | |
| 30 | |
| 31 def set_list_param(self, key, vals): | |
| 32 """Set a list parameter. | |
| 33 """ | |
| 34 node = self._root.find(key) | |
| 35 if node is None: | |
| 36 raise ValueError('Element {} not found in parameter file' | |
| 37 .format(key)) | |
| 38 node.clear() | |
| 39 node.tag = key | |
| 40 for e in vals: | |
| 41 et_add_child(node, name='string', text=e) | |
| 42 | |
| 43 def set_simple_param(self, key, value): | |
| 44 """Set a simple parameter. | |
| 45 """ | |
| 46 node = self._root.find(key) | |
| 47 if node is None: | |
| 48 raise ValueError('Element {} not found in parameter file' | |
| 49 .format(key)) | |
| 50 node.text = str(value) | |
| 51 | |
| 52 def set_silac(self, light_labels, medium_labels, heavy_labels): | |
| 53 """Set label modifications. | |
| 54 """ | |
| 55 if medium_labels and not (heavy_labels or light_labels): # medium omly with heavy and light | |
| 56 raise Exception("Incorrect SILAC specification. Use medium only together with light and heavy labels.") | |
| 57 multiplicity = 3 if medium_labels else 2 if heavy_labels else 1 | |
| 58 max_label = str(max(len(light_labels) if light_labels else 0, | |
| 59 len(medium_labels) if medium_labels else 0, | |
| 60 len(heavy_labels) if heavy_labels else 0)) | |
| 61 self._root.find('multiplicity').text = str(multiplicity) | |
| 62 self._root.find('maxLabeledAa').text = max_label | |
| 63 node = self._root.find('labelMods') | |
| 64 node[0].text = ';'.join(light_labels) if light_labels else '' | |
| 65 if multiplicity == 3: | |
| 66 et_add_child(node, name='string', text=';'.join(medium_labels)) | |
| 67 if multiplicity > 1: | |
| 68 et_add_child(node, name='string', | |
| 69 text=';'.join(heavy_labels) if heavy_labels else '') | |
| 70 | |
| 71 def set_isobaric_label(self, internalLabel, terminalLabel, | |
| 72 cm2, cm1, cp1, cp2, tmtLike): | |
| 73 """Add isobaric label info. | |
| 74 Args: | |
| 75 internalLabel: string | |
| 76 terminalLabel: string | |
| 77 cm2: (float) correction factor | |
| 78 cm1: (float) correction factor | |
| 79 cp1: (float) correction factor | |
| 80 cp2: (float) correction factor | |
| 81 tmtLike: bool or string | |
| 82 Returns: | |
| 83 None | |
| 84 """ | |
| 85 iso_labels_node = self._root.find('isobaricLabels') | |
| 86 label = et_add_child(iso_labels_node, 'IsobaricLabelInfo', '') | |
| 87 et_add_child(label, 'internalLabel', internalLabel) | |
| 88 et_add_child(label, 'terminalLabel', terminalLabel) | |
| 89 for num, factor in (('M2', cm2), ('M1', cm1), ('P1', cp1), ('P2', cp2)): | |
| 90 et_add_child(label, 'correctionFactor' + num, | |
| 91 str(float(factor) if factor % 1 else int(factor))) | |
| 92 et_add_child(label, 'tmtLike', str(tmtLike)) | |
| 19 | 93 |
| 20 | 94 |
| 21 class MQParam: | 95 class MQParam: |
| 22 """Represents a mqpar.xml and provides methods to modify | 96 """Represents a mqpar.xml and provides methods to modify |
| 23 some of its parameters. | 97 some of its parameters. |
| 24 """ | 98 """ |
| 25 | 99 |
| 26 fasta_template = """<FastaFileInfo> | 100 def __init__(self, mqpar_in, exp_design=None, yaml=None, substitution_rx=r'[^\s\S]'): # no sub by default |
| 27 <fastaFilePath></fastaFilePath> | |
| 28 <identifierParseRule></identifierParseRule> | |
| 29 <descriptionParseRule></descriptionParseRule> | |
| 30 <taxonomyParseRule></taxonomyParseRule> | |
| 31 <variationParseRule></variationParseRule> | |
| 32 <modificationParseRule></modificationParseRule> | |
| 33 <taxonomyId></taxonomyId> | |
| 34 </FastaFileInfo>""" | |
| 35 | |
| 36 def __init__(self, mqpar_out, mqpar_in, exp_design, | |
| 37 substitution_rx=r'[^\s\S]'): # no sub by default | |
| 38 """Initialize MQParam class. mqpar_in can either be a template | 101 """Initialize MQParam class. mqpar_in can either be a template |
| 39 or a already suitable mqpar file. | 102 or a already suitable mqpar file. |
| 40 >>> t = MQParam("test", './test-data/template.xml', None) | 103 Args: |
| 41 >>> t.root.tag | 104 mqpar_in: a template parameter file |
| 42 'MaxQuantParams' | 105 exp_design: a experimental design template (see MaxQuant documentation), |
| 43 >>> (t.root.find('maxQuantVersion')).text | 106 can be None |
| 44 '1.6.3.4' | 107 substitution_rx: a regular expression for replacements in the file names. |
| 45 """ | 108 It is applied before comparing input file names (e.g. from the exp. design) |
| 46 | 109 """ |
| 47 self.orig_mqpar = mqpar_in | 110 self.orig_mqpar = mqpar_in |
| 48 self.exp_design = exp_design | 111 self.exp_design = exp_design |
| 49 self.mqpar_out = mqpar_out | 112 self._root = ET.parse(mqpar_in).getroot() |
| 50 self.root = ET.parse(mqpar_in).getroot() | 113 self.version = self._root.find('maxQuantVersion').text |
| 51 self.version = self.root.find('maxQuantVersion').text | |
| 52 # regex for substitution of certain file name characters | 114 # regex for substitution of certain file name characters |
| 53 self.substitution_rx = substitution_rx | 115 self.substitution_rx = substitution_rx |
| 54 | 116 self.pg_node = copy.deepcopy(self._root.find('parameterGroups')[0]) |
| 55 @staticmethod | 117 self._paramGroups = [] |
| 56 def _add_child(el, name, text, attrib=None): | 118 self.fasta_file_node = copy.deepcopy(self._root.find('fastaFiles')[0]) |
| 57 """Add a child element to an element. | 119 if yaml: |
| 58 | 120 self._from_yaml(yaml) |
| 59 >>> t = MQParam("test", './test-data/template.xml', None) | 121 |
| 60 >>> MQParam._add_child(t.root, "test", "test") | 122 def __getitem__(self, index): |
| 61 >>> t.root.find('test').text == "test" | 123 """Return paramGroup if indexed with integer, else try to find |
| 62 True | 124 matching Element in XML root and return its text or None. |
| 63 """ | 125 """ |
| 64 | 126 try: |
| 65 child = ET.SubElement(el, name, attrib=attrib if attrib else {}) | 127 return self._paramGroups[index] |
| 66 child.text = str(text) | 128 except TypeError: |
| 129 ret = self._root.find(index) | |
| 130 return ret.text if ret is not None else None | |
| 67 | 131 |
| 68 @staticmethod | 132 @staticmethod |
| 69 def _check_validity(design, len_infiles): | 133 def _check_validity(design, len_infiles): |
| 70 "Perform some checks on the exp. design template" | 134 """Perform some checks on the exp. design template""" |
| 71 design_len = len(design['Name']) | 135 design_len = len(design['Name']) |
| 136 # 'Name' can be None, we need at least len_infiles valid entries | |
| 72 match = len(list(filter(lambda x: bool(x), design['Name']))) | 137 match = len(list(filter(lambda x: bool(x), design['Name']))) |
| 73 if match < len_infiles: | 138 if match < len_infiles: |
| 74 raise Exception("Error parsing experimental design template: " + | 139 raise Exception(' '.join(["Error parsing experimental design template:", |
| 75 "Found only {} matching entries ".format(design_len) + | 140 "Found only {} matching entries".format(match), |
| 76 "for {} input files".format(len_infiles)) | 141 "for {} input files".format(len_infiles)])) |
| 77 for i in range(0, design_len): | 142 for i in range(0, design_len): |
| 78 msg = "Error in line " + str(i + 2) + " of experimental design: " | 143 msg = "(in line " + str(i + 2) + " of experimental design) " |
| 79 if not (design['Name'][i] and design['Experiment'][i]): | 144 if not design['Experiment'][i]: |
| 80 raise Exception(msg + " Name or Experiment is empty.") | 145 raise ValueError(msg + " Experiment is empty.") |
| 81 if design['PTM'][i].lower() not in ('true', 'false'): | 146 if design['PTM'][i].lower() not in ('true', 'false'): |
| 82 raise Exception(msg + "Defines invalid PTM value, " + | 147 raise ValueError(msg + "Defines invalid PTM value, should be 'True' or 'False'.") |
| 83 "should be 'True' or 'False'.") | |
| 84 try: | 148 try: |
| 85 int(design['Fraction'][i]) | 149 int(design['Fraction'][i]) |
| 86 except ValueError as e: | 150 except ValueError as e: |
| 87 raise Exception(msg + str(e)) | 151 raise ValueError(msg + str(e)) |
| 88 | 152 |
| 89 def _make_exp_design(self, infiles): | 153 def _make_exp_design(self, groups, files): |
| 90 """Create a dict representing an experimental design from | 154 """Create a dict representing an experimental design from an |
| 91 an experimental design template and a list of input files. | 155 experimental design template and a list input files. |
| 92 If the experimental design template is None, create a default | 156 If the experimental design template is None, create a default |
| 93 design with one experiment for each input file, no fractions and | 157 design with one experiment for each input file and no fractions |
| 94 parameter group 0 for all files. | 158 for all files. |
| 95 >>> t2 = MQParam("test", './test-data/template.xml', \ | 159 Args: |
| 96 './test-data/two/exp_design_template.txt') | 160 files: list of input file paths |
| 97 >>> design = t2._make_exp_design(['./test-data/BSA_min_21.mzXML', \ | 161 groups: list of parameter group indices |
| 98 './test-data/BSA_min_22.mzXML']) | 162 Returns: |
| 99 >>> design['Name'] | 163 dict: The (complete) experimental design template |
| 100 ['./test-data/BSA_min_21.mzXML', './test-data/BSA_min_22.mzXML'] | 164 """ |
| 101 >>> design['Fraction'] | 165 design = {s: [] for s in ("Name", "PTM", "Fraction", "Experiment", "paramGroup")} |
| 102 ['1', '2'] | |
| 103 """ | |
| 104 | |
| 105 design = {s: [] for s in ("Name", "PTM", "Fraction", "Experiment")} | |
| 106 if not self.exp_design: | 166 if not self.exp_design: |
| 107 design["Name"] = infiles | 167 design["Name"] = files |
| 108 design["Fraction"] = ('32767',) * len(infiles) | 168 design["Fraction"] = ('32767',) * len(files) |
| 109 design["Experiment"] = [os.path.split(f)[1] for f in infiles] | 169 design["Experiment"] = [os.path.split(f)[1] for f in files] |
| 110 design["PTM"] = ('False',) * len(infiles) | 170 design["PTM"] = ('False',) * len(files) |
| 171 design["paramGroup"] = groups | |
| 111 else: | 172 else: |
| 112 with open(self.exp_design) as design_file: | 173 with open(self.exp_design) as design_file: |
| 113 index_line = design_file.readline().strip() | 174 index_line = design_file.readline().strip() |
| 114 index = [] | 175 index = [] |
| 115 for i in index_line.split('\t'): | 176 for i in index_line.split('\t'): |
| 116 if i in design: | 177 if i in design: |
| 117 index.append(i) | 178 index.append(i) |
| 118 else: | 179 else: |
| 119 raise Exception("Invalid column index in experimental" | 180 raise Exception("Invalid column index in experimental design template: {}".format(i)) |
| 120 + " design template: {}".format(i)) | |
| 121 | |
| 122 for line in design_file: | 181 for line in design_file: |
| 123 row = line.strip().split('\t') | 182 row = line.strip().split('\t') |
| 124 for e, i in zip_longest(row, index): | 183 for e, i in zip_longest(row, index): |
| 125 if i == "Fraction" and e == '': | 184 if i == "Fraction" and not e: |
| 126 e = 32767 | 185 e = '32767' |
| 127 elif i == "PTM" and not e: | 186 elif i == "PTM" and not e: |
| 128 e = 'False' | 187 e = 'False' |
| 129 design[i].append(e) | 188 design[i].append(e) |
| 130 | 189 # map files to names in exp. design template |
| 131 # map infiles to names in exp. design template | |
| 132 names = [] | 190 names = [] |
| 133 names_to_paths = {} | 191 names_to_paths = {} |
| 134 # strip path and extension | 192 # strip path and extension |
| 135 for f in infiles: | 193 for f in files: |
| 136 b = os.path.basename(f) | 194 b = os.path.basename(f) |
| 137 basename = b[:-6] if b.endswith('.mzXML') else b[:-11] | 195 basename = b[:-11] if b.lower().endswith('.thermo.raw') else b.rsplit('.', maxsplit=1)[0] |
| 138 names_to_paths[basename] = f | 196 names_to_paths[basename] = f |
| 139 for name in design['Name']: | 197 for name in design['Name']: |
| 140 # same substitution as in maxquant.xml, | 198 # same substitution as in maxquant.xml, |
| 141 # when passing the element identifiers | 199 # when passing the element identifiers |
| 142 fname = re.sub(self.substitution_rx, '_', name) | 200 fname = re.sub(self.substitution_rx, '_', name) |
| 143 names.append(names_to_paths[fname] if fname in names_to_paths | 201 names.append(names_to_paths[fname] if fname in names_to_paths |
| 144 else None) | 202 else None) |
| 145 # replace orig. file names with matching links to galaxy datasets | 203 # replace orig. file names with matching links to galaxy datasets |
| 146 design['Name'] = names | 204 design['Name'] = names |
| 147 MQParam._check_validity(design, len(infiles)) | 205 design['paramGroup'] = groups |
| 148 | 206 MQParam._check_validity(design, len(files)) |
| 149 return design | 207 return design |
| 150 | 208 |
| 151 def add_infiles(self, infiles, interactive): | 209 def add_infiles(self, infiles): |
| 152 """Add a list of raw/mzxml files to the mqpar.xml. | 210 """Add a list of raw/mzxml files to the mqpar.xml. |
| 153 If experimental design template was specified, | 211 If experimental design template was specified, |
| 154 modify other parameters accordingly. | 212 modify other parameters accordingly. |
| 155 The files must be specified as absolute paths | 213 The files must be specified as absolute paths |
| 156 for maxquant to find them. | 214 for maxquant to find them. |
| 157 >>> t1 = MQParam("test", './test-data/template.xml', None) | 215 Also add parameter Groups. |
| 158 >>> t1.add_infiles(('test1', ), True) | 216 Args: |
| 159 >>> t1.root.find("filePaths")[0].text | 217 infiles: a list of infile lists. first dimension denotes the |
| 160 'test1' | 218 parameter group. |
| 161 >>> t1.root.find("fractions")[0].text | 219 Returns: |
| 162 '32767' | 220 None |
| 163 >>> len(t1.root.find("fractions")) | 221 """ |
| 164 1 | 222 groups, files = zip(*[(num, f) for num, l in enumerate(infiles) for f in l]) |
| 165 >>> t2 = MQParam("test", './test-data/template.xml', \ | 223 self._paramGroups = [ParamGroup(self.pg_node) for i in range(len(infiles))] |
| 166 './test-data/exp_design_test.txt') | 224 nodenames = ('filePaths', 'experiments', 'fractions', |
| 167 >>> t2.add_infiles(('test-data/QEplus021874.thermo.raw', \ | 225 'ptms', 'paramGroupIndices', 'referenceChannel') |
| 168 'test-data/QEplus021876.thermo.raw'), True) | 226 design = self._make_exp_design(groups, files) |
| 169 >>> len(t2.root.find("filePaths")) | |
| 170 2 | |
| 171 >>> t2.root.find("filePaths")[1].text | |
| 172 'test-data/QEplus021876.thermo.raw' | |
| 173 >>> t2.root.find("experiments")[1].text | |
| 174 '2' | |
| 175 >>> t2.root.find("fractions")[0].text | |
| 176 '3' | |
| 177 """ | |
| 178 | |
| 179 # Create experimental design for interactive mode. | |
| 180 # In non-interactive mode only filepaths are modified, but | |
| 181 # their order from the original mqpar must be kept. | |
| 182 if interactive: | |
| 183 index = range(len(infiles)) | |
| 184 nodenames = ('filePaths', 'experiments', 'fractions', | |
| 185 'ptms', 'paramGroupIndices', 'referenceChannel') | |
| 186 design = self._make_exp_design(infiles) | |
| 187 else: | |
| 188 index = [-1] * len(infiles) | |
| 189 # kind of a BUG: fails if filename starts with '.' | |
| 190 infilenames = [os.path.basename(f).split('.')[0] for f in infiles] | |
| 191 i = 0 | |
| 192 for child in self.root.find('filePaths'): | |
| 193 # either windows or posix path | |
| 194 win = ntpath.basename(child.text) | |
| 195 posix = os.path.basename(child.text) | |
| 196 basename = win if len(win) < len(posix) else posix | |
| 197 basename_with_sub = re.sub(self.substitution_rx, '_', | |
| 198 basename.split('.')[0]) | |
| 199 # match infiles to their names in mqpar.xml, | |
| 200 # ignore files missing in mqpar.xml | |
| 201 if basename_with_sub in infilenames: | |
| 202 index[i] = infilenames.index(basename_with_sub) | |
| 203 i += 1 | |
| 204 else: | |
| 205 raise ValueError("no matching infile found for " | |
| 206 + child.text) | |
| 207 | |
| 208 nodenames = ('filePaths', ) | |
| 209 design = {'Name': infiles} | |
| 210 | |
| 211 # Get parent nodes from document | 227 # Get parent nodes from document |
| 212 nodes = dict() | 228 nodes = dict() |
| 213 for nodename in nodenames: | 229 for nodename in nodenames: |
| 214 node = self.root.find(nodename) | 230 node = self._root.find(nodename) |
| 215 if node is None: | 231 if node is None: |
| 216 raise ValueError('Element {} not found in parameter file' | 232 raise ValueError('Element {} not found in parameter file' |
| 217 .format(nodename)) | 233 .format(nodename)) |
| 218 nodes[nodename] = node | 234 nodes[nodename] = node |
| 219 node.clear() | 235 node.clear() |
| 220 node.tag = nodename | 236 node.tag = nodename |
| 221 | |
| 222 # Append sub-elements to nodes (one per file) | 237 # Append sub-elements to nodes (one per file) |
| 223 for i in index: | 238 for i, name in enumerate(design['Name']): |
| 224 if i > -1 and design['Name'][i]: | 239 if name: |
| 225 MQParam._add_child(nodes['filePaths'], 'string', | 240 et_add_child(nodes['filePaths'], 'string', name) |
| 226 design['Name'][i]) | 241 et_add_child(nodes['experiments'], 'string', |
| 227 if interactive: | 242 design['Experiment'][i]) |
| 228 MQParam._add_child(nodes['experiments'], 'string', | 243 et_add_child(nodes['fractions'], 'short', |
| 229 design['Experiment'][i]) | 244 design['Fraction'][i]) |
| 230 MQParam._add_child(nodes['fractions'], 'short', | 245 et_add_child(nodes['ptms'], 'boolean', |
| 231 design['Fraction'][i]) | 246 design['PTM'][i]) |
| 232 MQParam._add_child(nodes['ptms'], 'boolean', | 247 et_add_child(nodes['paramGroupIndices'], 'int', |
| 233 design['PTM'][i]) | 248 design['paramGroup'][i]) |
| 234 MQParam._add_child(nodes['paramGroupIndices'], 'int', 0) | 249 et_add_child(nodes['referenceChannel'], 'string', '') |
| 235 MQParam._add_child(nodes['referenceChannel'], 'string', '') | 250 |
| 236 | 251 def translate(self, infiles): |
| 237 def add_fasta_files(self, files, | 252 """Map a list of given infiles to the files specified in the parameter file. |
| 238 identifier=r'>([^\s]*)', | 253 Needed for the mqpar upload in galaxy. Removes the path and then tries |
| 239 description=r'>(.*)'): | 254 to match the files. |
| 255 Args: | |
| 256 infiles: list or tuple of the input | |
| 257 Returns: | |
| 258 None | |
| 259 """ | |
| 260 # kind of a BUG: fails if filename starts with '.' | |
| 261 infilenames = [os.path.basename(f).split('.')[0] for f in infiles] | |
| 262 filesNode = self._root.find('filePaths') | |
| 263 files_from_mqpar = [e.text for e in filesNode] | |
| 264 filesNode.clear() | |
| 265 filesNode.tag = 'filePaths' | |
| 266 for f in files_from_mqpar: | |
| 267 # either windows or posix path | |
| 268 win = ntpath.basename(f) | |
| 269 posix = os.path.basename(f) | |
| 270 basename = win if len(win) < len(posix) else posix | |
| 271 basename_with_sub = re.sub(self.substitution_rx, '_', | |
| 272 basename.split('.')[0]) | |
| 273 # match infiles to their names in mqpar.xml, | |
| 274 # ignore files missing in mqpar.xml | |
| 275 if basename_with_sub in infilenames: | |
| 276 i = infilenames.index(basename_with_sub) | |
| 277 et_add_child(filesNode, 'string', infiles[i]) | |
| 278 else: | |
| 279 raise ValueError("no matching infile found for " + f) | |
| 280 | |
| 281 def add_fasta_files(self, files, parse_rules={}): | |
| 240 """Add fasta file groups. | 282 """Add fasta file groups. |
| 241 >>> t = MQParam('test', './test-data/template.xml', None) | 283 Args: |
| 242 >>> t.add_fasta_files(('test1', 'test2')) | 284 files: (list) of fasta file paths |
| 243 >>> len(t.root.find('fastaFiles')) | 285 parseRules: (dict) the parse rules as (tag, text)-pairs |
| 244 2 | 286 Returns: |
| 245 >>> t.root.find('fastaFiles')[0].find("fastaFilePath").text | 287 None |
| 246 'test1' | 288 """ |
| 247 """ | 289 fasta_node = self._root.find('fastaFiles') |
| 248 fasta_node = self.root.find("fastaFiles") | |
| 249 fasta_node.clear() | 290 fasta_node.clear() |
| 250 fasta_node.tag = "fastaFiles" | 291 for f in files: |
| 251 | 292 fasta_node.append(copy.deepcopy(self.fasta_file_node)) |
| 252 for index in range(len(files)): | 293 fasta_node[-1].find('fastaFilePath').text = f |
| 253 filepath = '<fastaFilePath>' + files[index] | 294 for rule in parse_rules: |
| 254 identifier = identifier.replace('<', '<') | 295 fasta_node[-1].find(rule).text = parse_rules[rule] |
| 255 description = description.replace('<', '<') | |
| 256 fasta = self.fasta_template.replace('<fastaFilePath>', filepath) | |
| 257 fasta = fasta.replace('<identifierParseRule>', | |
| 258 '<identifierParseRule>' + identifier) | |
| 259 fasta = fasta.replace('<descriptionParseRule>', | |
| 260 '<descriptionParseRule>' + description) | |
| 261 ff_node = self.root.find('.fastaFiles') | |
| 262 fastaentry = ET.fromstring(fasta) | |
| 263 ff_node.append(fastaentry) | |
| 264 | 296 |
| 265 def set_simple_param(self, key, value): | 297 def set_simple_param(self, key, value): |
| 266 """Set a simple parameter. | 298 """Set a simple parameter. |
| 267 >>> t = MQParam(None, './test-data/template.xml', None) | 299 Args: |
| 268 >>> t.set_simple_param('min_unique_pep', 4) | 300 key: (string) XML tag of the parameter |
| 269 >>> t.root.find('.minUniquePeptides').text | 301 value: the text of the parameter XML node |
| 270 '4' | 302 Returns: |
| 271 """ | 303 None |
| 272 # map simple params to their node in the xml tree | 304 """ |
| 273 simple_params = {'missed_cleavages': | 305 node = self._root.find(key) |
| 274 '.parameterGroups/parameterGroup/maxMissedCleavages', | 306 if node is None: |
| 275 'min_unique_pep': '.minUniquePeptides', | 307 raise ValueError('Element {} not found in parameter file' |
| 276 'num_threads': 'numThreads', | 308 .format(key)) |
| 277 'calc_peak_properties': '.calcPeakProperties', | 309 node.text = str(value) |
| 278 'write_mztab': 'writeMzTab', | 310 |
| 279 'min_peptide_len': 'minPepLen', | 311 def _from_yaml(self, conf): |
| 280 'max_peptide_mass': 'maxPeptideMass', | 312 """Read a yaml config file. |
| 281 'match_between_runs': 'matchBetweenRuns', | 313 Args: |
| 282 'ibaq': 'ibaq', # lfq global options | 314 conf: (string) path to the yaml conf file |
| 283 'ibaq_log_fit': 'ibaqLogFit', | 315 Returns: |
| 284 'separate_lfq': 'separateLfq', | 316 None |
| 285 'lfq_stabilize_large_ratios': | 317 """ |
| 286 'lfqStabilizeLargeRatios', | 318 with open(conf) as f: |
| 287 'lfq_require_msms': 'lfqRequireMsms', | 319 conf_dict = yaml.safe_load(f.read()) |
| 288 'advanced_site_intensities': | 320 paramGroups = conf_dict.pop('paramGroups') |
| 289 'advancedSiteIntensities', | 321 self.add_infiles([pg.pop('files') for pg in paramGroups]) |
| 290 'lfq_mode': # lfq param group options | 322 for i, pg in enumerate(paramGroups): |
| 291 '.parameterGroups/parameterGroup/lfqMode', | 323 silac = pg.pop('labelMods', False) |
| 292 'lfq_skip_norm': | 324 if silac: |
| 293 '.parameterGroups/parameterGroup/lfqSkipNorm', | 325 self[i].set_silac(*silac) |
| 294 'lfq_min_edges_per_node': | 326 isobaricLabels = pg.pop('isobaricLabels', False) |
| 295 '.parameterGroups/parameterGroup/lfqMinEdgesPerNode', | 327 if isobaricLabels: |
| 296 'lfq_avg_edges_per_node': | 328 for l in isobaricLabels: |
| 297 '.parameterGroups/parameterGroup/lfqAvEdgesPerNode', | 329 self[i].set_isobaric_label(*l) |
| 298 'lfq_min_ratio_count': | 330 for el in ['fixedModifications', 'variableModifications', 'enzymes']: |
| 299 '.parameterGroups/parameterGroup/lfqMinRatioCount'} | 331 lst = pg.pop(el, None) |
| 300 | 332 if lst is not None: |
| 301 if key in simple_params: | 333 self[i].set_list_param(el, lst) |
| 302 node = self.root.find(simple_params[key]) | 334 for key in pg: |
| 303 if node is None: | 335 self[i].set_simple_param(key, pg[key]) |
| 304 raise ValueError('Element {} not found in parameter file' | 336 fastafiles = conf_dict.pop('fastaFiles', False) |
| 305 .format(simple_params[key])) | 337 if fastafiles: |
| 306 node.text = str(value) | 338 self.add_fasta_files(fastafiles, parse_rules=conf_dict.pop('parseRules', {})) |
| 307 else: | 339 else: |
| 308 raise ValueError("Parameter not found.") | 340 raise Exception('No fasta files provided.') |
| 309 | 341 for key in conf_dict: |
| 310 def set_silac(self, light_mods, medium_mods, heavy_mods): | 342 self.set_simple_param(key, conf_dict[key]) |
| 311 """Set label modifications. | 343 |
| 312 >>> t1 = MQParam('test', './test-data/template.xml', None) | 344 def write(self, mqpar_out): |
| 313 >>> t1.set_silac(None, ('test1', 'test2'), None) | 345 """Write pretty formatted xml parameter file. |
| 314 >>> t1.root.find('.parameterGroups/parameterGroup/maxLabeledAa').text | 346 Compose it from global parameters and parameter Groups. |
| 315 '2' | 347 """ |
| 316 >>> t1.root.find('.parameterGroups/parameterGroup/multiplicity').text | 348 if self._paramGroups: |
| 317 '3' | 349 pg_node = self._root.find('parameterGroups') |
| 318 >>> t1.root.find('.parameterGroups/parameterGroup/labelMods')[1].text | 350 pg_node.remove(pg_node[0]) |
| 319 'test1;test2' | 351 for group in self._paramGroups: |
| 320 >>> t1.root.find('.parameterGroups/parameterGroup/labelMods')[2].text | 352 pg_node.append(group._root) |
| 321 '' | 353 rough_string = ET.tostring(self._root, 'utf-8', short_empty_elements=False) |
| 322 """ | |
| 323 multiplicity = 3 if medium_mods else 2 if heavy_mods else 1 | |
| 324 max_label = str(max(len(light_mods) if light_mods else 0, | |
| 325 len(medium_mods) if medium_mods else 0, | |
| 326 len(heavy_mods) if heavy_mods else 0)) | |
| 327 multiplicity_node = self.root.find('.parameterGroups/parameterGroup/' | |
| 328 + 'multiplicity') | |
| 329 multiplicity_node.text = str(multiplicity) | |
| 330 max_label_node = self.root.find('.parameterGroups/parameterGroup/' | |
| 331 + 'maxLabeledAa') | |
| 332 max_label_node.text = max_label | |
| 333 | |
| 334 node = self.root.find('.parameterGroups/parameterGroup/labelMods') | |
| 335 node[0].text = ';'.join(light_mods) if light_mods else '' | |
| 336 if multiplicity == 3: | |
| 337 MQParam._add_child(node, name='string', text=';'.join(medium_mods)) | |
| 338 if multiplicity > 1: | |
| 339 MQParam._add_child(node, name='string', | |
| 340 text=';'.join(heavy_mods) if heavy_mods else '') | |
| 341 | |
| 342 def set_list_params(self, key, vals): | |
| 343 """Set a list parameter. | |
| 344 >>> t = MQParam(None, './test-data/template.xml', None) | |
| 345 >>> t.set_list_params('proteases', ('test 1', 'test 2')) | |
| 346 >>> len(t.root.find('.parameterGroups/parameterGroup/enzymes')) | |
| 347 2 | |
| 348 >>> t.set_list_params('var_mods', ('Oxidation (M)', )) | |
| 349 >>> var_mods = '.parameterGroups/parameterGroup/variableModifications' | |
| 350 >>> t.root.find(var_mods)[0].text | |
| 351 'Oxidation (M)' | |
| 352 """ | |
| 353 | |
| 354 params = {'var_mods': | |
| 355 '.parameterGroups/parameterGroup/variableModifications', | |
| 356 'fixed_mods': | |
| 357 '.parameterGroups/parameterGroup/fixedModifications', | |
| 358 'proteases': | |
| 359 '.parameterGroups/parameterGroup/enzymes'} | |
| 360 | |
| 361 if key in params: | |
| 362 node = self.root.find(params[key]) | |
| 363 if node is None: | |
| 364 raise ValueError('Element {} not found in parameter file' | |
| 365 .format(params[key])) | |
| 366 node.clear() | |
| 367 node.tag = params[key].split('/')[-1] | |
| 368 for e in vals: | |
| 369 MQParam._add_child(node, name='string', text=e) | |
| 370 else: | |
| 371 raise ValueError("Parameter {} not found.".format(key)) | |
| 372 | |
| 373 def write(self): | |
| 374 rough_string = ET.tostring(self.root, 'utf-8', short_empty_elements=False) | |
| 375 reparsed = minidom.parseString(rough_string) | 354 reparsed = minidom.parseString(rough_string) |
| 376 pretty = reparsed.toprettyxml(indent="\t") | 355 pretty = reparsed.toprettyxml(indent="\t") |
| 377 even_prettier = re.sub(r"\n\s+\n", r"\n", pretty) | 356 even_prettier = re.sub(r"\n\s+\n", r"\n", pretty) |
| 378 with open(self.mqpar_out, 'w') as f: | 357 with open(mqpar_out, 'w') as f: |
| 379 print(even_prettier, file=f) | 358 print(even_prettier, file=f) |
