Mercurial > repos > galaxyp > maxquant
comparison maxquant_wrapper.py @ 0:d4b6c9eae635 draft
Initial commit.
| author | galaxyp |
|---|---|
| date | Fri, 10 May 2013 17:22:51 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d4b6c9eae635 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import optparse | |
| 3 import os | |
| 4 import shutil | |
| 5 import sys | |
| 6 import tempfile | |
| 7 import subprocess | |
| 8 import logging | |
| 9 from string import Template | |
| 10 from xml.sax.saxutils import escape | |
| 11 import xml.etree.ElementTree as ET | |
| 12 | |
| 13 log = logging.getLogger(__name__) | |
| 14 | |
| 15 DEBUG = True | |
| 16 | |
| 17 working_directory = os.getcwd() | |
| 18 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name | |
| 19 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name | |
| 20 | |
| 21 | |
| 22 def stop_err(msg): | |
| 23 sys.stderr.write("%s\n" % msg) | |
| 24 sys.exit() | |
| 25 | |
| 26 | |
| 27 def read_stderr(): | |
| 28 stderr = '' | |
| 29 if(os.path.exists(tmp_stderr_name)): | |
| 30 with open(tmp_stderr_name, 'rb') as tmp_stderr: | |
| 31 buffsize = 1048576 | |
| 32 try: | |
| 33 while True: | |
| 34 stderr += tmp_stderr.read(buffsize) | |
| 35 if not stderr or len(stderr) % buffsize != 0: | |
| 36 break | |
| 37 except OverflowError: | |
| 38 pass | |
| 39 return stderr | |
| 40 | |
| 41 | |
| 42 def execute(command, stdin=None): | |
| 43 try: | |
| 44 with open(tmp_stderr_name, 'wb') as tmp_stderr: | |
| 45 with open(tmp_stdout_name, 'wb') as tmp_stdout: | |
| 46 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ) | |
| 47 returncode = proc.wait() | |
| 48 if returncode != 0: | |
| 49 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr())) | |
| 50 finally: | |
| 51 print open(tmp_stderr_name, "r").read(64000) | |
| 52 print open(tmp_stdout_name, "r").read(64000) | |
| 53 | |
| 54 | |
| 55 def delete_file(path): | |
| 56 if os.path.exists(path): | |
| 57 try: | |
| 58 os.remove(path) | |
| 59 except: | |
| 60 pass | |
| 61 | |
| 62 | |
| 63 def delete_directory(directory): | |
| 64 if os.path.exists(directory): | |
| 65 try: | |
| 66 shutil.rmtree(directory) | |
| 67 except: | |
| 68 pass | |
| 69 | |
| 70 | |
| 71 def symlink(source, link_name): | |
| 72 import platform | |
| 73 if platform.system() == 'Windows': | |
| 74 try: | |
| 75 import win32file | |
| 76 win32file.CreateSymbolicLink(source, link_name, 1) | |
| 77 except: | |
| 78 shutil.copy(source, link_name) | |
| 79 else: | |
| 80 os.symlink(source, link_name) | |
| 81 | |
| 82 | |
| 83 def copy_to_working_directory(data_file, relative_path): | |
| 84 if os.path.abspath(data_file) != os.path.abspath(relative_path): | |
| 85 shutil.copy(data_file, relative_path) | |
| 86 return relative_path | |
| 87 | |
| 88 | |
| 89 def __main__(): | |
| 90 run_script() | |
| 91 | |
| 92 | |
| 93 ## Lock File Stuff | |
| 94 ## http://www.evanfosmark.com/2009/01/cross-platform-file-locking-support-in-python/ | |
| 95 import os | |
| 96 import time | |
| 97 import errno | |
| 98 | |
| 99 | |
| 100 class FileLockException(Exception): | |
| 101 pass | |
| 102 | |
| 103 | |
| 104 class FileLock(object): | |
| 105 """ A file locking mechanism that has context-manager support so | |
| 106 you can use it in a with statement. This should be relatively cross | |
| 107 compatible as it doesn't rely on msvcrt or fcntl for the locking. | |
| 108 """ | |
| 109 | |
| 110 def __init__(self, file_name, timeout=10, delay=.05): | |
| 111 """ Prepare the file locker. Specify the file to lock and optionally | |
| 112 the maximum timeout and the delay between each attempt to lock. | |
| 113 """ | |
| 114 self.is_locked = False | |
| 115 self.lockfile = os.path.join(os.getcwd(), "%s.lock" % file_name) | |
| 116 self.file_name = file_name | |
| 117 self.timeout = timeout | |
| 118 self.delay = delay | |
| 119 | |
| 120 def acquire(self): | |
| 121 """ Acquire the lock, if possible. If the lock is in use, it check again | |
| 122 every `wait` seconds. It does this until it either gets the lock or | |
| 123 exceeds `timeout` number of seconds, in which case it throws | |
| 124 an exception. | |
| 125 """ | |
| 126 start_time = time.time() | |
| 127 while True: | |
| 128 try: | |
| 129 self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR) | |
| 130 break | |
| 131 except OSError as e: | |
| 132 if e.errno != errno.EEXIST: | |
| 133 raise | |
| 134 if (time.time() - start_time) >= self.timeout: | |
| 135 raise FileLockException("Timeout occured.") | |
| 136 time.sleep(self.delay) | |
| 137 self.is_locked = True | |
| 138 | |
| 139 def release(self): | |
| 140 """ Get rid of the lock by deleting the lockfile. | |
| 141 When working in a `with` statement, this gets automatically | |
| 142 called at the end. | |
| 143 """ | |
| 144 if self.is_locked: | |
| 145 os.close(self.fd) | |
| 146 os.unlink(self.lockfile) | |
| 147 self.is_locked = False | |
| 148 | |
| 149 def __enter__(self): | |
| 150 """ Activated when used in the with statement. | |
| 151 Should automatically acquire a lock to be used in the with block. | |
| 152 """ | |
| 153 if not self.is_locked: | |
| 154 self.acquire() | |
| 155 return self | |
| 156 | |
| 157 def __exit__(self, type, value, traceback): | |
| 158 """ Activated at the end of the with statement. | |
| 159 It automatically releases the lock if it isn't locked. | |
| 160 """ | |
| 161 if self.is_locked: | |
| 162 self.release() | |
| 163 | |
| 164 def __del__(self): | |
| 165 """ Make sure that the FileLock instance doesn't leave a lockfile | |
| 166 lying around. | |
| 167 """ | |
| 168 self.release() | |
| 169 | |
| 170 TEMPLATE = """<?xml version="1.0" encoding="utf-8"?> | |
| 171 <MaxQuantParams xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" runOnCluster="false" processFolder="$process_folder"> | |
| 172 $raw_file_info | |
| 173 <experimentalDesignFilename/> | |
| 174 <slicePeaks>$slice_peaks</slicePeaks> | |
| 175 <tempFolder/> | |
| 176 <ncores>$num_cores</ncores> | |
| 177 <ionCountIntensities>false</ionCountIntensities> | |
| 178 <maxFeatureDetectionCores>1</maxFeatureDetectionCores> | |
| 179 <verboseColumnHeaders>false</verboseColumnHeaders> | |
| 180 <minTime>NaN</minTime> | |
| 181 <maxTime>NaN</maxTime> | |
| 182 <calcPeakProperties>$calc_peak_properties</calcPeakProperties> | |
| 183 <useOriginalPrecursorMz>$use_original_precursor_mz</useOriginalPrecursorMz> | |
| 184 $fixed_mods | |
| 185 <multiModificationSearch>$multi_modification_search</multiModificationSearch> | |
| 186 <fastaFiles>$database</fastaFiles> | |
| 187 <fastaFilesFirstSearch/> | |
| 188 <fixedSearchFolder/> | |
| 189 <advancedRatios>$advanced_ratios</advancedRatios> | |
| 190 <rtShift>$rt_shift</rtShift> | |
| 191 <fastLfq>$fast_lfq</fastLfq> | |
| 192 <randomize>$randomize</randomize> | |
| 193 <specialAas>$special_aas</specialAas> | |
| 194 <includeContamiants>$include_contamiants</includeContamiants> | |
| 195 <equalIl>$equal_il</equalIl> | |
| 196 <topxWindow>100</topxWindow> | |
| 197 <maxPeptideMass>$max_peptide_mass</maxPeptideMass> | |
| 198 <reporterPif>$reporter_pif</reporterPif> | |
| 199 <reporterFraction>$reporter_fraction</reporterFraction> | |
| 200 <reporterBasePeakRatio>$reporter_base_peak_ratio</reporterBasePeakRatio> | |
| 201 <scoreThreshold>$score_threshold</scoreThreshold> | |
| 202 <filterAacounts>$filter_aacounts</filterAacounts> | |
| 203 <secondPeptide>$second_peptide</secondPeptide> | |
| 204 <matchBetweenRuns>$match_between_runs</matchBetweenRuns> | |
| 205 <matchBetweenRunsFdr>$match_between_runs_fdr</matchBetweenRunsFdr> | |
| 206 <reQuantify>$re_quantify</reQuantify> | |
| 207 <dependentPeptides>$dependent_peptides</dependentPeptides> | |
| 208 <dependentPeptideFdr>$dependent_peptide_fdr</dependentPeptideFdr> | |
| 209 <dependentPeptideMassBin>$dependent_peptide_mass_bin</dependentPeptideMassBin> | |
| 210 <labelFree>$label_free</labelFree> | |
| 211 <lfqMinEdgesPerNode>$lfq_min_edges_per_node</lfqMinEdgesPerNode> | |
| 212 <lfqAvEdgesPerNode>$lfq_av_edges_per_node</lfqAvEdgesPerNode> | |
| 213 <hybridQuantification>$hybrid_quantification</hybridQuantification> | |
| 214 <msmsConnection>$msms_connection</msmsConnection> | |
| 215 <ibaq>$ibaq</ibaq> | |
| 216 <msmsRecalibration>$msms_recalibration</msmsRecalibration> | |
| 217 <ibaqLogFit>$ibaq_log_fit</ibaqLogFit> | |
| 218 <razorProteinFdr>$razor_protein_fdr</razorProteinFdr> | |
| 219 <calcSequenceTags>$calc_sequence_tags</calcSequenceTags> | |
| 220 <deNovoVarMods>$de_novo_var_mods</deNovoVarMods> | |
| 221 <massDifferenceSearch>$mass_difference_search</massDifferenceSearch> | |
| 222 <minPepLen>$min_pep_len</minPepLen> | |
| 223 <peptideFdr>$peptide_fdr</peptideFdr> | |
| 224 <peptidePep>$peptide_pep</peptidePep> | |
| 225 <proteinFdr>$protein_fdr</proteinFdr> | |
| 226 <siteFdr>$site_fdr</siteFdr> | |
| 227 <minPeptideLengthForUnspecificSearch>$min_peptide_length_for_unspecific_search</minPeptideLengthForUnspecificSearch> | |
| 228 <maxPeptideLengthForUnspecificSearch>$max_peptide_length_for_unspecific_search</maxPeptideLengthForUnspecificSearch> | |
| 229 <useNormRatiosForOccupancy>$use_norm_ratios_for_occupancy</useNormRatiosForOccupancy> | |
| 230 <minPeptides>$min_peptides</minPeptides> | |
| 231 <minRazorPeptides>$min_razor_peptides</minRazorPeptides> | |
| 232 <minUniquePeptides>$min_unique_peptides</minUniquePeptides> | |
| 233 <useCounterparts>$use_counterparts</useCounterparts> | |
| 234 <minRatioCount>$min_ratio_count</minRatioCount> | |
| 235 <lfqMinRatioCount>$lfq_min_ratio_count</lfqMinRatioCount> | |
| 236 <restrictProteinQuantification>$restrict_protein_quantification</restrictProteinQuantification> | |
| 237 $restrict_mods | |
| 238 <matchingTimeWindow>$matching_time_window</matchingTimeWindow> | |
| 239 <numberOfCandidatesMultiplexedMsms>$number_of_candidates_multiplexed_msms</numberOfCandidatesMultiplexedMsms> | |
| 240 <numberOfCandidatesMsms>$number_of_candidates_msms</numberOfCandidatesMsms> | |
| 241 <separateAasForSiteFdr>$separate_aas_for_site_fdr</separateAasForSiteFdr> | |
| 242 <massDifferenceMods /> | |
| 243 <aifParams aifSilWeight="$aif_sil_weight" | |
| 244 aifIsoWeight="$aif_iso_weight" | |
| 245 aifTopx="$aif_topx" | |
| 246 aifCorrelation="$aif_correlation" | |
| 247 aifCorrelationFirstPass="$aif_correlation_first_pass" | |
| 248 aifMinMass="$aif_min_mass" | |
| 249 aifMsmsTol="$aif_msms_tol" | |
| 250 aifSecondPass="$aif_second_pass" | |
| 251 aifIterative="$aif_iterative" | |
| 252 aifThresholdFdr="$aif_threhold_fdr" /> | |
| 253 <groups> | |
| 254 <ParameterGroups> | |
| 255 $group_params | |
| 256 </ParameterGroups> | |
| 257 </groups> | |
| 258 <qcSettings> | |
| 259 <qcSetting xsi:nil="true" /> | |
| 260 </qcSettings> | |
| 261 <msmsParams> | |
| 262 $ftms_fragment_settings | |
| 263 $itms_fragment_settings | |
| 264 $tof_fragment_settings | |
| 265 $unknown_fragment_settings | |
| 266 </msmsParams> | |
| 267 <keepLowScoresMode>$keep_low_scores_mode</keepLowScoresMode> | |
| 268 <msmsCentroidMode>$msms_centroid_mode</msmsCentroidMode> | |
| 269 <quantMode>$quant_mode</quantMode> | |
| 270 <siteQuantMode>$site_quant_mode</siteQuantMode> | |
| 271 <groupParams> | |
| 272 <groupParam> | |
| 273 $group_params | |
| 274 </groupParam> | |
| 275 </groupParams> | |
| 276 </MaxQuantParams> | |
| 277 """ | |
| 278 | |
| 279 GROUP_TEMPLATE = """ | |
| 280 <maxCharge>$max_charge</maxCharge> | |
| 281 <lcmsRunType>$lcms_run_type</lcmsRunType> | |
| 282 <msInstrument>$ms_instrument</msInstrument> | |
| 283 <groupIndex>$group_index</groupIndex> | |
| 284 <maxLabeledAa>$max_labeled_aa</maxLabeledAa> | |
| 285 <maxNmods>$max_n_mods</maxNmods> | |
| 286 <maxMissedCleavages>$max_missed_cleavages</maxMissedCleavages> | |
| 287 <multiplicity>$multiplicity</multiplicity> | |
| 288 <protease>$protease</protease> | |
| 289 <proteaseFirstSearch>$protease</proteaseFirstSearch> | |
| 290 <useProteaseFirstSearch>false</useProteaseFirstSearch> | |
| 291 <useVariableModificationsFirstSearch>false</useVariableModificationsFirstSearch> | |
| 292 $variable_mods | |
| 293 $isobaric_labels | |
| 294 <variableModificationsFirstSearch> | |
| 295 <string>Oxidation (M)</string> | |
| 296 <string>Acetyl (Protein N-term)</string> | |
| 297 </variableModificationsFirstSearch> | |
| 298 <hasAdditionalVariableModifications>false</hasAdditionalVariableModifications> | |
| 299 <additionalVariableModifications> | |
| 300 <ArrayOfString /> | |
| 301 </additionalVariableModifications> | |
| 302 <additionalVariableModificationProteins> | |
| 303 <ArrayOfString /> | |
| 304 </additionalVariableModificationProteins> | |
| 305 <doMassFiltering>$do_mass_filtering</doMassFiltering> | |
| 306 <firstSearchTol>$first_search_tol</firstSearchTol> | |
| 307 <mainSearchTol>$main_search_tol</mainSearchTol> | |
| 308 $labels | |
| 309 """ | |
| 310 | |
| 311 # <labels> | |
| 312 # <string /> | |
| 313 # <string>Arg10; Lys8</string> | |
| 314 # </labels> | |
| 315 | |
| 316 fragment_settings = { | |
| 317 "FTMS": {"InPpm": "true", "Deisotope": "true", "Topx": "10", "HigherCharges": "true", | |
| 318 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
| 319 "tolerance_value": "20", "tolerance_unit": "Ppm", "name": "FTMS"}, | |
| 320 "ITMS": {"InPpm": "false", "Deisotope": "false", "Topx": "6", "HigherCharges": "true", | |
| 321 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
| 322 "tolerance_value": "0.5", "tolerance_unit": "Dalton", "name": "ITMS"}, | |
| 323 "TOF": {"InPpm": "false", "Deisotope": "true", "Topx": "10", "HigherCharges": "true", | |
| 324 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
| 325 "tolerance_value": "0.1", "tolerance_unit": "Dalton", "name": "TOF"}, | |
| 326 "Unknown": {"InPpm": "false", "Deisotope": "false", "Topx": "6", "HigherCharges": "true", | |
| 327 "IncludeWater": "true", "IncludeAmmonia": "true", "DependentLosses": "true", | |
| 328 "tolerance_value": "0.5", "tolerance_unit": "Dalton", "name": "Unknown"}, | |
| 329 } | |
| 330 | |
| 331 | |
| 332 def build_isobaric_labels(reporter_type): | |
| 333 if not reporter_type: | |
| 334 return "<isobaricLabels />" | |
| 335 if reporter_type == "itraq_4plex": | |
| 336 prefix = "iTRAQ4plex" | |
| 337 mzs = [114, 115, 116, 117] | |
| 338 elif reporter_type == "itraq_8plex": | |
| 339 prefix = "iTRAQ8plex" | |
| 340 mzs = [113, 114, 115, 116, 117, 118, 119, 121] | |
| 341 elif reporter_type == "tmt_2plex": | |
| 342 prefix = "TMT2plex" | |
| 343 mzs = [126, 127] | |
| 344 elif reporter_type == "tmt_6plex": | |
| 345 prefix = "TMT6plex" | |
| 346 mzs = [126, 127, 128, 129, 130, 131] | |
| 347 else: | |
| 348 raise Exception("Unknown reporter type - %s" % reporter_type) | |
| 349 labels = ["%s-%s%d" % (prefix, term, mz) for term in ["Nter", "Lys"] for mz in mzs] | |
| 350 return wrap(map(xml_string, labels), "isobaricLabels") | |
| 351 | |
| 352 | |
| 353 def parse_groups(inputs_file, group_parts=["num"], input_parts=["name", "path"]): | |
| 354 inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()] | |
| 355 inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")] | |
| 356 cur_group = None | |
| 357 i = 0 | |
| 358 group_prefixes = ["%s:" % group_part for group_part in group_parts] | |
| 359 input_prefixes = ["%s:" % input_part for input_part in input_parts] | |
| 360 groups = {} | |
| 361 while i < len(inputs_lines): | |
| 362 line = inputs_lines[i] | |
| 363 if line.startswith(group_prefixes[0]): | |
| 364 # Start new group | |
| 365 cur_group = line[len(group_prefixes[0]):] | |
| 366 group_data = {} | |
| 367 for j, group_prefix in enumerate(group_prefixes): | |
| 368 group_line = inputs_lines[i + j] | |
| 369 group_data[group_parts[j]] = group_line[len(group_prefix):] | |
| 370 i += len(group_prefixes) | |
| 371 elif line.startswith(input_prefixes[0]): | |
| 372 input = [] | |
| 373 for j, input_prefix in enumerate(input_prefixes): | |
| 374 part_line = inputs_lines[i + j] | |
| 375 part = part_line[len(input_prefixes[j]):] | |
| 376 input.append(part) | |
| 377 if cur_group not in groups: | |
| 378 groups[cur_group] = {"group_data": group_data, "inputs": []} | |
| 379 groups[cur_group]["inputs"].append(input) | |
| 380 i += len(input_prefixes) | |
| 381 else: | |
| 382 # Skip empty line | |
| 383 i += 1 | |
| 384 return groups | |
| 385 | |
| 386 | |
| 387 def add_fragment_options(parser): | |
| 388 for name, options in fragment_settings.iteritems(): | |
| 389 for key, value in options.iteritems(): | |
| 390 option_key = ("%s_%s" % (name, key)).lower() | |
| 391 parser.add_option("--%s" % option_key, default=value) | |
| 392 | |
| 393 | |
| 394 def update_fragment_settings(arg_options): | |
| 395 for name, options in fragment_settings.iteritems(): | |
| 396 for key, value in options.iteritems(): | |
| 397 arg_option_key = ("%s_%s" % (name, key)).lower() | |
| 398 options[key] = getattr(arg_options, arg_option_key) | |
| 399 | |
| 400 | |
| 401 def to_fragment_settings(name, values): | |
| 402 """ | |
| 403 """ | |
| 404 | |
| 405 fragment_settings_template = """ | |
| 406 <FragmentSpectrumSettings Name="$name" InPpm="$InPpm" Deisotope="$Deisotope" | |
| 407 Topx="$Topx" HigherCharges="$HigherCharges" IncludeWater="$IncludeWater" IncludeAmmonia="$IncludeAmmonia" | |
| 408 DependentLosses="$DependentLosses"> | |
| 409 <Tolerance> | |
| 410 <Value>$tolerance_value</Value> | |
| 411 <Unit>$tolerance_unit</Unit> | |
| 412 </Tolerance> | |
| 413 </FragmentSpectrumSettings> | |
| 414 """ | |
| 415 safe_values = dict(values) | |
| 416 for key, value in safe_values.iteritems(): | |
| 417 safe_values[key] = escape(value) | |
| 418 return Template(fragment_settings_template).substitute(safe_values) | |
| 419 | |
| 420 | |
| 421 def get_file_paths(files): | |
| 422 return wrap([xml_string(name) for name in files], "filePaths") | |
| 423 | |
| 424 | |
| 425 def get_file_names(file_names): | |
| 426 return wrap([xml_string(name) for name in file_names], "fileNames") | |
| 427 | |
| 428 | |
| 429 def get_file_groups(file_groups): | |
| 430 return wrap([xml_int(file_group) for file_group in file_groups], "paramGroups") | |
| 431 | |
| 432 | |
| 433 def wrap(values, tag): | |
| 434 return "<%s>%s</%s>" % (tag, "".join(values), tag) | |
| 435 | |
| 436 | |
| 437 def xml_string(str): | |
| 438 if str: | |
| 439 return "<string>%s</string>" % escape(str) | |
| 440 else: | |
| 441 return "<string />" | |
| 442 | |
| 443 | |
| 444 def xml_int(value): | |
| 445 return "<int>%d</int>" % int(value) | |
| 446 | |
| 447 | |
| 448 def get_properties(options): | |
| 449 direct_properties = ["lcms_run_type", | |
| 450 "max_missed_cleavages", | |
| 451 "protease", | |
| 452 "first_search_tol", | |
| 453 "main_search_tol", | |
| 454 "max_n_mods", | |
| 455 "max_charge", | |
| 456 "max_labeled_aa", | |
| 457 "do_mass_filtering", | |
| 458 "calc_peak_properties", | |
| 459 "use_original_precursor_mz", | |
| 460 "multi_modification_search", | |
| 461 "keep_low_scores_mode", | |
| 462 "msms_centroid_mode", | |
| 463 "quant_mode", | |
| 464 "site_quant_mode", | |
| 465 "advanced_ratios", | |
| 466 "rt_shift", | |
| 467 "fast_lfq", | |
| 468 "randomize", | |
| 469 "aif_sil_weight", | |
| 470 "aif_iso_weight", | |
| 471 "aif_topx", | |
| 472 "aif_correlation", | |
| 473 "aif_correlation_first_pass", | |
| 474 "aif_min_mass", | |
| 475 "aif_msms_tol", | |
| 476 "aif_second_pass", | |
| 477 "aif_iterative", | |
| 478 "aif_threhold_fdr", | |
| 479 "restrict_protein_quantification", | |
| 480 "matching_time_window", | |
| 481 "number_of_candidates_multiplexed_msms", | |
| 482 "number_of_candidates_msms", | |
| 483 "separate_aas_for_site_fdr", | |
| 484 "special_aas", | |
| 485 "include_contamiants", | |
| 486 "equal_il", | |
| 487 "topx_window", | |
| 488 "max_peptide_mass", | |
| 489 "reporter_pif", | |
| 490 "reporter_fraction", | |
| 491 "reporter_base_peak_ratio", | |
| 492 "score_threshold", | |
| 493 "filter_aacounts", | |
| 494 "second_peptide", | |
| 495 "match_between_runs", | |
| 496 "match_between_runs_fdr", | |
| 497 "re_quantify", | |
| 498 "dependent_peptides", | |
| 499 "dependent_peptide_fdr", | |
| 500 "dependent_peptide_mass_bin", | |
| 501 "label_free", | |
| 502 "lfq_min_edges_per_node", | |
| 503 "lfq_av_edges_per_node", | |
| 504 "hybrid_quantification", | |
| 505 "msms_connection", | |
| 506 "ibaq", | |
| 507 "msms_recalibration", | |
| 508 "ibaq_log_fit", | |
| 509 "razor_protein_fdr", | |
| 510 "calc_sequence_tags", | |
| 511 "de_novo_var_mods", | |
| 512 "mass_difference_search", | |
| 513 "min_pep_len", | |
| 514 "peptide_fdr", | |
| 515 "peptide_pep", | |
| 516 "protein_fdr", | |
| 517 "site_fdr", | |
| 518 "min_peptide_length_for_unspecific_search", | |
| 519 "max_peptide_length_for_unspecific_search", | |
| 520 "use_norm_ratios_for_occupancy", | |
| 521 "min_peptides", | |
| 522 "min_razor_peptides", | |
| 523 "min_unique_peptides", | |
| 524 "use_counterparts", | |
| 525 "min_ratio_count", | |
| 526 "lfq_min_ratio_count", | |
| 527 ] | |
| 528 | |
| 529 props = { | |
| 530 "slice_peaks": "true", | |
| 531 "num_cores": str(options.num_cores), | |
| 532 "database": xml_string(setup_database(options)), | |
| 533 "process_folder": os.path.join(os.getcwd(), "process"), | |
| 534 } | |
| 535 for prop in direct_properties: | |
| 536 props[prop] = str(getattr(options, prop)) | |
| 537 | |
| 538 for name, fragment_options in fragment_settings.iteritems(): | |
| 539 key = "%s_fragment_settings" % name.lower() | |
| 540 props[key] = to_fragment_settings(name, fragment_options) | |
| 541 | |
| 542 restrict_mods_string = wrap(map(xml_string, options.restrict_mods), "restrictMods") | |
| 543 props["restrict_mods"] = restrict_mods_string | |
| 544 fixed_mods_string = wrap(map(xml_string, options.fixed_mods), "fixedModifications") | |
| 545 props["fixed_mods"] = fixed_mods_string | |
| 546 variable_mods_string = wrap(map(xml_string, options.variable_mods), "variableModifications") | |
| 547 props["variable_mods"] = variable_mods_string | |
| 548 return props | |
| 549 | |
| 550 | |
| 551 # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python | |
| 552 def which(program): | |
| 553 import os | |
| 554 | |
| 555 def is_exe(fpath): | |
| 556 return os.path.isfile(fpath) and os.access(fpath, os.X_OK) | |
| 557 | |
| 558 fpath, fname = os.path.split(program) | |
| 559 if fpath: | |
| 560 if is_exe(program): | |
| 561 return program | |
| 562 else: | |
| 563 for path in os.environ["PATH"].split(os.pathsep): | |
| 564 path = path.strip('"') | |
| 565 exe_file = os.path.join(path, program) | |
| 566 if is_exe(exe_file): | |
| 567 return exe_file | |
| 568 | |
| 569 return None | |
| 570 | |
| 571 | |
| 572 def get_unique_path(base, extension): | |
| 573 """ | |
| 574 """ | |
| 575 return "%s_%d%s" % (base, int(time.time() * 1000), extension) | |
| 576 | |
| 577 | |
| 578 def get_env_property(name, default): | |
| 579 if name in os.environ: | |
| 580 return os.environ[name] | |
| 581 else: | |
| 582 return default | |
| 583 | |
| 584 | |
| 585 def setup_database(options): | |
| 586 database_path = options.database | |
| 587 database_name = options.database_name | |
| 588 database_name = database_name.replace(" ", "_") | |
| 589 (database_basename, extension) = os.path.splitext(database_name) | |
| 590 database_destination = get_unique_path(database_basename, ".fasta") | |
| 591 assert database_destination == os.path.basename(database_destination) | |
| 592 symlink(database_path, database_destination) | |
| 593 | |
| 594 database_conf = get_env_property("MAXQUANT_DATABASE_CONF", None) | |
| 595 if not database_conf: | |
| 596 exe_path = which("MaxQuantCmd.exe") | |
| 597 database_conf = os.path.join(os.path.dirname(exe_path), "conf", "databases.xml") | |
| 598 with FileLock(database_conf + ".galaxy_lock"): | |
| 599 tree = ET.parse(database_conf) | |
| 600 root = tree.getroot() | |
| 601 databases_node = root.find("Databases") | |
| 602 database_node = ET.SubElement(databases_node, 'databases') | |
| 603 database_node.attrib["search_expression"] = ">([^ ]*)" | |
| 604 database_node.attrib["replacement_expression"] = "%1" | |
| 605 database_node.attrib["filename"] = database_destination | |
| 606 tree.write(database_conf) | |
| 607 return os.path.abspath(database_destination) | |
| 608 | |
| 609 | |
| 610 def setup_inputs(input_groups_path): | |
| 611 parsed_groups = parse_groups(input_groups_path) | |
| 612 paths = [] | |
| 613 names = [] | |
| 614 group_nums = [] | |
| 615 for group, group_info in parsed_groups.iteritems(): | |
| 616 files = group_info["inputs"] | |
| 617 group_num = group_info["group_data"]["num"] | |
| 618 for (name, path) in files: | |
| 619 name = os.path.basename(name) | |
| 620 if not name.lower().endswith(".raw"): | |
| 621 name = "%s.%s" % (name, ".RAW") | |
| 622 symlink(path, name) | |
| 623 paths.append(os.path.abspath(name)) | |
| 624 names.append(os.path.splitext(name)[0]) | |
| 625 group_nums.append(group_num) | |
| 626 file_data = (get_file_paths(paths), get_file_names(names), get_file_groups(group_nums)) | |
| 627 return "<rawFileInfo>%s%s%s<Fractions/><Values/></rawFileInfo> " % file_data | |
| 628 | |
| 629 | |
| 630 def set_group_params(properties, options): | |
| 631 labels = [""] | |
| 632 if options.labels: | |
| 633 labels = options.labels | |
| 634 labels_string = wrap([xml_string(label.replace(",", "; ")) for label in labels], "labels") | |
| 635 group_properties = dict(properties) | |
| 636 group_properties["labels"] = labels_string | |
| 637 group_properties["multiplicity"] = len(labels) | |
| 638 group_properties["group_index"] = "1" | |
| 639 group_properties["ms_instrument"] = "0" | |
| 640 group_params = Template(GROUP_TEMPLATE).substitute(group_properties) | |
| 641 properties["group_params"] = group_params | |
| 642 | |
| 643 | |
| 644 def split_mods(mods_string): | |
| 645 return [mod for mod in mods_string.split(",") if mod] if mods_string else [] | |
| 646 | |
| 647 | |
| 648 def run_script(): | |
| 649 parser = optparse.OptionParser() | |
| 650 parser.add_option("--input_groups") | |
| 651 parser.add_option("--database") | |
| 652 parser.add_option("--database_name") | |
| 653 parser.add_option("--num_cores", type="int", default=4) | |
| 654 parser.add_option("--max_missed_cleavages", type="int", default=2) | |
| 655 parser.add_option("--protease", default="Trypsin/P") | |
| 656 parser.add_option("--first_search_tol", default="20") | |
| 657 parser.add_option("--main_search_tol", default="6") | |
| 658 parser.add_option("--max_n_mods", type="int", default=5) | |
| 659 parser.add_option("--max_charge", type="int", default=7) | |
| 660 parser.add_option("--do_mass_filtering", default="true") | |
| 661 parser.add_option("--labels", action="append", default=[]) | |
| 662 parser.add_option("--max_labeled_aa", type="int", default=3) | |
| 663 parser.add_option("--keep_low_scores_mode", type="int", default=0) | |
| 664 parser.add_option("--msms_centroid_mode", type="int", default=1) | |
| 665 # 0 = all peptides, 1 = Use razor and unique peptides, 2 = use unique peptides | |
| 666 parser.add_option("--quant_mode", type="int", default=1) | |
| 667 parser.add_option("--site_quant_mode", type="int", default=0) | |
| 668 parser.add_option("--aif_sil_weight", type="int", default=4) | |
| 669 parser.add_option("--aif_iso_weight", type="int", default=2) | |
| 670 parser.add_option("--aif_topx", type="int", default=50) | |
| 671 parser.add_option("--aif_correlation", type="float", default=0.8) | |
| 672 parser.add_option("--aif_correlation_first_pass", type="float", default=0.8) | |
| 673 parser.add_option("--aif_min_mass", type="float", default=0) | |
| 674 parser.add_option("--aif_msms_tol", type="float", default=10) | |
| 675 parser.add_option("--aif_second_pass", default="false") | |
| 676 parser.add_option("--aif_iterative", default="false") | |
| 677 parser.add_option("--aif_threhold_fdr", default="0.01") | |
| 678 parser.add_option("--restrict_protein_quantification", default="true") | |
| 679 parser.add_option("--matching_time_window", default="2") | |
| 680 parser.add_option("--number_of_candidates_multiplexed_msms", default="50") | |
| 681 parser.add_option("--number_of_candidates_msms", default="15") | |
| 682 parser.add_option("--separate_aas_for_site_fdr", default="true") | |
| 683 parser.add_option("--advanced_ratios", default="false") | |
| 684 parser.add_option("--rt_shift", default="false") | |
| 685 parser.add_option("--fast_lfq", default="true") | |
| 686 parser.add_option("--randomize", default="false") | |
| 687 parser.add_option("--special_aas", default="KR") | |
| 688 parser.add_option("--include_contamiants", default="false") | |
| 689 parser.add_option("--equal_il", default="false") | |
| 690 parser.add_option("--topx_window", default="100") | |
| 691 parser.add_option("--max_peptide_mass", default="5000") | |
| 692 parser.add_option("--reporter_pif", default="0.75") | |
| 693 parser.add_option("--reporter_fraction", default="0") | |
| 694 parser.add_option("--reporter_base_peak_ratio", default="0") | |
| 695 parser.add_option("--score_threshold", default="0") | |
| 696 parser.add_option("--filter_aacounts", default="true") | |
| 697 parser.add_option("--second_peptide", default="true") | |
| 698 parser.add_option("--match_between_runs", default="false") | |
| 699 parser.add_option("--match_between_runs_fdr", default="false") | |
| 700 parser.add_option("--re_quantify", default="true") | |
| 701 parser.add_option("--dependent_peptides", default="false") | |
| 702 parser.add_option("--dependent_peptide_fdr", default="0.01") | |
| 703 parser.add_option("--dependent_peptide_mass_bin", default="0.0055") | |
| 704 parser.add_option("--label_free", default="false") | |
| 705 parser.add_option("--lfq_min_edges_per_node", default="3") | |
| 706 parser.add_option("--lfq_av_edges_per_node", default="6") | |
| 707 parser.add_option("--hybrid_quantification", default="false") | |
| 708 parser.add_option("--msms_connection", default="false") | |
| 709 parser.add_option("--ibaq", default="false") | |
| 710 parser.add_option("--msms_recalibration", default="false") | |
| 711 parser.add_option("--ibaq_log_fit", default="true") | |
| 712 parser.add_option("--razor_protein_fdr", default="true") | |
| 713 parser.add_option("--calc_sequence_tags", default="false") | |
| 714 parser.add_option("--de_novo_var_mods", default="true") | |
| 715 parser.add_option("--mass_difference_search", default="false") | |
| 716 parser.add_option("--min_pep_len", default="7") | |
| 717 parser.add_option("--peptide_fdr", default="0.01") | |
| 718 parser.add_option("--peptide_pep", default="1") | |
| 719 parser.add_option("--protein_fdr", default="0.01") | |
| 720 parser.add_option("--site_fdr", default="0.01") | |
| 721 parser.add_option("--min_peptide_length_for_unspecific_search", default="8") | |
| 722 parser.add_option("--max_peptide_length_for_unspecific_search", default="25") | |
| 723 parser.add_option("--use_norm_ratios_for_occupancy", default="true") | |
| 724 parser.add_option("--min_peptides", default="1") | |
| 725 parser.add_option("--min_razor_peptides", default="1") | |
| 726 parser.add_option("--min_unique_peptides", default="0") | |
| 727 parser.add_option("--use_counterparts", default="false") | |
| 728 parser.add_option("--min_ratio_count", default="2") | |
| 729 parser.add_option("--lfq_min_ratio_count", default="2") | |
| 730 parser.add_option("--calc_peak_properties", default="false") | |
| 731 parser.add_option("--use_original_precursor_mz", default="false") | |
| 732 parser.add_option("--multi_modification_search", default="false") | |
| 733 parser.add_option("--lcms_run_type", default="0") | |
| 734 parser.add_option("--reporter_type", default=None) | |
| 735 parser.add_option("--output_mqpar", default=None) | |
| 736 text_outputs = { | |
| 737 "aif_msms": "aifMsms", | |
| 738 "all_peptides": "allPeptides", | |
| 739 "evidence": "evidence", | |
| 740 "modification_specific_peptides": "modificationSpecificPeptides", | |
| 741 "msms": "msms", | |
| 742 "msms_scans": "msmsScans", | |
| 743 "mz_range": "mzRange", | |
| 744 "parameters": "parameters", | |
| 745 "peptides": "peptides", | |
| 746 "protein_groups": "proteinGroups", | |
| 747 "sim_peptides": "simPeptides", | |
| 748 "sim_scans": "simScans", | |
| 749 "summary": "summary" | |
| 750 } | |
| 751 for output in text_outputs.keys(): | |
| 752 parser.add_option("--output_%s" % output, default=None) | |
| 753 | |
| 754 parser.add_option("--variable_mods", default="Oxidation (M),Acetyl (Protein N-term)") | |
| 755 parser.add_option("--restrict_mods", default="Oxidation (M),Acetyl (Protein N-term)") | |
| 756 parser.add_option("--fixed_mods", default="Carbamidomethyl (C)") | |
| 757 | |
| 758 add_fragment_options(parser) | |
| 759 | |
| 760 (options, args) = parser.parse_args() | |
| 761 options.restrict_mods = split_mods(options.restrict_mods) | |
| 762 options.fixed_mods = split_mods(options.fixed_mods) | |
| 763 options.variable_mods = split_mods(options.variable_mods) | |
| 764 | |
| 765 update_fragment_settings(options) | |
| 766 | |
| 767 raw_file_info = setup_inputs(options.input_groups) | |
| 768 properties = get_properties(options) | |
| 769 properties["raw_file_info"] = raw_file_info | |
| 770 properties["isobaric_labels"] = build_isobaric_labels(options.reporter_type) | |
| 771 set_group_params(properties, options) | |
| 772 driver_contents = Template(TEMPLATE).substitute(properties) | |
| 773 open("mqpar.xml", "w").write(driver_contents) | |
| 774 print driver_contents | |
| 775 execute("MaxQuantCmd.exe mqpar.xml %d" % options.num_cores) | |
| 776 for key, basename in text_outputs.iteritems(): | |
| 777 attribute = "output_%s" % key | |
| 778 destination = getattr(options, attribute, None) | |
| 779 if destination: | |
| 780 source = os.path.join("combined", "txt", "%s.txt" % basename) | |
| 781 shutil.copy(source, destination) | |
| 782 output_mqpar = options.output_mqpar | |
| 783 if output_mqpar: | |
| 784 shutil.copy("mqpar.xml", output_mqpar) | |
| 785 | |
| 786 if __name__ == '__main__': | |
| 787 __main__() |
