annotate metfrag.py @ 0:938c818cf097 draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
author tomnl
date Fri, 04 Oct 2019 07:16:18 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
1 from __future__ import absolute_import, print_function
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
2
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
3 import ConfigParser
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
4 import argparse
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
5 import csv
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
6 import glob
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
7 import multiprocessing
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
8 import os
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
9 import re
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
10 import shutil
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
11 import sys
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
12 import tempfile
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
13 from collections import defaultdict
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
14
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
15 import six
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
16
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
17 print(sys.version)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
18
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
19 parser = argparse.ArgumentParser()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
20 parser.add_argument('--input_pth')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
21 parser.add_argument('--result_pth', default='metfrag_result.csv')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
22
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
23 parser.add_argument('--temp_dir')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
24 parser.add_argument('--polarity', default='pos')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
25 parser.add_argument('--minMSMSpeaks', default=1)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
26
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
27 parser.add_argument('--MetFragDatabaseType', default='PubChem')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
28 parser.add_argument('--LocalDatabasePath', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
29 parser.add_argument('--LocalMetChemDatabaseServerIp', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
30
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
31 parser.add_argument('--DatabaseSearchRelativeMassDeviation', default=5)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
32 parser.add_argument('--FragmentPeakMatchRelativeMassDeviation', default=10)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
33 parser.add_argument('--FragmentPeakMatchAbsoluteMassDeviation', default=0.001)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
34 parser.add_argument('--NumberThreads', default=1)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
35 parser.add_argument('--UnconnectedCompoundFilter', action='store_true')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
36 parser.add_argument('--IsotopeFilter', action='store_true')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
37
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
38 parser.add_argument('--FilterMinimumElements', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
39 parser.add_argument('--FilterMaximumElements', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
40 parser.add_argument('--FilterSmartsInclusionList', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
41 parser.add_argument('--FilterSmartsExclusionList', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
42 parser.add_argument('--FilterIncludedElements', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
43 parser.add_argument('--FilterExcludedElements', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
44 parser.add_argument('--FilterIncludedExclusiveElements', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
45
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
46 parser.add_argument('--score_thrshld', default=0)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
47 parser.add_argument('--pctexplpeak_thrshld', default=0)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
48 parser.add_argument('--schema')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
49 parser.add_argument('--cores_top_level', default=1)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
50 parser.add_argument('--chunks', default=1)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
51 parser.add_argument('--meta_select_col', default='name')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
52 parser.add_argument('--skip_invalid_adducts', action='store_true')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
53
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
54 parser.add_argument('--ScoreSuspectLists', default='')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
55 parser.add_argument('--MetFragScoreTypes',
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
56 default="FragmenterScore,OfflineMetFusionScore")
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
57 parser.add_argument('--MetFragScoreWeights', default="1.0,1.0")
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
58
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
59 args = parser.parse_args()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
60 print(args)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
61
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
62 config = ConfigParser.ConfigParser()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
63 config.read(
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
64 os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.ini'))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
65
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
66 if os.stat(args.input_pth).st_size == 0:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
67 print('Input file empty')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
68 exit()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
69
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
70 # Create temporary working directory
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
71 if args.temp_dir:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
72 wd = args.temp_dir
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
73 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
74 wd = tempfile.mkdtemp()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
75
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
76 if os.path.exists(wd):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
77 shutil.rmtree(wd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
78 os.makedirs(wd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
79 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
80 os.makedirs(wd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
81
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
82 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
83 # Setup regular expressions for MSP parsing dictionary
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
84 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
85 regex_msp = {}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
86 regex_msp['name'] = [r'^Name(?:=|:)(.*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
87 regex_msp['polarity'] = [r'^ion.*mode(?:=|:)(.*)$',
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
88 r'^ionization.*mode(?:=|:)(.*)$',
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
89 r'^polarity(?:=|:)(.*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
90 regex_msp['precursor_mz'] = [r'^precursor.*m/z(?:=|:)\s*(\d*[.,]?\d*)$',
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
91 r'^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
92 regex_msp['precursor_type'] = [r'^precursor.*type(?:=|:)(.*)$',
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
93 r'^adduct(?:=|:)(.*)$',
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
94 r'^ADDUCTIONNAME(?:=|:)(.*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
95 regex_msp['num_peaks'] = [r'^Num.*Peaks(?:=|:)\s*(\d*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
96 regex_msp['msp'] = [r'^Name(?:=|:)(.*)$'] # Flag for standard MSP format
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
97
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
98 regex_massbank = {}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
99 regex_massbank['name'] = [r'^RECORD_TITLE:(.*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
100 regex_massbank['polarity'] = [r'^AC\$MASS_SPECTROMETRY:\s+ION_MODE\s+(.*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
101 regex_massbank['precursor_mz'] = [
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
102 r'^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
103 regex_massbank['precursor_type'] = [
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
104 r'^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
105 regex_massbank['num_peaks'] = [r'^PK\$NUM_PEAK:\s+(\d*)']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
106 regex_massbank['cols'] = [r'^PK\$PEAK:\s+(.*)']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
107 regex_massbank['massbank'] = [
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
108 r'^RECORD_TITLE:(.*)$'] # Flag for massbank format
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
109
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
110 if args.schema == 'msp':
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
111 meta_regex = regex_msp
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
112 elif args.schema == 'massbank':
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
113 meta_regex = regex_massbank
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
114 elif args.schema == 'auto':
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
115 # If auto we just check for all the available paramter names and then
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
116 # determine if Massbank or MSP based on the name parameter
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
117 meta_regex = {}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
118 meta_regex.update(regex_massbank)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
119 meta_regex['name'].extend(regex_msp['name'])
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
120 meta_regex['polarity'].extend(regex_msp['polarity'])
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
121 meta_regex['precursor_mz'].extend(regex_msp['precursor_mz'])
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
122 meta_regex['precursor_type'].extend(regex_msp['precursor_type'])
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
123 meta_regex['num_peaks'].extend(regex_msp['num_peaks'])
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
124 meta_regex['msp'] = regex_msp['msp']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
125 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
126 sys.exit("No schema selected")
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
127
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
128 adduct_types = {
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
129 '[M+H]+': 1.007276,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
130 '[M+NH4]+': 18.034374,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
131 '[M+Na]+': 22.989218,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
132 '[M+K]+': 38.963158,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
133 '[M+CH3OH+H]+': 33.033489,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
134 '[M+ACN+H]+': 42.033823,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
135 '[M+ACN+Na]+': 64.015765,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
136 '[M+2ACN+H]+': 83.06037,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
137 '[M-H]-': -1.007276,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
138 '[M+Cl]-': 34.969402,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
139 '[M+HCOO]-': 44.99819,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
140 '[M-H+HCOOH]-': 44.99819,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
141 # same as above but different style of writing adduct
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
142 '[M+CH3COO]-': 59.01385,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
143 '[M-H+CH3COOH]-': 59.01385
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
144 # same as above but different style of writing adduct
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
145 }
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
146 inv_adduct_types = {int(round(v, 0)): k for k, v in adduct_types.iteritems()}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
147
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
148
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
149 # function to extract the meta data using the regular expressions
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
150 def parse_meta(meta_regex, meta_info=None):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
151 if meta_info is None:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
152 meta_info = {}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
153 for k, regexes in six.iteritems(meta_regex):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
154 for reg in regexes:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
155 m = re.search(reg, line, re.IGNORECASE)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
156 if m:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
157 meta_info[k] = '-'.join(m.groups()).strip()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
158 return meta_info
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
159
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
160
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
161 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
162 # Setup parameter dictionary
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
163 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
164 def init_paramd(args):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
165 paramd = defaultdict()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
166
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
167 paramd["MetFragDatabaseType"] = args.MetFragDatabaseType
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
168
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
169 if args.MetFragDatabaseType == "LocalCSV":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
170 paramd["LocalDatabasePath"] = args.LocalDatabasePath
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
171 elif args.MetFragDatabaseType == "MetChem":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
172 paramd["LocalMetChemDatabase"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
173 config.get('MetChem', 'LocalMetChemDatabase')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
174 paramd["LocalMetChemDatabasePortNumber"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
175 config.get('MetChem', 'LocalMetChemDatabasePortNumber')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
176 paramd["LocalMetChemDatabaseServerIp"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
177 args.LocalMetChemDatabaseServerIp
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
178 paramd["LocalMetChemDatabaseUser"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
179 config.get('MetChem', 'LocalMetChemDatabaseUser')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
180 paramd["LocalMetChemDatabasePassword"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
181 config.get('MetChem', 'LocalMetChemDatabasePassword')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
182
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
183 paramd["FragmentPeakMatchAbsoluteMassDeviation"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
184 args.FragmentPeakMatchAbsoluteMassDeviation
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
185 paramd["FragmentPeakMatchRelativeMassDeviation"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
186 args.FragmentPeakMatchRelativeMassDeviation
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
187 paramd["DatabaseSearchRelativeMassDeviation"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
188 args.DatabaseSearchRelativeMassDeviation
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
189 paramd["SampleName"] = ''
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
190 paramd["ResultsPath"] = os.path.join(wd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
191
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
192 if args.polarity == "pos":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
193 paramd["IsPositiveIonMode"] = True
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
194 paramd["PrecursorIonModeDefault"] = "1"
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
195 paramd["PrecursorIonMode"] = "1"
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
196 paramd["nm_mass_diff_default"] = 1.007276
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
197 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
198 paramd["IsPositiveIonMode"] = False
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
199 paramd["PrecursorIonModeDefault"] = "-1"
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
200 paramd["PrecursorIonMode"] = "-1"
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
201 paramd["nm_mass_diff_default"] = -1.007276
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
202
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
203 paramd["MetFragCandidateWriter"] = "CSV"
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
204 paramd["NumberThreads"] = args.NumberThreads
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
205
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
206 if args.ScoreSuspectLists:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
207 paramd["ScoreSuspectLists"] = args.ScoreSuspectLists
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
208
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
209 paramd["MetFragScoreTypes"] = args.MetFragScoreTypes
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
210 paramd["MetFragScoreWeights"] = args.MetFragScoreWeights
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
211
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
212 dct_filter = defaultdict()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
213 filterh = []
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
214
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
215 if args.UnconnectedCompoundFilter:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
216 filterh.append('UnconnectedCompoundFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
217
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
218 if args.IsotopeFilter:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
219 filterh.append('IsotopeFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
220
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
221 if args.FilterMinimumElements:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
222 filterh.append('MinimumElementsFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
223 dct_filter['FilterMinimumElements'] = args.FilterMinimumElements
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
224
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
225 if args.FilterMaximumElements:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
226 filterh.append('MaximumElementsFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
227 dct_filter['FilterMaximumElements'] = args.FilterMaximumElements
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
228
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
229 if args.FilterSmartsInclusionList:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
230 filterh.append('SmartsSubstructureInclusionFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
231 dct_filter[
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
232 'FilterSmartsInclusionList'] = args.FilterSmartsInclusionList
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
233
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
234 if args.FilterSmartsExclusionList:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
235 filterh.append('SmartsSubstructureExclusionFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
236 dct_filter[
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
237 'FilterSmartsExclusionList'] = args.FilterSmartsExclusionList
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
238
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
239 # My understanding is that both 'ElementInclusionExclusiveFilter'
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
240 # and 'ElementExclusionFilter' use 'FilterIncludedElements'
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
241 if args.FilterIncludedExclusiveElements:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
242 filterh.append('ElementInclusionExclusiveFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
243 dct_filter[
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
244 'FilterIncludedElements'] = args.FilterIncludedExclusiveElements
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
245
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
246 if args.FilterIncludedElements:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
247 filterh.append('ElementInclusionFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
248 dct_filter['FilterIncludedElements'] = args.FilterIncludedElements
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
249
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
250 if args.FilterExcludedElements:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
251 filterh.append('ElementExclusionFilter')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
252 dct_filter['FilterExcludedElements'] = args.FilterExcludedElements
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
253
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
254 if filterh:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
255 fcmds = ','.join(filterh) + ' '
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
256 for k, v in six.iteritems(dct_filter):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
257 fcmds += "{0}={1} ".format(str(k), str(v))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
258
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
259 paramd["MetFragPreProcessingCandidateFilter"] = fcmds
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
260
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
261 return paramd
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
262
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
263
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
264 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
265 # Function to run metfrag when all metainfo and peaks have been parsed
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
266 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
267 def run_metfrag(meta_info, peaklist, args, wd, spectrac, adduct_types):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
268 # Get sample details (if possible to extract) e.g. if created as part of
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
269 # the msPurity pipeline) choose between getting additional details to add
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
270 # as columns as either all meta data from msp, just details from the
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
271 # record name (i.e. when using msPurity and we have the columns coded into
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
272 # the name) or just the spectra index (spectrac)].
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
273 # Returns the parameters used and the command line call
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
274
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
275 paramd = init_paramd(args)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
276 if args.meta_select_col == 'name':
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
277 # have additional column of just the name
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
278 paramd['additional_details'] = {'name': meta_info['name']}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
279 elif args.meta_select_col == 'name_split':
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
280 # have additional columns split by "|" and
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
281 # then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
282 paramd['additional_details'] = {
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
283 sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
284 meta_info['name'].split("|")}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
285 elif args.meta_select_col == 'all':
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
286 # have additional columns based on all the meta information
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
287 # extracted from the MSP
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
288 paramd['additional_details'] = meta_info
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
289 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
290 # Just have and index of the spectra in the MSP file
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
291 paramd['additional_details'] = {'spectra_idx': spectrac}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
292
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
293 paramd["SampleName"] = "{}_metfrag_result".format(spectrac)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
294
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
295 # =============== Output peaks to txt file ==============================
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
296 paramd["PeakListPath"] = os.path.join(wd,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
297 "{}_tmpspec.txt".format(spectrac))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
298
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
299 # write spec file
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
300 with open(paramd["PeakListPath"], 'w') as outfile:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
301 for p in peaklist:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
302 outfile.write(p[0] + "\t" + p[1] + "\n")
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
303
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
304 # =============== Update param based on MSP metadata ======================
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
305 # Replace param details with details from MSP if required
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
306 if 'precursor_type' in meta_info and \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
307 meta_info['precursor_type'] in adduct_types:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
308 adduct = meta_info['precursor_type']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
309 nm = float(meta_info['precursor_mz']) - adduct_types[
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
310 meta_info['precursor_type']]
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
311 paramd["PrecursorIonMode"] = \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
312 int(round(adduct_types[meta_info['precursor_type']], 0))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
313 elif not args.skip_invalid_adducts:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
314 adduct = inv_adduct_types[int(paramd['PrecursorIonModeDefault'])]
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
315 paramd["PrecursorIonMode"] = paramd['PrecursorIonModeDefault']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
316 nm = float(meta_info['precursor_mz']) - paramd['nm_mass_diff_default']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
317 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
318 print('Skipping {}'.format(paramd["SampleName"]))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
319 return '', ''
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
320
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
321 paramd['additional_details']['adduct'] = adduct
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
322 paramd["NeutralPrecursorMass"] = nm
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
323
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
324 # ============== Create CLI cmd for metfrag ===============================
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
325 cmd = "metfrag"
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
326 for k, v in six.iteritems(paramd):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
327 if k not in ['PrecursorIonModeDefault', 'nm_mass_diff_default',
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
328 'additional_details']:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
329 cmd += " {}={}".format(str(k), str(v))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
330
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
331 # ============== Run metfrag ==============================================
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
332 # print(cmd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
333 # Filter before process with a minimum number of MS/MS peaks
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
334 if plinesread >= float(args.minMSMSpeaks):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
335
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
336 if int(args.cores_top_level) == 1:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
337 os.system(cmd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
338
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
339 return paramd, cmd
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
340
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
341
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
342 def work(cmds):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
343 return [os.system(cmd) for cmd in cmds]
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
344
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
345
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
346 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
347 # Parse MSP file and run metfrag CLI
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
348 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
349 # keep list of commands if performing in CLI in parallel
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
350 cmds = []
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
351 # keep a dictionary of all params
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
352 paramds = {}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
353 # keep count of spectra (for uid)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
354 spectrac = 0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
355 # this dictionary will store the meta data results form the MSp file
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
356 meta_info = {}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
357
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
358 with open(args.input_pth, "r") as infile:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
359 # number of lines for the peaks
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
360 pnumlines = 0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
361 # number of lines read for the peaks
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
362 plinesread = 0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
363 for line in infile:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
364 line = line.strip()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
365
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
366 if pnumlines == 0:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
367 # ============== Extract metadata from MSP ========================
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
368 meta_info = parse_meta(meta_regex, meta_info)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
369
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
370 if ('massbank' in meta_info and 'cols' in meta_info) or (
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
371 'msp' in meta_info and 'num_peaks' in meta_info):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
372 pnumlines = int(meta_info['num_peaks'])
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
373 plinesread = 0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
374 peaklist = []
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
375
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
376 elif plinesread < pnumlines:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
377 # ============== Extract peaks from MSP ==========================
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
378 # .split() will split on any empty space (i.e. tab and space)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
379 line = tuple(line.split())
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
380 # Keep only m/z and intensity, not relative intensity
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
381 save_line = tuple(line[0].split() + line[1].split())
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
382 plinesread += 1
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
383 peaklist.append(save_line)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
384
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
385 elif plinesread and plinesread == pnumlines:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
386 # ======= Get sample name and additional details for output =======
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
387 spectrac += 1
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
388 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
389 adduct_types)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
390
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
391 if paramd:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
392 paramds[paramd["SampleName"]] = paramd
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
393 cmds.append(cmd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
394
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
395 meta_info = {}
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
396 pnumlines = 0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
397 plinesread = 0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
398
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
399 # end of file. Check if there is a MSP spectra to run metfrag on still
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
400 if plinesread and plinesread == pnumlines:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
401
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
402 paramd, cmd = run_metfrag(meta_info, peaklist, args, wd, spectrac + 1,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
403 adduct_types)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
404
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
405 if paramd:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
406 paramds[paramd["SampleName"]] = paramd
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
407 cmds.append(cmd)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
408
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
409 # Perform multiprocessing on command line call level
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
410 if int(args.cores_top_level) > 1:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
411 cmds_chunks = [cmds[x:x + int(args.chunks)] for x in
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
412 list(range(0, len(cmds), int(args.chunks)))]
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
413 pool = multiprocessing.Pool(processes=int(args.cores_top_level))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
414 pool.map(work, cmds_chunks)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
415 pool.close()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
416 pool.join()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
417
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
418 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
419 # Concatenate and filter the output
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
420 ######################################################################
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
421 # outputs might have different headers. Need to get a list of all the
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
422 # headers before we start merging the files
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
423 # outfiles = [os.path.join(wd, f) for f in glob.glob(os.path.join(wd,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
424 # "*_metfrag_result.csv"))]
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
425 outfiles = glob.glob(os.path.join(wd, "*_metfrag_result.csv"))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
426
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
427 if len(outfiles) == 0:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
428 print('No results')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
429 sys.exit()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
430
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
431 headers = []
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
432 c = 0
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
433 for fn in outfiles:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
434 with open(fn, 'r') as infile:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
435 reader = csv.reader(infile)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
436 if sys.version_info >= (3, 0):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
437 headers.extend(next(reader))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
438 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
439 headers.extend(reader.next())
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
440 # check if file has any data rows
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
441 for i, row in enumerate(reader):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
442 c += 1
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
443 if i == 1:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
444 break
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
445
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
446 # if no data rows (e.g. matches) then do not save an
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
447 # output and leave the program
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
448 if c == 0:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
449 print('No results')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
450 sys.exit()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
451
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
452 additional_detail_headers = ['sample_name']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
453 for k, paramd in six.iteritems(paramds):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
454 additional_detail_headers = list(set(
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
455 additional_detail_headers + list(paramd['additional_details'].keys())))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
456
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
457 # add inchikey if not already present (missing in metchem output)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
458 if 'InChIKey' not in headers:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
459 headers.append('InChIKey')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
460
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
461 headers = additional_detail_headers + sorted(list(set(headers)))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
462
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
463 # Sort files nicely
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
464 outfiles.sort(
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
465 key=lambda s: int(re.match(r'^.*/(\d+)_metfrag_result.csv', s).group(1)))
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
466
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
467 print(outfiles)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
468
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
469 # merge outputs
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
470 with open(args.result_pth, 'a') as merged_outfile:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
471 dwriter = csv.DictWriter(merged_outfile, fieldnames=headers,
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
472 delimiter='\t', quotechar='"')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
473 dwriter.writeheader()
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
474
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
475 for fn in outfiles:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
476
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
477 with open(fn) as infile:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
478 reader = csv.DictReader(infile, delimiter=',', quotechar='"')
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
479 for line in reader:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
480 bewrite = True
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
481 for key, value in line.items():
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
482 # Filter when no MS/MS peak matched
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
483 if key == "ExplPeaks":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
484 if float(args.pctexplpeak_thrshld) > 0 and \
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
485 "NA" in value:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
486 bewrite = False
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
487 # Filter with a score threshold
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
488 elif key == "Score":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
489 if float(value) <= float(args.score_thrshld):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
490 bewrite = False
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
491 elif key == "NoExplPeaks":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
492 nbfindpeak = float(value)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
493 elif key == "NumberPeaksUsed":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
494 totpeaks = float(value)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
495 # Filter with a relative number of peak matched
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
496 try:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
497 pctexplpeak = nbfindpeak / totpeaks * 100
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
498 except ZeroDivisionError:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
499 bewrite = False
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
500 else:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
501 if pctexplpeak < float(args.pctexplpeak_thrshld):
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
502 bewrite = False
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
503
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
504 # Write the line if it pass all filters
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
505 if bewrite:
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
506 bfn = os.path.basename(fn)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
507 bfn = bfn.replace(".csv", "")
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
508 line['sample_name'] = paramds[bfn]['SampleName']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
509 ad = paramds[bfn]['additional_details']
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
510
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
511 if args.MetFragDatabaseType == "MetChem":
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
512 # for some reason the metchem database option does
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
513 # not report the full inchikey (at least in the Bham
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
514 # setup. This ensures we always get the fully inchikey
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
515 line['InChIKey'] = '{}-{}-{}'.format(line['InChIKey1'],
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
516 line['InChIKey2'],
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
517 line['InChIKey3'])
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
518
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
519 line.update(ad)
938c818cf097 "planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 6bc059c100f1f968d99c1d5ad5e7259e83d386b6"
tomnl
parents:
diff changeset
520 dwriter.writerow(line)