annotate sirius_csifingerid.py @ 5:57c4e7421085 draft

"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 99b92ad520378cfaceb98cb0c9956825033cf334"
author computational-metabolomics
date Fri, 04 Feb 2022 10:15:28 +0000
parents 8fb51147d15e
children 96b077221201
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
1 import argparse
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
2 import csv
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
3 import glob
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
4 import multiprocessing
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
5 import os
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
6 import re
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
7 import sys
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
8 import tempfile
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
9 import uuid
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
10 from collections import defaultdict
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
11
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
12
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
13 parser = argparse.ArgumentParser()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
14 parser.add_argument('--input_pth')
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
15 parser.add_argument('--canopus_result_pth')
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
16 parser.add_argument('--annotations_result_pth')
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
17 parser.add_argument('--database')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
18 parser.add_argument('--profile')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
19 parser.add_argument('--candidates')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
20 parser.add_argument('--ppm_max')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
21 parser.add_argument('--polarity')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
22 parser.add_argument('--results_name')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
23 parser.add_argument('--out_dir')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
24 parser.add_argument('--tool_directory')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
25 parser.add_argument('--temp_dir')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
26 parser.add_argument('--meta_select_col', default='all')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
27 parser.add_argument('--cores_top_level', default=1)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
28 parser.add_argument('--chunks', default=1)
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
29 parser.add_argument('--min_MSMS_peaks', default=1)
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
30 parser.add_argument('--rank_filter', default=0)
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
31 parser.add_argument('--confidence_filter', default=0)
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
32 parser.add_argument('--backwards_compatible',
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
33 default=False, action='store_true')
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
34 parser.add_argument('--schema', default='msp')
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
35 parser.add_argument('-a', '--adducts', action='append', nargs=1,
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
36 required=False, default=[], help='Adducts used')
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
37
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
38 args = parser.parse_args()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
39 print(args)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
40 if os.stat(args.input_pth).st_size == 0:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
41 print('Input file empty')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
42 exit()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
43
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
44 if args.temp_dir:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
45 wd = os.path.join(args.temp_dir, 'temp')
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
46 os.mkdir(wd)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
47
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
48 if not os.path.exists(wd):
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
49 os.mkdir(wd)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
50 else:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
51 td = tempfile.mkdtemp()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
52 wd = os.path.join(td, str(uuid.uuid4()))
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
53 os.mkdir(wd)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
54
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
55 print(args.adducts)
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
56 if args.adducts:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
57 adducts_from_cli = [
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
58 a[0].replace('__ob__', '[').replace('__cb__', ']') for a in
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
59 args.adducts
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
60 ]
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
61 else:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
62 adducts_from_cli = []
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
63
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
64 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
65 # Setup regular expressions for MSP parsing dictionary
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
66 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
67 regex_msp = {}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
68 regex_msp['name'] = [r'^Name(?:=|:)(.*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
69 regex_msp['polarity'] = [r'^ion.*mode(?:=|:)(.*)$',
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
70 r'^ionization.*mode(?:=|:)(.*)$',
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
71 r'^polarity(?:=|:)(.*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
72 regex_msp['precursor_mz'] = [r'^precursor.*m/z(?:=|:)\s*(\d*[.,]?\d*)$',
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
73 r'^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
74 regex_msp['precursor_type'] = [r'^precursor.*type(?:=|:)(.*)$',
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
75 r'^adduct(?:=|:)(.*)$',
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
76 r'^ADDUCTIONNAME(?:=|:)(.*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
77 regex_msp['num_peaks'] = [r'^Num.*Peaks(?:=|:)\s*(\d*)$']
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
78 regex_msp['retention_time'] = [r'^RETENTION.*TIME(?:=|:)\s*(.*)$',
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
79 r'^rt(?:=|:)\s*(.*)$',
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
80 r'^time(?:=|:)\s*(.*)$']
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
81 # From example winter_pos.mspy from kristian
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
82 regex_msp['AlignmentID'] = [r'^AlignmentID(?:=|:)\s*(.*)$']
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
83
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
84 regex_msp['msp'] = [r'^Name(?:=|:)(.*)$'] # Flag for standard MSP format
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
85
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
86 regex_massbank = {}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
87 regex_massbank['name'] = [r'^RECORD_TITLE:(.*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
88 regex_massbank['polarity'] = \
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
89 [r'^AC\$MASS_SPECTROMETRY:\s+ION_MODE\s+(.*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
90 regex_massbank['precursor_mz'] = \
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
91 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
92 regex_massbank['precursor_type'] = \
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
93 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
94 regex_massbank['num_peaks'] = [r'^PK\$NUM_PEAK:\s+(\d*)']
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
95 regex_massbank['retention_time'] = [
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
96 r'^AC\$CHROMATOGRAPHY:\s+RETENTION_TIME\s*(\d*\.?\d+).*']
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
97 regex_massbank['cols'] = [r'^PK\$PEAK:\s+(.*)']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
98 regex_massbank['massbank'] = [r'^RECORD_TITLE:(.*)$'] # Flag for massbank
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
99
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
100
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
101 if args.schema == 'msp':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
102 meta_regex = regex_msp
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
103 elif args.schema == 'massbank':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
104 meta_regex = regex_massbank
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
105 elif args.schema == 'auto':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
106 # If auto we just check for all the available paramter names
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
107 # and then determine if Massbank or MSP based on
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
108 # the name parameter
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
109 meta_regex = {}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
110 meta_regex.update(regex_massbank)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
111 meta_regex['name'].extend(regex_msp['name'])
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
112 meta_regex['polarity'].extend(regex_msp['polarity'])
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
113 meta_regex['precursor_mz'].extend(regex_msp['precursor_mz'])
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
114 meta_regex['precursor_type'].extend(regex_msp['precursor_type'])
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
115 meta_regex['num_peaks'].extend(regex_msp['num_peaks'])
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
116 meta_regex['msp'] = regex_msp['msp']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
117
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
118 print(meta_regex)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
119
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
120 # this dictionary will store the meta data results form the MSp file
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
121 meta_info = {}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
122
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
123
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
124 # function to extract the meta data using the regular expressions
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
125 def parse_meta(meta_regex, meta_info=None):
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
126 if meta_info is None:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
127 meta_info = {}
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
128 for k, regexes in meta_regex.items():
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
129 for reg in regexes:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
130 m = re.search(reg, line, re.IGNORECASE)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
131 if m:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
132 meta_info[k] = '-'.join(m.groups()).strip()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
133 return meta_info
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
134
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
135
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
136 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
137 # Setup parameter dictionary
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
138 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
139 def init_paramd(args):
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
140 paramd = defaultdict()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
141 paramd["cli"] = {}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
142 paramd["cli"]["--database"] = args.database
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
143 paramd["cli"]["--profile"] = args.profile
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
144 paramd["cli"]["--candidates"] = args.candidates
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
145 paramd["cli"]["--ppm-max"] = args.ppm_max
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
146 if args.polarity == 'positive':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
147 paramd["default_ion"] = "[M+H]+"
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
148 elif args.polarity == 'negative':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
149 paramd["default_ion"] = "[M-H]-"
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
150 else:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
151 paramd["default_ion"] = ''
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
152
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
153 return paramd
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
154
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
155
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
156 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
157 # Function to run sirius when all meta and spectra is obtained
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
158 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
159 def run_sirius(meta_info, peaklist, args, wd, spectrac):
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
160 # Get sample details (if possible to extract) e.g. if created as part of
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
161 # the msPurity pipeline) choose between getting additional details to
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
162 # add as columns as either all meta data from msp, just details from the
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
163 # record name (i.e. when using msPurity and we have the columns
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
164 # coded into the name) or just the spectra index (spectrac)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
165 paramd = init_paramd(args)
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
166 meta_info = {k: v for k, v in meta_info.items() if k
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
167 not in ['msp', 'massbank', 'cols']}
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
168
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
169 if args.meta_select_col == 'name':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
170 # have additional column of just the name
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
171 paramd['additional_details'] = {'name': meta_info['name']}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
172 elif args.meta_select_col == 'name_split':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
173 # have additional columns split by "|" and
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
174 # then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
175 paramd['additional_details'] = {
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
176 sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
177 meta_info['name'].split("|")}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
178 elif args.meta_select_col == 'all':
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
179 # have additional columns based on all
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
180 # the meta information extracted from the MSP
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
181 paramd['additional_details'] = meta_info
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
182 else:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
183 # Just have and index of the spectra in the MSP file
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
184 paramd['additional_details'] = {'spectra_idx': spectrac}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
185
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
186 paramd["SampleName"] = "{}_sirius_result".format(spectrac)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
187
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
188 paramd["cli"]["--output"] = \
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
189 os.path.join(wd, "{}_sirius_result".format(spectrac))
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
190
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
191 # =============== Output peaks to txt file ==============================
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
192 paramd["cli"]["--ms2"] = os.path.join(wd,
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
193 "{}_tmpspec.txt".format(spectrac))
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
194
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
195 # write spec file
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
196 with open(paramd["cli"]["--ms2"], 'w') as outfile:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
197 for p in peaklist:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
198 outfile.write(p[0] + "\t" + p[1] + "\n")
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
199
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
200 # =============== Update param based on MSP metadata ======================
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
201 # Replace param details with details from MSP if required
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
202 if 'precursor_type' in meta_info and meta_info['precursor_type']:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
203 paramd["cli"]["--ion"] = meta_info['precursor_type']
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
204 adduct = meta_info['precursor_type']
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
205 else:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
206 if paramd["default_ion"]:
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
207 paramd["cli"]["--adduct"] = paramd["default_ion"]
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
208 adduct = paramd["default_ion"]
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
209 else:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
210 paramd["cli"]["--auto-charge"] = ''
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
211
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
212 if 'precursor_mz' in meta_info and meta_info['precursor_mz']:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
213 paramd["cli"]["--precursor"] = meta_info['precursor_mz']
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
214
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
215 if not ('precursor_type' in paramd['additional_details'] or 'adduct'
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
216 in paramd['additional_details']):
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
217 # If possible always good to have the adduct in output as a column
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
218 paramd['additional_details']['adduct'] = adduct
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
219
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
220 # ============== Create CLI cmd for metfrag ===============================
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
221 cmd = "sirius --no-citations --ms2 {} --adduct {} --precursor {} -o {} " \
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
222 "formula -c {} --ppm-max {} --profile {} " \
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
223 "structure --database {} canopus".format(
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
224 paramd["cli"]["--ms2"],
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
225 adduct,
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
226 paramd["cli"]["--precursor"],
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
227 paramd["cli"]["--output"],
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
228 paramd["cli"]["--candidates"],
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
229 paramd["cli"]["--ppm-max"],
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
230 paramd["cli"]["--profile"],
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
231 paramd["cli"]["--database"]
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
232 )
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
233 print(cmd)
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
234 paramds[paramd["SampleName"]] = paramd
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
235
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
236 # =============== Run srius ==============================================
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
237 # Filter before process with a minimum number of MS/MS peaks
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
238 if plinesread >= float(args.min_MSMS_peaks):
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
239
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
240 if int(args.cores_top_level) == 1:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
241 os.system(cmd)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
242
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
243 return paramd, cmd
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
244
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
245
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
246 def work(cmds):
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
247 return [os.system(cmd) for cmd in cmds]
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
248
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
249
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
250 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
251 # Parse MSP file and run SIRIUS CLI
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
252 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
253 # keep list of commands if performing in CLI in parallel
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
254 cmds = []
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
255 # keep a dictionary of all params
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
256 paramds = {}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
257 # keep count of spectra (for uid)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
258 spectrac = 0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
259
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
260 with open(args.input_pth, "r") as infile:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
261 # number of lines for the peaks
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
262 pnumlines = 0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
263 # number of lines read for the peaks
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
264 plinesread = 0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
265 for line in infile:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
266
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
267 line = line.strip()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
268
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
269 if pnumlines == 0:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
270
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
271 # ============== Extract metadata from MSP ========================
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
272 meta_info = parse_meta(meta_regex, meta_info)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
273
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
274 if ('massbank' in meta_info and 'cols' in meta_info) or \
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
275 ('msp' in meta_info and 'num_peaks' in meta_info):
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
276 pnumlines = int(meta_info['num_peaks'])
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
277 peaklist = []
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
278 plinesread = 0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
279
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
280 elif plinesread < pnumlines:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
281 # =============== Extract peaks from MSP ==========================
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
282 # .split() will split on any empty space (i.e. tab and space)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
283 line = tuple(line.split())
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
284 # Keep only m/z and intensity, not relative intensity
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
285 save_line = tuple(line[0].split() + line[1].split())
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
286 plinesread += 1
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
287
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
288 peaklist.append(save_line)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
289
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
290 elif plinesread and plinesread == pnumlines:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
291 # ======= Get sample name and additional details for output =======
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
292 if adducts_from_cli:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
293 for adduct in adducts_from_cli:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
294 print(adduct)
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
295 spectrac += 1
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
296 meta_info['precursor_type'] = adduct
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
297 paramd, cmd = run_sirius(meta_info, peaklist, args, wd,
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
298 spectrac)
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
299
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
300 paramds[paramd["SampleName"]] = paramd
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
301 cmds.append(cmd)
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
302 else:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
303 spectrac += 1
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
304 paramd, cmd = run_sirius(meta_info, peaklist, args, wd,
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
305 spectrac)
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
306
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
307 paramds[paramd["SampleName"]] = paramd
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
308 cmds.append(cmd)
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
309
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
310 meta_info = {}
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
311 pnumlines = 0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
312 plinesread = 0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
313
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
314 # end of file. Check if there is a MSP spectra to
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
315 # run metfrag on still
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
316
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
317 if plinesread and plinesread == pnumlines:
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
318 if adducts_from_cli:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
319 for adduct in adducts_from_cli:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
320 print(adduct)
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
321 spectrac += 1
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
322 meta_info['precursor_type'] = adduct
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
323 paramd, cmd = run_sirius(meta_info, peaklist, args, wd,
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
324 spectrac)
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
325
2
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
326 paramds[paramd["SampleName"]] = paramd
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
327 cmds.append(cmd)
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
328 else:
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
329 spectrac += 1
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
330 paramd, cmd = run_sirius(meta_info, peaklist, args, wd,
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
331 spectrac)
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
332
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
333 paramds[paramd["SampleName"]] = paramd
856b3761277d "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
computational-metabolomics
parents: 0
diff changeset
334 cmds.append(cmd)
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
335
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
336 # Perform multiprocessing on command line call level
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
337 if int(args.cores_top_level) > 1:
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
338 cmds_chunks = [cmds[x:x + int(args.chunks)]
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
339 for x in list(range(0, len(cmds), int(args.chunks)))]
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
340 pool = multiprocessing.Pool(processes=int(args.cores_top_level))
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
341 pool.map(work, cmds_chunks)
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
342 pool.close()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
343 pool.join()
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
344
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
345
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
346 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
347 # Concatenate and filter the output
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
348 ######################################################################
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
349 # outputs might have different headers. Need to get a list of all the headers
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
350 # before we start merging the files outfiles = [os.path.join(wd, f) for f in
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
351 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))]
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
352 def concat_output(filename, result_pth,
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
353 rank_filter, confidence_filter, backwards_compatible):
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
354 outfiles = glob.glob(os.path.join(wd, '*', '*{}'.format(filename)))
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
355
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
356 # sort files nicely
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
357 outfiles.sort(key=lambda s: int(re.match(r'^.*/('
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
358 r'\d+).*{}'.format(filename),
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
359 s).group(1)))
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
360 print(outfiles)
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
361
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
362 if len(outfiles) == 0:
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
363 print('No results')
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
364 sys.exit()
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
365
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
366 headers = []
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
367
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
368 for fn in outfiles:
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
369 with open(fn, 'r') as infile:
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
370 reader = csv.reader(infile, delimiter='\t')
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
371 if sys.version_info >= (3, 0):
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
372 headers.extend(next(reader))
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
373 else:
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
374 headers.extend(reader.next())
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
375 break
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
376
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
377 headers = list(paramd['additional_details'].keys()) + headers
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
378
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
379 with open(result_pth, 'a') as merged_outfile:
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
380 dwriter = csv.DictWriter(merged_outfile,
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
381 fieldnames=headers, delimiter='\t')
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
382 dwriter.writeheader()
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
383
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
384 for fn in sorted(outfiles):
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
385 print(fn)
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
386
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
387 with open(fn) as infile:
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
388 reader = csv.DictReader(infile, delimiter='\t')
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
389
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
390 ad = paramds[fn.split(os.sep)[-2]]['additional_details']
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
391
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
392 for line in reader:
4
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
393 if 'rank' in line \
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
394 and 0 < int(rank_filter) < int(line['rank']):
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
395 # filter out those annotations greater than rank filter
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
396 # If rank_filter is zero then skip
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
397 continue
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
398
4
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
399 if ('ConfidenceScore' in line
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
400 and 0 < float(confidence_filter)
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
401 and float(line['ConfidenceScore']) <
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
402 float(confidence_filter)):
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
403 # filter out those annotations that are less than
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
404 # the confidence filter value
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
405 continue
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
406 line.update(ad)
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
407
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
408 dwriter.writerow(line)
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
409
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
410 if backwards_compatible:
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
411 # Headers required in this format for tools that used
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
412 # v4.9.3 of SIRIUS-CSI:FingerID
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
413 s1 = "sed 's/InChIkey2D/inchikey2d/g' {r} > {r}".format(r=result_pth)
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
414 os.system(s1)
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
415 s2 = "sed 's/CSI:FingerIDScore/Score/' {r} > {r}".format(r=result_pth)
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
416 os.system(s2)
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
417
0
9e6bf7278257 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
computational-metabolomics
parents:
diff changeset
418
4
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
419 concat_output('compound_identifications.tsv',
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
420 args.annotations_result_pth,
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
421 args.rank_filter,
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
422 args.confidence_filter,
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
423 args.backwards_compatible)
4
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
424 concat_output('canopus_summary.tsv',
8fb51147d15e "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
computational-metabolomics
parents: 3
diff changeset
425 args.canopus_result_pth,
3
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
426 0,
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
427 0,
4cbfd3d0a4c4 "planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
computational-metabolomics
parents: 2
diff changeset
428 False)