comparison sirius_csifingerid.py @ 9:8dac4cef0a22 draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit c2fc3de462ef4bd1a225c41624b094c3b5874736"
author computational-metabolomics
date Wed, 16 Feb 2022 16:11:55 +0000
parents e29e64ff50bb
children
comparison
equal deleted inserted replaced
8:7b9c17738db8 9:8dac4cef0a22
12 12
13 parser = argparse.ArgumentParser() 13 parser = argparse.ArgumentParser()
14 parser.add_argument('--input_pth') 14 parser.add_argument('--input_pth')
15 parser.add_argument('--canopus_result_pth') 15 parser.add_argument('--canopus_result_pth')
16 parser.add_argument('--annotations_result_pth') 16 parser.add_argument('--annotations_result_pth')
17 parser.add_argument('--all_structures_result_pth')
17 parser.add_argument('--database') 18 parser.add_argument('--database')
18 parser.add_argument('--profile') 19 parser.add_argument('--profile')
19 parser.add_argument('--candidates') 20 parser.add_argument('--candidates')
20 parser.add_argument('--ppm_max') 21 parser.add_argument('--ppm_max')
21 parser.add_argument('--polarity') 22 parser.add_argument('--polarity')
23 parser.add_argument('--out_dir') 24 parser.add_argument('--out_dir')
24 parser.add_argument('--tool_directory') 25 parser.add_argument('--tool_directory')
25 parser.add_argument('--temp_dir') 26 parser.add_argument('--temp_dir')
26 parser.add_argument('--meta_select_col', default='all') 27 parser.add_argument('--meta_select_col', default='all')
27 parser.add_argument('--cores_top_level', default=1) 28 parser.add_argument('--cores_top_level', default=1)
29 parser.add_argument('--cores_sirius', default=4)
28 parser.add_argument('--chunks', default=1) 30 parser.add_argument('--chunks', default=1)
29 parser.add_argument('--min_MSMS_peaks', default=1) 31 parser.add_argument('--min_MSMS_peaks', default=1)
30 parser.add_argument('--rank_filter', default=0)
31 parser.add_argument('--confidence_filter', default=0)
32 parser.add_argument('--schema', default='msp') 32 parser.add_argument('--schema', default='msp')
33 parser.add_argument('-a', '--adducts', action='append', nargs=1, 33 parser.add_argument('-a', '--adducts', action='append', nargs=1,
34 required=False, default=[], help='Adducts used') 34 required=False, default=[], help='Adducts used')
35 35
36 args = parser.parse_args() 36 args = parser.parse_args()
135 # Setup parameter dictionary 135 # Setup parameter dictionary
136 ###################################################################### 136 ######################################################################
137 def init_paramd(args): 137 def init_paramd(args):
138 paramd = defaultdict() 138 paramd = defaultdict()
139 paramd["cli"] = {} 139 paramd["cli"] = {}
140 paramd["cli"]["--cores"] = args.cores_sirius
140 paramd["cli"]["--database"] = args.database 141 paramd["cli"]["--database"] = args.database
141 paramd["cli"]["--profile"] = args.profile 142 paramd["cli"]["--profile"] = args.profile
142 paramd["cli"]["--candidates"] = args.candidates 143 paramd["cli"]["--candidates"] = args.candidates
143 paramd["cli"]["--ppm-max"] = args.ppm_max 144 paramd["cli"]["--ppm-max"] = args.ppm_max
144 if args.polarity == 'positive': 145 if args.polarity == 'positive':
214 in paramd['additional_details']): 215 in paramd['additional_details']):
215 # If possible always good to have the adduct in output as a column 216 # If possible always good to have the adduct in output as a column
216 paramd['additional_details']['adduct'] = adduct 217 paramd['additional_details']['adduct'] = adduct
217 218
218 # ============== Create CLI cmd for metfrag =============================== 219 # ============== Create CLI cmd for metfrag ===============================
219 cmd = "sirius --no-citations --ms2 {} --adduct {} --precursor {} -o {} " \ 220 cmd = "sirius --cores {} --no-citations --ms2 {} --adduct {} " \
221 "--precursor {} -o {} " \
220 "formula -c {} --ppm-max {} --profile {} " \ 222 "formula -c {} --ppm-max {} --profile {} " \
221 "structure --database {} canopus".format( 223 "structure --database {} canopus".format(
224 paramd["cli"]["--cores"],
222 paramd["cli"]["--ms2"], 225 paramd["cli"]["--ms2"],
223 adduct, 226 adduct,
224 paramd["cli"]["--precursor"], 227 paramd["cli"]["--precursor"],
225 paramd["cli"]["--output"], 228 paramd["cli"]["--output"],
226 paramd["cli"]["--candidates"], 229 paramd["cli"]["--candidates"],
345 # Concatenate and filter the output 348 # Concatenate and filter the output
346 ###################################################################### 349 ######################################################################
347 # outputs might have different headers. Need to get a list of all the headers 350 # outputs might have different headers. Need to get a list of all the headers
348 # before we start merging the files outfiles = [os.path.join(wd, f) for f in 351 # before we start merging the files outfiles = [os.path.join(wd, f) for f in
349 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] 352 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))]
350 def concat_output(filename, result_pth, 353 def concat_output(wd, filename, result_pth, level=2):
351 rank_filter, confidence_filter): 354
352 outfiles = glob.glob(os.path.join(wd, '*', '*{}'.format(filename))) 355 if level == 2:
353 356 outfiles = glob.glob(os.path.join(wd, '*', filename))
354 # sort files nicely 357 else:
355 outfiles.sort(key=lambda s: int(re.match(r'^.*/(' 358 outfiles = glob.glob(os.path.join(wd, '*', '*', filename))
356 r'\d+).*{}'.format(filename), 359
357 s).group(1))) 360 outfiles.sort(
361 key=lambda s: int(
362 re.match(r'.*/([0-9]+).*/{}$'.format(filename),
363 s).group(1)))
358 print(outfiles) 364 print(outfiles)
359
360 if len(outfiles) == 0: 365 if len(outfiles) == 0:
361 print('No results') 366 print('No results')
362 sys.exit() 367 sys.exit()
363 368
364 headers = [] 369 headers = []
383 print(fn) 388 print(fn)
384 389
385 with open(fn) as infile: 390 with open(fn) as infile:
386 reader = csv.DictReader(infile, delimiter='\t') 391 reader = csv.DictReader(infile, delimiter='\t')
387 392
388 ad = paramds[fn.split(os.sep)[-2]]['additional_details'] 393 ad = paramds[fn.split(os.sep)[-level]]['additional_details']
389 394
390 for line in reader: 395 for line in reader:
391 if 'rank' in line \ 396
392 and 0 < int(rank_filter) < int(line['rank']):
393 # filter out those annotations greater than rank filter
394 # If rank_filter is zero then skip
395 continue
396
397 if 'ConfidenceScore' in line:
398 if isinstance(line['ConfidenceScore'], str):
399 # Value is NA or N/A
400 continue
401
402 if (0 < float(confidence_filter)
403 and float(line['ConfidenceScore'])
404 < float(confidence_filter)):
405 # filter out those annotations that are less than
406 # the confidence filter value
407 continue
408 line.update(ad) 397 line.update(ad)
409 398
410 dwriter.writerow(line) 399 dwriter.writerow(line)
411 400
412 401
413 concat_output('compound_identifications.tsv', 402 concat_output(wd,
414 args.annotations_result_pth, 403 'compound_identifications.tsv',
415 args.rank_filter, 404 args.annotations_result_pth)
416 args.confidence_filter) 405
417 concat_output('canopus_summary.tsv', 406 concat_output(wd,
418 args.canopus_result_pth, 407 'canopus_summary.tsv',
419 0, 408 args.canopus_result_pth)
420 0) 409
410 concat_output(wd,
411 'structure_candidates.tsv',
412 args.all_structures_result_pth,
413 level=3)