Mercurial > repos > computational-metabolomics > sirius_csifingerid
comparison sirius_csifingerid.py @ 9:8dac4cef0a22 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit c2fc3de462ef4bd1a225c41624b094c3b5874736"
author | computational-metabolomics |
---|---|
date | Wed, 16 Feb 2022 16:11:55 +0000 |
parents | e29e64ff50bb |
children |
comparison
equal
deleted
inserted
replaced
8:7b9c17738db8 | 9:8dac4cef0a22 |
---|---|
12 | 12 |
13 parser = argparse.ArgumentParser() | 13 parser = argparse.ArgumentParser() |
14 parser.add_argument('--input_pth') | 14 parser.add_argument('--input_pth') |
15 parser.add_argument('--canopus_result_pth') | 15 parser.add_argument('--canopus_result_pth') |
16 parser.add_argument('--annotations_result_pth') | 16 parser.add_argument('--annotations_result_pth') |
17 parser.add_argument('--all_structures_result_pth') | |
17 parser.add_argument('--database') | 18 parser.add_argument('--database') |
18 parser.add_argument('--profile') | 19 parser.add_argument('--profile') |
19 parser.add_argument('--candidates') | 20 parser.add_argument('--candidates') |
20 parser.add_argument('--ppm_max') | 21 parser.add_argument('--ppm_max') |
21 parser.add_argument('--polarity') | 22 parser.add_argument('--polarity') |
23 parser.add_argument('--out_dir') | 24 parser.add_argument('--out_dir') |
24 parser.add_argument('--tool_directory') | 25 parser.add_argument('--tool_directory') |
25 parser.add_argument('--temp_dir') | 26 parser.add_argument('--temp_dir') |
26 parser.add_argument('--meta_select_col', default='all') | 27 parser.add_argument('--meta_select_col', default='all') |
27 parser.add_argument('--cores_top_level', default=1) | 28 parser.add_argument('--cores_top_level', default=1) |
29 parser.add_argument('--cores_sirius', default=4) | |
28 parser.add_argument('--chunks', default=1) | 30 parser.add_argument('--chunks', default=1) |
29 parser.add_argument('--min_MSMS_peaks', default=1) | 31 parser.add_argument('--min_MSMS_peaks', default=1) |
30 parser.add_argument('--rank_filter', default=0) | |
31 parser.add_argument('--confidence_filter', default=0) | |
32 parser.add_argument('--schema', default='msp') | 32 parser.add_argument('--schema', default='msp') |
33 parser.add_argument('-a', '--adducts', action='append', nargs=1, | 33 parser.add_argument('-a', '--adducts', action='append', nargs=1, |
34 required=False, default=[], help='Adducts used') | 34 required=False, default=[], help='Adducts used') |
35 | 35 |
36 args = parser.parse_args() | 36 args = parser.parse_args() |
135 # Setup parameter dictionary | 135 # Setup parameter dictionary |
136 ###################################################################### | 136 ###################################################################### |
137 def init_paramd(args): | 137 def init_paramd(args): |
138 paramd = defaultdict() | 138 paramd = defaultdict() |
139 paramd["cli"] = {} | 139 paramd["cli"] = {} |
140 paramd["cli"]["--cores"] = args.cores_sirius | |
140 paramd["cli"]["--database"] = args.database | 141 paramd["cli"]["--database"] = args.database |
141 paramd["cli"]["--profile"] = args.profile | 142 paramd["cli"]["--profile"] = args.profile |
142 paramd["cli"]["--candidates"] = args.candidates | 143 paramd["cli"]["--candidates"] = args.candidates |
143 paramd["cli"]["--ppm-max"] = args.ppm_max | 144 paramd["cli"]["--ppm-max"] = args.ppm_max |
144 if args.polarity == 'positive': | 145 if args.polarity == 'positive': |
214 in paramd['additional_details']): | 215 in paramd['additional_details']): |
215 # If possible always good to have the adduct in output as a column | 216 # If possible always good to have the adduct in output as a column |
216 paramd['additional_details']['adduct'] = adduct | 217 paramd['additional_details']['adduct'] = adduct |
217 | 218 |
218 # ============== Create CLI cmd for metfrag =============================== | 219 # ============== Create CLI cmd for metfrag =============================== |
219 cmd = "sirius --no-citations --ms2 {} --adduct {} --precursor {} -o {} " \ | 220 cmd = "sirius --cores {} --no-citations --ms2 {} --adduct {} " \ |
221 "--precursor {} -o {} " \ | |
220 "formula -c {} --ppm-max {} --profile {} " \ | 222 "formula -c {} --ppm-max {} --profile {} " \ |
221 "structure --database {} canopus".format( | 223 "structure --database {} canopus".format( |
224 paramd["cli"]["--cores"], | |
222 paramd["cli"]["--ms2"], | 225 paramd["cli"]["--ms2"], |
223 adduct, | 226 adduct, |
224 paramd["cli"]["--precursor"], | 227 paramd["cli"]["--precursor"], |
225 paramd["cli"]["--output"], | 228 paramd["cli"]["--output"], |
226 paramd["cli"]["--candidates"], | 229 paramd["cli"]["--candidates"], |
345 # Concatenate and filter the output | 348 # Concatenate and filter the output |
346 ###################################################################### | 349 ###################################################################### |
347 # outputs might have different headers. Need to get a list of all the headers | 350 # outputs might have different headers. Need to get a list of all the headers |
348 # before we start merging the files outfiles = [os.path.join(wd, f) for f in | 351 # before we start merging the files outfiles = [os.path.join(wd, f) for f in |
349 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] | 352 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] |
350 def concat_output(filename, result_pth, | 353 def concat_output(wd, filename, result_pth, level=2): |
351 rank_filter, confidence_filter): | 354 |
352 outfiles = glob.glob(os.path.join(wd, '*', '*{}'.format(filename))) | 355 if level == 2: |
353 | 356 outfiles = glob.glob(os.path.join(wd, '*', filename)) |
354 # sort files nicely | 357 else: |
355 outfiles.sort(key=lambda s: int(re.match(r'^.*/(' | 358 outfiles = glob.glob(os.path.join(wd, '*', '*', filename)) |
356 r'\d+).*{}'.format(filename), | 359 |
357 s).group(1))) | 360 outfiles.sort( |
361 key=lambda s: int( | |
362 re.match(r'.*/([0-9]+).*/{}$'.format(filename), | |
363 s).group(1))) | |
358 print(outfiles) | 364 print(outfiles) |
359 | |
360 if len(outfiles) == 0: | 365 if len(outfiles) == 0: |
361 print('No results') | 366 print('No results') |
362 sys.exit() | 367 sys.exit() |
363 | 368 |
364 headers = [] | 369 headers = [] |
383 print(fn) | 388 print(fn) |
384 | 389 |
385 with open(fn) as infile: | 390 with open(fn) as infile: |
386 reader = csv.DictReader(infile, delimiter='\t') | 391 reader = csv.DictReader(infile, delimiter='\t') |
387 | 392 |
388 ad = paramds[fn.split(os.sep)[-2]]['additional_details'] | 393 ad = paramds[fn.split(os.sep)[-level]]['additional_details'] |
389 | 394 |
390 for line in reader: | 395 for line in reader: |
391 if 'rank' in line \ | 396 |
392 and 0 < int(rank_filter) < int(line['rank']): | |
393 # filter out those annotations greater than rank filter | |
394 # If rank_filter is zero then skip | |
395 continue | |
396 | |
397 if 'ConfidenceScore' in line: | |
398 if isinstance(line['ConfidenceScore'], str): | |
399 # Value is NA or N/A | |
400 continue | |
401 | |
402 if (0 < float(confidence_filter) | |
403 and float(line['ConfidenceScore']) | |
404 < float(confidence_filter)): | |
405 # filter out those annotations that are less than | |
406 # the confidence filter value | |
407 continue | |
408 line.update(ad) | 397 line.update(ad) |
409 | 398 |
410 dwriter.writerow(line) | 399 dwriter.writerow(line) |
411 | 400 |
412 | 401 |
413 concat_output('compound_identifications.tsv', | 402 concat_output(wd, |
414 args.annotations_result_pth, | 403 'compound_identifications.tsv', |
415 args.rank_filter, | 404 args.annotations_result_pth) |
416 args.confidence_filter) | 405 |
417 concat_output('canopus_summary.tsv', | 406 concat_output(wd, |
418 args.canopus_result_pth, | 407 'canopus_summary.tsv', |
419 0, | 408 args.canopus_result_pth) |
420 0) | 409 |
410 concat_output(wd, | |
411 'structure_candidates.tsv', | |
412 args.all_structures_result_pth, | |
413 level=3) |