Mercurial > repos > computational-metabolomics > sirius_csifingerid
comparison sirius_csifingerid.py @ 9:8dac4cef0a22 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit c2fc3de462ef4bd1a225c41624b094c3b5874736"
| author | computational-metabolomics |
|---|---|
| date | Wed, 16 Feb 2022 16:11:55 +0000 |
| parents | e29e64ff50bb |
| children |
comparison
equal
deleted
inserted
replaced
| 8:7b9c17738db8 | 9:8dac4cef0a22 |
|---|---|
| 12 | 12 |
| 13 parser = argparse.ArgumentParser() | 13 parser = argparse.ArgumentParser() |
| 14 parser.add_argument('--input_pth') | 14 parser.add_argument('--input_pth') |
| 15 parser.add_argument('--canopus_result_pth') | 15 parser.add_argument('--canopus_result_pth') |
| 16 parser.add_argument('--annotations_result_pth') | 16 parser.add_argument('--annotations_result_pth') |
| 17 parser.add_argument('--all_structures_result_pth') | |
| 17 parser.add_argument('--database') | 18 parser.add_argument('--database') |
| 18 parser.add_argument('--profile') | 19 parser.add_argument('--profile') |
| 19 parser.add_argument('--candidates') | 20 parser.add_argument('--candidates') |
| 20 parser.add_argument('--ppm_max') | 21 parser.add_argument('--ppm_max') |
| 21 parser.add_argument('--polarity') | 22 parser.add_argument('--polarity') |
| 23 parser.add_argument('--out_dir') | 24 parser.add_argument('--out_dir') |
| 24 parser.add_argument('--tool_directory') | 25 parser.add_argument('--tool_directory') |
| 25 parser.add_argument('--temp_dir') | 26 parser.add_argument('--temp_dir') |
| 26 parser.add_argument('--meta_select_col', default='all') | 27 parser.add_argument('--meta_select_col', default='all') |
| 27 parser.add_argument('--cores_top_level', default=1) | 28 parser.add_argument('--cores_top_level', default=1) |
| 29 parser.add_argument('--cores_sirius', default=4) | |
| 28 parser.add_argument('--chunks', default=1) | 30 parser.add_argument('--chunks', default=1) |
| 29 parser.add_argument('--min_MSMS_peaks', default=1) | 31 parser.add_argument('--min_MSMS_peaks', default=1) |
| 30 parser.add_argument('--rank_filter', default=0) | |
| 31 parser.add_argument('--confidence_filter', default=0) | |
| 32 parser.add_argument('--schema', default='msp') | 32 parser.add_argument('--schema', default='msp') |
| 33 parser.add_argument('-a', '--adducts', action='append', nargs=1, | 33 parser.add_argument('-a', '--adducts', action='append', nargs=1, |
| 34 required=False, default=[], help='Adducts used') | 34 required=False, default=[], help='Adducts used') |
| 35 | 35 |
| 36 args = parser.parse_args() | 36 args = parser.parse_args() |
| 135 # Setup parameter dictionary | 135 # Setup parameter dictionary |
| 136 ###################################################################### | 136 ###################################################################### |
| 137 def init_paramd(args): | 137 def init_paramd(args): |
| 138 paramd = defaultdict() | 138 paramd = defaultdict() |
| 139 paramd["cli"] = {} | 139 paramd["cli"] = {} |
| 140 paramd["cli"]["--cores"] = args.cores_sirius | |
| 140 paramd["cli"]["--database"] = args.database | 141 paramd["cli"]["--database"] = args.database |
| 141 paramd["cli"]["--profile"] = args.profile | 142 paramd["cli"]["--profile"] = args.profile |
| 142 paramd["cli"]["--candidates"] = args.candidates | 143 paramd["cli"]["--candidates"] = args.candidates |
| 143 paramd["cli"]["--ppm-max"] = args.ppm_max | 144 paramd["cli"]["--ppm-max"] = args.ppm_max |
| 144 if args.polarity == 'positive': | 145 if args.polarity == 'positive': |
| 214 in paramd['additional_details']): | 215 in paramd['additional_details']): |
| 215 # If possible always good to have the adduct in output as a column | 216 # If possible always good to have the adduct in output as a column |
| 216 paramd['additional_details']['adduct'] = adduct | 217 paramd['additional_details']['adduct'] = adduct |
| 217 | 218 |
| 218 # ============== Create CLI cmd for metfrag =============================== | 219 # ============== Create CLI cmd for metfrag =============================== |
| 219 cmd = "sirius --no-citations --ms2 {} --adduct {} --precursor {} -o {} " \ | 220 cmd = "sirius --cores {} --no-citations --ms2 {} --adduct {} " \ |
| 221 "--precursor {} -o {} " \ | |
| 220 "formula -c {} --ppm-max {} --profile {} " \ | 222 "formula -c {} --ppm-max {} --profile {} " \ |
| 221 "structure --database {} canopus".format( | 223 "structure --database {} canopus".format( |
| 224 paramd["cli"]["--cores"], | |
| 222 paramd["cli"]["--ms2"], | 225 paramd["cli"]["--ms2"], |
| 223 adduct, | 226 adduct, |
| 224 paramd["cli"]["--precursor"], | 227 paramd["cli"]["--precursor"], |
| 225 paramd["cli"]["--output"], | 228 paramd["cli"]["--output"], |
| 226 paramd["cli"]["--candidates"], | 229 paramd["cli"]["--candidates"], |
| 345 # Concatenate and filter the output | 348 # Concatenate and filter the output |
| 346 ###################################################################### | 349 ###################################################################### |
| 347 # outputs might have different headers. Need to get a list of all the headers | 350 # outputs might have different headers. Need to get a list of all the headers |
| 348 # before we start merging the files outfiles = [os.path.join(wd, f) for f in | 351 # before we start merging the files outfiles = [os.path.join(wd, f) for f in |
| 349 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] | 352 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] |
| 350 def concat_output(filename, result_pth, | 353 def concat_output(wd, filename, result_pth, level=2): |
| 351 rank_filter, confidence_filter): | 354 |
| 352 outfiles = glob.glob(os.path.join(wd, '*', '*{}'.format(filename))) | 355 if level == 2: |
| 353 | 356 outfiles = glob.glob(os.path.join(wd, '*', filename)) |
| 354 # sort files nicely | 357 else: |
| 355 outfiles.sort(key=lambda s: int(re.match(r'^.*/(' | 358 outfiles = glob.glob(os.path.join(wd, '*', '*', filename)) |
| 356 r'\d+).*{}'.format(filename), | 359 |
| 357 s).group(1))) | 360 outfiles.sort( |
| 361 key=lambda s: int( | |
| 362 re.match(r'.*/([0-9]+).*/{}$'.format(filename), | |
| 363 s).group(1))) | |
| 358 print(outfiles) | 364 print(outfiles) |
| 359 | |
| 360 if len(outfiles) == 0: | 365 if len(outfiles) == 0: |
| 361 print('No results') | 366 print('No results') |
| 362 sys.exit() | 367 sys.exit() |
| 363 | 368 |
| 364 headers = [] | 369 headers = [] |
| 383 print(fn) | 388 print(fn) |
| 384 | 389 |
| 385 with open(fn) as infile: | 390 with open(fn) as infile: |
| 386 reader = csv.DictReader(infile, delimiter='\t') | 391 reader = csv.DictReader(infile, delimiter='\t') |
| 387 | 392 |
| 388 ad = paramds[fn.split(os.sep)[-2]]['additional_details'] | 393 ad = paramds[fn.split(os.sep)[-level]]['additional_details'] |
| 389 | 394 |
| 390 for line in reader: | 395 for line in reader: |
| 391 if 'rank' in line \ | 396 |
| 392 and 0 < int(rank_filter) < int(line['rank']): | |
| 393 # filter out those annotations greater than rank filter | |
| 394 # If rank_filter is zero then skip | |
| 395 continue | |
| 396 | |
| 397 if 'ConfidenceScore' in line: | |
| 398 if isinstance(line['ConfidenceScore'], str): | |
| 399 # Value is NA or N/A | |
| 400 continue | |
| 401 | |
| 402 if (0 < float(confidence_filter) | |
| 403 and float(line['ConfidenceScore']) | |
| 404 < float(confidence_filter)): | |
| 405 # filter out those annotations that are less than | |
| 406 # the confidence filter value | |
| 407 continue | |
| 408 line.update(ad) | 397 line.update(ad) |
| 409 | 398 |
| 410 dwriter.writerow(line) | 399 dwriter.writerow(line) |
| 411 | 400 |
| 412 | 401 |
| 413 concat_output('compound_identifications.tsv', | 402 concat_output(wd, |
| 414 args.annotations_result_pth, | 403 'compound_identifications.tsv', |
| 415 args.rank_filter, | 404 args.annotations_result_pth) |
| 416 args.confidence_filter) | 405 |
| 417 concat_output('canopus_summary.tsv', | 406 concat_output(wd, |
| 418 args.canopus_result_pth, | 407 'canopus_summary.tsv', |
| 419 0, | 408 args.canopus_result_pth) |
| 420 0) | 409 |
| 410 concat_output(wd, | |
| 411 'structure_candidates.tsv', | |
| 412 args.all_structures_result_pth, | |
| 413 level=3) |
