Mercurial > repos > computational-metabolomics > sirius_csifingerid
diff sirius_csifingerid.py @ 9:8dac4cef0a22 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit c2fc3de462ef4bd1a225c41624b094c3b5874736"
author | computational-metabolomics |
---|---|
date | Wed, 16 Feb 2022 16:11:55 +0000 |
parents | e29e64ff50bb |
children |
line wrap: on
line diff
--- a/sirius_csifingerid.py Fri Feb 11 10:05:21 2022 +0000 +++ b/sirius_csifingerid.py Wed Feb 16 16:11:55 2022 +0000 @@ -14,6 +14,7 @@ parser.add_argument('--input_pth') parser.add_argument('--canopus_result_pth') parser.add_argument('--annotations_result_pth') +parser.add_argument('--all_structures_result_pth') parser.add_argument('--database') parser.add_argument('--profile') parser.add_argument('--candidates') @@ -25,10 +26,9 @@ parser.add_argument('--temp_dir') parser.add_argument('--meta_select_col', default='all') parser.add_argument('--cores_top_level', default=1) +parser.add_argument('--cores_sirius', default=4) parser.add_argument('--chunks', default=1) parser.add_argument('--min_MSMS_peaks', default=1) -parser.add_argument('--rank_filter', default=0) -parser.add_argument('--confidence_filter', default=0) parser.add_argument('--schema', default='msp') parser.add_argument('-a', '--adducts', action='append', nargs=1, required=False, default=[], help='Adducts used') @@ -137,6 +137,7 @@ def init_paramd(args): paramd = defaultdict() paramd["cli"] = {} + paramd["cli"]["--cores"] = args.cores_sirius paramd["cli"]["--database"] = args.database paramd["cli"]["--profile"] = args.profile paramd["cli"]["--candidates"] = args.candidates @@ -216,9 +217,11 @@ paramd['additional_details']['adduct'] = adduct # ============== Create CLI cmd for metfrag =============================== - cmd = "sirius --no-citations --ms2 {} --adduct {} --precursor {} -o {} " \ + cmd = "sirius --cores {} --no-citations --ms2 {} --adduct {} " \ + "--precursor {} -o {} " \ "formula -c {} --ppm-max {} --profile {} " \ "structure --database {} canopus".format( + paramd["cli"]["--cores"], paramd["cli"]["--ms2"], adduct, paramd["cli"]["--precursor"], @@ -347,16 +350,18 @@ # outputs might have different headers. Need to get a list of all the headers # before we start merging the files outfiles = [os.path.join(wd, f) for f in # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] -def concat_output(filename, result_pth, - rank_filter, confidence_filter): - outfiles = glob.glob(os.path.join(wd, '*', '*{}'.format(filename))) +def concat_output(wd, filename, result_pth, level=2): - # sort files nicely - outfiles.sort(key=lambda s: int(re.match(r'^.*/(' - r'\d+).*{}'.format(filename), - s).group(1))) + if level == 2: + outfiles = glob.glob(os.path.join(wd, '*', filename)) + else: + outfiles = glob.glob(os.path.join(wd, '*', '*', filename)) + + outfiles.sort( + key=lambda s: int( + re.match(r'.*/([0-9]+).*/{}$'.format(filename), + s).group(1))) print(outfiles) - if len(outfiles) == 0: print('No results') sys.exit() @@ -385,36 +390,24 @@ with open(fn) as infile: reader = csv.DictReader(infile, delimiter='\t') - ad = paramds[fn.split(os.sep)[-2]]['additional_details'] + ad = paramds[fn.split(os.sep)[-level]]['additional_details'] for line in reader: - if 'rank' in line \ - and 0 < int(rank_filter) < int(line['rank']): - # filter out those annotations greater than rank filter - # If rank_filter is zero then skip - continue - if 'ConfidenceScore' in line: - if isinstance(line['ConfidenceScore'], str): - # Value is NA or N/A - continue - - if (0 < float(confidence_filter) - and float(line['ConfidenceScore']) - < float(confidence_filter)): - # filter out those annotations that are less than - # the confidence filter value - continue line.update(ad) dwriter.writerow(line) -concat_output('compound_identifications.tsv', - args.annotations_result_pth, - args.rank_filter, - args.confidence_filter) -concat_output('canopus_summary.tsv', - args.canopus_result_pth, - 0, - 0) +concat_output(wd, + 'compound_identifications.tsv', + args.annotations_result_pth) + +concat_output(wd, + 'canopus_summary.tsv', + args.canopus_result_pth) + +concat_output(wd, + 'structure_candidates.tsv', + args.all_structures_result_pth, + level=3)