view test-data/test-db/metaphlan-db/customizemapping.py @ 0:db4f6b239f5e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author thanhlv
date Mon, 13 Feb 2023 16:18:40 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Script to generate a extract a custom mapping file from input mapping file.
# Mostly used for a reduced-size demo data generation.


import argparse
from pathlib import Path


if __name__ == '__main__':
    # Read command line
    parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping')
    parser.add_argument('--in_mapping', help="Path to mapping file to reduce")
    parser.add_argument('--features', help="Path to tabular file with features to keep in first column")
    parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns")
    parser.add_argument('--out_mapping', help="Path to reduced mapping file")
    args = parser.parse_args()

    in_mapping_fp = Path(args.in_mapping)
    feature_fp = Path(args.features)
    element_fp = Path(args.elements)
    out_mapping_fp = Path(args.out_mapping)

    # extract features to keep
    features = set()
    with open(feature_fp, 'r') as feature_f:
        for line in feature_f.readlines():
            features.add(line.split("\t")[0])
    print(features)

    # extract elements to keep
    elements = set()
    with open(element_fp, 'r') as element_f:
        for line in element_f.readlines():
            elements.add(line.split("\t")[0])
    print(elements)

    # write mapping for features to keep while keeping only elements
    with open(in_mapping_fp, 'r') as in_mapping_f:
        with open(out_mapping_fp, 'w') as out_mapping_f:
            for line in in_mapping_f.readlines():
                l_split = line.split("\t")
                feat = l_split[0]
                if feat in features:
                    to_write = [feat]
                    for e in l_split[1:]:
                        if e in elements:
                            to_write.append(e)
                    out_mapping_f.write("%s\n" % '\t'.join(to_write))