view ipapy2_gibbs_sampler.py @ 0:7f84a8a5edde draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2 commit 64b61ff2823b4f54868c0ab7a4c0dc49eaf2979a
author recetox
date Fri, 16 May 2025 08:00:41 +0000
parents
children
line wrap: on
line source

from ipaPy2 import ipa
from utils import flattern_annotations, GibbsArgumentParser, group_by_peak_id


def main(
    input_dataset_mapped_isotope_patterns,
    input_dataset_annotations,
    integrating_mode,
    input_dataset_bio,
    noits,
    burn,
    delta_bio,
    delta_add,
    all_out,
    zs,
    zs_out,
    output_dataset,
):
    annotations_df = input_dataset_annotations
    annotations_df["post"] = annotations_df["post"].replace("", 0)
    annotations_df = annotations_df.replace("", None)
    annotations = group_by_peak_id(annotations_df)

    if not zs:
        zs = None

    if integrating_mode == "adducts":
        zs = ipa.Gibbs_sampler_add(
            input_dataset_mapped_isotope_patterns,
            annotations,
            noits=noits,
            burn=burn,
            delta_add=delta_add,
            all_out=all_out,
            zs=zs,
        )
    else:
        if args.integrating_mode == "biochemical":
            zs = ipa.Gibbs_sampler_bio(
                input_dataset_mapped_isotope_patterns,
                annotations,
                Bio=input_dataset_bio,
                noits=noits,
                burn=burn,
                delta_bio=delta_bio,
                all_out=all_out,
                zs=zs,
            )
        else:
            zs = ipa.Gibbs_sampler_bio_add(
                input_dataset_mapped_isotope_patterns,
                annotations,
                Bio=input_dataset_bio,
                noits=noits,
                burn=burn,
                delta_bio=delta_bio,
                delta_add=delta_add,
                all_out=all_out,
                zs=zs,
            )

    annotations_flat = flattern_annotations(annotations)
    write_func, file_path = output_dataset
    write_func(annotations_flat, file_path)

    if args.all_out:
        write_func, file_path = zs_out
        write_func(zs, file_path)


if __name__ == "__main__":
    parser = GibbsArgumentParser(
        description="""Gibbs sampler considering both biochemical and adducts connections. The
    function computes the posterior probabilities of the annotations
    considering the possible biochemical connections reported in Bio and the
    possible adducts connection.""",
    )
    parser.add_argument(
        "--input_dataset_mapped_isotope_patterns",
        nargs=2,
        action="load_data",
        required=True,
        help="a dataframe containing the measured intensities across several samples.",
    )
    parser.add_argument(
        "--input_dataset_annotations",
        nargs=2,
        action="load_data",
        required=True,
        help="a datset containing the annotations of the features.",
    )
    parser.add_argument(
        "--integrating_mode",
        type=str,
        required=True,
        choices=["adducts", "biochemical", "biochemical_adducts"],
        help=(
            "The mode of integration. Options are 'adducts', 'biochemical', or"
            " 'biochemical_adducts'."
        ),
    )
    parser.add_argument(
        "--input_dataset_bio",
        nargs=2,
        action="load_data",
        type=str,
        help="""dataframe (2 columns), reporting all the possible connections between
         compounds. It uses the unique ids from the database. It could be the
         output of Compute_Bio() or Compute_Bio_Parallel()""",
    )
    parser.add_argument(
        "--delta_bio",
        type=float,
        help="""parameter used when computing the conditional priors. The
               parameter must be positive. The smaller the parameter the more
               weight the adducts connections have on the posterior
               probabilities. Default 1.""",
    )
    args = parser.parse_args()
    main(
        args.input_dataset_mapped_isotope_patterns,
        args.input_dataset_annotations,
        args.integrating_mode,
        args.input_dataset_bio,
        args.noits,
        args.burn,
        args.delta_bio,
        args.delta_add,
        args.all_out,
        args.zs,
        args.zs_out,
        args.output_dataset,
    )