Mercurial > repos > bimib > marea_2_0
changeset 186:6f3a092f0a0e draft
Uploaded
author | luca_milaz |
---|---|
date | Thu, 04 Jul 2024 20:08:09 +0000 |
parents | f9bb6887b684 |
children | 50246292ba35 |
files | marea_2_0/flux_sampling.py marea_2_0/flux_sampling.xml marea_2_0/statistics.py marea_2_0/statistics.xml |
diffstat | 4 files changed, 15 insertions(+), 197 deletions(-) [+] |
line wrap: on
line diff
--- a/marea_2_0/flux_sampling.py Thu Jul 04 14:27:49 2024 +0000 +++ b/marea_2_0/flux_sampling.py Thu Jul 04 20:08:09 2024 +0000 @@ -32,11 +32,6 @@ required = True, help = 'your tool directory') - parser.add_argument( - "-of", "--output_format", - type = utils.FileFormat.fromExt, - choices = [utils.FileFormat.CSV, utils.FileFormat.PICKLE], - required = True, help = "Extension of all output files") parser.add_argument('-in', '--input', required = True, @@ -95,12 +90,7 @@ def write_to_file(dataset: pd.DataFrame, name: str)->None: - - if ARGS.output_format is utils.FileFormat.PICKLE: - save_path = utils.FilePath(name, ARGS.output_format, prefix = ARGS.output_folder) - utils.writePickle(save_path, dataset) - elif ARGS.output_format is utils.FileFormat.CSV: - dataset.to_csv(ARGS.output_folder + name + ".csv", sep = '\t', index = False) + dataset.to_csv(ARGS.output_folder + name + ".csv", sep = '\t', index = False)
--- a/marea_2_0/flux_sampling.xml Thu Jul 04 14:27:49 2024 +0000 +++ b/marea_2_0/flux_sampling.xml Thu Jul 04 20:08:09 2024 +0000 @@ -29,7 +29,8 @@ --n_batches $n_batches --n_samples $n_samples --seed $seed - --output_format $output_format + --output_type "${",".join(map(str, $output_types))}" + --toMerge toMerge --out_log $log ]]> </command> @@ -51,15 +52,21 @@ <param name="n_samples" argument="--n_samples" type="integer" label="Samples:" value="1000"/> - <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="1" help="The output file will contain n_batches*n_samples samples. - This is useful for computational perfomances."/> + <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="10" help="This is useful for computational perfomances."/> <param name="seed" argument="--seed" type="integer" label="Seed:" value="0" helph="Random seed."/> + + <param type="select" argument="--output_types" multiple="true" name="output_types" label="Desired outputs"> + <option value="mean" selected="true">Mean</option> + <option value="median" selected="true">Median</option> + <option value="quantiles" selected="true">Quantiles</option> + <option value="fluxes" help="Not recommanded when dealing with many models because of disk space." selected="false">All fluxes</option> + </param> + + <param name="toMerge" argument="--toMerge" type="boolean" checked="true" truevalue="Yes" falsevalue="No" + label="Merge datasets" help="If true, generate for each output type a single dataset having as index all the input models names. Not valid for 'All Fluxes'"/> - <param name="output_format" argument="--output_format" type="select" label="Output files format:"> - <option value="pickle" selected="true">pickle extension (binary, non-editable, pre-parsed)</option> - <option value="csv">Comma Separated Values (csv text file, editable, raw)</option> - </param> + </inputs>
--- a/marea_2_0/statistics.py Thu Jul 04 14:27:49 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -import argparse -import utils.general_utils as utils -from typing import Optional, List -import os -import numpy as np -import pandas as pd -import cobra -import utils.flux_analysis as flux_analysis -from joblib import Parallel, delayed, cpu_count -import sys - -################################# process args ############################### -def process_args(args :List[str]) -> argparse.Namespace: - """ - Processes command-line arguments. - - Args: - args (list): List of command-line arguments. - - Returns: - Namespace: An object containing parsed arguments. - """ - parser = argparse.ArgumentParser(usage = '%(prog)s [options]', - description = 'process some value\'s') - - parser.add_argument('-ol', '--out_log', - help = "Output log") - - parser.add_argument('-td', '--tool_dir', - type = str, - required = True, - help = 'your tool directory') - - parser.add_argument('-in', '--input', - required = True, - type=str, - help = 'inputs data') - - parser.add_argument('-nm', '--name', - required = True, - type=str, - help = 'inputs data ids') - - parser.add_argument('-st', '--statistics', - required = True, - type=str, - help = 'statistics') - - parser.add_argument('-tm', '--toMerge', - required = True, - type=bool, - help = 'Merge output datasets') - - - ARGS = parser.parse_args() - return ARGS - -########################### warning ########################################### -def warning(s :str) -> None: - """ - Log a warning message to an output log file and print it to the console. - - Args: - s (str): The warning message to be logged and printed. - - Returns: - None - """ - with open(ARGS.out_log, 'a') as log: - log.write(s + "\n\n") - print(s) - - - -############################# main ########################################### -def main() -> None: - """ - Initializes everything and sets the program in motion based on the fronted input arguments. - - Returns: - None - """ - if not os.path.exists('statistics'): - os.makedirs('statistics') - - num_processors = cpu_count() - - global ARGS - ARGS = process_args(sys.argv) - - ARGS.output_folder = 'statistics/' - - data_inputs = ARGS.input.split(",") - data_names = ARGS.name.split(",") - - utils.logWarning( - ARGS.statistics, - ARGS.out_log) - - pass - -############################################################################## -if __name__ == "__main__": - main() \ No newline at end of file
--- a/marea_2_0/statistics.xml Thu Jul 04 14:27:49 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -<tool id="statistics" name="Statistics" version="2.0.0"> - - <macros> - <import>marea_macros.xml</import> - </macros> - - <requirements> - <requirement type="package" version="0.29.0">cobra</requirement> - <requirement type="package" version="4.9.3">lxml</requirement> - <requirement type="package" version="1.4.2">joblib</requirement> - - </requirements> - - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/statistics.py - --tool_dir $__tool_directory__ - --input "${",".join(map(str, $inputs))}" - #set $names = "" - #for $input_temp in $inputs: - #set $names = $names + $input_temp.element_identifier + "," - #end for/ - --name $names - --statistics "${",".join(map(str, $statistics))}" - --toMerge toMerge - --out_log $log - ]]> - </command> - <inputs> - - <param name="inputs" argument="--inputs" multiple="true" type="data" format="tabular, csv, tsv, pickle, p, PICKLE" label="Dataset(s):" /> - - <param type="select" argument="--statistics" multiple="true" name="statistics" label="Statistics"> - <option value="mean" selected="true">Mean</option> - <option value="median">Median</option> - <option value="quantiles">Quantiles</option> - <option value="correlation">Biomass correlation</option> - </param> - <param name="toMerge" argument="--toMerge" type="boolean" checked="true" truevalue="Yes" falsevalue="No" - label="Merge datasets" help="If true, generate for each statiscs a single dataset having as index all the input datasets."/> - </inputs> - - - <outputs> - <data format="txt" name="log" label="statistics - Log" /> - <collection name="results" type="list" label="${tool.name} - Statistics"> - <discover_datasets pattern="__name_and_ext__" directory="statistics"/> - </collection> - </outputs> - - - <help> - <![CDATA[ -What it does -------------- - -This tool generates flux samples starting from a model in JSON or XML format by using CBS (Corner-based sampling) and OPTGP (mproved Artificial Centering Hit-and-Run sampler) sampling algorithms. - -Accepted files: - - A model: JSON or XML file reporting reactions and rules contained in the model. It can be a single model, multiple models or a collection of models. - -Output: -------------- - -The tool generates: - - Samples: reporting the sampled fluxes for each reaction. Format: csv or pickle. - - a log file (.txt). - -**TIP**: The Batches parameter is useful to mantain in memory just a batch of samples at time. For example, if you wish to sample 10.000 points, than it is suggested to select n_samples = 1.000 and n_batches=10. - - -]]> - </help> - <expand macro="citations" /> -</tool> \ No newline at end of file