changeset 186:6f3a092f0a0e draft

Uploaded
author luca_milaz
date Thu, 04 Jul 2024 20:08:09 +0000
parents f9bb6887b684
children 50246292ba35
files marea_2_0/flux_sampling.py marea_2_0/flux_sampling.xml marea_2_0/statistics.py marea_2_0/statistics.xml
diffstat 4 files changed, 15 insertions(+), 197 deletions(-) [+]
line wrap: on
line diff
--- a/marea_2_0/flux_sampling.py	Thu Jul 04 14:27:49 2024 +0000
+++ b/marea_2_0/flux_sampling.py	Thu Jul 04 20:08:09 2024 +0000
@@ -32,11 +32,6 @@
                         required = True,
                         help = 'your tool directory')
     
-    parser.add_argument(
-        "-of", "--output_format",
-        type = utils.FileFormat.fromExt,
-        choices = [utils.FileFormat.CSV, utils.FileFormat.PICKLE],
-        required = True, help = "Extension of all output files")
     
     parser.add_argument('-in', '--input',
                         required = True,
@@ -95,12 +90,7 @@
 
 
 def write_to_file(dataset: pd.DataFrame, name: str)->None:
-
-    if ARGS.output_format is utils.FileFormat.PICKLE:
-        save_path = utils.FilePath(name, ARGS.output_format, prefix = ARGS.output_folder)
-        utils.writePickle(save_path, dataset)
-    elif ARGS.output_format is utils.FileFormat.CSV:
-        dataset.to_csv(ARGS.output_folder + name + ".csv", sep = '\t', index = False)
+    dataset.to_csv(ARGS.output_folder + name + ".csv", sep = '\t', index = False)
 
 
 
--- a/marea_2_0/flux_sampling.xml	Thu Jul 04 14:27:49 2024 +0000
+++ b/marea_2_0/flux_sampling.xml	Thu Jul 04 20:08:09 2024 +0000
@@ -29,7 +29,8 @@
         --n_batches $n_batches
         --n_samples $n_samples
         --seed $seed
-        --output_format $output_format
+        --output_type "${",".join(map(str, $output_types))}"
+        --toMerge toMerge
         --out_log $log
         ]]>
     </command>
@@ -51,15 +52,21 @@
 
         <param name="n_samples" argument="--n_samples" type="integer" label="Samples:" value="1000"/>
 
-        <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="1" help="The output file will contain n_batches*n_samples samples. 
-        This is useful for computational perfomances."/>
+        <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="10" help="This is useful for computational perfomances."/>
 
         <param name="seed" argument="--seed" type="integer" label="Seed:" value="0" helph="Random seed."/>
+
+        <param type="select" argument="--output_types" multiple="true" name="output_types" label="Desired outputs">
+            <option value="mean" selected="true">Mean</option>
+            <option value="median" selected="true">Median</option>
+            <option value="quantiles" selected="true">Quantiles</option>
+            <option value="fluxes" help="Not recommanded when dealing with many models because of disk space." selected="false">All fluxes</option>
+        </param>
+
+        <param name="toMerge" argument="--toMerge" type="boolean" checked="true" truevalue="Yes" falsevalue="No"
+         label="Merge datasets" help="If true, generate for each output type a single dataset having as index all the input models names. Not valid for 'All Fluxes'"/>
         
-        <param name="output_format" argument="--output_format" type="select" label="Output files format:">
-            <option value="pickle" selected="true">pickle extension (binary, non-editable, pre-parsed)</option>
-            <option value="csv">Comma Separated Values (csv text file, editable, raw)</option>
-        </param>
+        
     </inputs>
 
         		
--- a/marea_2_0/statistics.py	Thu Jul 04 14:27:49 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-import argparse
-import utils.general_utils as utils
-from typing import Optional, List
-import os
-import numpy as np
-import pandas as pd
-import cobra
-import utils.flux_analysis as flux_analysis
-from joblib import Parallel, delayed, cpu_count
-import sys
-
-################################# process args ###############################
-def process_args(args :List[str]) -> argparse.Namespace:
-    """
-    Processes command-line arguments.
-
-    Args:
-        args (list): List of command-line arguments.
-
-    Returns:
-        Namespace: An object containing parsed arguments.
-    """
-    parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
-                                     description = 'process some value\'s')
-
-    parser.add_argument('-ol', '--out_log', 
-                        help = "Output log")
-    
-    parser.add_argument('-td', '--tool_dir',
-                        type = str,
-                        required = True,
-                        help = 'your tool directory')
-    
-    parser.add_argument('-in', '--input',
-                        required = True,
-                        type=str,
-                        help = 'inputs data')
-    
-    parser.add_argument('-nm', '--name',
-                        required = True,
-                        type=str,
-                        help = 'inputs data ids')
-    
-    parser.add_argument('-st', '--statistics',
-                        required = True,
-                        type=str,
-                        help = 'statistics')
-    
-    parser.add_argument('-tm', '--toMerge',
-                        required = True,
-                        type=bool,
-                        help = 'Merge output datasets')
-    
-    
-    ARGS = parser.parse_args()
-    return ARGS
-
-########################### warning ###########################################
-def warning(s :str) -> None:
-    """
-    Log a warning message to an output log file and print it to the console.
-
-    Args:
-        s (str): The warning message to be logged and printed.
-    
-    Returns:
-      None
-    """
-    with open(ARGS.out_log, 'a') as log:
-        log.write(s + "\n\n")
-    print(s)
-
-
-
-############################# main ###########################################
-def main() -> None:
-    """
-    Initializes everything and sets the program in motion based on the fronted input arguments.
-
-    Returns:
-        None
-    """
-    if not os.path.exists('statistics'):
-        os.makedirs('statistics')
-
-    num_processors = cpu_count()
-
-    global ARGS
-    ARGS = process_args(sys.argv)
-
-    ARGS.output_folder = 'statistics/'
-
-    data_inputs = ARGS.input.split(",")
-    data_names = ARGS.name.split(",")
-
-    utils.logWarning(
-        ARGS.statistics,
-        ARGS.out_log)
-    
-    pass
-        
-##############################################################################
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/marea_2_0/statistics.xml	Thu Jul 04 14:27:49 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-<tool id="statistics" name="Statistics" version="2.0.0">
-    
-    <macros>
-        <import>marea_macros.xml</import>
-    </macros>
-
-	<requirements>
-        <requirement type="package" version="0.29.0">cobra</requirement>
-        <requirement type="package" version="4.9.3">lxml</requirement>
-        <requirement type="package" version="1.4.2">joblib</requirement>
-        
-	</requirements>
-
-    <command detect_errors="exit_code">
-        <![CDATA[
-        python $__tool_directory__/statistics.py
-        --tool_dir $__tool_directory__
-        --input "${",".join(map(str, $inputs))}"
-        #set $names = ""
-        #for $input_temp in $inputs:
-            #set $names = $names + $input_temp.element_identifier + ","
-        #end for/
-        --name $names
-        --statistics "${",".join(map(str, $statistics))}"
-        --toMerge toMerge
-        --out_log $log
-        ]]>
-    </command>
-    <inputs>
-
-        <param name="inputs" argument="--inputs" multiple="true" type="data" format="tabular, csv, tsv, pickle, p, PICKLE" label="Dataset(s):" />
-
-        <param type="select" argument="--statistics" multiple="true" name="statistics" label="Statistics">
-            <option value="mean" selected="true">Mean</option>
-            <option value="median">Median</option>
-            <option value="quantiles">Quantiles</option>
-            <option value="correlation">Biomass correlation</option>
-        </param>
-        <param name="toMerge" argument="--toMerge" type="boolean" checked="true" truevalue="Yes" falsevalue="No"
-         label="Merge datasets" help="If true, generate for each statiscs a single dataset having as index all the input datasets."/>
-    </inputs>
-
-        		
-    <outputs>
-        <data format="txt" name="log" label="statistics - Log" />
-        <collection name="results" type="list" label="${tool.name} - Statistics">
-            <discover_datasets pattern="__name_and_ext__" directory="statistics"/>
-        </collection>
-    </outputs>
-       
-        
-    <help>
-    <![CDATA[
-What it does
--------------
-
-This tool generates flux samples starting from a model in JSON or XML format by using CBS (Corner-based sampling) and OPTGP (mproved Artificial Centering Hit-and-Run sampler) sampling algorithms.
-
-Accepted files:
-    - A model: JSON or XML file reporting reactions and rules contained in the model. It can be a single model, multiple models or a collection of models. 
-
-Output:
--------------
-
-The tool generates:
-    - Samples: reporting the sampled fluxes for each reaction. Format: csv or pickle.
-    - a log file (.txt).
-
-**TIP**: The Batches parameter is useful to mantain in memory just a batch of samples at time. For example, if you wish to sample 10.000 points, than it is suggested to select n_samples = 1.000 and n_batches=10.
-
-
-]]>
-    </help>
-    <expand macro="citations" />
-</tool>
\ No newline at end of file