changeset 2:c122403ac78a draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tools/tox_tools/baseline_calculator commit 61a3d9a20a9a90d551dd5f7503be781dc28f4b75
author ufz
date Wed, 18 Dec 2024 09:11:40 +0000
parents 8a1b524ed9d8
children 2aa9da0a84a4
files dose_response.R dose_response.xml test-data/drc_EC_output.tsv test-data/drc_input.tsv test-data/image_output.jpg
diffstat 5 files changed, 109 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/dose_response.R	Tue Oct 08 12:41:07 2024 +0000
+++ b/dose_response.R	Wed Dec 18 09:11:40 2024 +0000
@@ -4,11 +4,10 @@
 fit_models <- function(data, concentration_col, response_col) {
     models <- list(
         LL.2 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.2(), type = "binomial"),
-        LL.3 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.3(), type = "binomial"),
         LL.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.4(), type = "binomial"),
-        LL.5 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.5(), type = "binomial"),
         W1.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = W1.4(), type = "binomial"),
-        W2.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = W2.4(), type = "binomial")
+        W2.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = W2.4(), type = "binomial"),
+        BC.5 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = BC.5(), type = "binomial")
     )
     return(models)
 }
@@ -27,45 +26,81 @@
     return(list(EC50 = ec50, EC25 = ec25, EC10 = ec10))
 }
 
-plot_dose_response <- function(model, data, ec_values, concentration_col, response_col, plot_file) {
+plot_dose_response <- function(model, data, ec_values, concentration_col, response_col, plot_file, compound_name, concentration_unit) {
+    # Generate a grid of concentration values for predictions
     concentration_grid <- seq(min(data[[concentration_col]]), max(data[[concentration_col]]), length.out = 100)
     prediction_data <- data.frame(concentration_grid)
     colnames(prediction_data) <- concentration_col
-    predicted_values <- predict(model, newdata = prediction_data, type = "response")
-    prediction_data$response <- predicted_values
+
+    # Compute predictions with confidence intervals
+    predictions <- predict(model, newdata = prediction_data, type = "response", interval = "confidence")
+    prediction_data$resp <- predictions[, 1]
+    prediction_data$lower <- predictions[, 2]
+    prediction_data$upper <- predictions[, 3]
+
+    print(prediction_data)
+
+    data$rep <- factor(data$rep)
+
+    # Create the plot
     p <- ggplot(data, aes_string(x = concentration_col, y = response_col)) +
-        geom_point(color = "red") +
-        geom_line(data = prediction_data, aes_string(x = concentration_col, y = "response"), color = "blue") +
+        geom_point(aes(colour = rep)) + # Original data points
+        geom_line(data = prediction_data, aes_string(x = "conc", y = "resp"), color = "blue") + # Predicted curve
+        geom_ribbon(data = prediction_data, aes_string(x = "conc", ymin = "lower", ymax = "upper"), alpha = 0.2, fill = "blue") + # Confidence intervals
         geom_vline(xintercept = ec_values$EC10[1], color = "green", linetype = "dashed") +
-        geom_vline(xintercept = ec_values$EC50[1], color = "purple", linetype = "dashed") +
-        labs(title = "Dose-Response Curve", x = "Concentration", y = "Effect") +
+        geom_vline(xintercept = ec_values$EC50[1], color = "red", linetype = "dashed") +
+        labs(
+            title = paste(compound_name, "- Dose-Response Curve"),
+            x = paste("Dose [", concentration_unit, "]"),
+            y = "Response %"
+        ) +
         theme_minimal() +
         theme(
             panel.background = element_rect(fill = "white", color = NA),
             plot.background = element_rect(fill = "white", color = NA)
         )
 
-    jpeg(filename = plot_file)
+    # Save the plot to a file
+    jpeg(filename = plot_file, width = 480, height = 480, res = 72)
     print(p)
     dev.off()
 }
 
-dose_response_analysis <- function(data, concentration_col, response_col, plot_file, ec_file) {
+dose_response_analysis <- function(data, concentration_col, response_col, plot_file, ec_file, compound_name, concentration_unit) {
+    # Ensure column names are correctly selected
     concentration_col <- colnames(data)[as.integer(concentration_col)]
     response_col <- colnames(data)[as.integer(response_col)]
+
+    # Fit models and select the best one
     models <- fit_models(data, concentration_col, response_col)
     best_model_info <- select_best_model(models)
-    ec_values <- calculate_ec_values(best_model_info$model)
-    plot_dose_response(best_model_info$model, data, ec_values, concentration_col, response_col, plot_file)
+    best_model <- best_model_info$model
+    best_model_name <- best_model_info$name
+
+    # Calculate EC values
+    ec_values <- calculate_ec_values(best_model)
 
+    # Plot the dose-response curve
+    plot_dose_response(best_model, data, ec_values, concentration_col, response_col, plot_file, compound_name, concentration_unit)
+
+    # Get model summary and AIC value
+    model_summary <- summary(best_model)
+    model_aic <- AIC(best_model)
+
+    # Prepare EC values data frame with additional information
     ec_data <- data.frame(
-        EC10 = ec_values$EC10[1],
-        EC25 = ec_values$EC25[1],
-        EC50 = ec_values$EC50[1]
+        Metric = c("chemical_name", "EC10", "EC25", "EC50", "AIC"),
+        Value = c(compound_name, ec_values$EC10[1], ec_values$EC25[1], ec_values$EC50[1], model_aic)
     )
+
+    # Write EC values, AIC, and model summary to the output file
     write.table(ec_data, ec_file, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
 
-    return(list(best_model = best_model_info$name, ec_values = ec_values))
+    # Append the model summary to the file
+    cat("\nModel Summary:\n", file = ec_file, append = TRUE)
+    capture.output(model_summary, file = ec_file, append = TRUE)
+
+    return(list(best_model = best_model_name, ec_values = ec_values))
 }
 
 args <- commandArgs(trailingOnly = TRUE)
@@ -75,6 +110,9 @@
 response_col <- args[3]
 plot_file <- args[4]
 ec_file <- args[5]
+compound_name <- args[6]
+concentration_unit <- args[7]
 
 data <- read.csv(data_file, header = TRUE, sep = "\t")
-dose_response_analysis(data, concentration_col, response_col, plot_file, ec_file)
+print(data)
+dose_response_analysis(data, concentration_col, response_col, plot_file, ec_file, compound_name, concentration_unit)
--- a/dose_response.xml	Tue Oct 08 12:41:07 2024 +0000
+++ b/dose_response.xml	Wed Dec 18 09:11:40 2024 +0000
@@ -2,7 +2,7 @@
     <description>for Toxicological Risk Assessment</description>
     <macros>
         <token name="@TOOL_VERSION@">3.0.1</token>
-        <token name="@VERSION_SUFFIX@">1</token>
+        <token name="@VERSION_SUFFIX@">3</token>
     </macros>
     <creator>
         <organization name="Helmholtz Centre for Environmental Research - UFZ, Department of Ecotoxicology"
@@ -21,23 +21,33 @@
             '$response_column'
             '$plot_output'
             '$ec_output'
+            '$compound_name'
+            '$concentration_unit'
         ]]>
     </command>
     <inputs>
-        <param name="input_csv" type="data" format="tabular" label="Concentration - Response Tabular Input"/>
-        <param name="concentration_column" type="data_column" data_ref="input_csv" label="Concentration Column" help="Name of the column for concentration values"/>
-        <param name="response_column" type="data_column" data_ref="input_csv" label="Response Column" help="Name of the column for response values"/>
+        <param name="input_csv" type="data" format="tabular" label="Dose-Response Tabular Input"/>
+        <param name="concentration_column" type="data_column" data_ref="input_csv" label="Dose/Concentration Column Index" help="Index of the column for concentration values"/>
+        <param name="response_column" type="data_column" data_ref="input_csv" label="Response Column Index" help="Index of the column for response values"/>
+        <param name="compound_name" type="text" label="Compound Name" help="Name of the compound to analyze">
+            <validator type="regex" message="Enter a valid compound name">^^[a-zA-Z0-9\[\]()_-]+$</validator>
+        </param>
+        <param name="concentration_unit" type="text" label="Concentration Unit (i.e. mg/L, µM)">
+            <validator type="regex" message="Enter a valid concentration unit">^(\S+/\S+)</validator>
+        </param>
     </inputs>
     <outputs>
-        <data name="plot_output" format="jpg" label="Dose Response Plot"/>
-        <data name="ec_output" format="tabular" label="${tool.name} on ${on_string}: EC Values"/>
+        <data name="plot_output" format="jpg" label="${tool.name} on ${on_string}: ${compound_name} - Dose Response Plot"/>
+        <data name="ec_output" format="tabular" label="${tool.name} on ${on_string}: ${compound_name} - EC Values"/>
     </outputs>
         <tests>
         <test>
             <param name="input_csv" value="drc_input.tsv"/>
-            <param name="concentration_column" value="1"/>
-            <param name="response_column" value="2"/>
-            <output name="plot_output" ftype="jpg">
+            <param name="concentration_column" value="2"/>
+            <param name="response_column" value="3"/>
+            <param name="compound_name" value="test-chemical"/>
+            <param name="concentration_unit" value="mg/L"/>
+            <output name="plot_output" value="image_output.jpg" ftype="jpg">
                 <assert_contents>
                     <has_image_width width="480"/>
                     <has_image_height height="480"/>
--- a/test-data/drc_EC_output.tsv	Tue Oct 08 12:41:07 2024 +0000
+++ b/test-data/drc_EC_output.tsv	Wed Dec 18 09:11:40 2024 +0000
@@ -1,2 +1,18 @@
-EC10	EC25	EC50
-5.08340659604631	10.6579575265526	22.3456566952853
+Metric	Value
+chemical_name	test-chemical
+EC10	4.33426721357793
+EC25	9.27203754541479
+EC50	19.8351130669244
+AIC	15.2903874500298
+
+Model Summary:
+
+Model fitted: Log-logistic (ED50 as parameter) with lower limit at 0 and upper limit at 1 (2 parms)
+
+Parameter estimates:
+
+              Estimate Std. Error t-value p-value  
+b:(Intercept) -1.44469    0.78675 -1.8363 0.06632 .
+e:(Intercept) 19.83511   10.50871  1.8875 0.05909 .
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
--- a/test-data/drc_input.tsv	Tue Oct 08 12:41:07 2024 +0000
+++ b/test-data/drc_input.tsv	Wed Dec 18 09:11:40 2024 +0000
@@ -1,8 +1,15 @@
-conc	resp
-0	0
-5	0.1
-10	0.3
-25	0.5
-50	0.6
-75	0.9
-100	1
+rep	conc	resp
+1	0	0
+1	5	0.1
+1	10	0.3
+1	25	0.5
+1	50	0.6
+1	75	0.9
+1	100	1
+2	0	0
+2	5	0.1
+2	10	0.4
+2	25	0.7
+2	50	0.5
+2	75	1
+2	100	1
Binary file test-data/image_output.jpg has changed