Mercurial > repos > ufz > dose_response_analysis_tool
changeset 2:c122403ac78a draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tools/tox_tools/baseline_calculator commit 61a3d9a20a9a90d551dd5f7503be781dc28f4b75
author | ufz |
---|---|
date | Wed, 18 Dec 2024 09:11:40 +0000 |
parents | 8a1b524ed9d8 |
children | 2aa9da0a84a4 |
files | dose_response.R dose_response.xml test-data/drc_EC_output.tsv test-data/drc_input.tsv test-data/image_output.jpg |
diffstat | 5 files changed, 109 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/dose_response.R Tue Oct 08 12:41:07 2024 +0000 +++ b/dose_response.R Wed Dec 18 09:11:40 2024 +0000 @@ -4,11 +4,10 @@ fit_models <- function(data, concentration_col, response_col) { models <- list( LL.2 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.2(), type = "binomial"), - LL.3 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.3(), type = "binomial"), LL.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.4(), type = "binomial"), - LL.5 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = LL.5(), type = "binomial"), W1.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = W1.4(), type = "binomial"), - W2.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = W2.4(), type = "binomial") + W2.4 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = W2.4(), type = "binomial"), + BC.5 = drm(data[[response_col]] ~ data[[concentration_col]], data = data, fct = BC.5(), type = "binomial") ) return(models) } @@ -27,45 +26,81 @@ return(list(EC50 = ec50, EC25 = ec25, EC10 = ec10)) } -plot_dose_response <- function(model, data, ec_values, concentration_col, response_col, plot_file) { +plot_dose_response <- function(model, data, ec_values, concentration_col, response_col, plot_file, compound_name, concentration_unit) { + # Generate a grid of concentration values for predictions concentration_grid <- seq(min(data[[concentration_col]]), max(data[[concentration_col]]), length.out = 100) prediction_data <- data.frame(concentration_grid) colnames(prediction_data) <- concentration_col - predicted_values <- predict(model, newdata = prediction_data, type = "response") - prediction_data$response <- predicted_values + + # Compute predictions with confidence intervals + predictions <- predict(model, newdata = prediction_data, type = "response", interval = "confidence") + prediction_data$resp <- predictions[, 1] + prediction_data$lower <- predictions[, 2] + prediction_data$upper <- predictions[, 3] + + print(prediction_data) + + data$rep <- factor(data$rep) + + # Create the plot p <- ggplot(data, aes_string(x = concentration_col, y = response_col)) + - geom_point(color = "red") + - geom_line(data = prediction_data, aes_string(x = concentration_col, y = "response"), color = "blue") + + geom_point(aes(colour = rep)) + # Original data points + geom_line(data = prediction_data, aes_string(x = "conc", y = "resp"), color = "blue") + # Predicted curve + geom_ribbon(data = prediction_data, aes_string(x = "conc", ymin = "lower", ymax = "upper"), alpha = 0.2, fill = "blue") + # Confidence intervals geom_vline(xintercept = ec_values$EC10[1], color = "green", linetype = "dashed") + - geom_vline(xintercept = ec_values$EC50[1], color = "purple", linetype = "dashed") + - labs(title = "Dose-Response Curve", x = "Concentration", y = "Effect") + + geom_vline(xintercept = ec_values$EC50[1], color = "red", linetype = "dashed") + + labs( + title = paste(compound_name, "- Dose-Response Curve"), + x = paste("Dose [", concentration_unit, "]"), + y = "Response %" + ) + theme_minimal() + theme( panel.background = element_rect(fill = "white", color = NA), plot.background = element_rect(fill = "white", color = NA) ) - jpeg(filename = plot_file) + # Save the plot to a file + jpeg(filename = plot_file, width = 480, height = 480, res = 72) print(p) dev.off() } -dose_response_analysis <- function(data, concentration_col, response_col, plot_file, ec_file) { +dose_response_analysis <- function(data, concentration_col, response_col, plot_file, ec_file, compound_name, concentration_unit) { + # Ensure column names are correctly selected concentration_col <- colnames(data)[as.integer(concentration_col)] response_col <- colnames(data)[as.integer(response_col)] + + # Fit models and select the best one models <- fit_models(data, concentration_col, response_col) best_model_info <- select_best_model(models) - ec_values <- calculate_ec_values(best_model_info$model) - plot_dose_response(best_model_info$model, data, ec_values, concentration_col, response_col, plot_file) + best_model <- best_model_info$model + best_model_name <- best_model_info$name + + # Calculate EC values + ec_values <- calculate_ec_values(best_model) + # Plot the dose-response curve + plot_dose_response(best_model, data, ec_values, concentration_col, response_col, plot_file, compound_name, concentration_unit) + + # Get model summary and AIC value + model_summary <- summary(best_model) + model_aic <- AIC(best_model) + + # Prepare EC values data frame with additional information ec_data <- data.frame( - EC10 = ec_values$EC10[1], - EC25 = ec_values$EC25[1], - EC50 = ec_values$EC50[1] + Metric = c("chemical_name", "EC10", "EC25", "EC50", "AIC"), + Value = c(compound_name, ec_values$EC10[1], ec_values$EC25[1], ec_values$EC50[1], model_aic) ) + + # Write EC values, AIC, and model summary to the output file write.table(ec_data, ec_file, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE) - return(list(best_model = best_model_info$name, ec_values = ec_values)) + # Append the model summary to the file + cat("\nModel Summary:\n", file = ec_file, append = TRUE) + capture.output(model_summary, file = ec_file, append = TRUE) + + return(list(best_model = best_model_name, ec_values = ec_values)) } args <- commandArgs(trailingOnly = TRUE) @@ -75,6 +110,9 @@ response_col <- args[3] plot_file <- args[4] ec_file <- args[5] +compound_name <- args[6] +concentration_unit <- args[7] data <- read.csv(data_file, header = TRUE, sep = "\t") -dose_response_analysis(data, concentration_col, response_col, plot_file, ec_file) +print(data) +dose_response_analysis(data, concentration_col, response_col, plot_file, ec_file, compound_name, concentration_unit)
--- a/dose_response.xml Tue Oct 08 12:41:07 2024 +0000 +++ b/dose_response.xml Wed Dec 18 09:11:40 2024 +0000 @@ -2,7 +2,7 @@ <description>for Toxicological Risk Assessment</description> <macros> <token name="@TOOL_VERSION@">3.0.1</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@VERSION_SUFFIX@">3</token> </macros> <creator> <organization name="Helmholtz Centre for Environmental Research - UFZ, Department of Ecotoxicology" @@ -21,23 +21,33 @@ '$response_column' '$plot_output' '$ec_output' + '$compound_name' + '$concentration_unit' ]]> </command> <inputs> - <param name="input_csv" type="data" format="tabular" label="Concentration - Response Tabular Input"/> - <param name="concentration_column" type="data_column" data_ref="input_csv" label="Concentration Column" help="Name of the column for concentration values"/> - <param name="response_column" type="data_column" data_ref="input_csv" label="Response Column" help="Name of the column for response values"/> + <param name="input_csv" type="data" format="tabular" label="Dose-Response Tabular Input"/> + <param name="concentration_column" type="data_column" data_ref="input_csv" label="Dose/Concentration Column Index" help="Index of the column for concentration values"/> + <param name="response_column" type="data_column" data_ref="input_csv" label="Response Column Index" help="Index of the column for response values"/> + <param name="compound_name" type="text" label="Compound Name" help="Name of the compound to analyze"> + <validator type="regex" message="Enter a valid compound name">^^[a-zA-Z0-9\[\]()_-]+$</validator> + </param> + <param name="concentration_unit" type="text" label="Concentration Unit (i.e. mg/L, µM)"> + <validator type="regex" message="Enter a valid concentration unit">^(\S+/\S+)</validator> + </param> </inputs> <outputs> - <data name="plot_output" format="jpg" label="Dose Response Plot"/> - <data name="ec_output" format="tabular" label="${tool.name} on ${on_string}: EC Values"/> + <data name="plot_output" format="jpg" label="${tool.name} on ${on_string}: ${compound_name} - Dose Response Plot"/> + <data name="ec_output" format="tabular" label="${tool.name} on ${on_string}: ${compound_name} - EC Values"/> </outputs> <tests> <test> <param name="input_csv" value="drc_input.tsv"/> - <param name="concentration_column" value="1"/> - <param name="response_column" value="2"/> - <output name="plot_output" ftype="jpg"> + <param name="concentration_column" value="2"/> + <param name="response_column" value="3"/> + <param name="compound_name" value="test-chemical"/> + <param name="concentration_unit" value="mg/L"/> + <output name="plot_output" value="image_output.jpg" ftype="jpg"> <assert_contents> <has_image_width width="480"/> <has_image_height height="480"/>
--- a/test-data/drc_EC_output.tsv Tue Oct 08 12:41:07 2024 +0000 +++ b/test-data/drc_EC_output.tsv Wed Dec 18 09:11:40 2024 +0000 @@ -1,2 +1,18 @@ -EC10 EC25 EC50 -5.08340659604631 10.6579575265526 22.3456566952853 +Metric Value +chemical_name test-chemical +EC10 4.33426721357793 +EC25 9.27203754541479 +EC50 19.8351130669244 +AIC 15.2903874500298 + +Model Summary: + +Model fitted: Log-logistic (ED50 as parameter) with lower limit at 0 and upper limit at 1 (2 parms) + +Parameter estimates: + + Estimate Std. Error t-value p-value +b:(Intercept) -1.44469 0.78675 -1.8363 0.06632 . +e:(Intercept) 19.83511 10.50871 1.8875 0.05909 . +--- +Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
--- a/test-data/drc_input.tsv Tue Oct 08 12:41:07 2024 +0000 +++ b/test-data/drc_input.tsv Wed Dec 18 09:11:40 2024 +0000 @@ -1,8 +1,15 @@ -conc resp -0 0 -5 0.1 -10 0.3 -25 0.5 -50 0.6 -75 0.9 -100 1 +rep conc resp +1 0 0 +1 5 0.1 +1 10 0.3 +1 25 0.5 +1 50 0.6 +1 75 0.9 +1 100 1 +2 0 0 +2 5 0.1 +2 10 0.4 +2 25 0.7 +2 50 0.5 +2 75 1 +2 100 1