Mercurial > repos > iuc > micro_decon
changeset 0:bd267e082f86 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/micro_decon/ commit bb37578aa61bf4a47af262e02baf0a1c1d9d02c6
author | iuc |
---|---|
date | Wed, 06 Aug 2025 08:38:24 +0000 |
parents | |
children | |
files | decon.R macros.xml micro_decon.xml test-data/decon_table.csv test-data/difference_mean.csv test-data/difference_sum.csv test-data/input.csv test-data/otus_removed.csv test-data/reads_removed.csv test-data/remove_cont_decon_table.csv test-data/remove_thresh_decon_table.csv |
diffstat | 11 files changed, 326 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/decon.R Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,79 @@ +suppressPackageStartupMessages(library(microDecon)) +suppressPackageStartupMessages(library(optparse)) + +# Define command-line options +option_list <- list( + make_option(c("-m", "--mode"), type = "character", help = "Mode of operation: decon, remove.cont, remove.thresh, decon.diff", metavar = "MODE"), + make_option(c("-d", "--data_file"), type = "character", help = "Path to the data file (CSV format expected)", metavar = "FILE"), + make_option(c("-o", "--output"), type = "character", default = NULL, help = "Output file from remove.cont or remove.thresh (used in decon.diff)", metavar = "FILE"), + make_option(c("-b", "--numb_blanks"), type = "integer", default = NULL, help = "Number of blank samples"), + make_option(c("-n", "--numb_ind"), type = "character", default = NULL, help = "Number of individuals (eval-parsed)"), + make_option(c("-t", "--taxa"), type = "logical", default = FALSE, help = "Taxa flag (TRUE or FALSE)"), + make_option(c("-r", "--runs"), type = "integer", default = NULL, help = "Number of runs"), + make_option(c("-T", "--thresh"), type = "double", default = NULL, help = "Threshold value"), + make_option(c("-p", "--prop_thresh"), type = "double", default = NULL, help = "Proportional threshold value"), + make_option(c("-g", "--regression"), type = "double", default = NULL, help = "Regression value"), + make_option(c("-l", "--low_threshold"), type = "double", default = 40, help = "Low threshold [default: %default]"), + make_option(c("-u", "--up_threshold"), type = "double", default = 400, help = "Upper threshold [default: %default]") +) + +# Parse arguments +opt <- parse_args(OptionParser(option_list = option_list)) + +# Read main input data +microbe_data <- read.csv(opt$data_file, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) + +if (opt$mode == "decon") { + result <- decon( + data = microbe_data, + numb.blanks = opt$numb_blanks, + numb.ind = eval(parse(text = opt$numb_ind)), + taxa = opt$taxa, + runs = opt$runs, + thresh = opt$thresh, + prop.thresh = opt$prop_thresh, + regression = opt$regression, + low.threshold = opt$low_threshold, + up.threshold = opt$up_threshold + ) + write.csv(result$decon.table, "decon_table.csv", row.names = FALSE) + write.csv(result$reads.removed, "reads_removed.csv", row.names = FALSE) + write.csv(result$sum.per.group, "difference_sum.csv", row.names = FALSE) + write.csv(result$mean.per.group, "difference_mean.csv", row.names = FALSE) + write.csv(result$OTUs.removed, "OTUs_removed.csv", row.names = FALSE) +} else if (opt$mode == "remove_cont") { + result <- remove.cont( + data = microbe_data, + numb.blanks = opt$numb_blanks, + taxa = opt$taxa, + runs = opt$runs, + regression = opt$regression, + low.threshold = opt$low_threshold, + up.threshold = opt$up_threshold + ) + write.csv(result, "decon_table.csv", row.names = FALSE) +} else if (opt$mode == "remove_thresh") { + result <- remove.thresh( + data = microbe_data, + numb.ind = eval(parse(text = opt$numb_ind)), + taxa = opt$taxa, + thresh = opt$thresh, + prop.thresh = opt$prop_thresh + ) + write.csv(result, "decon_table.csv", row.names = FALSE) +} else if (opt$mode == "decon_diff") { + if (is.null(opt$output)) stop("Error: --output must be provided for decon.diff mode") + output_data <- read.csv(opt$output, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) + result <- decon.diff( + data = microbe_data, + output = output_data, + numb.blanks = opt$numb_blanks, + numb.ind = eval(parse(text = opt$numb_ind)), + taxa = opt$taxa + ) + write.csv(result$decon.table, "decon_table.csv", row.names = FALSE) + write.csv(result$reads.removed, "reads_removed.csv", row.names = FALSE) + write.csv(result$sum.per.group, "difference_sum.csv", row.names = FALSE) + write.csv(result$mean.per.group, "difference_mean.csv", row.names = FALSE) + write.csv(result$OTUs.removed, "OTUs_removed.csv", row.names = FALSE) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,38 @@ +<macros> + <token name="@TOOL_VERSION@">1.0.2</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@LICENSE@">GPL-2.0-or-later</token> + <xml name="requirements"> + <requirements> + <container type="docker">sebimer/samba-v4-qiime2:2024.2</container> + </requirements> + </xml> + <xml name="numb_blanks_arg"> + <param argument="--numb_blanks" type="integer" label="Number of blank samples" value="1" min="0" help="Specifies the number of blanks included in the data set"/> + </xml> + <xml name="numb_ind_arg"> + <param argument="--numb_ind" type="text" label="Number of individuals per group" optional="false" help="A comma-separated vector (e.g. 3,2) indicating the number of individual samples in each group"> + <validator type="regex" message="Enter a comma-separated list of positive integers (e.g., 3,2)">^\d+(,\d+)*$</validator> + </param> + </xml> + <xml name="runs_and_regression_args"> + <param argument="--runs" type="integer" label="Number of runs" value="2" min="1" max="10" help="Specifies the number of times that the function should run the decontamination procedure on the data."/> + <param argument="--regression" type="integer" label="Regression" value="0" min="0" max="2" help="Specifies the regression equation used to calculate the constant. 0 = it chooses between regression 1 and regression 2 based on the lower threshold and upper threshold arguments. 1 = it always uses regression 1. 2 = it always uses regression 2"/> + <param argument="--low_threshold" type="integer" label="Lower threshold" value="40" min="0" help="Selects the lower point for switching between regression 1 and regression 2. It is usually best not to change this value"/> + <param argument="--up_threshold" type="integer" label="Upper threshold" value="400" min="0" help="Selects the higher point for switching between regression 1 and regression 2. It is usually best not to change this value"/> + </xml> + <xml name="thresh_args"> + <param argument="--thresh" type="float" label="Zero proportion threshold" value="0.7" min="0" max="1" help="This is the threshold at which if that proportion of 0s are present for an OTU within a group, all samples will be set to 0 for that OTU for that group"/> + <param argument="--prop_thresh" type="float" label="Proportional threshold" value="0.00005" min="0" max="1" help="This is the threshold at which if the number of reads for a particular OTU are below this proportion, the OTU will be set to zero for all individuals in that group"/> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1002/edn3.11</citation> + </citations> + </xml> + <xml name="creators"> + <creator> + <person givenName="Rand" familyName="Zoabi" url="https://github.com/RZ9082" identifier="https://orcid.org/0009-0000-2501-8053"/> + </creator> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/micro_decon.xml Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,164 @@ +<tool name="micro_decon" id="micro_decon" version="@TOOL_VERSION@+@VERSION_SUFFIX@" profile="24.2" license="@LICENSE@"> + <description>removing contamination from metabarcoding</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="creators"/> + <command detect_errors="exit_code"><![CDATA[ + Rscript '$__tool_directory__/decon.R' + --mode $function_cond.function_selector + --data_file '$input_table' + #set mode = $function_cond.function_selector + #if $mode != 'remove_thresh': + --numb_blanks '$function_cond.numb_blanks' + #end if + + #if $mode != 'remove_cont' + --numb_ind 'c($function_cond.numb_ind)' + #end if + + #if $mode in ['decon', 'remove_cont']: + --runs '$function_cond.runs' + --regression '$function_cond.regression' + --low_threshold '$function_cond.low_threshold' + --up_threshold '$function_cond.up_threshold' + #end if + + #if $mode in ['decon', 'remove_thresh']: + --thresh '$function_cond.thresh' + --prop_thresh '$function_cond.prop_thresh' + #end if + + #if $mode == 'decon_diff': + --output '$function_cond.output' + #end if + --taxa '$taxa' + ]]></command> + <inputs> + <param name="input_table" type="data" format="csv,tsv,tabular" label="Input table"/> + <param argument="--taxa" type="boolean" label="Does the input table contains taxonomy column?" value="true" truevalue="T" falsevalue="F"/> + <conditional name="function_cond"> + <param name="function_selector" type="select" label="Select the microDecon function to apply"> + <option value="decon">decon</option> + <option value="remove_cont">remove.cont</option> + <option value="remove_thresh">remove.thresh</option> + <option value="decon_diff">decon.diff</option> + </param> + <when value="decon"> + <expand macro="numb_blanks_arg"/> + <expand macro="numb_ind_arg"/> + <expand macro="runs_and_regression_args"/> + <expand macro="thresh_args"/> + </when> + <when value="remove_cont"> + <expand macro="numb_blanks_arg"/> + <expand macro="runs_and_regression_args"/> + </when> + <when value="remove_thresh"> + <expand macro="numb_ind_arg"/> + <expand macro="thresh_args"/> + </when> + <when value="decon_diff"> + <param argument="--output" type="data" label="The output table of remove.cont or remove.thresh" format="csv"/> + <expand macro="numb_blanks_arg"/> + <expand macro="numb_ind_arg"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="decon_table" format="csv" label="${function_cond.function_selector} on ${on_string}: Decontaminated table" from_work_dir="decon_table.csv"/> + <data name="reads_removed" format="csv" label="${function_cond.function_selector} on ${on_string}: Reads removed" from_work_dir="reads_removed.csv"> + <filter>function_cond['function_selector'] != "remove_cont" and function_cond['function_selector'] != "remove_thresh"</filter> + </data> + <data name="difference_sum" format="csv" label="${function_cond.function_selector} on ${on_string}: Difference sum" from_work_dir="difference_sum.csv"> + <filter>function_cond['function_selector'] != "remove_cont" and function_cond['function_selector'] != "remove_thresh"</filter> + </data> + <data name="difference_mean" format="csv" label="${function_cond.function_selector} on ${on_string}: Difference mean" from_work_dir="difference_mean.csv"> + <filter>function_cond['function_selector'] != "remove_cont" and function_cond['function_selector'] != "remove_thresh"</filter> + </data> + <data name="otus_removed" format="csv" label="${function_cond.function_selector} on ${on_string}: OTUs removed" from_work_dir="OTUs_removed.csv"> + <filter>function_cond['function_selector'] != "remove_cont" and function_cond['function_selector'] != "remove_thresh"</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="5"> + <param name="input_table" value="input.csv"/> + <param name="taxa" value="true"/> + <conditional name="function_cond"> + <param name="function_selector" value="decon"/> + <param name="numb_ind" value="3,2"/> + </conditional> + <output name="decon_table" file="decon_table.csv"/> + <output name="reads_removed" file="reads_removed.csv"/> + <output name="difference_sum" file="difference_sum.csv"/> + <output name="difference_mean" file="difference_mean.csv"/> + <output name="otus_removed" file="otus_removed.csv"/> + </test> + <test expect_num_outputs="1"> + <param name="input_table" value="input.csv"/> + <param name="taxa" value="true"/> + <conditional name="function_cond"> + <param name="function_selector" value="remove_cont"/> + </conditional> + <output name="decon_table" file="remove_cont_decon_table.csv"/> + </test> + <test expect_num_outputs="1"> + <param name="input_table" value="remove_cont_decon_table.csv"/> + <param name="taxa" value="true"/> + <conditional name="function_cond"> + <param name="function_selector" value="remove_thresh"/> + <param name="numb_ind" value="3,2"/> + </conditional> + <output name="decon_table" file="remove_thresh_decon_table.csv"/> + </test> + <test expect_num_outputs="5"> + <param name="input_table" value="input.csv"/> + <param name="taxa" value="true"/> + <conditional name="function_cond"> + <param name="function_selector" value="decon_diff"/> + <param name="numb_ind" value="3,2"/> + <param name="output" value="remove_thresh_decon_table.csv"/> + </conditional> + <output name="decon_table" file="decon_table.csv"/> + <output name="reads_removed" file="reads_removed.csv"/> + <output name="difference_sum" file="difference_sum.csv"/> + <output name="difference_mean" file="difference_mean.csv"/> + <output name="otus_removed" file="otus_removed.csv"/> + </test> + </tests> + <help><![CDATA[ +microDecon +========== +The microDecon package is designed to remove contaminant reads from +metabarcoding studies (e.g., from bacterial contamination in reagents during a microbiome +study). + +Input +------ +A table of metabarcoding data structured as a data +frame where each row is an OTU (or ASV or other metabarcoding output), each column is an +individual sample, and each cell contains the number of reads for a given OTU for a given +individual. Additionally, the first column should contain OTU IDs (these can be numeric or +characters), and the last column should (optionally) contain taxonomic information. + +Functions +--------- +* **decon**: This is a wrapper function for all other microDecon functions. t first decontaminates the data using remove.cont(), then it runs remove.thresh(), then it runs decon.diff(). +* **remove.cont**: This is the primary function for removing contamination. It outputs a single OTU table of decontaminated results (OTUs that were entirely contamination are still included as rows of 0s). +* **remove.thresh**: This function removes residual contamination in the output from remove.cont(). +* **decon.diff**: This function takes the output of either remove.cont() or remove.thresh() as well your original, contaminated data, and it returns summary statistics of how many OTUs were removed. + +Outputs +-------- +* **Decontaminated table**: A table of decontaminated OTU data. +* **Reads removed**: An OTU table showing the number of removed reads form each OTU. +* **Difference sum**: The total number of removed reads from each OTU that amplified in the blank. +* **Difference mean**: The average number of removed reads from each OTU that amplified in the blank. +* **removed OTUs**: A table including the OTU identifiers which were completely removed from either particular groups or the entire data set. + +For further information visit the complete `docs <https://github.com/donaldtmcknight/microDecon/blob/master/microDecon%20user's%20guide%201.0.2%20updated.pdf>`_ + +]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/decon_table.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,6 @@ +"OTU_ID","Blank1","Blank2","Blank3","Pop1_Sample1","Pop1_Sample2","Pop2_Sample3","Taxa" +"OTU1",0,0,0,60,64,40,"K_Bacteria; P_Actinobacteria" +"OTU2",200,100,0,380,328,340,"K_Bacteria; P_Proteobacteria" +"OTU3",1000,200,80,40,40,0,"K_Bacteria; P_Proteobacteria" +"OTU4",50,0,9,0,0,0,"K_Bacteria; P_Bacteroidetes" +"OTU5",0,0,0,2400,1900,2100,"K_Bacteria"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/difference_mean.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,5 @@ +"OTU_ID","Blank1","All.groups","Group1","Group2","Taxa" +"OTU2",200,182.4,193.333333333333,166,"K_Bacteria; P_Proteobacteria" +"OTU3",1000,976,1073.33333333333,830,"K_Bacteria; P_Proteobacteria" +"OTU4",50,48.8,53.6666666666667,41.5,"K_Bacteria; P_Bacteroidetes" +"OTU6",25,21,23.3333333333333,17.5,"K_Bacteria"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/difference_sum.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,5 @@ +"OTU_ID","Blank1","All.groups","Group1","Group2","Taxa" +"OTU2",200,912,580,332,"K_Bacteria; P_Proteobacteria" +"OTU3",1000,4880,3220,1660,"K_Bacteria; P_Proteobacteria" +"OTU4",50,244,161,83,"K_Bacteria; P_Bacteroidetes" +"OTU6",25,105,70,35,"K_Bacteria"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,7 @@ +"OTU_ID","Blank1","Blank2","Blank3","Pop1_Sample1","Pop1_Sample2","Pop2_Sample3","Taxa" +"OTU1",0,0,0,60,64,40,"K_Bacteria; P_Actinobacteria" +"OTU2",200,220,180,660,520,480,"K_Bacteria; P_Proteobacteria" +"OTU3",1000,800,1300,1440,1000,700,"K_Bacteria; P_Proteobacteria" +"OTU4",50,30,70,70,48,35,"K_Bacteria; P_Bacteroidetes" +"OTU5",0,0,0,2400,1900,2100,"K_Bacteria" +"OTU6",25,10,30,30,20,15,"K_Bacteria"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/otus_removed.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,3 @@ +"OTU_ID","Blank1","All.groups","Group1","Group2","Taxa" +"OTU4",50,"-","-","Totally.removed","K_Bacteria; P_Bacteroidetes" +"OTU6",25,"Totally.removed","Totally.removed","Totally.removed","K_Bacteria"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reads_removed.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,5 @@ +"OTU_ID","Blank1","Blank2","Blank3","Pop1_Sample1","Pop1_Sample2","Pop2_Sample3","Taxa" +"OTU2",200,120,180,280,192,140,"K_Bacteria; P_Proteobacteria" +"OTU3",1000,600,1220,1400,960,700,"K_Bacteria; P_Proteobacteria" +"OTU4",50,30,61,70,48,35,"K_Bacteria; P_Bacteroidetes" +"OTU6",25,10,30,30,20,15,"K_Bacteria"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_cont_decon_table.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,7 @@ +"OTU_ID","Blank1","Blank2","Blank3","Pop1_Sample1","Pop1_Sample2","Pop2_Sample3","Taxa" +"OTU1",0,0,0,60,64,40,"K_Bacteria; P_Actinobacteria" +"OTU2",200,100,0,380,328,340,"K_Bacteria; P_Proteobacteria" +"OTU3",1000,200,80,40,40,0,"K_Bacteria; P_Proteobacteria" +"OTU4",50,0,9,0,0,0,"K_Bacteria; P_Bacteroidetes" +"OTU5",0,0,0,2400,1900,2100,"K_Bacteria" +"OTU6",25,0,0,0,0,0,"K_Bacteria"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_thresh_decon_table.csv Wed Aug 06 08:38:24 2025 +0000 @@ -0,0 +1,7 @@ +"OTU_ID","Blank1","Blank2","Blank3","Pop1_Sample1","Pop1_Sample2","Pop2_Sample3","Taxa" +"OTU1",0,0,0,60,64,40,"K_Bacteria; P_Actinobacteria" +"OTU2",200,100,0,380,328,340,"K_Bacteria; P_Proteobacteria" +"OTU3",1000,200,80,40,40,0,"K_Bacteria; P_Proteobacteria" +"OTU4",50,0,9,0,0,0,"K_Bacteria; P_Bacteroidetes" +"OTU5",0,0,0,2400,1900,2100,"K_Bacteria" +"OTU6",25,0,0,0,0,0,"K_Bacteria"