comparison aldex2.R @ 0:f4d0bd4b4d6d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/aldex2 commit b99f09cf03f075a6881d192b0f1233233289fa60
author iuc
date Wed, 29 Jun 2022 07:36:45 +0000
parents
children 75214276e2b7
comparison
equal deleted inserted replaced
-1:000000000000 0:f4d0bd4b4d6d
1 #!/usr/bin/env Rscript
2
3 suppressPackageStartupMessages(library("ALDEx2"))
4 suppressPackageStartupMessages(library("data.table"))
5 suppressPackageStartupMessages(library("qgraph"))
6 suppressPackageStartupMessages(library("optparse"))
7
8 option_list <- list(
9 make_option(c("--aldex_test"), action = "store", dest = "aldex_test", default = NULL, help = "Indicates which analysis to perform"),
10 make_option(c("--analysis_type"), action = "store", dest = "analysis_type", help = "Indicates which analysis to perform"),
11 make_option(c("--cutoff_effect"), action = "store", dest = "cutoff_effect", type = "integer", default = NULL, help = "Effect size cutoff for plotting"),
12 make_option(c("--cutoff_pval"), action = "store", dest = "cutoff_pval", type = "double", default = NULL, help = "Benjamini-Hochberg fdr cutoff"),
13 make_option(c("--denom"), action = "store", dest = "denom", help = "Indicates which features to retain as the denominator for the Geometric Mean calculation"),
14 make_option(c("--effect"), action = "store", dest = "effect", default = "false", help = "Calculate abundances and effect sizes"),
15 make_option(c("--feature_name"), action = "store", dest = "feature_name", default = NULL, help = "Name of the feature from the input data to be plotted"),
16 make_option(c("--group_names"), action = "store", dest = "group_names", help = "Group names vector"),
17 make_option(c("--group_nums"), action = "store", dest = "group_nums", default = NULL, help = "Group number for continuous numeric vector"),
18 make_option(c("--hist_plot"), action = "store", dest = "hist_plot", default = "false", help = "Indicates whether to plot a histogram of p-values for the first Dirichlet Monte Carlo instance"),
19 make_option(c("--include_sample_summary"), action = "store", dest = "include_sample_summary", default = "false", help = "Include median clr values for each sample"),
20 make_option(c("--iterate"), action = "store", dest = "iterate", default = "false", help = "Indicates whether to iteratively perform a test"),
21 make_option(c("--num_cols"), action = "store", dest = "num_cols", help = "Number of columns in group vector"),
22 make_option(c("--num_cols_in_groups"), action = "store", dest = "num_cols_in_groups", default = NULL, help = "Number of columns in each group dewfining the continuous numeric vector"),
23 make_option(c("--num_mc_samples"), action = "store", dest = "num_mc_samples", type = "integer", help = "Number of Monte Carlo samples"),
24 make_option(c("--output"), action = "store", dest = "output", help = "output file"),
25 make_option(c("--paired_test"), action = "store", dest = "paired_test", default = "false", help = "Indicates whether to do paired-sample tests"),
26 make_option(c("--plot_test"), action = "store", dest = "plot_test", default = NULL, help = "The method of calculating significance"),
27 make_option(c("--plot_type"), action = "store", dest = "plot_type", default = NULL, help = "The type of plot to be produced"),
28 make_option(c("--reads"), action = "store", dest = "reads", help = "Input reads table"),
29 make_option(c("--xlab"), action = "store", dest = "xlab", default = NULL, help = "x lable for the plot"),
30 make_option(c("--ylab"), action = "store", dest = "ylab", default = NULL, help = "y lable for the plot")
31 )
32
33 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
34 args <- parse_args(parser, positional_arguments = TRUE)
35 opt <- args$options
36
37 get_boolean_value <- function(val) {
38 if (val == "true") {
39 return(TRUE)
40 } else {
41 return(FALSE)
42 }
43 }
44
45 # Read the input reads file into a data frame.
46 reads_df <- read.table(file = opt$reads, header = TRUE, sep = "\t", row.names = 1, dec = ".", as.is = FALSE)
47
48 # Split the group_names and num_cols into lists of strings.
49 group_names_str <- as.character(opt$group_names)
50 group_names_list <- strsplit(group_names_str, ",")[[1]]
51 num_cols_str <- as.character(opt$num_cols)
52 num_cols_list <- strsplit(num_cols_str, ",")[[1]]
53 # Construct conditions vector.
54 conditions_vector <- c()
55 for (i in seq_along(num_cols_list)) {
56 num_cols <- as.integer(num_cols_list[i])
57 group_name <- group_names_list[i]
58 for (j in 1:num_cols) {
59 conditions_vector <- cbind(conditions_vector, group_name)
60 }
61 }
62 # The conditions_vector is now a matrix,
63 # so coerce it back to a vector.
64 conditions_vector <- as.vector(conditions_vector)
65
66 # Convert boolean values to boolean.
67 effect <- get_boolean_value(opt$effect)
68 include_sample_summary <- get_boolean_value(opt$include_sample_summary)
69 iterate <- get_boolean_value(opt$iterate)
70
71 if (opt$analysis_type == "aldex") {
72 aldex_obj <- aldex(reads = reads_df,
73 conditions_vector,
74 mc.samples = opt$num_mc_samples,
75 test = opt$aldex_test,
76 effect = effect,
77 include.sample.summary = include_sample_summary,
78 denom = opt$denom,
79 iterate = iterate)
80 } else {
81 # Generate Monte Carlo samples of the Dirichlet distribution for each sample. Convert each
82 # instance using a log-ratio transform. This is the input for all further analyses.
83 aldex_clr_obj <- aldex.clr(reads_df, conditions_vector, mc.samples = opt$num_mc_samples, denom = opt$denom)
84
85 if (opt$analysis_type == "aldex_corr") {
86 if (!is.null(opt$cont_var)) {
87 # Read the input cont_var vector.
88 cont_var <- as.numeric(read.table(file = opt$cont_var, header = FALSE, sep = "\t"))
89 }
90
91 # Split the group_names and num_cols into lists of strings.
92 group_nums_str <- as.character(opt$group_nums)
93 group_nums_list <- strsplit(group_nums_str, ",")[[1]]
94 num_cols_in_groups_str <- as.character(opt$num_cols_in_groups)
95 num_cols_in_groups_list <- strsplit(num_cols_in_groups_str, ",")[[1]]
96 # Construct continuous numeric vector.
97 cont_var_vector <- c()
98 for (i in seq_along(num_cols_in_groups_list)) {
99 num_cols_in_group <- as.integer(num_cols_in_groups_list[i])
100 group_num <- group_nums_list[i]
101 for (j in 1:num_cols_in_group) {
102 cont_var_vector <- cbind(cont_var_vector, group_num)
103 }
104 }
105 # The cont_var_vector is now a matrix,
106 # so coerce it back to a vector.
107 cont_var_vector <- as.numeric(as.vector(cont_var_vector))
108
109 aldex_obj <- aldex.corr(aldex_clr_obj, cont.var = cont_var_vector)
110 } else if (opt$analysis_type == "aldex_effect") {
111 aldex_obj <- aldex.effect(aldex_clr_obj, include_sample_summary)
112 } else if (opt$analysis_type == "aldex_expected_distance") {
113 dist <- aldex.expectedDistance(aldex_clr_obj)
114 png(filename = opt$output)
115 qgraph(dist, layout = "spring", vsize = 1)
116 dev.off()
117 } else if (opt$analysis_type == "aldex_kw") {
118 aldex_obj <- aldex.kw(aldex_clr_obj)
119 } else if (opt$analysis_type == "aldex_plot") {
120 aldex_obj <- aldex(reads = reads_df,
121 conditions_vector,
122 mc.samples = opt$num_mc_samples,
123 test = opt$aldex_test,
124 effect = effect,
125 include.sample.summary = include_sample_summary,
126 denom = opt$denom,
127 iterate = iterate)
128 png(filename = opt$output)
129 aldex.plot(x = aldex_obj,
130 type = opt$plot_type,
131 test = opt$plot_test,
132 cutoff.pval = opt$cutoff_pval,
133 cutoff.effect = opt$cutoff_effect,
134 xlab = opt$xlab,
135 ylab = opt$ylab)
136 dev.off()
137 } else if (opt$analysis_type == "aldex_plot_feature") {
138 png(filename = opt$output)
139 aldex.plotFeature(aldex_clr_obj, opt$feature_name)
140 dev.off()
141 } else if (opt$analysis_type == "aldex_ttest") {
142 paired_test <- get_boolean_value(opt$paired_test)
143 hist_plot <- get_boolean_value(opt$hist_plot)
144 aldex_obj <- aldex.ttest(aldex_clr_obj, paired.test = paired_test, hist.plot = hist_plot)
145 }
146 }
147 if ((opt$analysis_type != "aldex_expected_distance") && (opt$analysis_type != "aldex_plot") && (opt$analysis_type != "aldex_plot_feature")) {
148 # Output the ALDEx object.
149 write.table(aldex_obj, file = opt$output, append = FALSE, sep = "\t", dec = ".", row.names = FALSE, col.names = TRUE)
150 }