Mercurial > repos > eschen42 > w4mcorcov
changeset 7:066b1f409e9f draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit e73fabe1b3c871abbcb2e89914c181149c8e2066
author | eschen42 |
---|---|
date | Thu, 26 Jul 2018 10:23:34 -0400 |
parents | 7bd523ca1f9a |
children | 342570ad880c |
files | w4mcorcov.xml w4mcorcov_calc.R w4mcorcov_input.R |
diffstat | 3 files changed, 83 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/w4mcorcov.xml Wed Jul 18 12:35:55 2018 -0400 +++ b/w4mcorcov.xml Thu Jul 26 10:23:34 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.10"> +<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.11"> <description>OPLS-DA Contrasts of Univariate Results</description> @@ -202,22 +202,16 @@ <has_text text="level1Level2Sig" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="-0.04051509" /> - <has_text text="-0.001964912" /> - <has_text text="0.02106343" /> - <has_text text="0.2446366813" /> + <has_text text="-0.3704185" /> + <has_text text="-36.6668927" /> + <has_text text="0.4914638" /> + <has_text text="0.01302117" /> <!-- second matched line --> <has_text text="M207.9308T206" /> - <has_text text="0.504885262" /> - <has_text text="0.020749097" /> + <has_text text="0.3235022" /> + <has_text text="5.97529097" /> <has_text text="0.207196379" /> <has_text text="0.04438632" /> - <!-- third matched line --> - <has_text text="M211.0607T263" /> - <has_text text="0.0680900" /> - <has_text text="0.0020163" /> - <has_text text="0.0201345" /> - <has_text text="0.0690773" /> </assert_contents> </output> <output name="contrast_salience"> @@ -266,8 +260,8 @@ <has_text text="level1Level2Sig" /> <!-- first matched line --> <has_text text="M200.005T296" /> - <has_text text="-0.2803571" /> - <has_text text="-0.0115899" /> + <has_text text="-0.24533821" /> + <has_text text="-3.3573953" /> <has_text text="0.1157346" /> <has_text text="0.0647860" /> </assert_contents> @@ -316,14 +310,14 @@ <has_text text="vip4o" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="-0.4732226" /> - <has_text text="-0.0506172" /> + <has_text text="-0.37867079" /> + <has_text text="-37.71066" /> <has_text text="0.5246766" /> <has_text text="0.0103341" /> <!-- second matched line --> <has_text text="M207.9308T206" /> - <has_text text="0.4927151" /> - <has_text text="0.0203715" /> + <has_text text="0.31570433" /> + <has_text text="5.86655640" /> <has_text text="0.2111623" /> <has_text text="0.0488654" /> </assert_contents> @@ -374,12 +368,12 @@ <has_text text="NM516T251" /> <has_text text="flower_yes" /> <has_text text="other" /> - <has_text text="0.3490559" /> - <has_text text="0.0260147" /> - <has_text text="0.4377872" /> - <has_text text="0.5916089" /> - <has_text text="0.0260147" /> - <has_text text="0.0438942" /> + <has_text text="0.03402807" /> + <has_text text="0.03526926" /> + <has_text text="0.43664386" /> + <has_text text="0.587701897" /> + <has_text text="0.026082688" /> + <has_text text="0.0437742145" /> <has_text text="516.0845" /> <has_text text="250.8762" /> </assert_contents> @@ -734,6 +728,10 @@ Release notes ------------- +0.98.11 + +- bug fix: Readdress issue 2 - features now are pareto-scaled *and centered* per Wikland *op cit.*. + 0.98.10 - new feature: C-plots of VIP versus correlation or relative covariance.
--- a/w4mcorcov_calc.R Wed Jul 18 12:35:55 2018 -0400 +++ b/w4mcorcov_calc.R Thu Jul 26 10:23:34 2018 -0400 @@ -33,9 +33,15 @@ , printL = FALSE , plotL = FALSE , crossvalI = x_crossval_i - , scaleC = "none" # data have been pareto scaled outside this routine and must not be scaled again. This line fixes issue #2. + , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. ) + # strip out variables having negligible variance + x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) + # x_progress(strF(x_dataMatrix)) + # x_progress(strF(my_oplsda)) + #x_progress(head(my_oplsda_suppLs_y_levels)) + #x_progress(unique(my_oplsda_suppLs_y_levels)) fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] do_s_plot <- function( @@ -57,13 +63,14 @@ matrix_x = x_dataMatrix , ropls_x = my_oplsda , predictor_projection_x = predictor_projection_x + , x_progress ) } else { my_cor_vs_cov <- cor_vs_cov_x } # print("str(my_cor_vs_cov)") # str(my_cor_vs_cov) - if (sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { + if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { x_progress("No cor_vs_cov data produced") plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") text(x=1, y=1, labels="too few covariance data") @@ -483,12 +490,9 @@ } # transpose matrix because ropls matrix is the transpose of XCMS matrix + tdm <- t(data_matrix) # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot - # center - cdm <- center_colmeans(t(data_matrix)) - # pareto-scale - my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE)) - scdm <- sweep(cdm, 2, my_scale, "/") + # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2. # pattern to match column names like k10_kruskal_k4.k3_sig col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) @@ -553,7 +557,7 @@ chosen_samples <- smpl_metadata_facC %in% level_union chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) col_selector <- vrbl_metadata_names[ overall_significant ] - my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] + my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] plot_action <- function(fctr_lvl_1, fctr_lvl_2) { progress_action( sprintf("calculating/plotting contrast of %s vs. %s" @@ -632,7 +636,7 @@ col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] - my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] + my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] my_cor_cov <- do_detail_plot( x_dataMatrix = my_matrix , x_predictor = predictor @@ -697,7 +701,7 @@ if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 } ) - my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + my_matrix <- tdm[ chosen_samples, , drop = FALSE ] # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) my_cor_cov <- do_detail_plot( @@ -742,7 +746,7 @@ } else { chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) predictor <- chosen_facC - my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + my_matrix <- tdm[ chosen_samples, , drop = FALSE ] # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) my_cor_cov <- do_detail_plot( @@ -789,8 +793,31 @@ # Galindo_Prieto_2014 doi:10.1002/cem.2627 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R cor_vs_cov <- function( - matrix_x, ropls_x + matrix_x +, ropls_x , predictor_projection_x = TRUE +, x_progress = print +) { + tryCatch({ + return( + cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) + ) + }, error = function(e) { + x_progress( + sprintf( + "cor_vs_cov fatal error - %s" + , as.character(e) # e$message + ) + ) + return ( NULL ) + }) +} + +cor_vs_cov_try <- function( + matrix_x +, ropls_x +, predictor_projection_x = TRUE +, x_progress = print ) { x_class <- class(ropls_x) if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) @@ -865,7 +892,23 @@ if (length(result$vip4o) == 0) result$vip4o <- NA greaterLevel <- sapply( X = result$correlation - , FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 + , FUN = function(my_corr) + tryCatch({ + if ( is.nan( my_corr ) ) { + print("my_corr is NaN") + NA + } else { + if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 + } + }, error = function(e) { + x_progress( + sprintf( + "cor_vs_cov -> sapply: error - substituting NA - %s" + , as.character(e) + ) + ) + NA + }) ) # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
--- a/w4mcorcov_input.R Wed Jul 18 12:35:55 2018 -0400 +++ b/w4mcorcov_input.R Thu Jul 26 10:23:34 2018 -0400 @@ -165,6 +165,10 @@ data_matrix <- as.matrix(data_matrix) } + # Omit any feature not found in variableMetadata and any sample not found in sampleMetadata + # For something more elaborate, see https://github.com/HegemanLab/w4mclassfilter + data_matrix <- data_matrix[rownames(data_matrix) %in% rownames(vrbl_metadata),colnames(data_matrix) %in% rownames(smpl_metadata)] + input_env$data_matrix <- data_matrix # ... } else {