comparison w4mcorcov_calc.R @ 7:066b1f409e9f draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit e73fabe1b3c871abbcb2e89914c181149c8e2066
author eschen42
date Thu, 26 Jul 2018 10:23:34 -0400
parents 7bd523ca1f9a
children ddcc33ff3205
comparison
equal deleted inserted replaced
6:7bd523ca1f9a 7:066b1f409e9f
31 , predI = 1 31 , predI = 1
32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
33 , printL = FALSE 33 , printL = FALSE
34 , plotL = FALSE 34 , plotL = FALSE
35 , crossvalI = x_crossval_i 35 , crossvalI = x_crossval_i
36 , scaleC = "none" # data have been pareto scaled outside this routine and must not be scaled again. This line fixes issue #2. 36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
37 ) 37 )
38 # strip out variables having negligible variance
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]
38 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) 40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
41 # x_progress(strF(x_dataMatrix))
42 # x_progress(strF(my_oplsda))
43 #x_progress(head(my_oplsda_suppLs_y_levels))
44 #x_progress(unique(my_oplsda_suppLs_y_levels))
39 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] 45 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
40 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] 46 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
41 do_s_plot <- function( 47 do_s_plot <- function(
42 x_env 48 x_env
43 , predictor_projection_x = TRUE 49 , predictor_projection_x = TRUE
55 if (is.null(cor_vs_cov_x)) { 61 if (is.null(cor_vs_cov_x)) {
56 my_cor_vs_cov <- cor_vs_cov( 62 my_cor_vs_cov <- cor_vs_cov(
57 matrix_x = x_dataMatrix 63 matrix_x = x_dataMatrix
58 , ropls_x = my_oplsda 64 , ropls_x = my_oplsda
59 , predictor_projection_x = predictor_projection_x 65 , predictor_projection_x = predictor_projection_x
66 , x_progress
60 ) 67 )
61 } else { 68 } else {
62 my_cor_vs_cov <- cor_vs_cov_x 69 my_cor_vs_cov <- cor_vs_cov_x
63 } 70 }
64 # print("str(my_cor_vs_cov)") 71 # print("str(my_cor_vs_cov)")
65 # str(my_cor_vs_cov) 72 # str(my_cor_vs_cov)
66 if (sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { 73 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
67 x_progress("No cor_vs_cov data produced") 74 x_progress("No cor_vs_cov data produced")
68 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") 75 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
69 text(x=1, y=1, labels="too few covariance data") 76 text(x=1, y=1, labels="too few covariance data")
70 return(my_cor_vs_cov) 77 return(my_cor_vs_cov)
71 } 78 }
481 ) 488 )
482 ) 489 )
483 } 490 }
484 491
485 # transpose matrix because ropls matrix is the transpose of XCMS matrix 492 # transpose matrix because ropls matrix is the transpose of XCMS matrix
493 tdm <- t(data_matrix)
486 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot 494 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot
487 # center 495 # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2.
488 cdm <- center_colmeans(t(data_matrix))
489 # pareto-scale
490 my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE))
491 scdm <- sweep(cdm, 2, my_scale, "/")
492 496
493 # pattern to match column names like k10_kruskal_k4.k3_sig 497 # pattern to match column names like k10_kruskal_k4.k3_sig
494 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) 498 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC)
495 # column name like k10_kruskal_sig 499 # column name like k10_kruskal_sig
496 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC) 500 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC)
551 ) 555 )
552 if ( length(level_union) > 2 ) { 556 if ( length(level_union) > 2 ) {
553 chosen_samples <- smpl_metadata_facC %in% level_union 557 chosen_samples <- smpl_metadata_facC %in% level_union
554 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 558 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
555 col_selector <- vrbl_metadata_names[ overall_significant ] 559 col_selector <- vrbl_metadata_names[ overall_significant ]
556 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] 560 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
557 plot_action <- function(fctr_lvl_1, fctr_lvl_2) { 561 plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
558 progress_action( 562 progress_action(
559 sprintf("calculating/plotting contrast of %s vs. %s" 563 sprintf("calculating/plotting contrast of %s vs. %s"
560 , fctr_lvl_1, fctr_lvl_2)) 564 , fctr_lvl_1, fctr_lvl_2))
561 predictor <- sapply( 565 predictor <- sapply(
630 } 634 }
631 ) 635 )
632 col_selector <- vrbl_metadata_names[ 636 col_selector <- vrbl_metadata_names[
633 if ( pairSigFeatOnly ) fully_significant else overall_significant 637 if ( pairSigFeatOnly ) fully_significant else overall_significant
634 ] 638 ]
635 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] 639 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
636 my_cor_cov <- do_detail_plot( 640 my_cor_cov <- do_detail_plot(
637 x_dataMatrix = my_matrix 641 x_dataMatrix = my_matrix
638 , x_predictor = predictor 642 , x_predictor = predictor
639 , x_is_match = is_match 643 , x_is_match = is_match
640 , x_algorithm = algoC 644 , x_algorithm = algoC
695 X = chosen_facC 699 X = chosen_facC
696 , FUN = function(fac) { 700 , FUN = function(fac) {
697 if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 701 if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
698 } 702 }
699 ) 703 )
700 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] 704 my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
701 # only process this column if both factors are members of lvlCSV 705 # only process this column if both factors are members of lvlCSV
702 is_match <- isLevelSelected(fctr_lvl_1) 706 is_match <- isLevelSelected(fctr_lvl_1)
703 my_cor_cov <- do_detail_plot( 707 my_cor_cov <- do_detail_plot(
704 x_dataMatrix = my_matrix 708 x_dataMatrix = my_matrix
705 , x_predictor = predictor 709 , x_predictor = predictor
740 if (length(unique(chosen_samples)) < 1) { 744 if (length(unique(chosen_samples)) < 1) {
741 progress_action("NOTHING TO PLOT...") 745 progress_action("NOTHING TO PLOT...")
742 } else { 746 } else {
743 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 747 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
744 predictor <- chosen_facC 748 predictor <- chosen_facC
745 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] 749 my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
746 # only process this column if both factors are members of lvlCSV 750 # only process this column if both factors are members of lvlCSV
747 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) 751 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
748 my_cor_cov <- do_detail_plot( 752 my_cor_cov <- do_detail_plot(
749 x_dataMatrix = my_matrix 753 x_dataMatrix = my_matrix
750 , x_predictor = predictor 754 , x_predictor = predictor
787 # Adapted from: 791 # Adapted from:
788 # Wiklund_2008 doi:10.1021/ac0713510 792 # Wiklund_2008 doi:10.1021/ac0713510
789 # Galindo_Prieto_2014 doi:10.1002/cem.2627 793 # Galindo_Prieto_2014 doi:10.1002/cem.2627
790 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R 794 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
791 cor_vs_cov <- function( 795 cor_vs_cov <- function(
792 matrix_x, ropls_x 796 matrix_x
797 , ropls_x
793 , predictor_projection_x = TRUE 798 , predictor_projection_x = TRUE
799 , x_progress = print
800 ) {
801 tryCatch({
802 return(
803 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress)
804 )
805 }, error = function(e) {
806 x_progress(
807 sprintf(
808 "cor_vs_cov fatal error - %s"
809 , as.character(e) # e$message
810 )
811 )
812 return ( NULL )
813 })
814 }
815
816 cor_vs_cov_try <- function(
817 matrix_x
818 , ropls_x
819 , predictor_projection_x = TRUE
820 , x_progress = print
794 ) { 821 ) {
795 x_class <- class(ropls_x) 822 x_class <- class(ropls_x)
796 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) 823 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
797 stop( 824 stop(
798 paste( 825 paste(
863 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) 890 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count)
864 superresult <- list() 891 superresult <- list()
865 if (length(result$vip4o) == 0) result$vip4o <- NA 892 if (length(result$vip4o) == 0) result$vip4o <- NA
866 greaterLevel <- sapply( 893 greaterLevel <- sapply(
867 X = result$correlation 894 X = result$correlation
868 , FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 895 , FUN = function(my_corr)
896 tryCatch({
897 if ( is.nan( my_corr ) ) {
898 print("my_corr is NaN")
899 NA
900 } else {
901 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
902 }
903 }, error = function(e) {
904 x_progress(
905 sprintf(
906 "cor_vs_cov -> sapply: error - substituting NA - %s"
907 , as.character(e)
908 )
909 )
910 NA
911 })
869 ) 912 )
870 913
871 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 914 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
872 featureID <- names(ropls_x@vipVn) 915 featureID <- names(ropls_x@vipVn)
873 greaterLevel <- greaterLevel[featureID] 916 greaterLevel <- greaterLevel[featureID]