Mercurial > repos > eschen42 > w4mcorcov
comparison w4mcorcov_calc.R @ 7:066b1f409e9f draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit e73fabe1b3c871abbcb2e89914c181149c8e2066
author | eschen42 |
---|---|
date | Thu, 26 Jul 2018 10:23:34 -0400 |
parents | 7bd523ca1f9a |
children | ddcc33ff3205 |
comparison
equal
deleted
inserted
replaced
6:7bd523ca1f9a | 7:066b1f409e9f |
---|---|
31 , predI = 1 | 31 , predI = 1 |
32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 | 32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 |
33 , printL = FALSE | 33 , printL = FALSE |
34 , plotL = FALSE | 34 , plotL = FALSE |
35 , crossvalI = x_crossval_i | 35 , crossvalI = x_crossval_i |
36 , scaleC = "none" # data have been pareto scaled outside this routine and must not be scaled again. This line fixes issue #2. | 36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. |
37 ) | 37 ) |
38 # strip out variables having negligible variance | |
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] | |
38 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) | 40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) |
41 # x_progress(strF(x_dataMatrix)) | |
42 # x_progress(strF(my_oplsda)) | |
43 #x_progress(head(my_oplsda_suppLs_y_levels)) | |
44 #x_progress(unique(my_oplsda_suppLs_y_levels)) | |
39 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] | 45 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] |
40 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] | 46 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] |
41 do_s_plot <- function( | 47 do_s_plot <- function( |
42 x_env | 48 x_env |
43 , predictor_projection_x = TRUE | 49 , predictor_projection_x = TRUE |
55 if (is.null(cor_vs_cov_x)) { | 61 if (is.null(cor_vs_cov_x)) { |
56 my_cor_vs_cov <- cor_vs_cov( | 62 my_cor_vs_cov <- cor_vs_cov( |
57 matrix_x = x_dataMatrix | 63 matrix_x = x_dataMatrix |
58 , ropls_x = my_oplsda | 64 , ropls_x = my_oplsda |
59 , predictor_projection_x = predictor_projection_x | 65 , predictor_projection_x = predictor_projection_x |
66 , x_progress | |
60 ) | 67 ) |
61 } else { | 68 } else { |
62 my_cor_vs_cov <- cor_vs_cov_x | 69 my_cor_vs_cov <- cor_vs_cov_x |
63 } | 70 } |
64 # print("str(my_cor_vs_cov)") | 71 # print("str(my_cor_vs_cov)") |
65 # str(my_cor_vs_cov) | 72 # str(my_cor_vs_cov) |
66 if (sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { | 73 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { |
67 x_progress("No cor_vs_cov data produced") | 74 x_progress("No cor_vs_cov data produced") |
68 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 75 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
69 text(x=1, y=1, labels="too few covariance data") | 76 text(x=1, y=1, labels="too few covariance data") |
70 return(my_cor_vs_cov) | 77 return(my_cor_vs_cov) |
71 } | 78 } |
481 ) | 488 ) |
482 ) | 489 ) |
483 } | 490 } |
484 | 491 |
485 # transpose matrix because ropls matrix is the transpose of XCMS matrix | 492 # transpose matrix because ropls matrix is the transpose of XCMS matrix |
493 tdm <- t(data_matrix) | |
486 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot | 494 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot |
487 # center | 495 # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2. |
488 cdm <- center_colmeans(t(data_matrix)) | |
489 # pareto-scale | |
490 my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE)) | |
491 scdm <- sweep(cdm, 2, my_scale, "/") | |
492 | 496 |
493 # pattern to match column names like k10_kruskal_k4.k3_sig | 497 # pattern to match column names like k10_kruskal_k4.k3_sig |
494 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) | 498 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) |
495 # column name like k10_kruskal_sig | 499 # column name like k10_kruskal_sig |
496 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC) | 500 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC) |
551 ) | 555 ) |
552 if ( length(level_union) > 2 ) { | 556 if ( length(level_union) > 2 ) { |
553 chosen_samples <- smpl_metadata_facC %in% level_union | 557 chosen_samples <- smpl_metadata_facC %in% level_union |
554 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | 558 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) |
555 col_selector <- vrbl_metadata_names[ overall_significant ] | 559 col_selector <- vrbl_metadata_names[ overall_significant ] |
556 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] | 560 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] |
557 plot_action <- function(fctr_lvl_1, fctr_lvl_2) { | 561 plot_action <- function(fctr_lvl_1, fctr_lvl_2) { |
558 progress_action( | 562 progress_action( |
559 sprintf("calculating/plotting contrast of %s vs. %s" | 563 sprintf("calculating/plotting contrast of %s vs. %s" |
560 , fctr_lvl_1, fctr_lvl_2)) | 564 , fctr_lvl_1, fctr_lvl_2)) |
561 predictor <- sapply( | 565 predictor <- sapply( |
630 } | 634 } |
631 ) | 635 ) |
632 col_selector <- vrbl_metadata_names[ | 636 col_selector <- vrbl_metadata_names[ |
633 if ( pairSigFeatOnly ) fully_significant else overall_significant | 637 if ( pairSigFeatOnly ) fully_significant else overall_significant |
634 ] | 638 ] |
635 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] | 639 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] |
636 my_cor_cov <- do_detail_plot( | 640 my_cor_cov <- do_detail_plot( |
637 x_dataMatrix = my_matrix | 641 x_dataMatrix = my_matrix |
638 , x_predictor = predictor | 642 , x_predictor = predictor |
639 , x_is_match = is_match | 643 , x_is_match = is_match |
640 , x_algorithm = algoC | 644 , x_algorithm = algoC |
695 X = chosen_facC | 699 X = chosen_facC |
696 , FUN = function(fac) { | 700 , FUN = function(fac) { |
697 if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 | 701 if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 |
698 } | 702 } |
699 ) | 703 ) |
700 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] | 704 my_matrix <- tdm[ chosen_samples, , drop = FALSE ] |
701 # only process this column if both factors are members of lvlCSV | 705 # only process this column if both factors are members of lvlCSV |
702 is_match <- isLevelSelected(fctr_lvl_1) | 706 is_match <- isLevelSelected(fctr_lvl_1) |
703 my_cor_cov <- do_detail_plot( | 707 my_cor_cov <- do_detail_plot( |
704 x_dataMatrix = my_matrix | 708 x_dataMatrix = my_matrix |
705 , x_predictor = predictor | 709 , x_predictor = predictor |
740 if (length(unique(chosen_samples)) < 1) { | 744 if (length(unique(chosen_samples)) < 1) { |
741 progress_action("NOTHING TO PLOT...") | 745 progress_action("NOTHING TO PLOT...") |
742 } else { | 746 } else { |
743 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | 747 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) |
744 predictor <- chosen_facC | 748 predictor <- chosen_facC |
745 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] | 749 my_matrix <- tdm[ chosen_samples, , drop = FALSE ] |
746 # only process this column if both factors are members of lvlCSV | 750 # only process this column if both factors are members of lvlCSV |
747 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | 751 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) |
748 my_cor_cov <- do_detail_plot( | 752 my_cor_cov <- do_detail_plot( |
749 x_dataMatrix = my_matrix | 753 x_dataMatrix = my_matrix |
750 , x_predictor = predictor | 754 , x_predictor = predictor |
787 # Adapted from: | 791 # Adapted from: |
788 # Wiklund_2008 doi:10.1021/ac0713510 | 792 # Wiklund_2008 doi:10.1021/ac0713510 |
789 # Galindo_Prieto_2014 doi:10.1002/cem.2627 | 793 # Galindo_Prieto_2014 doi:10.1002/cem.2627 |
790 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R | 794 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R |
791 cor_vs_cov <- function( | 795 cor_vs_cov <- function( |
792 matrix_x, ropls_x | 796 matrix_x |
797 , ropls_x | |
793 , predictor_projection_x = TRUE | 798 , predictor_projection_x = TRUE |
799 , x_progress = print | |
800 ) { | |
801 tryCatch({ | |
802 return( | |
803 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) | |
804 ) | |
805 }, error = function(e) { | |
806 x_progress( | |
807 sprintf( | |
808 "cor_vs_cov fatal error - %s" | |
809 , as.character(e) # e$message | |
810 ) | |
811 ) | |
812 return ( NULL ) | |
813 }) | |
814 } | |
815 | |
816 cor_vs_cov_try <- function( | |
817 matrix_x | |
818 , ropls_x | |
819 , predictor_projection_x = TRUE | |
820 , x_progress = print | |
794 ) { | 821 ) { |
795 x_class <- class(ropls_x) | 822 x_class <- class(ropls_x) |
796 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) | 823 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) |
797 stop( | 824 stop( |
798 paste( | 825 paste( |
863 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) | 890 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) |
864 superresult <- list() | 891 superresult <- list() |
865 if (length(result$vip4o) == 0) result$vip4o <- NA | 892 if (length(result$vip4o) == 0) result$vip4o <- NA |
866 greaterLevel <- sapply( | 893 greaterLevel <- sapply( |
867 X = result$correlation | 894 X = result$correlation |
868 , FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 | 895 , FUN = function(my_corr) |
896 tryCatch({ | |
897 if ( is.nan( my_corr ) ) { | |
898 print("my_corr is NaN") | |
899 NA | |
900 } else { | |
901 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 | |
902 } | |
903 }, error = function(e) { | |
904 x_progress( | |
905 sprintf( | |
906 "cor_vs_cov -> sapply: error - substituting NA - %s" | |
907 , as.character(e) | |
908 ) | |
909 ) | |
910 NA | |
911 }) | |
869 ) | 912 ) |
870 | 913 |
871 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | 914 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 |
872 featureID <- names(ropls_x@vipVn) | 915 featureID <- names(ropls_x@vipVn) |
873 greaterLevel <- greaterLevel[featureID] | 916 greaterLevel <- greaterLevel[featureID] |