comparison XSeekerPreparator.R @ 14:00079fadc240 draft

" master branch Updating"
author lain
date Tue, 01 Jun 2021 09:47:15 +0000
parents 26f01380145d
children 2c7e7fd1f740
comparison
equal deleted inserted replaced
13:26f01380145d 14:00079fadc240
1 1
2 2
3 TOOL_NAME <- "XSeekerPreparator" 3 TOOL_NAME <- "XSeekerPreparator"
4 VERSION <- "1.2.1" 4 VERSION <- "1.2.2"
5
6 DEBUG_FAST <- FALSE
7 DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST
8 PROCESS_SMOL_BATCH <- DEBUG_FAST
9 FAST_FEATURE_RATIO <- 10
5 10
6 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" 11 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
7 12
8 ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-") 13 ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-")
9 ENRICHED_RDATA_DOC <- sprintf(" 14 ENRICHED_RDATA_DOC <- sprintf("
10 Welcome to the enriched <Version %s> of the output of CAMERA/xcms. 15 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
11 This doc was generated by the tool: %s - Version %s 16 This doc was generated by the tool: %s - Version %s
12 To show the different variables contained in this rdata, type: 17 To show the different variables contained in this rdata, type:
13 - `load('this_rdata.rdata', rdata_env <- new.env())` 18 - `load('this_rdata.rdata', rdata_env <- new.env())`
552 || !(sample_name %in% sample_names) 557 || !(sample_name %in% sample_names)
553 ) { 558 ) {
554 next 559 next
555 } 560 }
556 env <- new.env() 561 env <- new.env()
557 ms_file <- xcms::xcmsRaw(sample_path) 562
558 env$tic <- ms_file@tic 563 if (!DEBUG_FAST_IGNORE_SLOW_OP) {
559 env$mz <- ms_file@env$mz 564 ms_file <- xcms::xcmsRaw(sample_path)
560 env$scanindex <- ms_file@scanindex 565 env$tic <- ms_file@tic
561 env$scantime <- ms_file@scantime * 60 566 env$mz <- ms_file@env$mz
562 env$intensity <- ms_file@env$intensity 567 env$scanindex <- ms_file@scanindex
563 env$polarity <- as.character(ms_file@polarity[[1]]) 568 env$scantime <- ms_file@scantime * 60
564 569 env$intensity <- ms_file@env$intensity
565 ## Again, ms file is huge, so we get rid of it quickly. 570 env$polarity <- as.character(ms_file@polarity[[1]])
566 rm(ms_file) 571
567 572 ## Again, ms file is huge, so we get rid of it quickly.
573 rm(ms_file)
574
575 }
568 env$sample_name <- sample_name 576 env$sample_name <- sample_name
569 env$dataset_path <- sample_path 577 env$dataset_path <- sample_path
570 env$process_params <- process_params 578 env$process_params <- process_params
571 env$enriched_rdata <- TRUE 579 env$enriched_rdata <- TRUE
572 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION 580 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
573 env$tool_name <- TOOL_NAME 581 env$tool_name <- TOOL_NAME
574 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC 582 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
583
575 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) 584 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
576 rm (env) 585 rm (env)
577 context$samples[no] <- sample$get_id() 586 context$samples[no] <- sample$get_id()
578 rm (sample) 587 rm (sample)
579 } 588 }
674 683
675 if (show_percent <- context$show_percent) { 684 if (show_percent <- context$show_percent) {
676 percent <- -1 685 percent <- -1
677 total <- nrow(var_meta) 686 total <- nrow(var_meta)
678 } 687 }
679 for (row in seq_len(nrow(var_meta))) { 688 rows <- seq_len(nrow(var_meta))
689 if (PROCESS_SMOL_BATCH) {
690
691 rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))]
692 }
693 cluster_row <- list()
694 for (row in rows) {
680 if (show_percent && (row / total) * 100 > percent) { 695 if (show_percent && (row / total) * 100 > percent) {
681 percent <- percent + 1 696 percent <- percent + 1
682 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) 697 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
683 } 698 }
684 699
685 curent_var_meta <- var_meta[row, ]
686
687
688 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
689
690 dummy_feature$set_featureID(next_feature_id) 700 dummy_feature$set_featureID(next_feature_id)
691 next_feature_id <- next_feature_id + 1 701 next_feature_id <- next_feature_id + 1
702
703 curent_var_meta <- var_meta[row, ]
704 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
692 fake_iso <- dummy_feature$get_iso() 705 fake_iso <- dummy_feature$get_iso()
693 iso <- extract_iso(fake_iso) 706 iso <- extract_iso(fake_iso)
694 clusterID <- extract_clusterID(fake_iso, next_cluster_id) 707 clusterID <- extract_clusterID(fake_iso, next_cluster_id)
695 context$clusterID <- clusterID 708 context$clusterID <- clusterID
696 dummy_feature$set_iso(iso) 709 dummy_feature$set_iso(iso)
697
698 710
699 peak_list <- context$peaks[context$groupidx[[row]], ] 711 peak_list <- context$peaks[context$groupidx[[row]], ]
700 if (! ("matrix" %in% class(peak_list))) { 712 if (! ("matrix" %in% class(peak_list))) {
701 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) 713 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
702 } 714 }
707 context$central_feature[[clusterID]] <- ( 719 context$central_feature[[clusterID]] <- (
708 peak_list[peak_list[, "into"] == int_o,]["sample"] 720 peak_list[peak_list[, "into"] == int_o,]["sample"]
709 ) 721 )
710 } 722 }
711 723
712 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] 724 if (!DEBUG_FAST_IGNORE_SLOW_OP) {
713 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { 725 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
714 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { 726 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
715 dummy_feature$set_int_o(int_o) 727 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
728 dummy_feature$set_int_o(int_o)
729 }
730 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
731 dummy_feature$set_int_b(int_b)
732 }
733 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
734 dummy_feature$set_max_o(max_o)
735 }
716 } 736 }
717 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { 737 }
718 dummy_feature$set_int_b(int_b) 738
719 } 739 cluster_row[[row]] <- create_associated_cluster(
720 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
721 dummy_feature$set_max_o(max_o)
722 }
723 }
724 create_associated_cluster(
725 orm, 740 orm,
726 context$central_feature[[clusterID]], 741 context$samples[context$central_feature[[clusterID]]][[1]],
727 dummy_feature, clusterID, 742 dummy_feature, clusterID,
728 context, curent_var_meta, next_pc_group, 743 context, curent_var_meta, next_pc_group,
729 next_align_group 744 next_align_group
730 ) 745 )
731 next_align_group <- next_align_group + 1 746 next_align_group <- next_align_group + 1
732 features[[length(features)+1]] <- as.list(dummy_feature, field_names) 747 features[[length(features)+1]] <- as.list(dummy_feature, field_names)
733 dummy_feature$clear() 748 dummy_feature$clear()
734 } 749 }
735 message("")## +\n for previous message 750 rm(var_meta)
751 message("")
736 message("Saving features") 752 message("Saving features")
737 rm(var_meta)
738 invisible(dummy_feature$save(bulk=features)) 753 invisible(dummy_feature$save(bulk=features))
754
755 ## We link manually clusters to the sample they're in.
756 link_cache <- list()
757 for (row in rows) {
758 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
759 for (sample_id in context$samples[sample_nos]) {
760 cluster_id <- cluster_row[[row]]$get_id()
761 if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) {
762 link_cache[[id]] <- 1
763 orm$cluster_sample(
764 sample_id=sample_id,
765 cluster_id=cluster_id
766 )$save()
767 }
768 }
769 }
770
739 message("Saved.") 771 message("Saved.")
740 return (context$clusters) 772 return (context$clusters)
741 } 773 }
742 774
743 extract_peak_var <- function(peak_list, var_name, selector=max) { 775 extract_peak_var <- function(peak_list, var_name, selector=max) {
788 return (clusterID + next_cluster_id) 820 return (clusterID + next_cluster_id)
789 } 821 }
790 822
791 create_associated_cluster <- function( 823 create_associated_cluster <- function(
792 orm, 824 orm,
793 sample_no, feature, clusterID, 825 main_sample_id, feature, clusterID,
794 context, curent_var_meta, next_pc_group, next_align_group 826 context, curent_var_meta, next_pc_group, next_align_group
795 ) { 827 ) {
796 clusterID <- as.character(clusterID) 828 clusterID <- as.character(clusterID)
797 if (is.null(cluster <- context$clusters[[clusterID]])) { 829 if (is.null(cluster <- context$clusters[[clusterID]])) {
798 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) 830 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
815 } 847 }
816 cluster$set_adduct(adduct) 848 cluster$set_adduct(adduct)
817 ## Crappy hack to assign sample id to cluster without loading the sample. 849 ## Crappy hack to assign sample id to cluster without loading the sample.
818 ## Samples are too big (their sample$env) and slows the process, and eat all the menory 850 ## Samples are too big (their sample$env) and slows the process, and eat all the menory
819 ## so we dont't want to load them. 851 ## so we dont't want to load them.
820 cluster[["sample_id"]] <- context$samples[sample_no][[1]] 852 cluster[["sample_id"]] <- main_sample_id
821 cluster$modified__[["sample_id"]] <- cluster[["sample_id"]] 853 cluster$modified__[["sample_id"]] <- main_sample_id
822 } else { 854 } else {
823 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { 855 if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
824 cluster$set_clusterID(context$clusterID) 856 cluster$set_clusterID(context$clusterID)
825 } 857 }
826 } 858 }
827 cluster$save() 859 cluster$save()
828 feature$set_cluster(cluster) 860 feature$set_cluster(cluster)
829 return (feature) 861 return (cluster)
830 } 862 }
831 863
832 complete_features <- function(orm, clusters, show_percent) { 864 complete_features <- function(orm, clusters, show_percent) {
833 total <- length(clusters) 865 total <- length(clusters)
834 percent <- -1 866 percent <- -1