diff XSeekerPreparator.R @ 14:00079fadc240 draft

" master branch Updating"
author lain
date Tue, 01 Jun 2021 09:47:15 +0000
parents 26f01380145d
children 2c7e7fd1f740
line wrap: on
line diff
--- a/XSeekerPreparator.R	Wed Apr 07 13:28:09 2021 +0000
+++ b/XSeekerPreparator.R	Tue Jun 01 09:47:15 2021 +0000
@@ -1,11 +1,16 @@
 
 
 TOOL_NAME <- "XSeekerPreparator"
-VERSION <- "1.2.1"
+VERSION <- "1.2.2"
+
+DEBUG_FAST <- FALSE
+DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST
+PROCESS_SMOL_BATCH <- DEBUG_FAST
+FAST_FEATURE_RATIO <- 10
 
 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
 
-ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
+ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-")
 ENRICHED_RDATA_DOC <- sprintf("
 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
 This doc was generated by the tool: %s - Version %s
@@ -554,17 +559,20 @@
             next
         }
         env <- new.env()
-        ms_file <- xcms::xcmsRaw(sample_path)
-        env$tic <- ms_file@tic
-        env$mz <- ms_file@env$mz
-        env$scanindex <- ms_file@scanindex
-        env$scantime <- ms_file@scantime * 60
-        env$intensity <- ms_file@env$intensity
-        env$polarity <- as.character(ms_file@polarity[[1]])
 
-        ## Again, ms file is huge, so we get rid of it quickly.
-        rm(ms_file)
+        if (!DEBUG_FAST_IGNORE_SLOW_OP) {
+            ms_file <- xcms::xcmsRaw(sample_path)
+            env$tic <- ms_file@tic
+            env$mz <- ms_file@env$mz
+            env$scanindex <- ms_file@scanindex
+            env$scantime <- ms_file@scantime * 60
+            env$intensity <- ms_file@env$intensity
+            env$polarity <- as.character(ms_file@polarity[[1]])
 
+            ## Again, ms file is huge, so we get rid of it quickly.
+            rm(ms_file)
+
+        }
         env$sample_name <- sample_name
         env$dataset_path <- sample_path
         env$process_params <- process_params
@@ -572,6 +580,7 @@
         env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
         env$tool_name <- TOOL_NAME
         env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
+
         sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
         rm (env)
         context$samples[no] <- sample$get_id()
@@ -676,26 +685,29 @@
         percent <- -1
         total <- nrow(var_meta)
     }
-    for (row in seq_len(nrow(var_meta))) {
+    rows <- seq_len(nrow(var_meta))
+    if (PROCESS_SMOL_BATCH) {
+
+        rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))]
+    }
+    cluster_row <- list()
+    for (row in rows) {
         if (show_percent && (row / total) * 100 > percent) {
             percent <- percent + 1
             message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
         }
 
-        curent_var_meta <- var_meta[row, ]
-
-
-        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
-
         dummy_feature$set_featureID(next_feature_id)
         next_feature_id <- next_feature_id + 1
+
+        curent_var_meta <- var_meta[row, ]
+        set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
         fake_iso <- dummy_feature$get_iso()
         iso <- extract_iso(fake_iso)
         clusterID <- extract_clusterID(fake_iso, next_cluster_id)
         context$clusterID <- clusterID
         dummy_feature$set_iso(iso)
 
-
         peak_list <- context$peaks[context$groupidx[[row]], ]
         if (! ("matrix" %in% class(peak_list))) {
             peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
@@ -709,21 +721,24 @@
             )
         }
 
-        sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
-        if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
-            if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
-                dummy_feature$set_int_o(int_o)
-            }
-            if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
-                dummy_feature$set_int_b(int_b)
-            }
-            if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
-                dummy_feature$set_max_o(max_o)
+        if (!DEBUG_FAST_IGNORE_SLOW_OP) {
+            sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
+            if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
+                if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+                    dummy_feature$set_int_o(int_o)
+                }
+                if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+                    dummy_feature$set_int_b(int_b)
+                }
+                if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+                    dummy_feature$set_max_o(max_o)
+                }
             }
         }
-        create_associated_cluster(
+
+        cluster_row[[row]] <- create_associated_cluster(
             orm,
-            context$central_feature[[clusterID]],
+            context$samples[context$central_feature[[clusterID]]][[1]],
             dummy_feature, clusterID,
             context, curent_var_meta, next_pc_group,
             next_align_group
@@ -732,10 +747,27 @@
         features[[length(features)+1]] <- as.list(dummy_feature, field_names)
         dummy_feature$clear()
     }
-    message("")## +\n for previous message 
+    rm(var_meta)
+    message("")
     message("Saving features")
-    rm(var_meta)
     invisible(dummy_feature$save(bulk=features))
+
+    ## We link manually clusters to the sample they're in.
+    link_cache <- list()
+    for (row in rows) {
+        sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
+        for (sample_id in context$samples[sample_nos]) {
+            cluster_id <- cluster_row[[row]]$get_id()
+            if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) {
+                link_cache[[id]] <- 1
+                orm$cluster_sample(
+                    sample_id=sample_id,
+                    cluster_id=cluster_id
+                )$save()
+            }
+        }
+    }
+
     message("Saved.")
     return (context$clusters)
 }
@@ -790,7 +822,7 @@
 
 create_associated_cluster <- function(
     orm,
-    sample_no, feature, clusterID,
+    main_sample_id, feature, clusterID,
     context, curent_var_meta, next_pc_group, next_align_group
 ) {
     clusterID <- as.character(clusterID)
@@ -817,8 +849,8 @@
         ## Crappy hack to assign sample id to cluster without loading the sample.
         ## Samples are too big (their sample$env) and slows the process, and eat all the menory
         ## so we dont't want to load them.
-        cluster[["sample_id"]] <- context$samples[sample_no][[1]]
-        cluster$modified__[["sample_id"]] <- cluster[["sample_id"]]
+        cluster[["sample_id"]] <- main_sample_id
+        cluster$modified__[["sample_id"]] <- main_sample_id
     } else {
         if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
             cluster$set_clusterID(context$clusterID)
@@ -826,7 +858,7 @@
     }
     cluster$save()
     feature$set_cluster(cluster)
-    return (feature)
+    return (cluster)
 }
 
 complete_features <- function(orm, clusters, show_percent) {