xseekerpreparator: XSeekerPreparator.R comparison

comparison XSeekerPreparator.R @ 14:00079fadc240 draft

" master branch Updating"

author	lain
date	Tue, 01 Jun 2021 09:47:15 +0000
parents	26f01380145d
children	2c7e7fd1f740

comparison

equal deleted inserted replaced

-:26f01380145d
+:00079fadc240
 TOOL_NAME <- "XSeekerPreparator"
-VERSION <- "1.2.1"
+VERSION <- "1.2.2"
+DEBUG_FAST <- FALSE
+DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST
+PROCESS_SMOL_BATCH <- DEBUG_FAST
+FAST_FEATURE_RATIO <- 10
 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
-ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
+ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-")
 ENRICHED_RDATA_DOC <- sprintf("
 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
 This doc was generated by the tool: %s - Version %s
 To show the different variables contained in this rdata, type:
 - `load('this_rdata.rdata', rdata_env <- new.env())`
 || !(sample_name %in% sample_names)
 ) {
 next
 }
 env <- new.env()
-ms_file <- xcms::xcmsRaw(sample_path)
-env$tic <- ms_file@tic
+if (!DEBUG_FAST_IGNORE_SLOW_OP) {
-env$mz <- ms_file@env$mz
+ms_file <- xcms::xcmsRaw(sample_path)
-env$scanindex <- ms_file@scanindex
+env$tic <- ms_file@tic
-env$scantime <- ms_file@scantime * 60
+env$mz <- ms_file@env$mz
-env$intensity <- ms_file@env$intensity
+env$scanindex <- ms_file@scanindex
-env$polarity <- as.character(ms_file@polarity[[1]])
+env$scantime <- ms_file@scantime * 60
+env$intensity <- ms_file@env$intensity
-## Again, ms file is huge, so we get rid of it quickly.
+env$polarity <- as.character(ms_file@polarity[[1]])
-rm(ms_file)
+## Again, ms file is huge, so we get rid of it quickly.
+rm(ms_file)
+}
 env$sample_name <- sample_name
 env$dataset_path <- sample_path
 env$process_params <- process_params
 env$enriched_rdata <- TRUE
 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
 env$tool_name <- TOOL_NAME
 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
 rm (env)
 context$samples[no] <- sample$get_id()
 rm (sample)
 }
 if (show_percent <- context$show_percent) {
 percent <- -1
 total <- nrow(var_meta)
 }
-for (row in seq_len(nrow(var_meta))) {
+rows <- seq_len(nrow(var_meta))
+if (PROCESS_SMOL_BATCH) {
+rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))]
+}
+cluster_row <- list()
+for (row in rows) {
 if (show_percent && (row / total) * 100 > percent) {
 percent <- percent + 1
 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
 }
-curent_var_meta <- var_meta[row, ]
-set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
 dummy_feature$set_featureID(next_feature_id)
 next_feature_id <- next_feature_id + 1
+curent_var_meta <- var_meta[row, ]
+set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
 fake_iso <- dummy_feature$get_iso()
 iso <- extract_iso(fake_iso)
 clusterID <- extract_clusterID(fake_iso, next_cluster_id)
 context$clusterID <- clusterID
 dummy_feature$set_iso(iso)
 peak_list <- context$peaks[context$groupidx[[row]], ]
 if (! ("matrix" %in% class(peak_list))) {
 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
 }
 context$central_feature[[clusterID]] <- (
 peak_list[peak_list[, "into"] == int_o,]["sample"]
 )
 }
-sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
+if (!DEBUG_FAST_IGNORE_SLOW_OP) {
-if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
+sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
-if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
-dummy_feature$set_int_o(int_o)
+if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
+dummy_feature$set_int_o(int_o)
+}
+if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+dummy_feature$set_int_b(int_b)
+}
+if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
+dummy_feature$set_max_o(max_o)
+}
 }
-if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
+}
-dummy_feature$set_int_b(int_b)
-}
+cluster_row[[row]] <- create_associated_cluster(
-if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
-dummy_feature$set_max_o(max_o)
-}
-}
-create_associated_cluster(
 orm,
-context$central_feature[[clusterID]],
+context$samples[context$central_feature[[clusterID]]][[1]],
 dummy_feature, clusterID,
 context, curent_var_meta, next_pc_group,
 next_align_group
 )
 next_align_group <- next_align_group + 1
 features[[length(features)+1]] <- as.list(dummy_feature, field_names)
 dummy_feature$clear()
 }
-message("")## +\n for previous message
+rm(var_meta)
+message("")
 message("Saving features")
-rm(var_meta)
 invisible(dummy_feature$save(bulk=features))
+## We link manually clusters to the sample they're in.
+link_cache <- list()
+for (row in rows) {
+sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
+for (sample_id in context$samples[sample_nos]) {
+cluster_id <- cluster_row[[row]]$get_id()
+if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) {
+link_cache[[id]] <- 1
+orm$cluster_sample(
+sample_id=sample_id,
+cluster_id=cluster_id
+)$save()
+}
+}
+}
 message("Saved.")
 return (context$clusters)
 }
 extract_peak_var <- function(peak_list, var_name, selector=max) {
 return (clusterID + next_cluster_id)
 }
 create_associated_cluster <- function(
 orm,
-sample_no, feature, clusterID,
+main_sample_id, feature, clusterID,
 context, curent_var_meta, next_pc_group, next_align_group
 ) {
 clusterID <- as.character(clusterID)
 if (is.null(cluster <- context$clusters[[clusterID]])) {
 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
 }
 cluster$set_adduct(adduct)
 ## Crappy hack to assign sample id to cluster without loading the sample.
 ## Samples are too big (their sample$env) and slows the process, and eat all the menory
 ## so we dont't want to load them.
-cluster[["sample_id"]] <- context$samples[sample_no][[1]]
+cluster[["sample_id"]] <- main_sample_id
-cluster$modified__[["sample_id"]] <- cluster[["sample_id"]]
+cluster$modified__[["sample_id"]] <- main_sample_id
 } else {
 if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
 cluster$set_clusterID(context$clusterID)
 }
 }
 cluster$save()
 feature$set_cluster(cluster)
-return (feature)
+return (cluster)
 }
 complete_features <- function(orm, clusters, show_percent) {
 total <- length(clusters)
 percent <- -1

Mercurial > repos > lain > xseekerpreparator

comparison XSeekerPreparator.R @ 14:00079fadc240 draft