Mercurial > repos > lain > xseekerpreparator
comparison XSeekerPreparator.R @ 20:ce94e7a141bb draft default tip
" master branch Updating"
author | lain |
---|---|
date | Tue, 06 Dec 2022 10:18:10 +0000 |
parents | 2937e72e5891 |
children |
comparison
equal
deleted
inserted
replaced
19:2937e72e5891 | 20:ce94e7a141bb |
---|---|
196 } | 196 } |
197 | 197 |
198 search_tree <- function(path, target) { | 198 search_tree <- function(path, target) { |
199 target <- tolower(target) | 199 target <- tolower(target) |
200 for (file in list.files(path)) { | 200 for (file in list.files(path)) { |
201 if (is.dir(file)) { | 201 if (fs::is.dir(file)) { |
202 result <- search_tree(file.path(path, file), target) | 202 result <- search_tree(file.path(path, file), target) |
203 if (!is.null(result)) { | 203 if (!is.null(result)) { |
204 return(result) | 204 return(result) |
205 } | 205 } |
206 } else if (tolower(file) == target) { | 206 } else if (tolower(file) == target) { |
398 return(csv_header_translator(translator, compounds)) | 398 return(csv_header_translator(translator, compounds)) |
399 } | 399 } |
400 | 400 |
401 guess_translator <- function(header) { | 401 guess_translator <- function(header) { |
402 result <- list( | 402 result <- list( |
403 # HMDB_ID = NULL, | |
404 mz = NULL, | 403 mz = NULL, |
405 name = NULL, | 404 name = NULL, |
406 common_name = NULL, | 405 common_name = NULL, |
407 formula = NULL, | 406 formula = NULL |
408 # inchi_key = NULL | |
409 ) | 407 ) |
410 asked_cols <- names(result) | 408 asked_cols <- names(result) |
411 for (asked_col in asked_cols) { | 409 for (asked_col in asked_cols) { |
412 for (col in header) { | 410 for (col in header) { |
413 if ((twisted <- tolower(col)) == asked_col | 411 if ((twisted <- tolower(col)) == asked_col |
469 ) | 467 ) |
470 error <- tryCatch({ | 468 error <- tryCatch({ |
471 process_sample_list( | 469 process_sample_list( |
472 orm, rdata, samples, | 470 orm, rdata, samples, |
473 show_percent = show_percent, | 471 show_percent = show_percent, |
474 file_grouping_var = options$class | 472 file_grouping_var = options$class, |
473 options = options | |
475 ) | 474 ) |
476 NULL | 475 NULL |
477 }, error = function(e) { | 476 }, error = function(e) { |
478 message(e) | 477 return(e) |
479 e | |
480 }) | 478 }) |
481 if (!is.null(mzml_tmp_dir)) { | 479 if (!is.null(mzml_tmp_dir)) { |
482 unlink(mzml_tmp_dir, recursive = TRUE) | 480 unlink(mzml_tmp_dir, recursive = TRUE) |
483 } | 481 } |
484 if (!is.null(error)) { | 482 if (!is.null(error)) { |
485 stop(error) | 483 stop(error) |
486 } | 484 } |
485 return(!is.null(error)) | |
487 } | 486 } |
488 | 487 |
489 gather_mzml_files <- function(rdata) { | 488 gather_mzml_files <- function(rdata) { |
490 if (is.null(rdata$singlefile)) { | 489 if (is.null(rdata$singlefile)) { |
491 message("Extracting mxml files") | 490 message("Extracting mxml files") |
508 process_sample_list <- function( | 507 process_sample_list <- function( |
509 orm, | 508 orm, |
510 rdata, | 509 rdata, |
511 sample_names, | 510 sample_names, |
512 show_percent, | 511 show_percent, |
513 file_grouping_var = NULL | 512 file_grouping_var = NULL, |
513 options = list() | |
514 ) { | 514 ) { |
515 if (is.null(file_grouping_var)) { | 515 if (is.null(file_grouping_var)) { |
516 file_grouping_var <- find_grouping_var(rdata$variableMetadata) | 516 file_grouping_var <- find_grouping_var(rdata$variableMetadata) |
517 if (is.null(file_grouping_var)) { | 517 if (is.null(file_grouping_var)) { |
518 stop("Malformed variableMetada.") | 518 stop("Malformed variableMetada.") |
585 } | 585 } |
586 } | 586 } |
587 | 587 |
588 message("Parameters from previous processes extracted.") | 588 message("Parameters from previous processes extracted.") |
589 | 589 |
590 | |
591 indices <- as.numeric(unique(var_meta[, file_grouping_var])) | |
592 if (any(is.null(names(singlefile)[indices]))) { | |
593 stop(sprintf( | |
594 paste( | |
595 "Indices defined by grouping variable %s are not all present", | |
596 "in singlefile names (%s).\nCannot continue. Indices: %s" | |
597 ), | |
598 file_grouping_var, | |
599 paste(names(singlefile), collapse = ", "), | |
600 paste(indices, collapse = ", ") | |
601 )) | |
602 } | |
603 smol_xcms_set <- orm$smol_xcms_set() | 590 smol_xcms_set <- orm$smol_xcms_set() |
604 mz_tab_info <- new.env() | 591 mz_tab_info <- new.env() |
605 g <- xcms::groups(xcms_set) | 592 g <- xcms::groups(xcms_set) |
606 mz_tab_info$group_length <- nrow(g) | 593 mz_tab_info$group_length <- nrow(g) |
607 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) | 594 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) |
621 | 608 |
622 invisible(smol_xcms_set$set_raw(blogified)$save()) | 609 invisible(smol_xcms_set$set_raw(blogified)$save()) |
623 smol_xcms_set_id <- smol_xcms_set$get_id() | 610 smol_xcms_set_id <- smol_xcms_set$get_id() |
624 rm(smol_xcms_set) | 611 rm(smol_xcms_set) |
625 | 612 |
626 for (no in indices) { | 613 for (no in seq_along(names(singlefile))) { |
627 sample_name <- names(singlefile)[[no]] | 614 sample_name <- names(singlefile)[[no]] |
628 sample_path <- singlefile[[no]] | 615 sample_path <- singlefile[[no]] |
629 if ( | 616 if ( |
630 is.na(no) | 617 is.na(no) |
631 || is.null(sample_path) | 618 || is.null(sample_path) |
758 next_pc_group, next_align_group | 745 next_pc_group, next_align_group |
759 ) { | 746 ) { |
760 field_names <- as.list(names(orm$feature()$fields__)) | 747 field_names <- as.list(names(orm$feature()$fields__)) |
761 field_names[field_names == "id"] <- NULL | 748 field_names[field_names == "id"] <- NULL |
762 | 749 |
763 features <- list() | |
764 dummy_feature <- orm$feature() | 750 dummy_feature <- orm$feature() |
765 | 751 |
766 if (show_percent <- context$show_percent) { | 752 if (show_percent <- context$show_percent) { |
767 percent <- -1 | 753 percent <- -1 |
768 total <- nrow(var_meta) | 754 total <- nrow(var_meta) |
770 rows <- seq_len(nrow(var_meta)) | 756 rows <- seq_len(nrow(var_meta)) |
771 if (PROCESS_SMOL_BATCH) { | 757 if (PROCESS_SMOL_BATCH) { |
772 | 758 |
773 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] | 759 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] |
774 } | 760 } |
775 cluster_row <- list() | 761 # features <- list() |
762 features <- as.list(rows) ## allocate all memory before processing | |
763 # cluster_row <- list() | |
764 cluster_row <- as.list(rows) ## allocate all memory before processing | |
776 for (row in rows) { | 765 for (row in rows) { |
777 if (show_percent && (row / total) * 100 > percent) { | 766 if (show_percent && (row / total) * 100 > percent) { |
778 percent <- percent + 1 | 767 percent <- percent + 1 |
779 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) | 768 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) |
780 } | 769 } |
841 dummy_feature, clusterID, | 830 dummy_feature, clusterID, |
842 context, curent_var_meta, next_pc_group, | 831 context, curent_var_meta, next_pc_group, |
843 next_align_group | 832 next_align_group |
844 ) | 833 ) |
845 next_align_group <- next_align_group + 1 | 834 next_align_group <- next_align_group + 1 |
846 features[[length(features) + 1]] <- as.list(dummy_feature, field_names) | 835 features[[row]] <- as.list(dummy_feature, field_names) |
836 # features[[length(features) + 1]] <- as.list(dummy_feature, field_names) | |
847 dummy_feature$clear() | 837 dummy_feature$clear() |
848 } | 838 } |
849 rm(var_meta) | 839 rm(var_meta) |
850 message("") | 840 message("") |
851 message("Saving features") | 841 message("Saving features") |
961 cluster$set_clusterID(context$clusterID) | 951 cluster$set_clusterID(context$clusterID) |
962 } | 952 } |
963 } | 953 } |
964 cluster$save() | 954 cluster$save() |
965 feature$set_cluster(cluster) | 955 feature$set_cluster(cluster) |
956 feature$save() | |
966 return(cluster) | 957 return(cluster) |
967 } | 958 } |
968 | 959 |
969 complete_features <- function(orm, clusters, show_percent) { | 960 complete_features <- function(orm, clusters, show_percent) { |
970 total <- length(clusters) | 961 total <- length(clusters) |
1060 option_list <- list( | 1051 option_list <- list( |
1061 optparse::make_option( | 1052 optparse::make_option( |
1062 c("-v", "--version"), | 1053 c("-v", "--version"), |
1063 action = "store_true", | 1054 action = "store_true", |
1064 help = "Display this tool's version and exits" | 1055 help = "Display this tool's version and exits" |
1056 ), | |
1057 optparse::make_option( | |
1058 c("-V", "--verbose"), | |
1059 action = "store_true", | |
1060 help = "Does more verbose outputs", | |
1061 default = FALSE | |
1065 ), | 1062 ), |
1066 optparse::make_option( | 1063 optparse::make_option( |
1067 c("-i", "--input"), | 1064 c("-i", "--input"), |
1068 type = "character", | 1065 type = "character", |
1069 help = "The rdata path to import in XSeeker" | 1066 help = "The rdata path to import in XSeeker" |
1159 # } | 1156 # } |
1160 | 1157 |
1161 | 1158 |
1162 load(args$options$input, rdata <- new.env()) | 1159 load(args$options$input, rdata <- new.env()) |
1163 | 1160 |
1164 process_rdata(orm, rdata, args$options) | 1161 args$options$verbose <- ( |
1162 if (args$options$verbose) { | |
1163 message("Verbose outputs.") | |
1164 \(...) { | |
1165 message(sprintf(...)) | |
1166 } | |
1167 } else { | |
1168 \(...) { | |
1169 } | |
1170 } | |
1171 ) | |
1172 | |
1173 err_code <- process_rdata(orm, rdata, args$options) | |
1165 | 1174 |
1166 quit(status = err_code) | 1175 quit(status = err_code) |