Mercurial > repos > lain > xseekerpreparator
comparison XSeekerPreparator.R @ 11:f4fc4a0f41e2 draft
" master branch Updating"
author | lain |
---|---|
date | Thu, 18 Mar 2021 10:46:01 +0000 |
parents | bb9ebd6365ea |
children | bdb2878ee189 |
comparison
equal
deleted
inserted
replaced
10:bb9ebd6365ea | 11:f4fc4a0f41e2 |
---|---|
1 | 1 |
2 | 2 |
3 TOOL_NAME <- "XSeekerPreparator" | 3 TOOL_NAME <- "XSeekerPreparator" |
4 VERSION <- "1.1.6" | 4 VERSION <- "1.2.0" |
5 | 5 |
6 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" | 6 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" |
7 | 7 |
8 ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-") | 8 ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-") |
9 ENRICHED_RDATA_DOC <- sprintf(" | 9 ENRICHED_RDATA_DOC <- sprintf(" |
307 dummy_adduct$set_oidscore(adduct[[i <- i+1]]) | 307 dummy_adduct$set_oidscore(adduct[[i <- i+1]]) |
308 dummy_adduct$set_quasi(adduct[[i <- i+1]]) | 308 dummy_adduct$set_quasi(adduct[[i <- i+1]]) |
309 dummy_adduct$set_ips(adduct[[i <- i+1]]) | 309 dummy_adduct$set_ips(adduct[[i <- i+1]]) |
310 dummy_adduct$set_formula_add(adduct[[i <- i+1]]) | 310 dummy_adduct$set_formula_add(adduct[[i <- i+1]]) |
311 dummy_adduct$set_formula_ded(adduct[[i <- i+1]]) | 311 dummy_adduct$set_formula_ded(adduct[[i <- i+1]]) |
312 dummy_adduct$save() | 312 invisible(dummy_adduct$save()) |
313 dummy_adduct$clear(unset_id=TRUE) | 313 dummy_adduct$clear(unset_id=TRUE) |
314 } | 314 } |
315 message("Adducts created") | 315 message("Adducts created") |
316 } | 316 } |
317 | 317 |
337 for (i in seq_len(nrow(compounds))) { | 337 for (i in seq_len(nrow(compounds))) { |
338 dummy_compound$set_mz(compounds[i, "mz"]) | 338 dummy_compound$set_mz(compounds[i, "mz"]) |
339 dummy_compound$set_name(compounds[i, "name"]) | 339 dummy_compound$set_name(compounds[i, "name"]) |
340 dummy_compound$set_common_name(compounds[i, "common_name"]) | 340 dummy_compound$set_common_name(compounds[i, "common_name"]) |
341 dummy_compound$set_formula(compounds[i, "formula"]) | 341 dummy_compound$set_formula(compounds[i, "formula"]) |
342 # dummy_compound$set_mz(compounds[i, "mz"]) | |
343 # dummy_compound$set_mz(compounds[i, "mz"]) | |
344 compound_list[[length(compound_list)+1]] <- as.list( | 342 compound_list[[length(compound_list)+1]] <- as.list( |
345 dummy_compound, | 343 dummy_compound, |
346 c("mz", "name", "common_name", "formula") | 344 c("mz", "name", "common_name", "formula") |
347 ) | 345 ) |
348 dummy_compound$clear(unset_id=TRUE) | 346 dummy_compound$clear(unset_id=TRUE) |
349 } | 347 } |
350 dummy_compound$save(bulk=compound_list) | 348 invisible(dummy_compound$save(bulk=compound_list)) |
351 } | 349 } |
352 | 350 |
353 translate_compounds <- function(compounds) { | 351 translate_compounds <- function(compounds) { |
354 recognized_headers <- list( | 352 recognized_headers <- list( |
355 c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey") | 353 c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey") |
369 return (csv_header_translator(translator, compounds)) | 367 return (csv_header_translator(translator, compounds)) |
370 } | 368 } |
371 | 369 |
372 guess_translator <- function(header) { | 370 guess_translator <- function(header) { |
373 result <- list( | 371 result <- list( |
374 # HMDB_ID=NULL,< | 372 # HMDB_ID=NULL, |
375 mz=NULL, | 373 mz=NULL, |
376 name=NULL, | 374 name=NULL, |
377 common_name=NULL, | 375 common_name=NULL, |
378 formula=NULL, | 376 formula=NULL, |
379 # inchi_key=NULL | 377 # inchi_key=NULL |
411 } | 409 } |
412 | 410 |
413 csv_header_translator <- function(translation_table, csv) { | 411 csv_header_translator <- function(translation_table, csv) { |
414 header_names <- names(translation_table) | 412 header_names <- names(translation_table) |
415 result <- data.frame(1:nrow(csv)) | 413 result <- data.frame(1:nrow(csv)) |
416 # colnames(result) <- header_names | |
417 for (i in seq_along(header_names)) { | 414 for (i in seq_along(header_names)) { |
418 result[, header_names[[i]]] <- csv[, translation_table[[i]]] | 415 result[, header_names[[i]]] <- csv[, translation_table[[i]]] |
419 } | 416 } |
420 print(result[, "mz"]) | |
421 result[, "mz"] <- as.numeric(result[, "mz"]) | 417 result[, "mz"] <- as.numeric(result[, "mz"]) |
422 print(result[, "mz"]) | |
423 return (result) | 418 return (result) |
424 } | 419 } |
425 | 420 |
426 set_database_version <- function(orm, version) { | 421 set_database_version <- function(orm, version) { |
427 orm$set_tag( | 422 orm$set_tag( |
479 message(sprintf("File grouping variable: %s", file_grouping_var)) | 474 message(sprintf("File grouping variable: %s", file_grouping_var)) |
480 if(is.null(file_grouping_var)) { | 475 if(is.null(file_grouping_var)) { |
481 stop("Malformed variableMetada.") | 476 stop("Malformed variableMetada.") |
482 } | 477 } |
483 | 478 |
479 context <- new.env() | |
480 context$samples <- list() | |
481 context$peaks <- rdata$xa@xcmsSet@peaks | |
482 context$groupidx <- rdata$xa@xcmsSet@groupidx | |
483 xcms_set <- rdata$xa@xcmsSet | |
484 singlefile <- rdata$singlefile | |
484 process_arg_list <- rdata$listOFlistArguments | 485 process_arg_list <- rdata$listOFlistArguments |
486 var_meta <- rdata$variableMetadata | |
487 ## We needed to get rid of the rdata, which is very big. | |
488 ## So we gathered all variable assignment from rdata here, and got rid of it. | |
489 rm(rdata) | |
490 | |
485 process_params <- list() | 491 process_params <- list() |
486 for (list_name in names(process_arg_list)) { | 492 for (list_name in names(process_arg_list)) { |
487 param_list <- list() | 493 param_list <- list() |
488 for (param_name in names(process_arg_list[[list_name]])) { | 494 for (param_name in names(process_arg_list[[list_name]])) { |
489 param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]] | 495 param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]] |
490 } | 496 } |
491 process_params[[length(process_params)+1]] <- param_list | 497 process_params[[length(process_params)+1]] <- param_list |
492 } | 498 } |
493 message("Parameters from previous processes extracted.") | 499 message("Parameters from previous processes extracted.") |
494 | 500 |
495 var_meta <- rdata$variableMetadata | |
496 align_group <- rep(0, nrow(var_meta)) | |
497 var_meta <- cbind(var_meta, align_group) | |
498 context <- new.env() | |
499 context$clusters <- list() | |
500 context$groupidx <- rdata$xa@xcmsSet@groupidx | |
501 context$peaks <- rdata$xa@xcmsSet@peaks | |
502 context$show_percent <- show_percent | |
503 | 501 |
504 indices <- as.numeric(unique(var_meta[, file_grouping_var])) | 502 indices <- as.numeric(unique(var_meta[, file_grouping_var])) |
505 smol_xcms_set <- orm$smol_xcms_set() | 503 smol_xcms_set <- orm$smol_xcms_set() |
506 mz_tab_info <- new.env() | 504 mz_tab_info <- new.env() |
507 xcms_set <- rdata$xa@xcmsSet | |
508 g <- xcms::groups(xcms_set) | 505 g <- xcms::groups(xcms_set) |
509 mz_tab_info$group_length <- nrow(g) | 506 mz_tab_info$group_length <- nrow(g) |
510 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) | 507 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) |
511 mz_tab_info$sampnames <- xcms::sampnames(xcms_set) | 508 mz_tab_info$sampnames <- xcms::sampnames(xcms_set) |
512 mz_tab_info$sampclass <- xcms::sampclass(xcms_set) | 509 mz_tab_info$sampclass <- xcms::sampclass(xcms_set) |
513 mz_tab_info$rtmed <- g[,"rtmed"] | 510 mz_tab_info$rtmed <- g[,"rtmed"] |
514 mz_tab_info$mzmed <- g[,"mzmed"] | 511 mz_tab_info$mzmed <- g[,"mzmed"] |
515 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into") | 512 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into") |
516 blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100)) | 513 blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100)) |
517 smol_xcms_set$set_raw(blogified)$save() | 514 rm(mz_tab_info) |
515 | |
516 invisible(smol_xcms_set$set_raw(blogified)$save()) | |
517 smol_xcms_set_id <- smol_xcms_set$get_id() | |
518 rm(smol_xcms_set) | |
519 | |
518 for (no in indices) { | 520 for (no in indices) { |
519 sample_name <- names(rdata$singlefile)[[no]] | 521 sample_name <- names(singlefile)[[no]] |
520 sample_path <- rdata$singlefile[[no]] | 522 sample_path <- singlefile[[no]] |
521 if ( | 523 if ( |
522 is.na(no) | 524 is.na(no) |
523 || is.null(sample_path) | 525 || is.null(sample_path) |
524 || !(sample_name %in% sample_names) | 526 || !(sample_name %in% sample_names) |
525 ) { | 527 ) { |
526 next | 528 next |
527 } | 529 } |
528 ms_file=xcms::xcmsRaw(sample_path) | |
529 env <- new.env() | 530 env <- new.env() |
530 env$variableMetadata <- var_meta[var_meta[, file_grouping_var]==no,] | 531 ms_file <- xcms::xcmsRaw(sample_path) |
531 env$tic <- ms_file@tic | 532 env$tic <- ms_file@tic |
532 env$mz <- ms_file@env$mz | 533 env$mz <- ms_file@env$mz |
533 env$scanindex <- ms_file@scanindex | 534 env$scanindex <- ms_file@scanindex |
534 env$scantime <- ms_file@scantime | 535 env$scantime <- ms_file@scantime |
535 env$intensity <- ms_file@env$intensity | 536 env$intensity <- ms_file@env$intensity |
536 env$polarity <- as.character(ms_file@polarity[[1]]) | 537 env$polarity <- as.character(ms_file@polarity[[1]]) |
538 | |
539 ## Again, ms file is huge, so we get rid of it quickly. | |
540 rm(ms_file) | |
541 | |
537 env$sample_name <- sample_name | 542 env$sample_name <- sample_name |
538 env$dataset_path <- sample_path | 543 env$dataset_path <- sample_path |
539 env$process_params <- process_params | 544 env$process_params <- process_params |
540 env$enriched_rdata <- TRUE | 545 env$enriched_rdata <- TRUE |
541 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION | 546 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION |
542 env$tool_name <- TOOL_NAME | 547 env$tool_name <- TOOL_NAME |
543 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC | 548 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC |
544 context$sample_no <- no | 549 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) |
545 add_sample_to_database(orm, env, context, smol_xcms_set) | 550 rm (env) |
546 } | 551 context$samples[no] <- sample$get_id() |
552 rm (sample) | |
553 } | |
554 context$clusters <- list() | |
555 context$show_percent <- show_percent | |
556 context$cluster_mean_rt_abundance <- list() | |
557 context$central_feature <- list() | |
558 load_variable_metadata(orm, var_meta, context) | |
559 clusters <- context$clusters | |
560 rm(context) | |
547 message("Features enrichment") | 561 message("Features enrichment") |
548 complete_features(orm, context) | 562 complete_features(orm, clusters, show_percent) |
549 message("Features enrichment done.") | 563 message("Features enrichment done.") |
550 return (NULL) | 564 return (NULL) |
551 } | 565 } |
552 | 566 |
553 find_grouping_var <- function(var_meta) { | 567 find_grouping_var <- function(var_meta) { |
569 stop("Could not find any class column in your variableMetadata.") | 583 stop("Could not find any class column in your variableMetadata.") |
570 } | 584 } |
571 return (classes[[1]]) | 585 return (classes[[1]]) |
572 } | 586 } |
573 | 587 |
574 add_sample_to_database <- function(orm, env, context, smol_xcms_set) { | 588 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) { |
575 message(sprintf("Processing sample %s", env$sample_name)) | 589 message(sprintf("Processing sample %s", env$sample_name)) |
576 sample <- ( | 590 sample <- ( |
577 orm$sample() | 591 orm$sample() |
578 $set_name(env$sample_name) | 592 $set_name(env$sample_name) |
579 $set_path(env$dataset_path) | 593 $set_path(env$dataset_path) |
580 $set_kind("enriched_rdata") | 594 $set_kind("enriched_rdata") |
581 $set_polarity( | 595 $set_polarity( |
582 if (is.null(env$polarity) || identical(env$polarity, character(0))) "" | 596 if (is.null(env$polarity) || identical(env$polarity, character(0))) "" |
583 else env$polarity | 597 else env$polarity |
584 ) | 598 ) |
585 $set_smol_xcms_set(smol_xcms_set) | |
586 $set_raw(blob::blob(fst::compress_fst( | 599 $set_raw(blob::blob(fst::compress_fst( |
587 serialize(env, NULL), | 600 serialize(env, NULL), |
588 compression=100 | 601 compression=100 |
589 ))) | 602 ))) |
590 $save() | |
591 ) | 603 ) |
592 load_variable_metadata(orm, sample, env$variableMetadata, context) | 604 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id |
605 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id | |
606 sample <- sample$save() | |
593 load_process_params(orm, sample, env$process_params) | 607 load_process_params(orm, sample, env$process_params) |
594 message(sprintf("Sample %s inserted.", env$sample_name)) | 608 message(sprintf("Sample %s inserted.", env$sample_name)) |
595 return (sample) | 609 return (sample) |
596 } | 610 } |
597 | 611 |
598 | 612 |
599 load_variable_metadata <- function(orm, sample, var_meta, context) { | 613 load_variable_metadata <- function(orm, var_meta, context) { |
600 all_clusters <- orm$cluster()$all() | 614 all_clusters <- orm$cluster()$all() |
601 | 615 |
602 next_feature_id <- get_next_id(orm$feature()$all(), "featureID") | 616 next_feature_id <- get_next_id(orm$feature()$all(), "featureID") + 1 |
603 next_cluster_id <- get_next_id(all_clusters, "clusterID") | 617 next_cluster_id <- 0 |
604 next_pc_group <- get_next_id(all_clusters, "pc_group") | 618 next_pc_group <- get_next_id(all_clusters, "pc_group") |
605 next_align_group <- get_next_id(all_clusters, "align_group") | 619 next_align_group <- get_next_id(all_clusters, "align_group") + 1 |
606 message("Extracting features") | 620 message("Extracting features") |
607 invisible(create_features( | 621 invisible(create_features( |
608 orm, sample, var_meta, context, | 622 orm, var_meta, context, |
609 next_feature_id, next_cluster_id, | 623 next_feature_id, next_cluster_id, |
610 next_pc_group, next_align_group | 624 next_pc_group, next_align_group |
611 )) | 625 )) |
612 message("Extracting features done.") | 626 message("Extracting features done.") |
613 return (NULL) | 627 return (NULL) |
614 } | 628 } |
615 | 629 |
616 get_next_id <- function(models, attribute) { | 630 get_next_id <- function(models, attribute) { |
617 if ((id <- models$max(attribute)) == Inf || id == -Inf) { | 631 if ((id <- models$max(attribute)) == Inf || id == -Inf) { |
618 return (1) | 632 return (0) |
619 } | 633 } |
620 return (id + 1) | 634 return (id) |
621 } | 635 } |
622 | 636 |
623 create_features <- function( | 637 create_features <- function( |
624 orm, sample, var_meta, context, | 638 orm, var_meta, context, |
625 next_feature_id, next_cluster_id, | 639 next_feature_id, next_cluster_id, |
626 next_pc_group, next_align_group | 640 next_pc_group, next_align_group |
627 ) { | 641 ) { |
628 field_names <- as.list(names(orm$feature()$fields__)) | 642 field_names <- as.list(names(orm$feature()$fields__)) |
629 field_names[field_names=="id"] <- NULL | 643 field_names[field_names=="id"] <- NULL |
641 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) | 655 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) |
642 } | 656 } |
643 | 657 |
644 curent_var_meta <- var_meta[row, ] | 658 curent_var_meta <- var_meta[row, ] |
645 | 659 |
660 | |
661 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) | |
662 | |
663 dummy_feature$set_featureID(next_feature_id) | |
664 next_feature_id <- next_feature_id + 1 | |
665 fake_iso <- dummy_feature$get_iso() | |
666 iso <- extract_iso(fake_iso) | |
667 clusterID <- extract_clusterID(fake_iso, next_cluster_id) | |
668 context$clusterID <- clusterID | |
669 dummy_feature$set_iso(iso) | |
670 | |
671 | |
646 peak_list <- context$peaks[context$groupidx[[row]], ] | 672 peak_list <- context$peaks[context$groupidx[[row]], ] |
647 if (! ("matrix" %in% class(peak_list))) { | 673 if (! ("matrix" %in% class(peak_list))) { |
648 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) | 674 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) |
649 } | 675 } |
650 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$sample_no, , drop=FALSE] | 676 |
677 clusterID <- as.character(clusterID) | |
678 if (is.null(context$central_feature[[clusterID]])) { | |
679 int_o <- extract_peak_var(peak_list, "into") | |
680 context$central_feature[[clusterID]] <- ( | |
681 peak_list[peak_list[, "into"] == int_o,]["sample"] | |
682 ) | |
683 } | |
684 | |
685 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] | |
651 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { | 686 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { |
652 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { | 687 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { |
653 dummy_feature$set_int_o(int_o) | 688 dummy_feature$set_int_o(int_o) |
654 } | 689 } |
655 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { | 690 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { |
657 } | 692 } |
658 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { | 693 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { |
659 dummy_feature$set_max_o(max_o) | 694 dummy_feature$set_max_o(max_o) |
660 } | 695 } |
661 } | 696 } |
662 | |
663 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) | |
664 | |
665 dummy_feature$set_featureID(next_feature_id) | |
666 next_feature_id <- next_feature_id + 1 | |
667 fake_iso <- dummy_feature$get_iso() | |
668 iso <- extract_iso(fake_iso) | |
669 clusterID <- extract_clusterID(fake_iso, next_cluster_id) | |
670 context$clusterID <- clusterID | |
671 dummy_feature$set_iso(iso) | |
672 create_associated_cluster( | 697 create_associated_cluster( |
673 sample, dummy_feature, clusterID, | 698 context$central_feature[[clusterID]], |
699 dummy_feature, clusterID, | |
674 context, curent_var_meta, next_pc_group, | 700 context, curent_var_meta, next_pc_group, |
675 next_align_group | 701 next_align_group |
676 ) | 702 ) |
677 next_align_group <- next_align_group + 1 | 703 next_align_group <- next_align_group + 1 |
678 features[[length(features)+1]] <- as.list(dummy_feature, field_names) | 704 features[[length(features)+1]] <- as.list(dummy_feature, field_names) |
679 dummy_feature$clear() | 705 dummy_feature$clear() |
680 } | 706 } |
681 message("")## +\n for previous message | 707 message("")## +\n for previous message |
682 message("Saving features") | 708 message("Saving features") |
683 dummy_feature$save(bulk=features) | 709 rm(var_meta) |
710 invisible(dummy_feature$save(bulk=features)) | |
684 message("Saved.") | 711 message("Saved.") |
685 return (context$clusters) | 712 return (context$clusters) |
686 } | 713 } |
687 | 714 |
688 extract_peak_var <- function(peak_list, var_name, selector=max) { | 715 extract_peak_var <- function(peak_list, var_name, selector=max) { |
732 } | 759 } |
733 return (clusterID + next_cluster_id) | 760 return (clusterID + next_cluster_id) |
734 } | 761 } |
735 | 762 |
736 create_associated_cluster <- function( | 763 create_associated_cluster <- function( |
737 sample, feature, grouping_variable, | 764 sample_no, feature, clusterID, |
738 context, curent_var_meta, next_pc_group, next_align_group | 765 context, curent_var_meta, next_pc_group, next_align_group |
739 ) { | 766 ) { |
740 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) | 767 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) |
741 adduct <- as.character(curent_var_meta[["adduct"]]) | 768 adduct <- as.character(curent_var_meta[["adduct"]]) |
742 annotation <- curent_var_meta[["isotopes"]] | 769 annotation <- curent_var_meta[["isotopes"]] |
743 grouping_variable <- as.character(grouping_variable) | 770 clusterID <- as.character(clusterID) |
744 if (is.null(cluster <- context$clusters[[grouping_variable]])) { | 771 if (is.null(cluster <- context$clusters[[clusterID]])) { |
745 cluster <- context$clusters[[grouping_variable]] <- orm$cluster( | 772 cluster <- context$clusters[[clusterID]] <- orm$cluster( |
746 pc_group=pcgroup + next_pc_group, | 773 pc_group=pcgroup + next_pc_group, |
747 adduct=adduct, | 774 adduct=adduct, |
748 align_group=next_align_group, | 775 align_group=next_align_group, |
749 # curent_group=curent_group, | 776 # curent_group=curent_group, |
750 clusterID=context$clusterID, | 777 clusterID=context$clusterID, |
751 annotation=annotation | 778 annotation=annotation |
752 )$set_sample(sample) | 779 ) |
780 ## Crappy hack to assign sample id to cluster without loading the sample. | |
781 ## Samples are too big (their sample$env) and slows the process, and eat all the menory | |
782 ## so we dont't want to load them. | |
783 cluster[["sample_id"]] <- context$samples[sample_no][[1]] | |
784 cluster$modified__[["sample_id"]] <- cluster[["sample_id"]] | |
753 } else { | 785 } else { |
754 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { | 786 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { |
755 cluster$set_clusterID(context$clusterID) | 787 cluster$set_clusterID(context$clusterID) |
756 } | 788 } |
757 } | 789 } |
758 cluster$save() | 790 cluster$save() |
759 feature$set_cluster(cluster) | 791 feature$set_cluster(cluster) |
760 return (feature) | 792 return (feature) |
761 } | 793 } |
762 | 794 |
763 complete_features <- function(orm, context) { | 795 complete_features <- function(orm, clusters, show_percent) { |
764 for (cluster in context$clusters) { | 796 total <- length(clusters) |
797 percent <- -1 | |
798 i <- 0 | |
799 for (cluster in clusters) { | |
800 i <- i+1 | |
801 if (show_percent && (i / total) * 100 > percent) { | |
802 percent <- percent + 1 | |
803 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) | |
804 } | |
765 features <- orm$feature()$load_by(cluster_id=cluster$get_id()) | 805 features <- orm$feature()$load_by(cluster_id=cluster$get_id()) |
766 if (features$any()) { | 806 if (features$any()) { |
767 if (!is.null(rt <- features$mean("rt"))) { | 807 if (!is.null(rt <- features$mean("rt"))) { |
768 cluster$set_mean_rt(rt)$save() | 808 cluster$set_mean_rt(rt)$save() |
769 } | 809 } |