Mercurial > repos > lain > xseekerpreparator
comparison XSeekerPreparator.R @ 14:00079fadc240 draft
" master branch Updating"
author | lain |
---|---|
date | Tue, 01 Jun 2021 09:47:15 +0000 |
parents | 26f01380145d |
children | 2c7e7fd1f740 |
comparison
equal
deleted
inserted
replaced
13:26f01380145d | 14:00079fadc240 |
---|---|
1 | 1 |
2 | 2 |
3 TOOL_NAME <- "XSeekerPreparator" | 3 TOOL_NAME <- "XSeekerPreparator" |
4 VERSION <- "1.2.1" | 4 VERSION <- "1.2.2" |
5 | |
6 DEBUG_FAST <- FALSE | |
7 DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST | |
8 PROCESS_SMOL_BATCH <- DEBUG_FAST | |
9 FAST_FEATURE_RATIO <- 10 | |
5 | 10 |
6 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" | 11 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" |
7 | 12 |
8 ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-") | 13 ENRICHED_RDATA_VERSION <- paste("1.2.2", OUTPUT_SPECIFIC_TOOL, sep="-") |
9 ENRICHED_RDATA_DOC <- sprintf(" | 14 ENRICHED_RDATA_DOC <- sprintf(" |
10 Welcome to the enriched <Version %s> of the output of CAMERA/xcms. | 15 Welcome to the enriched <Version %s> of the output of CAMERA/xcms. |
11 This doc was generated by the tool: %s - Version %s | 16 This doc was generated by the tool: %s - Version %s |
12 To show the different variables contained in this rdata, type: | 17 To show the different variables contained in this rdata, type: |
13 - `load('this_rdata.rdata', rdata_env <- new.env())` | 18 - `load('this_rdata.rdata', rdata_env <- new.env())` |
552 || !(sample_name %in% sample_names) | 557 || !(sample_name %in% sample_names) |
553 ) { | 558 ) { |
554 next | 559 next |
555 } | 560 } |
556 env <- new.env() | 561 env <- new.env() |
557 ms_file <- xcms::xcmsRaw(sample_path) | 562 |
558 env$tic <- ms_file@tic | 563 if (!DEBUG_FAST_IGNORE_SLOW_OP) { |
559 env$mz <- ms_file@env$mz | 564 ms_file <- xcms::xcmsRaw(sample_path) |
560 env$scanindex <- ms_file@scanindex | 565 env$tic <- ms_file@tic |
561 env$scantime <- ms_file@scantime * 60 | 566 env$mz <- ms_file@env$mz |
562 env$intensity <- ms_file@env$intensity | 567 env$scanindex <- ms_file@scanindex |
563 env$polarity <- as.character(ms_file@polarity[[1]]) | 568 env$scantime <- ms_file@scantime * 60 |
564 | 569 env$intensity <- ms_file@env$intensity |
565 ## Again, ms file is huge, so we get rid of it quickly. | 570 env$polarity <- as.character(ms_file@polarity[[1]]) |
566 rm(ms_file) | 571 |
567 | 572 ## Again, ms file is huge, so we get rid of it quickly. |
573 rm(ms_file) | |
574 | |
575 } | |
568 env$sample_name <- sample_name | 576 env$sample_name <- sample_name |
569 env$dataset_path <- sample_path | 577 env$dataset_path <- sample_path |
570 env$process_params <- process_params | 578 env$process_params <- process_params |
571 env$enriched_rdata <- TRUE | 579 env$enriched_rdata <- TRUE |
572 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION | 580 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION |
573 env$tool_name <- TOOL_NAME | 581 env$tool_name <- TOOL_NAME |
574 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC | 582 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC |
583 | |
575 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) | 584 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) |
576 rm (env) | 585 rm (env) |
577 context$samples[no] <- sample$get_id() | 586 context$samples[no] <- sample$get_id() |
578 rm (sample) | 587 rm (sample) |
579 } | 588 } |
674 | 683 |
675 if (show_percent <- context$show_percent) { | 684 if (show_percent <- context$show_percent) { |
676 percent <- -1 | 685 percent <- -1 |
677 total <- nrow(var_meta) | 686 total <- nrow(var_meta) |
678 } | 687 } |
679 for (row in seq_len(nrow(var_meta))) { | 688 rows <- seq_len(nrow(var_meta)) |
689 if (PROCESS_SMOL_BATCH) { | |
690 | |
691 rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))] | |
692 } | |
693 cluster_row <- list() | |
694 for (row in rows) { | |
680 if (show_percent && (row / total) * 100 > percent) { | 695 if (show_percent && (row / total) * 100 > percent) { |
681 percent <- percent + 1 | 696 percent <- percent + 1 |
682 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) | 697 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) |
683 } | 698 } |
684 | 699 |
685 curent_var_meta <- var_meta[row, ] | |
686 | |
687 | |
688 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) | |
689 | |
690 dummy_feature$set_featureID(next_feature_id) | 700 dummy_feature$set_featureID(next_feature_id) |
691 next_feature_id <- next_feature_id + 1 | 701 next_feature_id <- next_feature_id + 1 |
702 | |
703 curent_var_meta <- var_meta[row, ] | |
704 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta) | |
692 fake_iso <- dummy_feature$get_iso() | 705 fake_iso <- dummy_feature$get_iso() |
693 iso <- extract_iso(fake_iso) | 706 iso <- extract_iso(fake_iso) |
694 clusterID <- extract_clusterID(fake_iso, next_cluster_id) | 707 clusterID <- extract_clusterID(fake_iso, next_cluster_id) |
695 context$clusterID <- clusterID | 708 context$clusterID <- clusterID |
696 dummy_feature$set_iso(iso) | 709 dummy_feature$set_iso(iso) |
697 | |
698 | 710 |
699 peak_list <- context$peaks[context$groupidx[[row]], ] | 711 peak_list <- context$peaks[context$groupidx[[row]], ] |
700 if (! ("matrix" %in% class(peak_list))) { | 712 if (! ("matrix" %in% class(peak_list))) { |
701 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) | 713 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) |
702 } | 714 } |
707 context$central_feature[[clusterID]] <- ( | 719 context$central_feature[[clusterID]] <- ( |
708 peak_list[peak_list[, "into"] == int_o,]["sample"] | 720 peak_list[peak_list[, "into"] == int_o,]["sample"] |
709 ) | 721 ) |
710 } | 722 } |
711 | 723 |
712 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] | 724 if (!DEBUG_FAST_IGNORE_SLOW_OP) { |
713 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { | 725 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] |
714 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { | 726 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { |
715 dummy_feature$set_int_o(int_o) | 727 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { |
728 dummy_feature$set_int_o(int_o) | |
729 } | |
730 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { | |
731 dummy_feature$set_int_b(int_b) | |
732 } | |
733 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { | |
734 dummy_feature$set_max_o(max_o) | |
735 } | |
716 } | 736 } |
717 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { | 737 } |
718 dummy_feature$set_int_b(int_b) | 738 |
719 } | 739 cluster_row[[row]] <- create_associated_cluster( |
720 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { | |
721 dummy_feature$set_max_o(max_o) | |
722 } | |
723 } | |
724 create_associated_cluster( | |
725 orm, | 740 orm, |
726 context$central_feature[[clusterID]], | 741 context$samples[context$central_feature[[clusterID]]][[1]], |
727 dummy_feature, clusterID, | 742 dummy_feature, clusterID, |
728 context, curent_var_meta, next_pc_group, | 743 context, curent_var_meta, next_pc_group, |
729 next_align_group | 744 next_align_group |
730 ) | 745 ) |
731 next_align_group <- next_align_group + 1 | 746 next_align_group <- next_align_group + 1 |
732 features[[length(features)+1]] <- as.list(dummy_feature, field_names) | 747 features[[length(features)+1]] <- as.list(dummy_feature, field_names) |
733 dummy_feature$clear() | 748 dummy_feature$clear() |
734 } | 749 } |
735 message("")## +\n for previous message | 750 rm(var_meta) |
751 message("") | |
736 message("Saving features") | 752 message("Saving features") |
737 rm(var_meta) | |
738 invisible(dummy_feature$save(bulk=features)) | 753 invisible(dummy_feature$save(bulk=features)) |
754 | |
755 ## We link manually clusters to the sample they're in. | |
756 link_cache <- list() | |
757 for (row in rows) { | |
758 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) | |
759 for (sample_id in context$samples[sample_nos]) { | |
760 cluster_id <- cluster_row[[row]]$get_id() | |
761 if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) { | |
762 link_cache[[id]] <- 1 | |
763 orm$cluster_sample( | |
764 sample_id=sample_id, | |
765 cluster_id=cluster_id | |
766 )$save() | |
767 } | |
768 } | |
769 } | |
770 | |
739 message("Saved.") | 771 message("Saved.") |
740 return (context$clusters) | 772 return (context$clusters) |
741 } | 773 } |
742 | 774 |
743 extract_peak_var <- function(peak_list, var_name, selector=max) { | 775 extract_peak_var <- function(peak_list, var_name, selector=max) { |
788 return (clusterID + next_cluster_id) | 820 return (clusterID + next_cluster_id) |
789 } | 821 } |
790 | 822 |
791 create_associated_cluster <- function( | 823 create_associated_cluster <- function( |
792 orm, | 824 orm, |
793 sample_no, feature, clusterID, | 825 main_sample_id, feature, clusterID, |
794 context, curent_var_meta, next_pc_group, next_align_group | 826 context, curent_var_meta, next_pc_group, next_align_group |
795 ) { | 827 ) { |
796 clusterID <- as.character(clusterID) | 828 clusterID <- as.character(clusterID) |
797 if (is.null(cluster <- context$clusters[[clusterID]])) { | 829 if (is.null(cluster <- context$clusters[[clusterID]])) { |
798 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) | 830 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) |
815 } | 847 } |
816 cluster$set_adduct(adduct) | 848 cluster$set_adduct(adduct) |
817 ## Crappy hack to assign sample id to cluster without loading the sample. | 849 ## Crappy hack to assign sample id to cluster without loading the sample. |
818 ## Samples are too big (their sample$env) and slows the process, and eat all the menory | 850 ## Samples are too big (their sample$env) and slows the process, and eat all the menory |
819 ## so we dont't want to load them. | 851 ## so we dont't want to load them. |
820 cluster[["sample_id"]] <- context$samples[sample_no][[1]] | 852 cluster[["sample_id"]] <- main_sample_id |
821 cluster$modified__[["sample_id"]] <- cluster[["sample_id"]] | 853 cluster$modified__[["sample_id"]] <- main_sample_id |
822 } else { | 854 } else { |
823 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { | 855 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { |
824 cluster$set_clusterID(context$clusterID) | 856 cluster$set_clusterID(context$clusterID) |
825 } | 857 } |
826 } | 858 } |
827 cluster$save() | 859 cluster$save() |
828 feature$set_cluster(cluster) | 860 feature$set_cluster(cluster) |
829 return (feature) | 861 return (cluster) |
830 } | 862 } |
831 | 863 |
832 complete_features <- function(orm, clusters, show_percent) { | 864 complete_features <- function(orm, clusters, show_percent) { |
833 total <- length(clusters) | 865 total <- length(clusters) |
834 percent <- -1 | 866 percent <- -1 |