changeset 9:3c42de11ea1d draft default tip

Uploaded
author greg
date Wed, 24 Nov 2021 20:17:10 +0000
parents 33d759858625
children
files coral_multilocus_genotype.R
diffstat 1 files changed, 9 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/coral_multilocus_genotype.R	Thu Jul 15 20:06:14 2021 +0000
+++ b/coral_multilocus_genotype.R	Wed Nov 24 20:17:10 2021 +0000
@@ -175,7 +175,7 @@
 # Name the columns.
 smlg_data_frame <- as.data.frame(smlg);
 colnames(smlg_data_frame) <- c("user_specimen_id", "affy_id", "bcoral_genet_id", "genotype_id",
-		               "coral_mlg_clonal_id", "coral_mlg_rep_sample_id", "genetic_coral_species_call");
+                               "coral_mlg_clonal_id", "coral_mlg_rep_sample_id", "genetic_coral_species_call");
 log_data_frame("smlg_data_frame", smlg_data_frame);
 # Missing GT in samples submitted.
 start_time <- time_start("Discovering missing GT in samples");
@@ -776,7 +776,9 @@
         mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id.x),coral_mlg_rep_sample_id.y,coral_mlg_rep_sample_id.x)) %>%
     ungroup() %>%
     dplyr::select(-coral_mlg_rep_sample_id.x,-coral_mlg_rep_sample_id.y, -group.x,-group.y) %>%
-    distinct();
+    group_by(coral_mlg_clonal_id) %>%
+    arrange(coral_mlg_rep_sample_id) %>%
+    slice(1);
 
 # Confirm that the representative mlg is the same between runs.
 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]);
@@ -796,8 +798,7 @@
 representative_mlg_tibble <- prep_genotype_tibble %>%
     mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id) & (db_match =="no_match"), affy_id, coral_mlg_rep_sample_id)) %>%
     ungroup() %>%
-    select(-group)%>%
-    distinct();
+    select(-group);
 # prep_genotype_table_tibble looks like this:
 # affy_id       coral_mlg_clonal_id user_specimen_id db_match
 # a550962...CEL HG0120              1090             match
@@ -806,8 +807,8 @@
 prep_genotype_table_tibble <- stag_db_report %>%
     select("affy_id", "coral_mlg_clonal_id", "user_specimen_id", "db_match", "genetic_coral_species_call") %>%
     left_join(representative_mlg_tibble %>%
-        select("affy_id", "coral_mlg_rep_sample_id"),
-        by='affy_id');
+        select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"),
+        by='coral_mlg_clonal_id');
 # genotype_table_tibble looks like this:
 # affy_id         coral_mlg_clonal_id user_specimen_id db_match
 # a550962-436.CEL HG0120              1090             match
@@ -816,8 +817,7 @@
 genotype_table_tibble <- prep_genotype_table_tibble %>%
     left_join(affy_metadata_data_frame %>%
         select("user_specimen_id", "bcoral_genet_id"),
-        by='user_specimen_id') %>%
-    drop_na(coral_mlg_rep_sample_id);
+        by='user_specimen_id');
 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble);
 
 # Output the file needed for populating the person table.
@@ -902,7 +902,7 @@
     sample_table_data_frame$percent_heterozygous_coral[i] <- sample_prep_data_frame$percent_heterozygous_coral[i];
     sample_table_data_frame$percent_heterozygous_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE;
     sample_table_data_frame$field_call[i] <- sample_prep_data_frame$field_call[i];
-	sample_table_data_frame$bcoral_genet_id[i] <- sample_prep_data_frame$bcoral_genet_id[i];
+    sample_table_data_frame$bcoral_genet_id[i] <- sample_prep_data_frame$bcoral_genet_id[i];
 }
 write_data_frame(output_data_dir, "sample.tabular", sample_table_data_frame);