changeset 8:33d759858625 draft

Uploaded
author greg
date Thu, 15 Jul 2021 20:06:14 +0000
parents bcb28b49b0cc
children 3c42de11ea1d
files coral_multilocus_genotype.R
diffstat 1 files changed, 22 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/coral_multilocus_genotype.R	Tue Jan 12 15:19:58 2021 +0000
+++ b/coral_multilocus_genotype.R	Thu Jul 15 20:06:14 2021 +0000
@@ -129,9 +129,12 @@
 # Create list of MLGs.
 cat("\nCreating list of mlg_ids...\n\n");
 mlg_ids <- mlg.id(genind_clone);
+cat("\nCreated list of mlg_ids...\n\n");
 
 # Read user's Affymetrix 96 well plate tabular file.
+cat("\nCreating affy_metadata_data_frame...\n\n");
 affy_metadata_data_frame <- read.table(opt$input_affy_metadata, header=FALSE, stringsAsFactors=FALSE, sep="\t", na.strings=c("", "NA"), quote="");
+cat("\nCreated affy_metadata_data_frame...\n\n");
 colnames(affy_metadata_data_frame) <- c("user_specimen_id", "field_call", "bcoral_genet_id", "bsym_genet_id", "reef",
                                         "region", "latitude", "longitude", "geographic_origin", "colony_location",
                                         "depth", "disease_resist", "bleach_resist", "mortality","tle",
@@ -319,7 +322,8 @@
     unnest (affy_id) %>%
     # Join with mlg table.
     left_join(smlg_data_frame %>%
-              select("affy_id","coral_mlg_clonal_id", "coral_mlg_rep_sample_id"),
+              select("affy_id","coral_mlg_clonal_id", "coral_mlg_rep_sample_id",
+                     "genetic_coral_species_call", "bcoral_genet_id"),
               by="affy_id");
 
 # If found in database, group members on previous mlg id.
@@ -762,10 +766,18 @@
     group_by(row_number()) %>%
     dplyr::rename(group='row_number()') %>%
     unnest(affy_id) %>%
-    left_join(smlg_data_frame %>%
-        select("affy_id", "coral_mlg_rep_sample_id", "coral_mlg_clonal_id", "user_specimen_id",
-               "genetic_coral_species_call", "bcoral_genet_id"),
-        by='affy_id');
+    left_join(sample_mlg_match_tibble %>%
+        select("affy_id", "coral_mlg_rep_sample_id", "coral_mlg_clonal_id",
+               "genetic_coral_species_call", "bcoral_genet_id", "db_match"),
+        by='affy_id') %>%
+    right_join(sample_mlg_match_tibble %>%
+        select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"),
+        by='coral_mlg_clonal_id') %>%
+        mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id.x),coral_mlg_rep_sample_id.y,coral_mlg_rep_sample_id.x)) %>%
+    ungroup() %>%
+    dplyr::select(-coral_mlg_rep_sample_id.x,-coral_mlg_rep_sample_id.y, -group.x,-group.y) %>%
+    distinct();
+
 # Confirm that the representative mlg is the same between runs.
 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]);
 uniques2 <- uniques2[!is.na(uniques2$coral_mlg_rep_sample_id),];
@@ -782,9 +794,10 @@
 # <chr>                      <chr>
 # A.palmata                  C1651
 representative_mlg_tibble <- prep_genotype_tibble %>%
-    mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id), affy_id, coral_mlg_rep_sample_id)) %>%
+    mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id) & (db_match =="no_match"), affy_id, coral_mlg_rep_sample_id)) %>%
     ungroup() %>%
-    select(-group);
+    select(-group)%>%
+    distinct();
 # prep_genotype_table_tibble looks like this:
 # affy_id       coral_mlg_clonal_id user_specimen_id db_match
 # a550962...CEL HG0120              1090             match
@@ -803,7 +816,8 @@
 genotype_table_tibble <- prep_genotype_table_tibble %>%
     left_join(affy_metadata_data_frame %>%
         select("user_specimen_id", "bcoral_genet_id"),
-        by='user_specimen_id');
+        by='user_specimen_id') %>%
+    drop_na(coral_mlg_rep_sample_id);
 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble);
 
 # Output the file needed for populating the person table.