comparison coral_multilocus_genotype.R @ 8:33d759858625 draft

Uploaded
author greg
date Thu, 15 Jul 2021 20:06:14 +0000
parents bcb28b49b0cc
children 3c42de11ea1d
comparison
equal deleted inserted replaced
7:bcb28b49b0cc 8:33d759858625
127 mlg.filter(genind_clone, distance=bitwise_distance) <- 0.032; 127 mlg.filter(genind_clone, distance=bitwise_distance) <- 0.032;
128 128
129 # Create list of MLGs. 129 # Create list of MLGs.
130 cat("\nCreating list of mlg_ids...\n\n"); 130 cat("\nCreating list of mlg_ids...\n\n");
131 mlg_ids <- mlg.id(genind_clone); 131 mlg_ids <- mlg.id(genind_clone);
132 cat("\nCreated list of mlg_ids...\n\n");
132 133
133 # Read user's Affymetrix 96 well plate tabular file. 134 # Read user's Affymetrix 96 well plate tabular file.
135 cat("\nCreating affy_metadata_data_frame...\n\n");
134 affy_metadata_data_frame <- read.table(opt$input_affy_metadata, header=FALSE, stringsAsFactors=FALSE, sep="\t", na.strings=c("", "NA"), quote=""); 136 affy_metadata_data_frame <- read.table(opt$input_affy_metadata, header=FALSE, stringsAsFactors=FALSE, sep="\t", na.strings=c("", "NA"), quote="");
137 cat("\nCreated affy_metadata_data_frame...\n\n");
135 colnames(affy_metadata_data_frame) <- c("user_specimen_id", "field_call", "bcoral_genet_id", "bsym_genet_id", "reef", 138 colnames(affy_metadata_data_frame) <- c("user_specimen_id", "field_call", "bcoral_genet_id", "bsym_genet_id", "reef",
136 "region", "latitude", "longitude", "geographic_origin", "colony_location", 139 "region", "latitude", "longitude", "geographic_origin", "colony_location",
137 "depth", "disease_resist", "bleach_resist", "mortality","tle", 140 "depth", "disease_resist", "bleach_resist", "mortality","tle",
138 "spawning", "collector_last_name", "collector_first_name", "organization", "collection_date", 141 "spawning", "collector_last_name", "collector_first_name", "organization", "collection_date",
139 "email", "seq_facility", "array_version", "public", "public_after_date", 142 "email", "seq_facility", "array_version", "public", "public_after_date",
317 group_by(row_number()) %>% 320 group_by(row_number()) %>%
318 dplyr::rename(group="row_number()") %>% 321 dplyr::rename(group="row_number()") %>%
319 unnest (affy_id) %>% 322 unnest (affy_id) %>%
320 # Join with mlg table. 323 # Join with mlg table.
321 left_join(smlg_data_frame %>% 324 left_join(smlg_data_frame %>%
322 select("affy_id","coral_mlg_clonal_id", "coral_mlg_rep_sample_id"), 325 select("affy_id","coral_mlg_clonal_id", "coral_mlg_rep_sample_id",
326 "genetic_coral_species_call", "bcoral_genet_id"),
323 by="affy_id"); 327 by="affy_id");
324 328
325 # If found in database, group members on previous mlg id. 329 # If found in database, group members on previous mlg id.
326 uniques <- unique(sample_mlg_tibble[c("group", "coral_mlg_clonal_id")]); 330 uniques <- unique(sample_mlg_tibble[c("group", "coral_mlg_clonal_id")]);
327 uniques <- uniques[!is.na(uniques$coral_mlg_clonal_id),]; 331 uniques <- uniques[!is.na(uniques$coral_mlg_clonal_id),];
760 # A.palmata 1104 764 # A.palmata 1104
761 prep_genotype_tibble <- id_data_table %>% 765 prep_genotype_tibble <- id_data_table %>%
762 group_by(row_number()) %>% 766 group_by(row_number()) %>%
763 dplyr::rename(group='row_number()') %>% 767 dplyr::rename(group='row_number()') %>%
764 unnest(affy_id) %>% 768 unnest(affy_id) %>%
765 left_join(smlg_data_frame %>% 769 left_join(sample_mlg_match_tibble %>%
766 select("affy_id", "coral_mlg_rep_sample_id", "coral_mlg_clonal_id", "user_specimen_id", 770 select("affy_id", "coral_mlg_rep_sample_id", "coral_mlg_clonal_id",
767 "genetic_coral_species_call", "bcoral_genet_id"), 771 "genetic_coral_species_call", "bcoral_genet_id", "db_match"),
768 by='affy_id'); 772 by='affy_id') %>%
773 right_join(sample_mlg_match_tibble %>%
774 select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"),
775 by='coral_mlg_clonal_id') %>%
776 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id.x),coral_mlg_rep_sample_id.y,coral_mlg_rep_sample_id.x)) %>%
777 ungroup() %>%
778 dplyr::select(-coral_mlg_rep_sample_id.x,-coral_mlg_rep_sample_id.y, -group.x,-group.y) %>%
779 distinct();
780
769 # Confirm that the representative mlg is the same between runs. 781 # Confirm that the representative mlg is the same between runs.
770 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]); 782 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]);
771 uniques2 <- uniques2[!is.na(uniques2$coral_mlg_rep_sample_id),]; 783 uniques2 <- uniques2[!is.na(uniques2$coral_mlg_rep_sample_id),];
772 na.mlg3 <- which(is.na(prep_genotype_tibble$coral_mlg_rep_sample_id)); 784 na.mlg3 <- which(is.na(prep_genotype_tibble$coral_mlg_rep_sample_id));
773 na.group2 <- prep_genotype_tibble$group[na.mlg3]; 785 na.group2 <- prep_genotype_tibble$group[na.mlg3];
780 # a100000-… 13905 HG0048 13905 792 # a100000-… 13905 HG0048 13905
781 # genetic_coral_species_call bcoral_genet_id 793 # genetic_coral_species_call bcoral_genet_id
782 # <chr> <chr> 794 # <chr> <chr>
783 # A.palmata C1651 795 # A.palmata C1651
784 representative_mlg_tibble <- prep_genotype_tibble %>% 796 representative_mlg_tibble <- prep_genotype_tibble %>%
785 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id), affy_id, coral_mlg_rep_sample_id)) %>% 797 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id) & (db_match =="no_match"), affy_id, coral_mlg_rep_sample_id)) %>%
786 ungroup() %>% 798 ungroup() %>%
787 select(-group); 799 select(-group)%>%
800 distinct();
788 # prep_genotype_table_tibble looks like this: 801 # prep_genotype_table_tibble looks like this:
789 # affy_id coral_mlg_clonal_id user_specimen_id db_match 802 # affy_id coral_mlg_clonal_id user_specimen_id db_match
790 # a550962...CEL HG0120 1090 match 803 # a550962...CEL HG0120 1090 match
791 # genetic_coral_species_call coral_mlg_rep_sample_id 804 # genetic_coral_species_call coral_mlg_rep_sample_id
792 # A.palmata 1104 805 # A.palmata 1104
801 # genetic_coral_species_call coral_mlg_rep_sample_id bcoral_genet_id 814 # genetic_coral_species_call coral_mlg_rep_sample_id bcoral_genet_id
802 # A.palmata 1104 <NA> 815 # A.palmata 1104 <NA>
803 genotype_table_tibble <- prep_genotype_table_tibble %>% 816 genotype_table_tibble <- prep_genotype_table_tibble %>%
804 left_join(affy_metadata_data_frame %>% 817 left_join(affy_metadata_data_frame %>%
805 select("user_specimen_id", "bcoral_genet_id"), 818 select("user_specimen_id", "bcoral_genet_id"),
806 by='user_specimen_id'); 819 by='user_specimen_id') %>%
820 drop_na(coral_mlg_rep_sample_id);
807 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble); 821 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble);
808 822
809 # Output the file needed for populating the person table. 823 # Output the file needed for populating the person table.
810 person_table_data_frame <- data.frame(matrix(ncol=4, nrow=num_rows)); 824 person_table_data_frame <- data.frame(matrix(ncol=4, nrow=num_rows));
811 colnames(person_table_data_frame) <- c("last_name", "first_name", "organization", "email"); 825 colnames(person_table_data_frame) <- c("last_name", "first_name", "organization", "email");