Mercurial > repos > greg > coral_multilocus_genotype
comparison coral_multilocus_genotype.R @ 1:a690e0382ce4 draft
Uploaded
author | greg |
---|---|
date | Fri, 20 Sep 2019 09:17:28 -0400 |
parents | adaf89535d2e |
children | 9d0ddd9d62c5 |
comparison
equal
deleted
inserted
replaced
0:adaf89535d2e | 1:a690e0382ce4 |
---|---|
198 # a100000-4368120-060520-256_K07.CEL 11.45918 | 198 # a100000-4368120-060520-256_K07.CEL 11.45918 |
199 reference_alleles_data_table <- setDT(reference_alleles_data_frame, keep.rownames=TRUE)[]; | 199 reference_alleles_data_table <- setDT(reference_alleles_data_frame, keep.rownames=TRUE)[]; |
200 # Rename the rn column. | 200 # Rename the rn column. |
201 setnames(reference_alleles_data_table, c("rn"), c("affy_id")); | 201 setnames(reference_alleles_data_table, c("rn"), c("affy_id")); |
202 # Rename the reference_alleles column. | 202 # Rename the reference_alleles column. |
203 setnames(reference_alleles_data_table, c("reference_alleles"), c("percent_reference_coral")); | 203 setnames(reference_alleles_data_table, c("reference_alleles"), c("percent_acerv_coral")); |
204 # Round data to two digits. | 204 # Round data to two digits. |
205 reference_alleles_data_table$percent_reference_coral <- round(reference_alleles_data_table$percent_reference_coral, digits=2); | 205 reference_alleles_data_table$percent_acerv_coral <- round(reference_alleles_data_table$percent_acerv_coral, digits=2); |
206 time_elapsed(start_time); | 206 time_elapsed(start_time); |
207 | 207 |
208 # Alternative alleles | 208 # Alternative alleles |
209 start_time <- time_start("Discovering alternative alleles"); | 209 start_time <- time_start("Discovering alternative alleles"); |
210 alternative_alleles <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("1/1", x))))}); | 210 alternative_alleles <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("1/1", x))))}); |
216 # a100000-4368120-060520-256_K07.CEL 14.08916 | 216 # a100000-4368120-060520-256_K07.CEL 14.08916 |
217 alternative_alleles_data_table <- setDT(alternative_alleles_data_frame, keep.rownames=TRUE)[]; | 217 alternative_alleles_data_table <- setDT(alternative_alleles_data_frame, keep.rownames=TRUE)[]; |
218 # Rename the rn column. | 218 # Rename the rn column. |
219 setnames(alternative_alleles_data_table, c("rn"), c("affy_id")); | 219 setnames(alternative_alleles_data_table, c("rn"), c("affy_id")); |
220 # Rename the alternative_alleles column. | 220 # Rename the alternative_alleles column. |
221 setnames(alternative_alleles_data_table, c("alternative_alleles"), c("percent_alternative_coral")); | 221 setnames(alternative_alleles_data_table, c("alternative_alleles"), c("percent_apalm_coral")); |
222 # Round data to two digits. | 222 # Round data to two digits. |
223 alternative_alleles_data_table$percent_alternative_coral <- round(alternative_alleles_data_table$percent_alternative_coral, digits=2); | 223 alternative_alleles_data_table$percent_apalm_coral <- round(alternative_alleles_data_table$percent_apalm_coral, digits=2); |
224 time_elapsed(start_time); | 224 time_elapsed(start_time); |
225 | 225 |
226 # The mlg_ids_data_table looks like this: | 226 # The mlg_ids_data_table looks like this: |
227 # mlg_ids | 227 # mlg_ids |
228 # a550962-4368120-060520-500_M23.CEL | 228 # a550962-4368120-060520-500_M23.CEL |
303 by="affy_id") %>% | 303 by="affy_id") %>% |
304 left_join(heterozygous_alleles_data_table %>% | 304 left_join(heterozygous_alleles_data_table %>% |
305 select("affy_id", "percent_heterozygous_coral"), | 305 select("affy_id", "percent_heterozygous_coral"), |
306 by="affy_id") %>% | 306 by="affy_id") %>% |
307 left_join(reference_alleles_data_table %>% | 307 left_join(reference_alleles_data_table %>% |
308 select("affy_id", "percent_reference_coral"), | 308 select("affy_id", "percent_acerv_coral"), |
309 by="affy_id") %>% | 309 by="affy_id") %>% |
310 left_join(alternative_alleles_data_table %>% | 310 left_join(alternative_alleles_data_table %>% |
311 select("affy_id", "percent_alternative_coral"), | 311 select("affy_id", "percent_apalm_coral"), |
312 by="affy_id") %>% | 312 by="affy_id") %>% |
313 mutate(db_match = ifelse(is.na(db_match), "failed", db_match))%>% | 313 mutate(db_match = ifelse(is.na(db_match), "failed", db_match))%>% |
314 mutate(coral_mlg_clonal_id = ifelse(is.na(coral_mlg_clonal_id), "failed", coral_mlg_clonal_id)) %>% | 314 mutate(coral_mlg_clonal_id = ifelse(is.na(coral_mlg_clonal_id), "failed", coral_mlg_clonal_id)) %>% |
315 mutate(genetic_coral_species_call = ifelse(percent_alternative_coral >= 40 & percent_alternative_coral <= 44.99, "A.palmata","other")) %>% | 315 mutate(genetic_coral_species_call = ifelse(percent_apalm_coral >= 40 & percent_apalm_coral <= 44.99, "A.palmata","other")) %>% |
316 mutate(genetic_coral_species_call = ifelse(percent_alternative_coral >= 45 & percent_alternative_coral <= 51, "A.cervicornis", genetic_coral_species_call)) %>% | 316 mutate(genetic_coral_species_call = ifelse(percent_apalm_coral >= 45 & percent_apalm_coral <= 51, "A.cervicornis", genetic_coral_species_call)) %>% |
317 mutate(genetic_coral_species_call = ifelse(percent_heterozygous_coral > 40, "A.prolifera", genetic_coral_species_call)) %>% | 317 mutate(genetic_coral_species_call = ifelse(percent_heterozygous_coral > 40, "A.prolifera", genetic_coral_species_call)) %>% |
318 ungroup() %>% | 318 ungroup() %>% |
319 select(-group,-db_record); | 319 select(-group,-db_record); |
320 time_elapsed(start_time); | 320 time_elapsed(start_time); |
321 | 321 |
593 # NA -9 -9 NA | 593 # NA -9 -9 NA |
594 # dna_concentration registry_id result_folder_name plate_barcode mlg | 594 # dna_concentration registry_id result_folder_name plate_barcode mlg |
595 # NA NA PRO100175_PSU175_SAX_b02 P9SR10074 HG0227 | 595 # NA NA PRO100175_PSU175_SAX_b02 P9SR10074 HG0227 |
596 # affy_id percent_missing_data_coral percent_heterozygous_coral | 596 # affy_id percent_missing_data_coral percent_heterozygous_coral |
597 # a550962-436.CEL 1.06 19.10 | 597 # a550962-436.CEL 1.06 19.10 |
598 # percent_reference_coral percent_alternative_coral | 598 # percent_acerv_coral percent_apalm_coral |
599 # 40.10459 39.73396 | 599 # 40.10459 39.73396 |
600 sample_prep_data_frame <- affy_metadata_data_frame %>% | 600 sample_prep_data_frame <- affy_metadata_data_frame %>% |
601 left_join(stag_db_report %>% | 601 left_join(stag_db_report %>% |
602 select("user_specimen_id", "affy_id", "percent_missing_data_coral", "percent_heterozygous_coral", | 602 select("user_specimen_id", "affy_id", "percent_missing_data_coral", "percent_heterozygous_coral", |
603 "percent_reference_coral", "percent_alternative_coral"), | 603 "percent_acerv_coral", "percent_apalm_coral"), |
604 by='user_specimen_id'); | 604 by='user_specimen_id'); |
605 # Get the number of rows for all data frames. | 605 # Get the number of rows for all data frames. |
606 num_rows <- nrow(sample_prep_data_frame); | 606 num_rows <- nrow(sample_prep_data_frame); |
607 # Set the column names so that we can extract only those columns | 607 # Set the column names so that we can extract only those columns |
608 # needed for the sample table. | 608 # needed for the sample table. |
612 "spawning", "collector_last_name", "collector_first_name", "organization", | 612 "spawning", "collector_last_name", "collector_first_name", "organization", |
613 "collection_date", "email", "seq_facility", "array_version", "public", | 613 "collection_date", "email", "seq_facility", "array_version", "public", |
614 "public_after_date", "sperm_motility", "healing_time", "dna_extraction_method", | 614 "public_after_date", "sperm_motility", "healing_time", "dna_extraction_method", |
615 "dna_concentration", "registry_id", "result_folder_name", "plate_barcode", | 615 "dna_concentration", "registry_id", "result_folder_name", "plate_barcode", |
616 "mlg", "affy_id", "percent_missing_data_coral", "percent_heterozygous_coral", | 616 "mlg", "affy_id", "percent_missing_data_coral", "percent_heterozygous_coral", |
617 "percent_reference_coral", "percent_alternative_coral"); | 617 "percent_acerv_coral", "percent_apalm_coral"); |
618 | 618 |
619 # Output the data frame for updating the alleles table. | 619 # Output the data frame for updating the alleles table. |
620 # Subset to only the new plate data. | 620 # Subset to only the new plate data. |
621 i <- ifelse(is.na(stag_db_report[3]), "", stag_db_report[[3]]); | 621 i <- ifelse(is.na(stag_db_report[3]), "", stag_db_report[[3]]); |
622 # Create a vector indicating the number of individuals desired | 622 # Create a vector indicating the number of individuals desired |
778 # Output the file needed for populating the sample table. | 778 # Output the file needed for populating the sample table. |
779 sample_table_data_frame <- data.frame(matrix(ncol=20, nrow=num_rows)); | 779 sample_table_data_frame <- data.frame(matrix(ncol=20, nrow=num_rows)); |
780 colnames(sample_table_data_frame) <- c("affy_id", "colony_location", "collection_date", "user_specimen_id", | 780 colnames(sample_table_data_frame) <- c("affy_id", "colony_location", "collection_date", "user_specimen_id", |
781 "registry_id", "depth", "dna_extraction_method", "dna_concentration", | 781 "registry_id", "depth", "dna_extraction_method", "dna_concentration", |
782 "public", "public_after_date", "percent_missing_data_coral", | 782 "public", "public_after_date", "percent_missing_data_coral", |
783 "percent_missing_data_sym", "percent_reference_coral", | 783 "percent_missing_data_sym", "percent_acerv_coral", |
784 "percent_reference_sym", "percent_alternative_coral", | 784 "percent_reference_sym", "percent_apalm_coral", |
785 "percent_alternative_sym", "percent_heterozygous_coral", | 785 "percent_alternative_sym", "percent_heterozygous_coral", |
786 "percent_heterozygous_sym", "field_call", "bcoral_genet_id"); | 786 "percent_heterozygous_sym", "field_call", "bcoral_genet_id"); |
787 for (i in 1:num_rows) { | 787 for (i in 1:num_rows) { |
788 sample_table_data_frame$affy_id[i] <- sample_prep_data_frame$affy_id[i]; | 788 sample_table_data_frame$affy_id[i] <- sample_prep_data_frame$affy_id[i]; |
789 sample_table_data_frame$colony_location[i] <- sample_prep_data_frame$colony_location[i]; | 789 sample_table_data_frame$colony_location[i] <- sample_prep_data_frame$colony_location[i]; |
795 sample_table_data_frame$dna_concentration[i] <- sample_prep_data_frame$dna_concentration[i]; | 795 sample_table_data_frame$dna_concentration[i] <- sample_prep_data_frame$dna_concentration[i]; |
796 sample_table_data_frame$public[i] <- sample_prep_data_frame$public[i]; | 796 sample_table_data_frame$public[i] <- sample_prep_data_frame$public[i]; |
797 sample_table_data_frame$public_after_date[i] <- sample_prep_data_frame$public_after_date[i]; | 797 sample_table_data_frame$public_after_date[i] <- sample_prep_data_frame$public_after_date[i]; |
798 sample_table_data_frame$percent_missing_data_coral[i] <- sample_prep_data_frame$percent_missing_data_coral[i]; | 798 sample_table_data_frame$percent_missing_data_coral[i] <- sample_prep_data_frame$percent_missing_data_coral[i]; |
799 sample_table_data_frame$percent_missing_data_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; | 799 sample_table_data_frame$percent_missing_data_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; |
800 sample_table_data_frame$percent_reference_coral[i] <- sample_prep_data_frame$percent_reference_coral[i]; | 800 sample_table_data_frame$percent_acerv_coral[i] <- sample_prep_data_frame$percent_acerv_coral[i]; |
801 sample_table_data_frame$percent_reference_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; | 801 sample_table_data_frame$percent_reference_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; |
802 sample_table_data_frame$percent_alternative_coral[i] <- sample_prep_data_frame$percent_alternative_coral[i]; | 802 sample_table_data_frame$percent_apalm_coral[i] <- sample_prep_data_frame$percent_apalm_coral[i]; |
803 sample_table_data_frame$percent_alternative_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; | 803 sample_table_data_frame$percent_alternative_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; |
804 sample_table_data_frame$percent_heterozygous_coral[i] <- sample_prep_data_frame$percent_heterozygous_coral[i]; | 804 sample_table_data_frame$percent_heterozygous_coral[i] <- sample_prep_data_frame$percent_heterozygous_coral[i]; |
805 sample_table_data_frame$percent_heterozygous_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; | 805 sample_table_data_frame$percent_heterozygous_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; |
806 sample_table_data_frame$field_call[i] <- sample_prep_data_frame$field_call[i]; | 806 sample_table_data_frame$field_call[i] <- sample_prep_data_frame$field_call[i]; |
807 sample_table_data_frame$bcoral_genet_id[i] <- sample_prep_data_frame$bcoral_genet_id[i]; | 807 sample_table_data_frame$bcoral_genet_id[i] <- sample_prep_data_frame$bcoral_genet_id[i]; |