Mercurial > repos > greg > coral_multilocus_genotype
comparison coral_multilocus_genotype.R @ 9:3c42de11ea1d draft default tip
Uploaded
author | greg |
---|---|
date | Wed, 24 Nov 2021 20:17:10 +0000 |
parents | 33d759858625 |
children |
comparison
equal
deleted
inserted
replaced
8:33d759858625 | 9:3c42de11ea1d |
---|---|
173 select("id", "coral_mlg_clonal_id", "coral_mlg_rep_sample_id", "genetic_coral_species_call"), | 173 select("id", "coral_mlg_clonal_id", "coral_mlg_rep_sample_id", "genetic_coral_species_call"), |
174 by=c("genotype_id"="id")); | 174 by=c("genotype_id"="id")); |
175 # Name the columns. | 175 # Name the columns. |
176 smlg_data_frame <- as.data.frame(smlg); | 176 smlg_data_frame <- as.data.frame(smlg); |
177 colnames(smlg_data_frame) <- c("user_specimen_id", "affy_id", "bcoral_genet_id", "genotype_id", | 177 colnames(smlg_data_frame) <- c("user_specimen_id", "affy_id", "bcoral_genet_id", "genotype_id", |
178 "coral_mlg_clonal_id", "coral_mlg_rep_sample_id", "genetic_coral_species_call"); | 178 "coral_mlg_clonal_id", "coral_mlg_rep_sample_id", "genetic_coral_species_call"); |
179 log_data_frame("smlg_data_frame", smlg_data_frame); | 179 log_data_frame("smlg_data_frame", smlg_data_frame); |
180 # Missing GT in samples submitted. | 180 # Missing GT in samples submitted. |
181 start_time <- time_start("Discovering missing GT in samples"); | 181 start_time <- time_start("Discovering missing GT in samples"); |
182 gt <- extract.gt(vcf, element="GT", as.numeric=FALSE); | 182 gt <- extract.gt(vcf, element="GT", as.numeric=FALSE); |
183 missing_gt <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))}); | 183 missing_gt <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))}); |
774 select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"), | 774 select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"), |
775 by='coral_mlg_clonal_id') %>% | 775 by='coral_mlg_clonal_id') %>% |
776 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id.x),coral_mlg_rep_sample_id.y,coral_mlg_rep_sample_id.x)) %>% | 776 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id.x),coral_mlg_rep_sample_id.y,coral_mlg_rep_sample_id.x)) %>% |
777 ungroup() %>% | 777 ungroup() %>% |
778 dplyr::select(-coral_mlg_rep_sample_id.x,-coral_mlg_rep_sample_id.y, -group.x,-group.y) %>% | 778 dplyr::select(-coral_mlg_rep_sample_id.x,-coral_mlg_rep_sample_id.y, -group.x,-group.y) %>% |
779 distinct(); | 779 group_by(coral_mlg_clonal_id) %>% |
780 arrange(coral_mlg_rep_sample_id) %>% | |
781 slice(1); | |
780 | 782 |
781 # Confirm that the representative mlg is the same between runs. | 783 # Confirm that the representative mlg is the same between runs. |
782 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]); | 784 uniques2 <- unique(prep_genotype_tibble[c("group", "coral_mlg_rep_sample_id")]); |
783 uniques2 <- uniques2[!is.na(uniques2$coral_mlg_rep_sample_id),]; | 785 uniques2 <- uniques2[!is.na(uniques2$coral_mlg_rep_sample_id),]; |
784 na.mlg3 <- which(is.na(prep_genotype_tibble$coral_mlg_rep_sample_id)); | 786 na.mlg3 <- which(is.na(prep_genotype_tibble$coral_mlg_rep_sample_id)); |
794 # <chr> <chr> | 796 # <chr> <chr> |
795 # A.palmata C1651 | 797 # A.palmata C1651 |
796 representative_mlg_tibble <- prep_genotype_tibble %>% | 798 representative_mlg_tibble <- prep_genotype_tibble %>% |
797 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id) & (db_match =="no_match"), affy_id, coral_mlg_rep_sample_id)) %>% | 799 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_rep_sample_id) & (db_match =="no_match"), affy_id, coral_mlg_rep_sample_id)) %>% |
798 ungroup() %>% | 800 ungroup() %>% |
799 select(-group)%>% | 801 select(-group); |
800 distinct(); | |
801 # prep_genotype_table_tibble looks like this: | 802 # prep_genotype_table_tibble looks like this: |
802 # affy_id coral_mlg_clonal_id user_specimen_id db_match | 803 # affy_id coral_mlg_clonal_id user_specimen_id db_match |
803 # a550962...CEL HG0120 1090 match | 804 # a550962...CEL HG0120 1090 match |
804 # genetic_coral_species_call coral_mlg_rep_sample_id | 805 # genetic_coral_species_call coral_mlg_rep_sample_id |
805 # A.palmata 1104 | 806 # A.palmata 1104 |
806 prep_genotype_table_tibble <- stag_db_report %>% | 807 prep_genotype_table_tibble <- stag_db_report %>% |
807 select("affy_id", "coral_mlg_clonal_id", "user_specimen_id", "db_match", "genetic_coral_species_call") %>% | 808 select("affy_id", "coral_mlg_clonal_id", "user_specimen_id", "db_match", "genetic_coral_species_call") %>% |
808 left_join(representative_mlg_tibble %>% | 809 left_join(representative_mlg_tibble %>% |
809 select("affy_id", "coral_mlg_rep_sample_id"), | 810 select("coral_mlg_rep_sample_id", "coral_mlg_clonal_id"), |
810 by='affy_id'); | 811 by='coral_mlg_clonal_id'); |
811 # genotype_table_tibble looks like this: | 812 # genotype_table_tibble looks like this: |
812 # affy_id coral_mlg_clonal_id user_specimen_id db_match | 813 # affy_id coral_mlg_clonal_id user_specimen_id db_match |
813 # a550962-436.CEL HG0120 1090 match | 814 # a550962-436.CEL HG0120 1090 match |
814 # genetic_coral_species_call coral_mlg_rep_sample_id bcoral_genet_id | 815 # genetic_coral_species_call coral_mlg_rep_sample_id bcoral_genet_id |
815 # A.palmata 1104 <NA> | 816 # A.palmata 1104 <NA> |
816 genotype_table_tibble <- prep_genotype_table_tibble %>% | 817 genotype_table_tibble <- prep_genotype_table_tibble %>% |
817 left_join(affy_metadata_data_frame %>% | 818 left_join(affy_metadata_data_frame %>% |
818 select("user_specimen_id", "bcoral_genet_id"), | 819 select("user_specimen_id", "bcoral_genet_id"), |
819 by='user_specimen_id') %>% | 820 by='user_specimen_id'); |
820 drop_na(coral_mlg_rep_sample_id); | |
821 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble); | 821 write_data_frame(output_data_dir, "genotype.tabular", genotype_table_tibble); |
822 | 822 |
823 # Output the file needed for populating the person table. | 823 # Output the file needed for populating the person table. |
824 person_table_data_frame <- data.frame(matrix(ncol=4, nrow=num_rows)); | 824 person_table_data_frame <- data.frame(matrix(ncol=4, nrow=num_rows)); |
825 colnames(person_table_data_frame) <- c("last_name", "first_name", "organization", "email"); | 825 colnames(person_table_data_frame) <- c("last_name", "first_name", "organization", "email"); |
900 sample_table_data_frame$percent_apalm_coral[i] <- sample_prep_data_frame$percent_apalm_coral[i]; | 900 sample_table_data_frame$percent_apalm_coral[i] <- sample_prep_data_frame$percent_apalm_coral[i]; |
901 sample_table_data_frame$percent_alternative_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; | 901 sample_table_data_frame$percent_alternative_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; |
902 sample_table_data_frame$percent_heterozygous_coral[i] <- sample_prep_data_frame$percent_heterozygous_coral[i]; | 902 sample_table_data_frame$percent_heterozygous_coral[i] <- sample_prep_data_frame$percent_heterozygous_coral[i]; |
903 sample_table_data_frame$percent_heterozygous_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; | 903 sample_table_data_frame$percent_heterozygous_sym[i] <- DEFAULT_MISSING_NUMERIC_VALUE; |
904 sample_table_data_frame$field_call[i] <- sample_prep_data_frame$field_call[i]; | 904 sample_table_data_frame$field_call[i] <- sample_prep_data_frame$field_call[i]; |
905 sample_table_data_frame$bcoral_genet_id[i] <- sample_prep_data_frame$bcoral_genet_id[i]; | 905 sample_table_data_frame$bcoral_genet_id[i] <- sample_prep_data_frame$bcoral_genet_id[i]; |
906 } | 906 } |
907 write_data_frame(output_data_dir, "sample.tabular", sample_table_data_frame); | 907 write_data_frame(output_data_dir, "sample.tabular", sample_table_data_frame); |
908 | 908 |
909 # Output the file needed for populating the taxonomy table. | 909 # Output the file needed for populating the taxonomy table. |
910 # taxonomy_table_data_frame looks like this: | 910 # taxonomy_table_data_frame looks like this: |