Mercurial > repos > pmac > iterativepca
diff iterative_pca_plot.R @ 0:64e75e21466e draft default tip
Uploaded
author | pmac |
---|---|
date | Wed, 01 Jun 2016 03:38:39 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/iterative_pca_plot.R Wed Jun 01 03:38:39 2016 -0400 @@ -0,0 +1,97 @@ +library(flashpcaR) +library(dbscan) +library(cluster) + +## MAIN ### +# get command line arguments +CLI_FLAG = 1 +if (CLI_FLAG == 1) { + n = commandArgs(trailingOnly=TRUE)[1] + outdir = commandArgs(trailingOnly=TRUE)[2] + basename = commandArgs(trailingOnly=TRUE)[3] + data_source = commandArgs(trailingOnly=TRUE)[4] + data_type = commandArgs(trailingOnly=TRUE)[5] + eth_filename = commandArgs(trailingOnly=TRUE)[6] + control_tag = commandArgs(trailingOnly=TRUE)[7] + if (control_tag == "None") {control_tag = NULL} + cases_tag = commandArgs(trailingOnly=TRUE)[8] + if (cases_tag == "None") {cases_tag = NULL} + numsds = as.numeric(commandArgs(trailingOnly=TRUE)[9]) + cmethod = commandArgs(trailingOnly=TRUE)[10] + tmethod = commandArgs(trailingOnly=TRUE)[11] + path_to_r_functions = commandArgs(trailingOnly=TRUE)[12] + xsamples_filename = commandArgs(trailingOnly=TRUE)[13] + xsnps_filename = commandArgs(trailingOnly=TRUE)[14] +} else { + n = 10 + basename = "test_eth2" + data_source = "./data/halo1_numeric.ped" + data_type = "numeric_ped" + outdir = paste0(getwd(), "/full_output_", basename) + #data_source = "./data/HapMap3_flashPCA_data.rds" + #data_type = "rds" + #eth_filename = "./data/HapMap3_ethnicity_rf.txt" + eth_filename = "./data/Halo_ethnicity_rf.txt" + control_tag = "HAPS" + cases_tag = NULL + numsds = 1.1 + cmethod = "hclust" + tmethod = "mcd" + path_to_r_functions = paste0(getwd(), "/R_functions/") + xsamples_filename = "./xfiles/halo1_xsamples.txt" + xsnps_filename = "./xfiles/halo1_xsnps.txt" +} + +# get source code +source(paste0(path_to_r_functions, "/", "plotting_functions.R")) +source(paste0(path_to_r_functions, "/", "pca_helpers.R")) +source(paste0(path_to_r_functions, "/", "pipeline_code.R")) +source(paste0(path_to_r_functions, "/", "clustering_functions.R")) +source(paste0(path_to_r_functions, "/", "outlier_trimming.R")) + +if (CLI_FLAG != 1) { + unlink(paste0(getwd(), "/", "full_output_", basename), recursive=TRUE) +} + +# read in data +ped_data = get_source_data(data_source, data_type) +eth_data = parse_ethnicity_file(eth_filename) +xsamples = get_first_column(xsamples_filename) +xsnps = get_first_column(xsnps_filename) + +# do the pca and prepare plots +iterations = list() +for(i in 1:n) { + fpd = filter_ped_data(ped_data, xsamples, xsnps) + iterations[[i]] = single_iteration(outdir, basename, fpd, xsamples, numsds, + cmethod, tmethod, control_tag, cases_tag, ethnicity_data=eth_data) + iterations[[i]]$dirname = generate_directory_name(outdir, basename, i) + xsamples = iterations[[i]]$xsamples +} + +# create folders and plots +for (i in 1:n) { + titer = iterations[[i]] + dir.create(titer$dirname, recursive=TRUE) + num_plots = titer$num_plots + + for (j in 1:num_plots) { + plot_filename = sprintf("%s/%s_plot_number_%d.png", titer$dirname, basename, j) + plot_by_groups(titer$pca_data$values[, c(1, 2)], + titer$plots[[j]]$groups, + titer$plots[[j]]$tags, + titer$plots[[j]]$plot_colors, + titer$plots[[j]]$plot_symbols, + titer$plots[[j]]$plot_title, + plot_filename=plot_filename + ) + } + + # write outliers to file + xfilename = paste0(titer$dirname, "/", basename, "_xfile.txt") + outliers_filename = paste0(titer$dirname, "/", basename, "_outliers.txt") + xscon = add_ethnicity_data(titer$old_xsamples, eth_data) + olcon = add_ethnicity_data(titer$outliers, eth_data) + write.table(xscon, file=xfilename, row.names=FALSE, col.names=TRUE, sep=",") + write.table(olcon, file=outliers_filename, row.names=FALSE, col.names=TRUE, sep=",") +} \ No newline at end of file