Mercurial > repos > pmac > iterativepca
comparison iterative_pca_plot.R @ 0:64e75e21466e draft default tip
Uploaded
author | pmac |
---|---|
date | Wed, 01 Jun 2016 03:38:39 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:64e75e21466e |
---|---|
1 library(flashpcaR) | |
2 library(dbscan) | |
3 library(cluster) | |
4 | |
5 ## MAIN ### | |
6 # get command line arguments | |
7 CLI_FLAG = 1 | |
8 if (CLI_FLAG == 1) { | |
9 n = commandArgs(trailingOnly=TRUE)[1] | |
10 outdir = commandArgs(trailingOnly=TRUE)[2] | |
11 basename = commandArgs(trailingOnly=TRUE)[3] | |
12 data_source = commandArgs(trailingOnly=TRUE)[4] | |
13 data_type = commandArgs(trailingOnly=TRUE)[5] | |
14 eth_filename = commandArgs(trailingOnly=TRUE)[6] | |
15 control_tag = commandArgs(trailingOnly=TRUE)[7] | |
16 if (control_tag == "None") {control_tag = NULL} | |
17 cases_tag = commandArgs(trailingOnly=TRUE)[8] | |
18 if (cases_tag == "None") {cases_tag = NULL} | |
19 numsds = as.numeric(commandArgs(trailingOnly=TRUE)[9]) | |
20 cmethod = commandArgs(trailingOnly=TRUE)[10] | |
21 tmethod = commandArgs(trailingOnly=TRUE)[11] | |
22 path_to_r_functions = commandArgs(trailingOnly=TRUE)[12] | |
23 xsamples_filename = commandArgs(trailingOnly=TRUE)[13] | |
24 xsnps_filename = commandArgs(trailingOnly=TRUE)[14] | |
25 } else { | |
26 n = 10 | |
27 basename = "test_eth2" | |
28 data_source = "./data/halo1_numeric.ped" | |
29 data_type = "numeric_ped" | |
30 outdir = paste0(getwd(), "/full_output_", basename) | |
31 #data_source = "./data/HapMap3_flashPCA_data.rds" | |
32 #data_type = "rds" | |
33 #eth_filename = "./data/HapMap3_ethnicity_rf.txt" | |
34 eth_filename = "./data/Halo_ethnicity_rf.txt" | |
35 control_tag = "HAPS" | |
36 cases_tag = NULL | |
37 numsds = 1.1 | |
38 cmethod = "hclust" | |
39 tmethod = "mcd" | |
40 path_to_r_functions = paste0(getwd(), "/R_functions/") | |
41 xsamples_filename = "./xfiles/halo1_xsamples.txt" | |
42 xsnps_filename = "./xfiles/halo1_xsnps.txt" | |
43 } | |
44 | |
45 # get source code | |
46 source(paste0(path_to_r_functions, "/", "plotting_functions.R")) | |
47 source(paste0(path_to_r_functions, "/", "pca_helpers.R")) | |
48 source(paste0(path_to_r_functions, "/", "pipeline_code.R")) | |
49 source(paste0(path_to_r_functions, "/", "clustering_functions.R")) | |
50 source(paste0(path_to_r_functions, "/", "outlier_trimming.R")) | |
51 | |
52 if (CLI_FLAG != 1) { | |
53 unlink(paste0(getwd(), "/", "full_output_", basename), recursive=TRUE) | |
54 } | |
55 | |
56 # read in data | |
57 ped_data = get_source_data(data_source, data_type) | |
58 eth_data = parse_ethnicity_file(eth_filename) | |
59 xsamples = get_first_column(xsamples_filename) | |
60 xsnps = get_first_column(xsnps_filename) | |
61 | |
62 # do the pca and prepare plots | |
63 iterations = list() | |
64 for(i in 1:n) { | |
65 fpd = filter_ped_data(ped_data, xsamples, xsnps) | |
66 iterations[[i]] = single_iteration(outdir, basename, fpd, xsamples, numsds, | |
67 cmethod, tmethod, control_tag, cases_tag, ethnicity_data=eth_data) | |
68 iterations[[i]]$dirname = generate_directory_name(outdir, basename, i) | |
69 xsamples = iterations[[i]]$xsamples | |
70 } | |
71 | |
72 # create folders and plots | |
73 for (i in 1:n) { | |
74 titer = iterations[[i]] | |
75 dir.create(titer$dirname, recursive=TRUE) | |
76 num_plots = titer$num_plots | |
77 | |
78 for (j in 1:num_plots) { | |
79 plot_filename = sprintf("%s/%s_plot_number_%d.png", titer$dirname, basename, j) | |
80 plot_by_groups(titer$pca_data$values[, c(1, 2)], | |
81 titer$plots[[j]]$groups, | |
82 titer$plots[[j]]$tags, | |
83 titer$plots[[j]]$plot_colors, | |
84 titer$plots[[j]]$plot_symbols, | |
85 titer$plots[[j]]$plot_title, | |
86 plot_filename=plot_filename | |
87 ) | |
88 } | |
89 | |
90 # write outliers to file | |
91 xfilename = paste0(titer$dirname, "/", basename, "_xfile.txt") | |
92 outliers_filename = paste0(titer$dirname, "/", basename, "_outliers.txt") | |
93 xscon = add_ethnicity_data(titer$old_xsamples, eth_data) | |
94 olcon = add_ethnicity_data(titer$outliers, eth_data) | |
95 write.table(xscon, file=xfilename, row.names=FALSE, col.names=TRUE, sep=",") | |
96 write.table(olcon, file=outliers_filename, row.names=FALSE, col.names=TRUE, sep=",") | |
97 } |