annotate iterative_pca_plot.R @ 0:64e75e21466e draft default tip

Uploaded
author pmac
date Wed, 01 Jun 2016 03:38:39 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
64e75e21466e Uploaded
pmac
parents:
diff changeset
1 library(flashpcaR)
64e75e21466e Uploaded
pmac
parents:
diff changeset
2 library(dbscan)
64e75e21466e Uploaded
pmac
parents:
diff changeset
3 library(cluster)
64e75e21466e Uploaded
pmac
parents:
diff changeset
4
64e75e21466e Uploaded
pmac
parents:
diff changeset
5 ## MAIN ###
64e75e21466e Uploaded
pmac
parents:
diff changeset
6 # get command line arguments
64e75e21466e Uploaded
pmac
parents:
diff changeset
7 CLI_FLAG = 1
64e75e21466e Uploaded
pmac
parents:
diff changeset
8 if (CLI_FLAG == 1) {
64e75e21466e Uploaded
pmac
parents:
diff changeset
9 n = commandArgs(trailingOnly=TRUE)[1]
64e75e21466e Uploaded
pmac
parents:
diff changeset
10 outdir = commandArgs(trailingOnly=TRUE)[2]
64e75e21466e Uploaded
pmac
parents:
diff changeset
11 basename = commandArgs(trailingOnly=TRUE)[3]
64e75e21466e Uploaded
pmac
parents:
diff changeset
12 data_source = commandArgs(trailingOnly=TRUE)[4]
64e75e21466e Uploaded
pmac
parents:
diff changeset
13 data_type = commandArgs(trailingOnly=TRUE)[5]
64e75e21466e Uploaded
pmac
parents:
diff changeset
14 eth_filename = commandArgs(trailingOnly=TRUE)[6]
64e75e21466e Uploaded
pmac
parents:
diff changeset
15 control_tag = commandArgs(trailingOnly=TRUE)[7]
64e75e21466e Uploaded
pmac
parents:
diff changeset
16 if (control_tag == "None") {control_tag = NULL}
64e75e21466e Uploaded
pmac
parents:
diff changeset
17 cases_tag = commandArgs(trailingOnly=TRUE)[8]
64e75e21466e Uploaded
pmac
parents:
diff changeset
18 if (cases_tag == "None") {cases_tag = NULL}
64e75e21466e Uploaded
pmac
parents:
diff changeset
19 numsds = as.numeric(commandArgs(trailingOnly=TRUE)[9])
64e75e21466e Uploaded
pmac
parents:
diff changeset
20 cmethod = commandArgs(trailingOnly=TRUE)[10]
64e75e21466e Uploaded
pmac
parents:
diff changeset
21 tmethod = commandArgs(trailingOnly=TRUE)[11]
64e75e21466e Uploaded
pmac
parents:
diff changeset
22 path_to_r_functions = commandArgs(trailingOnly=TRUE)[12]
64e75e21466e Uploaded
pmac
parents:
diff changeset
23 xsamples_filename = commandArgs(trailingOnly=TRUE)[13]
64e75e21466e Uploaded
pmac
parents:
diff changeset
24 xsnps_filename = commandArgs(trailingOnly=TRUE)[14]
64e75e21466e Uploaded
pmac
parents:
diff changeset
25 } else {
64e75e21466e Uploaded
pmac
parents:
diff changeset
26 n = 10
64e75e21466e Uploaded
pmac
parents:
diff changeset
27 basename = "test_eth2"
64e75e21466e Uploaded
pmac
parents:
diff changeset
28 data_source = "./data/halo1_numeric.ped"
64e75e21466e Uploaded
pmac
parents:
diff changeset
29 data_type = "numeric_ped"
64e75e21466e Uploaded
pmac
parents:
diff changeset
30 outdir = paste0(getwd(), "/full_output_", basename)
64e75e21466e Uploaded
pmac
parents:
diff changeset
31 #data_source = "./data/HapMap3_flashPCA_data.rds"
64e75e21466e Uploaded
pmac
parents:
diff changeset
32 #data_type = "rds"
64e75e21466e Uploaded
pmac
parents:
diff changeset
33 #eth_filename = "./data/HapMap3_ethnicity_rf.txt"
64e75e21466e Uploaded
pmac
parents:
diff changeset
34 eth_filename = "./data/Halo_ethnicity_rf.txt"
64e75e21466e Uploaded
pmac
parents:
diff changeset
35 control_tag = "HAPS"
64e75e21466e Uploaded
pmac
parents:
diff changeset
36 cases_tag = NULL
64e75e21466e Uploaded
pmac
parents:
diff changeset
37 numsds = 1.1
64e75e21466e Uploaded
pmac
parents:
diff changeset
38 cmethod = "hclust"
64e75e21466e Uploaded
pmac
parents:
diff changeset
39 tmethod = "mcd"
64e75e21466e Uploaded
pmac
parents:
diff changeset
40 path_to_r_functions = paste0(getwd(), "/R_functions/")
64e75e21466e Uploaded
pmac
parents:
diff changeset
41 xsamples_filename = "./xfiles/halo1_xsamples.txt"
64e75e21466e Uploaded
pmac
parents:
diff changeset
42 xsnps_filename = "./xfiles/halo1_xsnps.txt"
64e75e21466e Uploaded
pmac
parents:
diff changeset
43 }
64e75e21466e Uploaded
pmac
parents:
diff changeset
44
64e75e21466e Uploaded
pmac
parents:
diff changeset
45 # get source code
64e75e21466e Uploaded
pmac
parents:
diff changeset
46 source(paste0(path_to_r_functions, "/", "plotting_functions.R"))
64e75e21466e Uploaded
pmac
parents:
diff changeset
47 source(paste0(path_to_r_functions, "/", "pca_helpers.R"))
64e75e21466e Uploaded
pmac
parents:
diff changeset
48 source(paste0(path_to_r_functions, "/", "pipeline_code.R"))
64e75e21466e Uploaded
pmac
parents:
diff changeset
49 source(paste0(path_to_r_functions, "/", "clustering_functions.R"))
64e75e21466e Uploaded
pmac
parents:
diff changeset
50 source(paste0(path_to_r_functions, "/", "outlier_trimming.R"))
64e75e21466e Uploaded
pmac
parents:
diff changeset
51
64e75e21466e Uploaded
pmac
parents:
diff changeset
52 if (CLI_FLAG != 1) {
64e75e21466e Uploaded
pmac
parents:
diff changeset
53 unlink(paste0(getwd(), "/", "full_output_", basename), recursive=TRUE)
64e75e21466e Uploaded
pmac
parents:
diff changeset
54 }
64e75e21466e Uploaded
pmac
parents:
diff changeset
55
64e75e21466e Uploaded
pmac
parents:
diff changeset
56 # read in data
64e75e21466e Uploaded
pmac
parents:
diff changeset
57 ped_data = get_source_data(data_source, data_type)
64e75e21466e Uploaded
pmac
parents:
diff changeset
58 eth_data = parse_ethnicity_file(eth_filename)
64e75e21466e Uploaded
pmac
parents:
diff changeset
59 xsamples = get_first_column(xsamples_filename)
64e75e21466e Uploaded
pmac
parents:
diff changeset
60 xsnps = get_first_column(xsnps_filename)
64e75e21466e Uploaded
pmac
parents:
diff changeset
61
64e75e21466e Uploaded
pmac
parents:
diff changeset
62 # do the pca and prepare plots
64e75e21466e Uploaded
pmac
parents:
diff changeset
63 iterations = list()
64e75e21466e Uploaded
pmac
parents:
diff changeset
64 for(i in 1:n) {
64e75e21466e Uploaded
pmac
parents:
diff changeset
65 fpd = filter_ped_data(ped_data, xsamples, xsnps)
64e75e21466e Uploaded
pmac
parents:
diff changeset
66 iterations[[i]] = single_iteration(outdir, basename, fpd, xsamples, numsds,
64e75e21466e Uploaded
pmac
parents:
diff changeset
67 cmethod, tmethod, control_tag, cases_tag, ethnicity_data=eth_data)
64e75e21466e Uploaded
pmac
parents:
diff changeset
68 iterations[[i]]$dirname = generate_directory_name(outdir, basename, i)
64e75e21466e Uploaded
pmac
parents:
diff changeset
69 xsamples = iterations[[i]]$xsamples
64e75e21466e Uploaded
pmac
parents:
diff changeset
70 }
64e75e21466e Uploaded
pmac
parents:
diff changeset
71
64e75e21466e Uploaded
pmac
parents:
diff changeset
72 # create folders and plots
64e75e21466e Uploaded
pmac
parents:
diff changeset
73 for (i in 1:n) {
64e75e21466e Uploaded
pmac
parents:
diff changeset
74 titer = iterations[[i]]
64e75e21466e Uploaded
pmac
parents:
diff changeset
75 dir.create(titer$dirname, recursive=TRUE)
64e75e21466e Uploaded
pmac
parents:
diff changeset
76 num_plots = titer$num_plots
64e75e21466e Uploaded
pmac
parents:
diff changeset
77
64e75e21466e Uploaded
pmac
parents:
diff changeset
78 for (j in 1:num_plots) {
64e75e21466e Uploaded
pmac
parents:
diff changeset
79 plot_filename = sprintf("%s/%s_plot_number_%d.png", titer$dirname, basename, j)
64e75e21466e Uploaded
pmac
parents:
diff changeset
80 plot_by_groups(titer$pca_data$values[, c(1, 2)],
64e75e21466e Uploaded
pmac
parents:
diff changeset
81 titer$plots[[j]]$groups,
64e75e21466e Uploaded
pmac
parents:
diff changeset
82 titer$plots[[j]]$tags,
64e75e21466e Uploaded
pmac
parents:
diff changeset
83 titer$plots[[j]]$plot_colors,
64e75e21466e Uploaded
pmac
parents:
diff changeset
84 titer$plots[[j]]$plot_symbols,
64e75e21466e Uploaded
pmac
parents:
diff changeset
85 titer$plots[[j]]$plot_title,
64e75e21466e Uploaded
pmac
parents:
diff changeset
86 plot_filename=plot_filename
64e75e21466e Uploaded
pmac
parents:
diff changeset
87 )
64e75e21466e Uploaded
pmac
parents:
diff changeset
88 }
64e75e21466e Uploaded
pmac
parents:
diff changeset
89
64e75e21466e Uploaded
pmac
parents:
diff changeset
90 # write outliers to file
64e75e21466e Uploaded
pmac
parents:
diff changeset
91 xfilename = paste0(titer$dirname, "/", basename, "_xfile.txt")
64e75e21466e Uploaded
pmac
parents:
diff changeset
92 outliers_filename = paste0(titer$dirname, "/", basename, "_outliers.txt")
64e75e21466e Uploaded
pmac
parents:
diff changeset
93 xscon = add_ethnicity_data(titer$old_xsamples, eth_data)
64e75e21466e Uploaded
pmac
parents:
diff changeset
94 olcon = add_ethnicity_data(titer$outliers, eth_data)
64e75e21466e Uploaded
pmac
parents:
diff changeset
95 write.table(xscon, file=xfilename, row.names=FALSE, col.names=TRUE, sep=",")
64e75e21466e Uploaded
pmac
parents:
diff changeset
96 write.table(olcon, file=outliers_filename, row.names=FALSE, col.names=TRUE, sep=",")
64e75e21466e Uploaded
pmac
parents:
diff changeset
97 }