annotate protein_rna_correlation.r @ 2:412403eec79c draft

planemo upload
author pravs
date Sun, 17 Jun 2018 04:44:29 -0400
parents fc89f8c3b777
children 6bf0203ee17e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
2 # About the script
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
3 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
4 # Version: V1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
5 # This script works for single sample only
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
6 # It takes GE (Gene Expression) and PE (Protein expression) data of one sample and perform correlation, regression analysis between PE and GE data
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
7 # Input data can be of tsv format
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
8 # Script also need a parameter or option file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
9
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
10 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
11 # Dependencies
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
12 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
13 # Following R package has to be installed.
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
14 # data.table
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
15 # gplots
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
16 # MASS
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
17 # DMwR
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
18 # mgcv
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
19 # It can be installed by following R command in R session. e.g. install.packages("data.table")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
20
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
21 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
22 # How to Run
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
23 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
24 # Rscript PE_GE_association_singleSample_V1.r <PE_file> <GE_file> <Option_file containing tool parameters> <Ensembl map file containing directory path> <outdir>
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
25
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
26 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
27 # Arguments
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
28 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
29 # Arg1. <PE file>: PE data (tsv format)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
30 # Arg2. <GE file>: GE data (tsv format)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
31 # Arg3. <Option file>: tsv format, key\tvalue
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
32 # Options are
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
33 # PE_idcolno: Column number of PE file containing protein IDs
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
34 # GE_idcolno: Column number of GE file containing transcript IDs
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
35 # PE_expcolno: Column number of PE file containing protein expression values
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
36 # GE_expcolno: Column number of GE file containing transcript expression values
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
37 # PE_idtype: protein id type. It can be either Uniprot or Ensembl or HGNC_symbol
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
38 # GE_idtype: transcript id type. At present it is only one type i.e. Ensembl or HGNC_symbol
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
39 # Organism: Organism
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
40 # writeMapUnmap: Whether to write mapped and unmapped data in input data format. It takes value as 1 or 0. If 1, mapped and unmapped data is written. Default is 1.
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
41 # doscale: Whether perform scaling to input data or not. If yet, abundance values are normalized by standard normalization. Default 1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
42 # Arg4. <Ensembl map file containg directory>: Path to Ensembl map file containg directory e.g. /home/user/Ensembl/mapfiles
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
43 # Arg5. <Outdir>: output directory (e.g. /home/user/out1)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
44
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
45 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
46 # Sample option file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
47 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
48 #PE_idcolno 7
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
49 #GE_idcolno 1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
50 #PE_expcolno 2
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
51 #GE_expcolno 3
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
52 #PE_idtype Ensembl
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
53 #GE_idtype Ensembl
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
54 #Organism mmusculus
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
55 #writeMapUnmap 1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
56 #doscale 1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
57
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
58 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
59 # Output
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
60 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
61 # The script outputs image and data folder along with Correlation_result.html and Result.log file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
62 # Result.log: Log file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
63 # Correlation_result.html; main result file in html format
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
64
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
65 # data folder contains following output files
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
66
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
67 # PE_abundance.tsv: 2 column tsv file containing mapped id and protein expression values
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
68 # GE_abundance.tsv: 2 column tsv file containing mapped id and transcript expression values
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
69
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
70 # If writeMapUnmap is 1 i.e. to write mapped and unmapped data, 4 additional file will be written
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
71 # PE_unmapped.tsv: Output format is same as input, PE unmapped data is written
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
72 # GE_unmapped.tsv: Output format is same as input, GE unmapped data is written
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
73 # PE_mapped.tsv: Output format is same as input, PE mapped data is written
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
74 # GE_mapped.tsv: Output format is same as input, GE mapped data is written
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
75
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
76 # PE_GE_influential_observation.tsv: Influential observations based on cook's distance
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
77 # PE_GE_kmeans_clusterpoints.txt: Observations clustered based on kmeans clustering. File contains cluster assignment of each observations
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
78
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
79 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
80 # ............................SCRIPT STARTS FROM HERE .............................
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
81 #==================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
82 # Warning Off
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
83 oldw <- getOption("warn")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
84 options(warn = -1)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
85 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
86 # Functions
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
87 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
88 usage <- function()
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
89 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
90 cat("\n\n###########\nERROR: Rscript PE_GE_association_singleSample_V1.r <PE file> <GE file> <Option file containing tool parameters> <Ensembl map file containing directory path> <outdir>\n###########\n\n");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
91 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
92
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
93 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
94 # Global variables
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
95 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
96 noargs = 13;
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
97
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
98 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
99 # Parse command line arguments in args object
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
100 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
101 args = commandArgs(trailingOnly = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
102
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
103
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
104 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
105 # Check for No of arguments
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
106 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
107 if(length(args) != noargs)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
108 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
109 usage();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
110 stop(paste("Please check usage. Number of arguments is not equal to ",noargs,sep="",collapse=""));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
111 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
112
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
113 #======================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
114 # Load libraries
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
115 #======================================================================
2
412403eec79c planemo upload
pravs
parents: 0
diff changeset
116 library(data.table);
412403eec79c planemo upload
pravs
parents: 0
diff changeset
117 library(lattice);
412403eec79c planemo upload
pravs
parents: 0
diff changeset
118 library(grid);
412403eec79c planemo upload
pravs
parents: 0
diff changeset
119 library(nlme);
412403eec79c planemo upload
pravs
parents: 0
diff changeset
120 library(gplots);
412403eec79c planemo upload
pravs
parents: 0
diff changeset
121 library(MASS);
412403eec79c planemo upload
pravs
parents: 0
diff changeset
122 library(DMwR);
412403eec79c planemo upload
pravs
parents: 0
diff changeset
123 library(mgcv);
0
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
124
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
125
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
126 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
127 # Set variables
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
128 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
129 #PE_file = args[1]; # Protein abundance data file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
130 #GE_file = args[2]; # Gene expression data file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
131 #option_file = args[3]; # Option file containing various parameters
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
132 #biomartdir = args[4]; # Biomart map file containing directory path in local system
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
133 #outdir = args[5]; # output directory
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
134
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
135 PE_file = args[1]; # Protein abundance data file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
136 GE_file = args[2]; # Gene expression data file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
137 #option_file = args[3]; # Option file containing various parameters
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
138 #biomartdir = args[4]; # Biomart map file containing directory path in local system
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
139 outdir = args[13]; # output directory
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
140
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
141 #imagesubdirprefix = "image";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
142 #datasubdirprefix = "data";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
143 #htmloutfile = "Correlation_result.html";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
144 htmloutfile = args[12]
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
145
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
146 #logfile = "Result.log";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
147 PE_outfile_mapped = "PE_mapped.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
148 GE_outfile_mapped = "GE_mapped.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
149 PE_outfile_unmapped = "PE_unmapped.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
150 GE_outfile_unmapped = "GE_unmapped.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
151 PE_expfile = "PE_abundance.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
152 GE_expfile = "GE_abundance.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
153 PE_outfile_excluded_naInf = "PE_excluded_NA_Inf.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
154 GE_outfile_excluded_naInf = "GE_excluded_NA_Inf.tsv";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
155
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
156 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
157 # Check input files existance
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
158 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
159 if(! file.exists(PE_file))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
160 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
161 usage();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
162 stop(paste("Input PE_file file does not exists. Path given: ",PE_file,sep="",collapse=""));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
163 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
164 if(! file.exists(GE_file))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
165 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
166 usage();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
167 stop(paste("Input GE_file does not exists. Path given: ",GE_file,sep="",collapse=""));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
168 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
169 #if(! file.exists(option_file))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
170 #{
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
171 # usage();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
172 # stop(paste("Input option_file does not exists. Path given: ",option_file,sep="",collapse=""));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
173 #}
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
174
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
175 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
176 # Read param_file and set parameter/option data frame
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
177 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
178 #optiondf = read.table(option_file, header = FALSE, stringsAsFactors = FALSE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
179 #rownames(optiondf) = optiondf[,1];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
180
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
181 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
182 # Define option variables
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
183 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
184 #PE_idcolno = as.numeric(optiondf["PE_idcolno",2]);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
185 #GE_idcolno = as.numeric(optiondf["GE_idcolno",2]);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
186 #PE_expcolno = as.numeric(optiondf["PE_expcolno",2]);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
187 #GE_expcolno = as.numeric(optiondf["GE_expcolno",2]);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
188 #PE_idtype = optiondf["PE_idtype",2];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
189 #GE_idtype = optiondf["GE_idtype",2];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
190 #Organism = optiondf["Organism",2];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
191 #writeMapUnmap = as.logical(as.numeric(optiondf["writeMapUnmap",2]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
192 #doscale = as.logical(as.numeric(optiondf["doscale",2]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
193
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
194 PE_idcolno = as.numeric(args[3])
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
195 GE_idcolno = as.numeric(args[4])
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
196 PE_expcolno = as.numeric(args[5])
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
197 GE_expcolno = as.numeric(args[6])
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
198 PE_idtype = args[7]
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
199 GE_idtype = args[8]
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
200 #Organism = args[9]
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
201 writeMapUnmap = as.logical(as.numeric(args[10]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
202 doscale = as.logical(as.numeric(args[11]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
203
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
204
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
205 #1 PE_file = "test_data/PE_mouse_singlesample.txt"
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
206 #2 GE_file = "test_data/GE_mouse_singlesample.txt"
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
207 #3 PE_idcolno = 7
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
208 #4 GE_idcolno = 1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
209 #5 PE_expcolno = 13
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
210 #6 GE_expcolno = 10
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
211 #7 PE_idtype = "Ensembl_with_version"
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
212 #8 GE_idtype = "Ensembl_with_version"
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
213 #10 writeMapUnmap = 1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
214 #11 doscale = 1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
215 #9 biomart_mapfile = "test_data/mmusculus_gene_ensembl__GRCm38.p6.map"
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
216 #12 htmloutfile = "html_out.html"
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
217 #13 outdir = "output_elements"
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
218
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
219 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
220 # Set column name of biomart map file (idtype) based on whether Ensembl or Uniprot
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
221 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
222 if(PE_idtype == "Ensembl")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
223 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
224 PE_idtype = "ensembl_peptide_id";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
225 }else
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
226 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
227 if(PE_idtype == "Ensembl_with_version")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
228 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
229 PE_idtype = "ensembl_peptide_id_version";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
230 }else{
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
231 if(PE_idtype == "HGNC_symbol")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
232 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
233 PE_idtype = "hgnc_symbol";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
234 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
235 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
236 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
237
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
238 if(GE_idtype == "Ensembl")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
239 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
240 GE_idtype = "ensembl_transcript_id";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
241 }else
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
242 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
243 if(GE_idtype == "Ensembl_with_version")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
244 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
245 GE_idtype = "ensembl_transcript_id_version";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
246 }else{
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
247 if(GE_idtype == "HGNC_symbol")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
248 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
249 GE_idtype = "hgnc_symbol";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
250 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
251 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
252 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
253 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
254 # Identify biomart mapping file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
255 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
256 #biomartdir = gsub(biomartdir, pattern="/$", replacement="")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
257 #biomart_mapfilename = list.files(path = biomartdir, pattern = Organism);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
258 #biomart_mapfile = paste(biomartdir,"/",biomart_mapfilename,sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
259 #print(biomart_mapfile);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
260 biomart_mapfile = args[9];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
261 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
262 # Parse PE, GE, biomart file file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
263 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
264 PE_df = as.data.frame(fread(input=PE_file, header=T, sep="\t"));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
265 GE_df = as.data.frame(fread(input=GE_file, header=T, sep="\t"));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
266 biomart_df = as.data.frame(fread(input=biomart_mapfile, header=T, sep="\t"));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
267
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
268 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
269 # Create directory structures and then set the working directory to output directory
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
270 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
271 if(! file.exists(outdir))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
272 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
273 dir.create(outdir);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
274 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
275
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
276 #tempdir = paste(outdir,"/",imagesubdirprefix,sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
277 #if(! file.exists(tempdir))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
278 #{
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
279 # dir.create(tempdir);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
280 #}
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
281
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
282 #tempdir = paste(outdir,"/",datasubdirprefix,sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
283 #if(! file.exists(tempdir))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
284 #{
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
285 # dir.create(tempdir);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
286 #}
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
287
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
288 #setwd(outdir);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
289 logfile = paste(outdir,"/", "Result.log",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
290
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
291 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
292 # Write initial data summary in html outfile
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
293 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
294 cat("<html><body>\n", file = htmloutfile);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
295 cat("<h1>Association between proteomics and transcriptomics data</h1>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
296 "<font color='blue'><h3>Input data summary</h3></font>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
297 "<ul>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
298 "<li>Abbrebiations used: PE (Proteomics) and GE (Transcriptomics)","</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
299 "<li>Input PE data dimension (Row Column): ", dim(PE_df),"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
300 "<li>Input GE data dimension (Row Column): ", dim(GE_df),"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
301 #"<li>Organism selected: ", Organism,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
302 "<li>Protein ID fetched from column: ", PE_idcolno,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
303 "<li>Transcript ID fetched from column: ", GE_idcolno,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
304 "<li>Protein ID type: ", PE_idtype,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
305 "<li>Transcript ID type: ", GE_idtype,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
306 "<li>Protein expression data fetched from column: ", PE_expcolno,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
307 "<li>Transcript expression data fetched from column: ", GE_expcolno,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
308 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
309
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
310 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
311 # Write initial data summary in logfile
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
312 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
313 #cat("Current work dir:", outdir,"\n");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
314 cat("Processing started\n---------------------\n", file=logfile);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
315 cat("Abbrebiations used: PE (Proteomics) and GE (Transcriptomics)\n", file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
316 cat("Input PE data dimension (Row Column): ", dim(PE_df),"\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
317 cat("Input GE data dimension (Row Column): ", dim(GE_df),"\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
318 #cat("Organism selected: ", Organism,"\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
319 #cat("Biomart map file used: ", biomart_mapfilename,"\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
320 cat("Ensembl Biomart mapping data dimension (Row Column): ", dim(biomart_df),"\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
321 cat("\n\nProtein ID to Transcript ID mapping started\n----------------\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
322 cat("Protein ID fetched from column:", PE_idcolno,"\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
323 cat("Transcript ID fetched from column:", GE_idcolno, "\n",file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
324 cat("Protein ID type:", PE_idtype, "\n",file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
325 cat("Transcript ID type:", GE_idtype,"\n", file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
326 cat("Protein expression data fetched from column:", PE_expcolno,"\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
327 cat("Transcript expression data fetched from column:", GE_expcolno, "\n",file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
328
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
329 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
330 # Mapping starts here
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
331 # Pseudocode
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
332 # Loop over each row of PE file, fetch protein_id
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
333 # Search the biomartmap file and obtain corresponding transcript_id
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
334 # Take the mapped transcript_id and search in GE file, which row it correspond
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
335 # Store the PE rowno and GE rowno in rowpair object
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
336 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
337 rowpair = data.frame(PE_rowno = 0, GE_rowno = 0);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
338 cat("Total rows:", nrow(PE_df),"\n");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
339 cat("\n\nTotal protein ids to be mapped: ", nrow(PE_df),"\n", file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
340 messagelog = "\n\nBelow are protein IDs, for which no match is observed in Ensembl Biomart Map file.\n\n";
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
341
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
342 # GE_id column
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
343 GE_ids = GE_df[,GE_idcolno];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
344 GE_ids = gsub(x=GE_ids, pattern=".\\d+$", replacement=""); # Remove version
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
345
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
346 # Loop over every row of PE data (PE_df)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
347 for(i in 1:nrow(PE_df))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
348 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
349
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
350 if(i%%100 ==0)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
351 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
352 cat("Total rows processed ", i,"\n", file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
353 print(i);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
354 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
355
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
356 queryid = PE_df[i,PE_idcolno];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
357 #queryid = gsub(x=queryid, pattern=".\\d+$", replacement=""); # Remove version
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
358 #print(queryid);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
359
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
360 PE_df_matchrowno = i; # Row number in PE_df which matches queryid
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
361
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
362 if(PE_idtype == "Uniprot")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
363 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
364 biomart_matchrowno = which(biomart_df[,8] == queryid | biomart_df[,9] == queryid); # Row number of biomart_df which matches queryid
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
365 }else{
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
366 biomart_matchrowno = which(biomart_df[,PE_idtype] == queryid); # Row number of biomart_df which matches queryid
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
367 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
368
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
369 # If match found, map protein id to GE id and find corresponding match row number of GE_df
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
370 if(length(biomart_matchrowno)>0)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
371 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
372 GE_df_matchrowno = which(GE_ids %in% biomart_df[biomart_matchrowno[1],GE_idtype]);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
373 rowpair = rbind( rowpair, c(PE_df_matchrowno, GE_df_matchrowno));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
374 if(length(GE_df_matchrowno) > 1)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
375 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
376 cat("\nFor protein ID ", i," multiple transcript mapping found\n", file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
377
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
378 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
379 "<br><font color=",'red',">For protein ID", i," multiple transcript mapping found</font><br>",file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
380 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
381 }else{
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
382 messagelog = paste(messagelog, queryid, "\n",sep="", collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
383 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
384 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
385 rowpair = rowpair[-1,];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
386
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
387 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
388 # Write mapping summary, mapped and unmapped data
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
389 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
390 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
391 "<li>Total Protein ID mapped: ", length(intersect(1:nrow(PE_df), rowpair[,1])),"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
392 "<li>Total Protein ID unmapped: ", length(setdiff(1:nrow(PE_df), rowpair[,1])),"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
393 "<li>Total Transcript ID mapped: ", length(intersect(1:nrow(GE_df), rowpair[,2])),"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
394 "<li>Total Transcript ID unmapped: ", length(setdiff(1:nrow(GE_df), rowpair[,2])),"</li></ul>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
395 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
396
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
397 cat("\n\nMapping Statistics\n---------------------\n", file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
398 cat("Total Protein ID mapped:", length(intersect(1:nrow(PE_df), rowpair[,1])), "\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
399 cat("Total Protein ID unmapped:", length(setdiff(1:nrow(PE_df), rowpair[,1])), "\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
400 cat("Total Transcript ID mapped:", length(intersect(1:nrow(GE_df), rowpair[,2])), "\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
401 cat("Total Transcript ID unmapped:", length(setdiff(1:nrow(GE_df), rowpair[,2])), "\n", file=logfile, append=T)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
402
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
403 cat(messagelog,"\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
404
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
405 if(writeMapUnmap)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
406 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
407 write.table(PE_df[rowpair[,1], ], file=paste(outdir,"/",PE_outfile_mapped,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
408 write.table(GE_df[rowpair[,2], ], file= paste(outdir,"/",GE_outfile_mapped,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
409 write.table(PE_df[-rowpair[,1], ], file= paste(outdir,"/",PE_outfile_unmapped,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
410 write.table(GE_df[-rowpair[,2], ], file=paste(outdir,"/",GE_outfile_unmapped,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
411
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
412 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
413 "<font color='blue'><h3>Download mapped unmapped data</h3></font>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
414 "<ul><li>Protein mapped data: ", '<a href="',paste(outdir,"/",PE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
415 "<li>Protein unmapped data: ", '<a href="',paste(outdir,"/",PE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
416 "<li>Transcript mapped data: ", '<a href="',paste(outdir,"/",GE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
417 "<li>Transcript unmapped data: ", '<a href="',paste(outdir,"/",GE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
418 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
419 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
420
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
421 write.table(PE_df[rowpair[,1], c(PE_idcolno,PE_expcolno)], file=paste(outdir,"/",PE_expfile,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
422 write.table(GE_df[rowpair[,2], c(GE_idcolno,GE_expcolno)], file=paste(outdir,"/",GE_expfile,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
423
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
424 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
425 "<li>Protein abundance data: ", '<a href="',paste(outdir,"/",PE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
426 "<li>Transcript abundance data: ", '<a href="',paste(outdir,"/",GE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
427 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
428
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
429 #==========================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
430 # Analysis (correlation and regression) starts here
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
431 #==========================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
432 cat("Analysis started\n---------------------------\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
433 proteome_df = PE_df[rowpair[,1], c(PE_idcolno,PE_expcolno)];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
434 transcriptome_df = GE_df[rowpair[,2], c(GE_idcolno,GE_expcolno)];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
435 nPE = nrow(proteome_df);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
436 nGE = nrow(transcriptome_df)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
437
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
438 cat("Total Protein ID: ",nPE,"\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
439 cat("Total Transcript ID: ",nGE,"\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
440
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
441 #==========================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
442 # Data summary
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
443 #==========================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
444 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
445 "<ul>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
446 "<li>Number of entries in Transcriptome data used for correlation: ",nPE,"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
447 "<li>Number of entries in Proteome data used for correlation: ",nGE,"</li></ul>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
448 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
449
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
450 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
451 # Remove entries with NA or Inf or -Inf in Transcriptome and Proteome data which will create problem in correlation analysis
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
452 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
453 totna = sum(is.na(transcriptome_df[,2]) | is.na(proteome_df[,2]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
454 totinf = sum(is.infinite(transcriptome_df[,2]) | is.infinite(proteome_df[,2]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
455
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
456 cat("<font color='blue'><h3>Filtering</h3></font>","Checking for NA or Inf or -Inf in either Transcriptome or Proteome data, if found, remove those entry<br>","<ul>","<li>Number of NA found: ",totna,"</li>","<li>Number of Inf or -Inf found: ",totinf,"</li></ul>",file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
457
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
458 cat("Total NA observed in either Transcriptome or Proteome data: ",totna,"\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
459 cat("Total Inf or -Inf observed in either Transcriptome or Proteome data: ",totinf,"\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
460
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
461 if(totna > 0 | totinf > 0)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
462 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
463 excludeIndices_PE = which(is.na(proteome_df[,2]) | is.infinite(proteome_df[,2]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
464 excludeIndices_GE = which(is.na(transcriptome_df[,2]) | is.infinite(transcriptome_df[,2]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
465 excludeIndices = which(is.na(transcriptome_df[,2]) | is.infinite(transcriptome_df[,2]) | is.na(proteome_df[,2]) | is.infinite(proteome_df[,2]));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
466
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
467 # Write excluded transcriptomics and proteomics data to file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
468 write.table(proteome_df[excludeIndices_PE,], file=paste(outdir,"/",PE_outfile_excluded_naInf,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
469 write.table(transcriptome_df[excludeIndices_GE,], file=paste(outdir,"/",GE_outfile_excluded_naInf,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
470
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
471 # Write excluded transcriptomics and proteomics data link to html file
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
472 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
473 "<ul><li>Protein excluded data with NA or Inf or -Inf: ", '<a href="',paste(outdir,"/",PE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
474 "<li>Transcript excluded data with NA or Inf or -Inf: ", '<a href="',paste(outdir,"/",GE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
475 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
476
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
477 # Keep the unexcluded entries only
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
478 transcriptome_df = transcriptome_df[-excludeIndices,];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
479 proteome_df = proteome_df[-excludeIndices,];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
480 nPE = nrow(proteome_df);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
481 nGE = nrow(transcriptome_df)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
482
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
483 cat("<font color='blue'><h3>Filtered data summary</h3></font>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
484 "Excluding entires with abundance values: NA/Inf/-Inf<br>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
485 "<ul>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
486 "<li>Number of entries in Transcriptome data remained: ",nrow(transcriptome_df),"</li>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
487 "<li>Number of entries in Proteome data remained: ",nrow(proteome_df),"</li></ul>", file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
488
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
489 cat("Excluding entires with abundance values: NA/Inf/-Inf","\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
490
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
491 cat("Total Protein ID after filtering: ",nPE,"\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
492 cat("Total Transcript ID after filtering: ",nGE,"\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
493
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
494 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
495
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
496 #==========================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
497 # Scaling of data
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
498 #==========================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
499 if(doscale)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
500 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
501 proteome_df[,2] = scale(proteome_df[,2], center = TRUE, scale = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
502 transcriptome_df[,2] = scale(transcriptome_df[,2], center = TRUE, scale = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
503 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
504
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
505 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
506 # Proteome and Transcriptome data summary
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
507 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
508 cat("Calculating summary of PE and GE data\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
509 s1 = summary(proteome_df[,2]);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
510 s2 = summary(transcriptome_df[,2])
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
511
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
512 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
513 "<font color='blue'><h3>Proteome data summary</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
514 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Value</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
515 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
516
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
517 for(i in 1:length(s1))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
518 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
519 cat("<tr><td>",names(s1[i]),"</td><td>", s1[i],"</td></tr>\n", file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
520 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
521 cat("</table>\n", file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
522
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
523 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
524 "<font color='blue'><h3>Transcriptome data summary</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
525 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Value</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
526 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
527
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
528 for(i in 1:length(s2))
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
529 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
530 cat("<tr><td>",names(s2[i]),"</td><td>", s2[i],"</td></tr>\n", file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
531 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
532 cat("</table>\n", file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
533
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
534 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
535 # Distribution of proteome and transcriptome abundance (Box and Density plot)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
536 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
537 cat("Generating Box and Density plot\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
538 outplot = paste("./",outdir,"/AbundancePlot.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
539 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
540 par(mfrow=c(2,2));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
541 boxplot(proteome_df[,2], ylab="Abundance", main="Proteome abundance", cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
542 plot(density(proteome_df[,2]), xlab="Protein Abundance", ylab="Density", main="Proteome abundance", cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
543 boxplot(transcriptome_df[,2], ylab="Abundance", main="Transcriptome abundance", cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
544 plot(density(transcriptome_df[,2]), xlab="Transcript Abundance", ylab="Density", main="Transcriptome abundance", cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
545 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
546
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
547 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
548 "<font color='blue'><h3>Distribution of Proteome and Transcripome abundance (Box plot and Density plot)</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
549 '<img src="AbundancePlot.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
550 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
551
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
552 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
553 # Scatter plot
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
554 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
555 cat("Generating scatter plot\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
556 outplot = paste("./",outdir,"/AbundancePlot_scatter.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
557 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
558 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
559 scatter.smooth(transcriptome_df[,2], proteome_df[,2], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
560
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
561 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
562 "<font color='blue'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
563 '<img src="AbundancePlot_scatter.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
564 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
565
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
566 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
567 # Correlation testing
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
568 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
569 cat("Estimating correlation\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
570 cor_result_pearson = cor.test(transcriptome_df[,2], proteome_df[,2], method = "pearson");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
571 cor_result_spearman = cor.test(transcriptome_df[,2], proteome_df[,2], method = "spearman");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
572 cor_result_kendall = cor.test(transcriptome_df[,2], proteome_df[,2], method = "kendall");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
573
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
574 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
575 "<font color='blue'><h3>Correlation with all data</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
576 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Method 1</th><th>Method 2</th><th>Method 3</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
577 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
578
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
579 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
580 "<tr><td>Correlation method used</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
581 "<tr><td>Correlation</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
582 "<tr><td>Pvalue</td><td>",cor_result_pearson$p.value,"</td><td>",cor_result_spearman$p.value,"</td><td>",cor_result_kendall$p.value,"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
583 file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
584 cat("</table>\n", file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
585
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
586 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
587 '<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>cook\'s distance based approach</u> to identify such influential observations.</font>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
588 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
589
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
590 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
591 # Linear Regression
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
592 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
593 cat("Fitting linear regression model\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
594 PE_GE_data = proteome_df;
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
595 PE_GE_data = cbind(PE_GE_data, transcriptome_df);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
596 colnames(PE_GE_data) = c("PE_ID","PE_abundance","GE_ID","GE_abundance");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
597
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
598 regmodel = lm(PE_abundance~GE_abundance, data=PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
599 regmodel_summary = summary(regmodel);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
600
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
601 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
602 "<font color='blue'><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
603 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
604 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Value</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
605 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
606
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
607 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
608 "<tr><td>Formula</td><td>","PE_abundance~GE_abundance","</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
609 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
610 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
611 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
612 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
613 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
614 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
615 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
616 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
617 file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
618 cat("</table>\n", file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
619
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
620 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
621 # Plotting various regression diagnostics plots
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
622 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
623 outplot1 = paste("./",outdir,"/PE_GE_modelfit.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
624 pdf(outplot1);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
625 devnum = which(unlist(sapply(2:length(.Devices), function(x){attributes(.Devices[[x]])$filepath==outplot1})))+1
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
626 print(.Devices)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
627 print(c(devnum,"+++"));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
628
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
629 regmodel_predictedy = predict(regmodel, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
630 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Linear regression with all data");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
631 points(PE_GE_data[,"GE_abundance"], regmodel_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
632
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
633 cat("Generating regression diagnostics plot\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
634 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
635 "<font color='blue'><h3>Plotting various regression diagnostics plots</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
636 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
637
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
638 outplot = paste("./",outdir,"/PE_GE_lm_1.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
639 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
640 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
641 plot(regmodel, 1, cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
642 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
643
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
644 outplot = paste("./",outdir,"/PE_GE_lm_2.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
645 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
646 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
647 plot(regmodel, 2, cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
648 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
649
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
650 outplot = paste("./",outdir,"/PE_GE_lm_3.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
651 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
652 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
653 plot(regmodel, 3, cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
654 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
655
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
656 outplot = paste("./",outdir,"/PE_GE_lm_5.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
657 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
658 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
659 plot(regmodel, 5, cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
660 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
661
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
662 outplot = paste("./",outdir,"/PE_GE_lm.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
663 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
664 plot(regmodel);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
665 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
666 regmodel_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
667
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
668
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
669 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
670 "<u><font color='brown'><h4>Residuals vs Fitted plot</h4></font></u>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
671 '<img src="PE_GE_lm_1.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
672 '<br><br>This plot checks for linear relationship assumptions. If a horizontal line is observed without any distinct patterns, it indicates a linear relationship<br>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
673 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
674
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
675 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
676 "<u><font color='brown'><h4>Normal Q-Q plot of residuals</h4></font></u>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
677 '<img src="PE_GE_lm_2.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
678 '<br><br>This plot checks whether residuals are normally distributed or not. It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.<br>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
679 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
680
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
681 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
682 "<u><font color='brown'><h4>Scale-Location (or Spread-Location) plot</h4></font></u>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
683 '<img src="PE_GE_lm_3.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
684 '<br><br>This plot checks for homogeneity of residual variance (homoscedasticity). A horizontal line observed with equally spread residual points is a good indication of homoscedasticity.<br>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
685 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
686
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
687 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
688 "<u><font color='brown'><h4>Residuals vs Leverage plot</h4></font></u>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
689 '<img src="PE_GE_lm_3.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
690 '<br><br>This plot is useful to identify any influential cases, that is outliers or extreme values that might influence the regression results upon inclusion or exclusion from the analysis.<br>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
691 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
692
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
693 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
694 # Identification of influential observations
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
695 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
696 cat("Identifying influential observations\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
697 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
698 "<font color='blue'><h3>Identify influential observations</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
699 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
700 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
701 '<p><b>Cook’s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>cook’s distance > than 4 times the mean</b> may be classified as <b>influential.</b></p>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
702 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
703
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
704 cooksd <- cooks.distance(regmodel);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
705
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
706 cat("Generating cooksd plot\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
707 outplot = paste("./",outdir,"/PE_GE_lm_cooksd.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
708 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
709 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
710 plot(cooksd, pch="*", cex=2, cex.lab=1.5,main="Influential Obs. by Cooks distance", ylab="Cook\'s distance", xlab="Observations") # plot cooks distance
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
711 abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
712 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
713 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
714
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
715 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
716 '<img src="PE_GE_lm_cooksd.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
717 '<br>In the above plot, observations above red line (4*mean cook\'s distance) are influential, marked in <b>*</b>. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
718 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
719
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
720 tempind = which(cooksd>4*mean(cooksd, na.rm=T));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
721 PE_GE_data_no_outlier = PE_GE_data[-tempind,];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
722 PE_GE_data_no_outlier$cooksd = cooksd[-tempind]
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
723 PE_GE_data_outlier = PE_GE_data[tempind,];
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
724 PE_GE_data_outlier$cooksd = cooksd[tempind]
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
725
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
726 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
727 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Value</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
728 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
729
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
730 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
731 "<tr><td>Mean cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
732 "<tr><td>Total influential observations (cook\'s distance > 4 * mean cook\'s distance)</td><td>",length(tempind),"</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
733 "<tr><td>Total influential observations (cook\'s distance > 3 * mean cook\'s distance)</td><td>",length(which(cooksd>3*mean(cooksd, na.rm=T))),"</td></tr>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
734 "</table>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
735 '<font color="brown"><h4>Top 10 influential observations (cook\'s distance > 4 * mean cook\'s distance)</h4></font>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
736 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
737
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
738 cat("Writing influential observations\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
739
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
740 outdatafile = paste("./",outdir,"/PE_GE_influential_observation.tsv", sep="", collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
741 cat('<a href="',outdatafile, '" target="_blank">Download entire list</a>',file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
742 write.table(PE_GE_data_outlier, file=outdatafile, row.names=F, sep="\t", quote=F);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
743
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
744 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
745 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>PE_ID</th><th>PE_abundance</th><th>GE_ID</th><th>GE_abundance</th><th>cooksd</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
746 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
747
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
748 for(i in 1:10)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
749 {
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
750 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
751 '<tr>','<td>',PE_GE_data_outlier[i,1],'</td>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
752 '<td>',PE_GE_data_outlier[i,2],'</td>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
753 '<td>',PE_GE_data_outlier[i,3],'</td>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
754 '<td>',PE_GE_data_outlier[i,4],'</td>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
755 '<td>',PE_GE_data_outlier[i,5],'</td></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
756 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
757 }
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
758 cat('</table>',file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
759
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
760 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
761 # Correlation with removal of outliers
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
762 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
763
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
764 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
765 # Scatter plot
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
766 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
767 outplot = paste("./",outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
768 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
769 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
770 scatter.smooth(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
771
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
772 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
773 "<font color='blue'><h3>Scatter plot between Proteome and Transcriptome Abundance, after removal of outliers/influential observations</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
774 '<img src="AbundancePlot_scatter_without_outliers.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
775 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
776
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
777 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
778 # Correlation
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
779 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
780 cat("Estimating orrelation with removal of outliers \n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
781 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
782 "<font color='blue'><h3>Correlation with removal of outliers / influential observations</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
783 '<p>We removed the influential observations and reestimated the correlation values.</p>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
784 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
785
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
786 cor_result_pearson = cor.test(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], method = "pearson");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
787 cor_result_spearman = cor.test(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], method = "spearman");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
788 cor_result_kendall = cor.test(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], method = "kendall");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
789
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
790 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
791 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Method 1</th><th>Method 2</th><th>Method 3</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
792 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
793
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
794 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
795 "<tr><td>Correlation method used</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
796 "<tr><td>Correlation</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
797 "<tr><td>Pvalue</td><td>",cor_result_pearson$p.value,"</td><td>",cor_result_spearman$p.value,"</td><td>",cor_result_kendall$p.value,"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
798 file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
799 cat("</table>\n", file = htmloutfile, append = TRUE)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
800
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
801 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
802 # Heatmap
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
803 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
804 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
805 "<font color='blue'><h3>Heatmap of PE and GE abundance values</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
806 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
807
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
808 cat("Generating heatmap plot\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
809 outplot = paste("./",outdir,"/PE_GE_heatmap.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
810 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
811 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
812 heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("PE","GE"), scale="col");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
813 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
814
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
815 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
816 '<img src="PE_GE_heatmap.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
817 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
818
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
819 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
820 # kmeans clustering
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
821 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
822
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
823 PE_GE_data_kdata = PE_GE_data;
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
824
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
825
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
826 k1 = kmeans(PE_GE_data_kdata[,c("PE_abundance","GE_abundance")], 5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
827 cat("Generating kmeans plot\n",file=logfile, append=T);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
828 outplot = paste("./",outdir,"/PE_GE_kmeans.png",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
829 png(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
830 par(mfrow=c(1,1));
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
831 scatter.smooth(PE_GE_data_kdata[,"GE_abundance"], PE_GE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
832
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
833 ind=which(k1$cluster==1);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
834 points(PE_GE_data_kdata[ind,"GE_abundance"], PE_GE_data_kdata[ind,"PE_abundance"], col="red", pch=16);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
835 ind=which(k1$cluster==2);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
836 points(PE_GE_data_kdata[ind,"GE_abundance"], PE_GE_data_kdata[ind,"PE_abundance"], col="green", pch=16);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
837 ind=which(k1$cluster==3);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
838 points(PE_GE_data_kdata[ind,"GE_abundance"], PE_GE_data_kdata[ind,"PE_abundance"], col="blue", pch=16);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
839 ind=which(k1$cluster==4);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
840 points(PE_GE_data_kdata[ind,"GE_abundance"], PE_GE_data_kdata[ind,"PE_abundance"], col="cyan", pch=16);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
841 ind=which(k1$cluster==5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
842 points(PE_GE_data_kdata[ind,"GE_abundance"], PE_GE_data_kdata[ind,"PE_abundance"], col="black", pch=16);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
843 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
844
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
845 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
846 "<font color='blue'><h3>Kmean clustering</h3></font>\nNumber of Clusters: 5<br>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
847 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
848
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
849 tempind = order(k1$cluster);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
850 tempoutfile = paste(outdir,"/","PE_GE_kmeans_clusterpoints.txt",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
851
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
852 write.table(data.frame(PE_GE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n")
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
853
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
854 cat('<a href="',tempoutfile, '" target="_blank">Download cluster list</a><br>',file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
855
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
856 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
857 '<img src="PE_GE_kmeans.png">',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
858 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
859
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
860 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
861 # Other Regression fit
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
862 #=============================================================================================================
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
863 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
864 # Linear regression with removal of outliers
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
865 regmodel_no_outlier = lm(PE_abundance~GE_abundance, data=PE_GE_data_no_outlier);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
866 regmodel_no_outlier_predictedy = predict(regmodel_no_outlier, PE_GE_data_no_outlier);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
867 outplot = paste("./",outdir,"/PE_GE_lm_without_outliers.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
868 plot(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Linear regression with removal of outliers");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
869 points(PE_GE_data_no_outlier[,"GE_abundance"], regmodel_no_outlier_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
870
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
871 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
872 plot(regmodel_no_outlier);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
873 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
874 regmodel_no_outlier_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_no_outlier$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
875
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
876 # Resistant regression (lqs / least trimmed squares method)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
877 regmodel_lqs = lqs(PE_abundance~GE_abundance, data=PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
878 regmodel_lqs_predictedy = predict(regmodel_lqs, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
879 outplot = paste("./",outdir,"/PE_GE_lqs.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
880 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
881 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Resistant regression (lqs / least trimmed squares method)");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
882 points(PE_GE_data[,"GE_abundance"], regmodel_lqs_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
883 #plot(regmodel_lqs);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
884 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
885 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
886 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Resistant regression (lqs / least trimmed squares method)");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
887 points(PE_GE_data[,"GE_abundance"], regmodel_lqs_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
888 regmodel_lqs_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_lqs$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
889
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
890 # Robust regression (rlm / Huber M-estimator method)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
891 regmodel_rlm = rlm(PE_abundance~GE_abundance, data=PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
892 regmodel_rlm_predictedy = predict(regmodel_rlm, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
893 outplot = paste("./",outdir,"/PE_GE_rlm.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
894 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Robust regression (rlm / Huber M-estimator method)");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
895 points(PE_GE_data[,"GE_abundance"], regmodel_rlm_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
896 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
897 plot(regmodel_rlm);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
898 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
899 regmodel_rlm_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_rlm$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
900
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
901 # polynomical reg
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
902 regmodel_poly2 = lm(PE_abundance ~ poly(GE_abundance, 2, raw = TRUE), data = PE_GE_data)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
903 regmodel_poly3 = lm(PE_abundance ~ poly(GE_abundance, 3, raw = TRUE), data = PE_GE_data)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
904 regmodel_poly4 = lm(PE_abundance ~ poly(GE_abundance, 4, raw = TRUE), data = PE_GE_data)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
905 regmodel_poly5 = lm(PE_abundance ~ poly(GE_abundance, 5, raw = TRUE), data = PE_GE_data)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
906 regmodel_poly6 = lm(PE_abundance ~ poly(GE_abundance, 6, raw = TRUE), data = PE_GE_data)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
907 regmodel_poly2_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_poly2$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
908 regmodel_poly3_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_poly3$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
909 regmodel_poly4_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_poly4$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
910 regmodel_poly5_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_poly5$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
911 regmodel_poly6_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_poly6$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
912 regmodel_poly2_predictedy = predict(regmodel_poly2, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
913 regmodel_poly3_predictedy = predict(regmodel_poly3, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
914 regmodel_poly4_predictedy = predict(regmodel_poly4, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
915 regmodel_poly5_predictedy = predict(regmodel_poly5, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
916 regmodel_poly6_predictedy = predict(regmodel_poly6, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
917
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
918 outplot = paste("./",outdir,"/PE_GE_poly2.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
919 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
920 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 2");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
921 points(PE_GE_data[,"GE_abundance"], regmodel_poly2_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
922 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
923 plot(regmodel_poly2);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
924 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
925
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
926 outplot = paste("./",outdir,"/PE_GE_poly3.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
927 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
928 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 3");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
929 points(PE_GE_data[,"GE_abundance"], regmodel_poly3_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
930 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
931 plot(regmodel_poly3);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
932 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
933
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
934 outplot = paste("./",outdir,"/PE_GE_poly4.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
935 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
936 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 4");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
937 points(PE_GE_data[,"GE_abundance"], regmodel_poly4_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
938 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
939 plot(regmodel_poly4);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
940 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
941
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
942 outplot = paste("./",outdir,"/PE_GE_poly5.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
943 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
944 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 5");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
945 points(PE_GE_data[,"GE_abundance"], regmodel_poly5_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
946 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
947 plot(regmodel_poly5);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
948 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
949
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
950 outplot = paste("./",outdir,"/PE_GE_poly6.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
951 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
952 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 6");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
953 points(PE_GE_data[,"GE_abundance"], regmodel_poly6_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
954 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
955 plot(regmodel_poly6);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
956 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
957
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
958 # GAM Generalized additive models
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
959 regmodel_gam <- gam(PE_abundance ~ s(GE_abundance), data = PE_GE_data)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
960 regmodel_gam_predictedy = predict(regmodel_gam, PE_GE_data);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
961
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
962 regmodel_gam_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_gam$fitted.values)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
963 outplot = paste("./",outdir,"/PE_GE_gam.pdf",sep="",collapse="");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
964 dev.set(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
965 plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Generalized additive models");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
966 points(PE_GE_data[,"GE_abundance"], regmodel_gam_predictedy, col="red");
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
967 pdf(outplot);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
968 plot(regmodel_gam,pages=1,residuals=TRUE); ## show partial residuals
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
969 plot(regmodel_gam,pages=1,seWithMean=TRUE) ## `with intercept' CIs
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
970 dev.off();
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
971 dev.off(devnum);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
972
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
973 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
974 "<font color='blue'><h3>Other regression model fitting</h3></font>\n",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
975 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
976
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
977 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
978 "<ul>
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
979 <li>MAE:mean absolute error</li>
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
980 <li>MSE: mean squared error</li>
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
981 <li>RMSE:root mean squared error ( sqrt(MSE) )</li>
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
982 <li>MAPE:mean absolute percentage error</li>
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
983 </ul>
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
984 ",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
985 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
986
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
987 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
988 '<h4><a href="PE_GE_modelfit.pdf" target="_blank">Comparison of model fits</a></h4>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
989 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
990
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
991 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
992 '<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Model</th><th>MAE</th><th>MSE</th><th>RMSE</th><th>MAPE</th><th>Diagnostics Plot</th></tr>',
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
993 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
994
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
995 cat(
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
996 "<tr><td>Linear regression with all data</td><td>",regmodel_metrics[1],"</td><td>",regmodel_metrics[2],"</td><td>",regmodel_metrics[3],"</td><td>",regmodel_metrics[4],"</td><td>",'<a href="PE_GE_lm.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
997
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
998 "<tr><td>Linear regression with removal of outliers</td><td>",regmodel_no_outlier_metrics[1],"</td><td>",regmodel_no_outlier_metrics[2],"</td><td>",regmodel_no_outlier_metrics[3],"</td><td>",regmodel_no_outlier_metrics[4],"</td><td>",'<a href="PE_GE_lm_without_outliers.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
999
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1000 "<tr><td>Resistant regression (lqs / least trimmed squares method)</td><td>",regmodel_lqs_metrics[1],"</td><td>",regmodel_lqs_metrics[2],"</td><td>",regmodel_lqs_metrics[3],"</td><td>",regmodel_lqs_metrics[4],"</td><td>", '<a href="PE_GE_lqs.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1001
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1002 "<tr><td>Robust regression (rlm / Huber M-estimator method)</td><td>",regmodel_rlm_metrics[1],"</td><td>",regmodel_rlm_metrics[2],"</td><td>",regmodel_rlm_metrics[3],"</td><td>",regmodel_rlm_metrics[4],"</td><td>",'<a href="PE_GE_rlm.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1003
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1004
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1005 "<tr><td>Polynomial regression with degree 2</td><td>",regmodel_poly2_metrics[1],"</td><td>",regmodel_poly2_metrics[2],"</td><td>",regmodel_poly2_metrics[3],"</td><td>",regmodel_poly2_metrics[4],"</td><td>",'<a href="PE_GE_poly2.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1006
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1007 "<tr><td>Polynomial regression with degree 3</td><td>",regmodel_poly3_metrics[1],"</td><td>",regmodel_poly3_metrics[2],"</td><td>",regmodel_poly3_metrics[3],"</td><td>",regmodel_poly3_metrics[4],"</td><td>",'<a href="PE_GE_poly3.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1008
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1009 "<tr><td>Polynomial regression with degree 4</td><td>",regmodel_poly4_metrics[1],"</td><td>",regmodel_poly4_metrics[2],"</td><td>",regmodel_poly4_metrics[3],"</td><td>",regmodel_poly4_metrics[4],"</td><td>",'<a href="PE_GE_poly4.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1010
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1011 "<tr><td>Polynomial regression with degree 5</td><td>",regmodel_poly5_metrics[1],"</td><td>",regmodel_poly5_metrics[2],"</td><td>",regmodel_poly5_metrics[3],"</td><td>",regmodel_poly5_metrics[4],"</td><td>",'<a href="PE_GE_poly5.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1012
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1013 "<tr><td>Polynomial regression with degree 6</td><td>",regmodel_poly6_metrics[1],"</td><td>",regmodel_poly6_metrics[2],"</td><td>",regmodel_poly6_metrics[3],"</td><td>",regmodel_poly6_metrics[4],"</td><td>",'<a href="PE_GE_poly6.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1014
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1015 "<tr><td>Generalized additive models</td><td>",regmodel_gam_metrics[1],"</td><td>",regmodel_gam_metrics[2],"</td><td>",regmodel_gam_metrics[3],"</td><td>",regmodel_gam_metrics[4],"</td><td>",'<a href="PE_GE_gam.pdf" target="_blank">Link</a>',"</td></tr>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1016
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1017 "</table>",
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1018 file = htmloutfile, append = TRUE);
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1019
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1020
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1021 # Warning On
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1022 options(warn = oldw)
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1023
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1024
fc89f8c3b777 planemo upload
pravs
parents:
diff changeset
1025