comparison RM_html_report.R @ 0:a4cd8608ef6b draft

Uploaded
author petr-novak
date Mon, 01 Apr 2019 07:56:36 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a4cd8608ef6b
1 #!/usr/bin/env Rscript
2 ### this script is expected to run from clustering directory! ######
3
4 ## assume RM-custom_output_tablesummary.csv file in active directory
5
6 suppressPackageStartupMessages(library(R2HTML))
7 ######################################################################################
8 htmlheader="
9 <html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">
10 <head>
11 <title> Clustering summary </title>
12 <style>
13 <!--
14 table { background:#FFFFFF;
15 border:1px solid gray;
16 border-collapse:collapse;
17 color:#fff;
18 font:normal 10px verdana, arial, helvetica, sans-serif;
19 }
20 caption { border:1px solid #5C443A;
21 color:#5C443A;
22 font-weight:bold;
23 font-size:20pt
24 padding:6px 4px 8px 0px;
25 text-align:center;
26
27 }
28 td, th { color:#363636;
29 padding:.4em;
30 }
31 tr { border:1px dotted gray;
32 }
33 thead th, tfoot th { background:#5C443A;
34 color:#FFFFFF;
35 padding:3px 10px 3px 10px;
36 text-align:left;
37 text-transform:uppercase;
38 }
39 tbody td a { color:#3636FF;
40 text-decoration:underline;
41 }
42 tbody td a:visited { color:gray;
43 text-decoration:line-through;
44 }
45 tbody td a:hover { text-decoration:underline;
46 }
47 tbody th a { color:#3636FF;
48 font-weight:normal;
49 text-decoration:none;
50 }
51 tbody th a:hover { color:#363636;
52 }
53 tbody td+td+td+td a { background-image:url('bullet_blue.png');
54 background-position:left center;
55 background-repeat:no-repeat;
56 color:#FFFFFF;
57 padding-left:15px;
58 }
59 tbody td+td+td+td a:visited { background-image:url('bullet_white.png');
60 background-position:left center;
61 background-repeat:no-repeat;
62 }
63 tbody th, tbody td { text-align:left;
64 vertical-align:top;
65 }
66 tfoot td { background:#5C443A;
67 color:#FFFFFF;
68 padding-top:3px;
69 }
70 .odd { background:#fff;
71 }
72 tbody tr:hover { background:#EEEEEE;
73 border:1px solid #03476F;
74 color:#000000;
75 }
76 -->
77 </style>
78
79 </head>
80
81 "
82 ######################################################################################
83 ######################################################################################
84
85
86
87 #basic statistics:
88 # Number of reads used for clustering
89
90 RM=read.table("RM-custom_output_tablesummary.csv",sep="\t",header=TRUE,as.is=TRUE,check.names=FALSE)
91
92 #Any hits to RM database?
93 N=NA
94
95 # convert to legible format:
96 RM2=data.frame(
97 'total length [bp]'=RM$All_Reads_Length[c(T,F,F)],
98 'number of reads'=RM$All_Reads_Number[c(T,F,F)],
99 check.names=FALSE,stringsAsFactors=FALSE
100 )
101
102 RMpart1=RM[c(T,F,F),-c(1:3),drop=FALSE] #counts
103 RMpart2=RM[c(F,T,F),-c(1:3),drop=FALSE] #percent
104
105 RMjoined=list()
106
107 for (i in colnames(RMpart1)){
108 RMjoined[[i]]=paste(RMpart1[,i],"hits, ",signif(RMpart2[,i],3),"%",sep='')
109 }
110
111
112
113 if (ncol(RM)>3){ # not emppty
114 RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''),
115 RM2,
116 "Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3),
117 "cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3),
118 as.data.frame(RMjoined,stringsAsFactors=FALSE))
119
120 ##### RM2 formating for html output: #####
121 ##########################################
122 bold=RMpart2>3
123 for (i in 6:ncol(RM2)){
124 rmcol=RM2[,i]
125 RM2[,i]=ifelse(bold[,i-5],paste("<b>",rmcol,"</b>",sep=''),rmcol)
126 }
127
128 # join hits to one column
129 RMstring=character(nrow(RM2))
130 for (i in 1:nrow(RM2)){
131 x=ifelse(RMpart2[i,]>0,paste(colnames(RM2[,-(1:5),drop=FALSE])," (",RM2[i,-(1:5),drop=FALSE],")",sep=''),"")
132 # reorder based on GR
133 x=x[order(RMpart2[i,],decreasing=TRUE)]
134
135 RMstring[i]=paste(x[x!=''],collapse="<br />")
136 if (nchar(RMstring[i])>240){
137 RMstring[i]=paste(substring(RMstring[i],1,220),"......",sep='')
138 }
139
140 }
141 }else{ # no RM hits
142 RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''),
143 RM2,
144 "Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3),
145 "cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3))
146 RMstring=rep("",nrow(RM)/3)
147 }
148
149
150 # RM2 add link to subpage
151
152
153 RM2=data.frame(RM2[,1:3],'Repeat Masker'=RMstring,check.names=FALSE)
154
155
156 ##################################################################################################
157 #################### HTML output #####
158 ##################################################################################################
159
160
161 htmlout=commandArgs(T)[1] # full absolute path
162
163 cat(htmlheader,file=htmlout)
164
165 HTML.title("RepeatMasker search against custom database",file=htmlout,HR=1)
166
167 HTML(RM2,file=htmlout,align='left',caption="",captionalign='')
168 HTMLEndFile(htmlout)
169