0
|
1 #!/usr/bin/env Rscript
|
|
2 ### this script is expected to run from clustering directory! ######
|
|
3
|
|
4 ## assume RM-custom_output_tablesummary.csv file in active directory
|
|
5
|
|
6 suppressPackageStartupMessages(library(R2HTML))
|
|
7 ######################################################################################
|
|
8 htmlheader="
|
|
9 <html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">
|
|
10 <head>
|
|
11 <title> Clustering summary </title>
|
|
12 <style>
|
|
13 <!--
|
|
14 table { background:#FFFFFF;
|
|
15 border:1px solid gray;
|
|
16 border-collapse:collapse;
|
|
17 color:#fff;
|
|
18 font:normal 10px verdana, arial, helvetica, sans-serif;
|
|
19 }
|
|
20 caption { border:1px solid #5C443A;
|
|
21 color:#5C443A;
|
|
22 font-weight:bold;
|
|
23 font-size:20pt
|
|
24 padding:6px 4px 8px 0px;
|
|
25 text-align:center;
|
|
26
|
|
27 }
|
|
28 td, th { color:#363636;
|
|
29 padding:.4em;
|
|
30 }
|
|
31 tr { border:1px dotted gray;
|
|
32 }
|
|
33 thead th, tfoot th { background:#5C443A;
|
|
34 color:#FFFFFF;
|
|
35 padding:3px 10px 3px 10px;
|
|
36 text-align:left;
|
|
37 text-transform:uppercase;
|
|
38 }
|
|
39 tbody td a { color:#3636FF;
|
|
40 text-decoration:underline;
|
|
41 }
|
|
42 tbody td a:visited { color:gray;
|
|
43 text-decoration:line-through;
|
|
44 }
|
|
45 tbody td a:hover { text-decoration:underline;
|
|
46 }
|
|
47 tbody th a { color:#3636FF;
|
|
48 font-weight:normal;
|
|
49 text-decoration:none;
|
|
50 }
|
|
51 tbody th a:hover { color:#363636;
|
|
52 }
|
|
53 tbody td+td+td+td a { background-image:url('bullet_blue.png');
|
|
54 background-position:left center;
|
|
55 background-repeat:no-repeat;
|
|
56 color:#FFFFFF;
|
|
57 padding-left:15px;
|
|
58 }
|
|
59 tbody td+td+td+td a:visited { background-image:url('bullet_white.png');
|
|
60 background-position:left center;
|
|
61 background-repeat:no-repeat;
|
|
62 }
|
|
63 tbody th, tbody td { text-align:left;
|
|
64 vertical-align:top;
|
|
65 }
|
|
66 tfoot td { background:#5C443A;
|
|
67 color:#FFFFFF;
|
|
68 padding-top:3px;
|
|
69 }
|
|
70 .odd { background:#fff;
|
|
71 }
|
|
72 tbody tr:hover { background:#EEEEEE;
|
|
73 border:1px solid #03476F;
|
|
74 color:#000000;
|
|
75 }
|
|
76 -->
|
|
77 </style>
|
|
78
|
|
79 </head>
|
|
80
|
|
81 "
|
|
82 ######################################################################################
|
|
83 ######################################################################################
|
|
84
|
|
85
|
|
86
|
|
87 #basic statistics:
|
|
88 # Number of reads used for clustering
|
|
89
|
|
90 RM=read.table("RM-custom_output_tablesummary.csv",sep="\t",header=TRUE,as.is=TRUE,check.names=FALSE)
|
|
91
|
|
92 #Any hits to RM database?
|
|
93 N=NA
|
|
94
|
|
95 # convert to legible format:
|
|
96 RM2=data.frame(
|
|
97 'total length [bp]'=RM$All_Reads_Length[c(T,F,F)],
|
|
98 'number of reads'=RM$All_Reads_Number[c(T,F,F)],
|
|
99 check.names=FALSE,stringsAsFactors=FALSE
|
|
100 )
|
|
101
|
|
102 RMpart1=RM[c(T,F,F),-c(1:3),drop=FALSE] #counts
|
|
103 RMpart2=RM[c(F,T,F),-c(1:3),drop=FALSE] #percent
|
|
104
|
|
105 RMjoined=list()
|
|
106
|
|
107 for (i in colnames(RMpart1)){
|
|
108 RMjoined[[i]]=paste(RMpart1[,i],"hits, ",signif(RMpart2[,i],3),"%",sep='')
|
|
109 }
|
|
110
|
|
111
|
|
112
|
|
113 if (ncol(RM)>3){ # not emppty
|
|
114 RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''),
|
|
115 RM2,
|
|
116 "Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3),
|
|
117 "cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3),
|
|
118 as.data.frame(RMjoined,stringsAsFactors=FALSE))
|
|
119
|
|
120 ##### RM2 formating for html output: #####
|
|
121 ##########################################
|
|
122 bold=RMpart2>3
|
|
123 for (i in 6:ncol(RM2)){
|
|
124 rmcol=RM2[,i]
|
|
125 RM2[,i]=ifelse(bold[,i-5],paste("<b>",rmcol,"</b>",sep=''),rmcol)
|
|
126 }
|
|
127
|
|
128 # join hits to one column
|
|
129 RMstring=character(nrow(RM2))
|
|
130 for (i in 1:nrow(RM2)){
|
|
131 x=ifelse(RMpart2[i,]>0,paste(colnames(RM2[,-(1:5),drop=FALSE])," (",RM2[i,-(1:5),drop=FALSE],")",sep=''),"")
|
|
132 # reorder based on GR
|
|
133 x=x[order(RMpart2[i,],decreasing=TRUE)]
|
|
134
|
|
135 RMstring[i]=paste(x[x!=''],collapse="<br />")
|
|
136 if (nchar(RMstring[i])>240){
|
|
137 RMstring[i]=paste(substring(RMstring[i],1,220),"......",sep='')
|
|
138 }
|
|
139
|
|
140 }
|
|
141 }else{ # no RM hits
|
|
142 RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''),
|
|
143 RM2,
|
|
144 "Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3),
|
|
145 "cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3))
|
|
146 RMstring=rep("",nrow(RM)/3)
|
|
147 }
|
|
148
|
|
149
|
|
150 # RM2 add link to subpage
|
|
151
|
|
152
|
|
153 RM2=data.frame(RM2[,1:3],'Repeat Masker'=RMstring,check.names=FALSE)
|
|
154
|
|
155
|
|
156 ##################################################################################################
|
|
157 #################### HTML output #####
|
|
158 ##################################################################################################
|
|
159
|
|
160
|
|
161 htmlout=commandArgs(T)[1] # full absolute path
|
|
162
|
|
163 cat(htmlheader,file=htmlout)
|
|
164
|
|
165 HTML.title("RepeatMasker search against custom database",file=htmlout,HR=1)
|
|
166
|
|
167 HTML(RM2,file=htmlout,align='left',caption="",captionalign='')
|
|
168 HTMLEndFile(htmlout)
|
|
169
|