diff RM_html_report.R @ 0:a4cd8608ef6b draft

Uploaded
author petr-novak
date Mon, 01 Apr 2019 07:56:36 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/RM_html_report.R	Mon Apr 01 07:56:36 2019 -0400
@@ -0,0 +1,169 @@
+#!/usr/bin/env Rscript
+### this script is expected to run from clustering directory! ######
+
+## assume RM-custom_output_tablesummary.csv file in active directory
+
+suppressPackageStartupMessages(library(R2HTML))
+######################################################################################
+htmlheader="
+		<html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">
+		<head>
+		<title> Clustering summary </title>
+		<style>
+		<!--
+		table { background:#FFFFFF;
+		border:1px solid gray;
+		border-collapse:collapse;
+		color:#fff;
+		font:normal 10px verdana, arial, helvetica, sans-serif;
+		}
+		caption { border:1px solid #5C443A;
+		color:#5C443A;
+		font-weight:bold;
+		font-size:20pt
+		padding:6px 4px 8px 0px;
+		text-align:center;
+		
+		}
+		td, th { color:#363636;
+		padding:.4em;
+		}
+		tr { border:1px dotted gray;
+		}
+		thead th, tfoot th { background:#5C443A;
+		color:#FFFFFF;
+		padding:3px 10px 3px 10px;
+		text-align:left;
+		text-transform:uppercase;
+		}
+		tbody td a { color:#3636FF;
+		text-decoration:underline;
+		}
+		tbody td a:visited { color:gray;
+		text-decoration:line-through;
+		}
+		tbody td a:hover { text-decoration:underline;
+		}
+		tbody th a { color:#3636FF;
+		font-weight:normal;
+		text-decoration:none;
+		}
+		tbody th a:hover { color:#363636;
+		}
+		tbody td+td+td+td a { background-image:url('bullet_blue.png');
+		background-position:left center;
+		background-repeat:no-repeat;
+		color:#FFFFFF;
+		padding-left:15px;
+		}
+		tbody td+td+td+td a:visited { background-image:url('bullet_white.png');
+		background-position:left center;
+		background-repeat:no-repeat;
+		}
+		tbody th, tbody td { text-align:left;
+		vertical-align:top;
+		}
+		tfoot td { background:#5C443A;
+		color:#FFFFFF;
+		padding-top:3px;
+		}
+		.odd { background:#fff;
+		}
+		tbody tr:hover { background:#EEEEEE;
+		border:1px solid #03476F;
+		color:#000000;
+		}
+		-->
+		</style>
+		
+		</head>
+		
+		"
+######################################################################################
+######################################################################################
+
+
+
+#basic statistics:
+# Number of reads used for clustering
+
+RM=read.table("RM-custom_output_tablesummary.csv",sep="\t",header=TRUE,as.is=TRUE,check.names=FALSE)
+
+#Any hits to RM database?
+N=NA
+
+# convert to legible format:
+RM2=data.frame(
+		'total length [bp]'=RM$All_Reads_Length[c(T,F,F)],
+		'number of reads'=RM$All_Reads_Number[c(T,F,F)],
+		check.names=FALSE,stringsAsFactors=FALSE
+)
+
+RMpart1=RM[c(T,F,F),-c(1:3),drop=FALSE] #counts
+RMpart2=RM[c(F,T,F),-c(1:3),drop=FALSE] #percent
+
+RMjoined=list()
+
+for (i in colnames(RMpart1)){
+	RMjoined[[i]]=paste(RMpart1[,i],"hits, ",signif(RMpart2[,i],3),"%",sep='')
+}
+
+
+
+if (ncol(RM)>3){  # not emppty	
+	RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''),
+			RM2,
+			"Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3),
+			"cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3),
+			as.data.frame(RMjoined,stringsAsFactors=FALSE))
+	
+	##### RM2 formating for html output: #####
+	##########################################
+	bold=RMpart2>3
+	for (i in 6:ncol(RM2)){
+		rmcol=RM2[,i]
+		RM2[,i]=ifelse(bold[,i-5],paste("<b>",rmcol,"</b>",sep=''),rmcol)
+	}
+	
+	# join hits to one  column
+	RMstring=character(nrow(RM2))
+	for (i in 1:nrow(RM2)){
+		x=ifelse(RMpart2[i,]>0,paste(colnames(RM2[,-(1:5),drop=FALSE])," (",RM2[i,-(1:5),drop=FALSE],")",sep=''),"")
+		# reorder based on GR
+		x=x[order(RMpart2[i,],decreasing=TRUE)]
+		
+		RMstring[i]=paste(x[x!=''],collapse="<br />")
+		if (nchar(RMstring[i])>240){
+			RMstring[i]=paste(substring(RMstring[i],1,220),"......",sep='')
+		}
+		
+	}
+}else{  # no RM hits
+	RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''),
+			RM2,
+			"Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3),
+			"cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3))
+	RMstring=rep("",nrow(RM)/3)
+}
+
+
+# RM2 add link to subpage
+
+
+RM2=data.frame(RM2[,1:3],'Repeat Masker'=RMstring,check.names=FALSE)
+
+
+##################################################################################################
+####################                              HTML output                                #####
+##################################################################################################
+
+
+htmlout=commandArgs(T)[1]  # full absolute path
+
+cat(htmlheader,file=htmlout)
+
+HTML.title("RepeatMasker search against custom database",file=htmlout,HR=1)
+
+HTML(RM2,file=htmlout,align='left',caption="",captionalign='')
+HTMLEndFile(htmlout)
+