Mercurial > repos > daumsoft > htseq_count_normalized2
comparison expression_calculator/expression.r @ 0:4f85170b4fd3 draft default tip
Uploaded
author | daumsoft |
---|---|
date | Wed, 18 Apr 2018 04:24:45 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f85170b4fd3 |
---|---|
1 args <- commandArgs(TRUE) | |
2 | |
3 if (length(args)!=1) { | |
4 stop("USAGE: Rscript expression.r htseq-count.out(mapping count)", call.=FALSE) | |
5 } | |
6 | |
7 | |
8 htseq_count.result=args[1] | |
9 expression.out<-paste("sample",".txt",sep="") | |
10 expression.out<-paste("./out_tmp/",expression.out,sep="") | |
11 #gene_region.dat="$GALAXY_HOME/package/DAUMSOFT/RNA-seq/Normalization/gene_hg38_region.txt"; | |
12 #gene_symbol.dat="$GALAXY_HOME/package/DAUMSOFT/RNA-seq/Normalization/gene_symbol_20170220.txt"; | |
13 gene_region.dat="/storage/home/galaxy/package/DAUMSOFT/RNA-seq/Normalization/gene_hg38_region.txt"; | |
14 gene_symbol.dat="/storage/home/galaxy/package/DAUMSOFT/RNA-seq/Normalization/gene_symbol_20170220.txt"; | |
15 | |
16 gene_region<-NULL | |
17 gene_len<-NULL | |
18 htseq_count<-NULL | |
19 expression<-NULL | |
20 expression_FPKM<-NULL | |
21 expression_FPKM_sort<-NULL | |
22 expression_FPKM_sort.tmp<-NULL | |
23 Quartile75<-NULL | |
24 Quartile75.tmp<-NULL | |
25 expression_FPKM_UQ<-NULL | |
26 expression_FPKM_UQ.tmp<-NULL | |
27 expression.tmp<-NULL | |
28 expression.tmp.1<-NULL | |
29 hgnc_swissprot<-NULL | |
30 gene_symbol<-NULL | |
31 | |
32 gene_region<-read.delim(gene_region.dat,header=F,sep="\t") | |
33 gene_len<-cbind(gene_region[1],gene_region[2]) | |
34 htseq_count<-read.delim(htseq_count.result,header=F,sep="\t") | |
35 colnames(gene_len)<- c("GeneName","Len") | |
36 colnames(htseq_count)<- c("GeneName","Count") | |
37 htseq_count<-htseq_count[ !grepl("__", htseq_count$GeneName) ,] | |
38 total_count<- sum(htseq_count$Count) | |
39 oneB<-10^9 | |
40 expression <- merge(gene_len,htseq_count,by="GeneName") | |
41 | |
42 expression$FPKM<-0 | |
43 expression[,2:4] <- (sapply(expression[,2:4], as.double)) | |
44 expression$FPKM<- (oneB*expression$Count)/(total_count*expression$Len) | |
45 | |
46 expression$TPM<-0 | |
47 expression[,2:5] <- (sapply(expression[,2:5], as.double)) | |
48 expression$TPM<-(expression$FPKM / sum(expression$FPKM) ) * (10^6) | |
49 | |
50 expression$FPKM_UQ<-0 | |
51 expression[,2:6] <- (sapply(expression[,2:6], as.double)) | |
52 expression_FPKM<-cbind(expression[1],expression[4]) | |
53 expression_FPKM[,2] <- (sapply(expression_FPKM[,2], as.double)) | |
54 #expression_FPKM_sort.tmp<-expression_FPKM[which(expression_FPKM$FPKM > 0),] | |
55 expression_FPKM_sort.tmp<-expression_FPKM | |
56 expression_FPKM_sort<-expression_FPKM_sort.tmp[order(expression_FPKM_sort.tmp$FPKM) , ] | |
57 | |
58 Quartile75.tmp<-t(quantile(expression_FPKM_sort$FPKM)) | |
59 Quartile75<-Quartile75.tmp[4] | |
60 | |
61 #expression_FPKM_UQ.tmp<-expression_FPKM[2]/Quartile75 | |
62 expression_FPKM_UQ.tmp<-(oneB*expression$Count)/(Quartile75*expression$Len) | |
63 expression_FPKM_UQ<-cbind(expression_FPKM[1],expression_FPKM_UQ.tmp) | |
64 colnames(expression_FPKM_UQ)<-c("GeneName","FPKM_UQ") | |
65 | |
66 expression$FPKM_UQ<-expression_FPKM_UQ$FPKM_UQ | |
67 | |
68 colnames(expression) <- c("ENSEMBL_GENE_ID","GENE_LENGTH","HTSEQ_COUNT","FPKM","TPM","FPKM_UQ") | |
69 colnames(gene_region)<-c("ENSEMBL_GENE_ID","GENE_LENGTH","CHROMOSOME","START","END","STRAND") | |
70 | |
71 | |
72 expression.tmp<-NULL | |
73 expression.tmp<-merge(expression,gene_region[-2],by="ENSEMBL_GENE_ID") | |
74 expression<-NULL | |
75 expression<-expression.tmp | |
76 | |
77 gene_symbol<-read.delim(gene_symbol.dat,header=T,sep="\t") | |
78 ensembl_gene_id.split<-strsplit(as.character(expression$ENSEMBL_GENE_ID),'.',fixed=TRUE) | |
79 ensembl_gene_id.split.1.1<-as.data.frame(unlist(ensembl_gene_id.split)[2*(1:length(as.character(expression$ENSEMBL_GENE_ID)))-1]) | |
80 colnames(ensembl_gene_id.split.1.1)<-c("ENSEMBL_GENE_ID2") | |
81 ensembl_gene_id.split.1<-cbind(expression[1],ensembl_gene_id.split.1.1) | |
82 | |
83 expression.tmp.0<-NULL | |
84 expression.tmp.0<-merge(ensembl_gene_id.split.1,expression,by="ENSEMBL_GENE_ID") | |
85 expression.tmp.1<-NULL | |
86 expression.tmp.1<-merge(gene_symbol,expression.tmp.0,by="ENSEMBL_GENE_ID2") | |
87 expression.tmp.2<-expression.tmp.1[-1] | |
88 expression.tmp.3<-cbind(expression.tmp.2[2],expression.tmp.2[-2]) | |
89 expression<-NULL | |
90 expression<-expression.tmp.3 | |
91 | |
92 write.table(expression, expression.out, sep="\t", quote=F, row.names=F, col.names=T) | |
93 |