comparison goseq.r @ 18:5fb82111ec62 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
author mvdbeek
date Fri, 26 Feb 2016 11:42:43 -0500
parents 1b03f6232900
children 9442d1bf6d93
comparison
equal deleted inserted replaced
17:1b03f6232900 18:5fb82111ec62
1 sink(stdout(), type = "message") 1 sink(stdout(), type = "message")
2 library(goseq) 2 suppressWarnings(suppressMessages(library(goseq)))
3 library(optparse) 3 suppressWarnings(suppressMessages(library(optparse)))
4 4
5 option_list <- list( 5 option_list <- list(
6 make_option(c("-d", "--dge_file"), type="character", help="Path to file with differential gene expression result"), 6 make_option(c("-d", "--dge_file"), type="character", help="Path to file with differential gene expression result"),
7 make_option(c("-w","--wallenius_tab"), type="character", help="Path to output file with P-values estimated using wallenius distribution."), 7 make_option(c("-w","--wallenius_tab"), type="character", help="Path to output file with P-values estimated using wallenius distribution."),
8 make_option(c("-s","--sampling_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution."), 8 make_option(c("-s","--sampling_tab"), type="character", default=FALSE, help="Path to output file with P-values estimated using wallenius distribution."),
32 nobias_tab = args$nobias_tab 32 nobias_tab = args$nobias_tab
33 length_bias_plot = args$length_bias_plot 33 length_bias_plot = args$length_bias_plot
34 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot 34 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot
35 repcnt = args$repcnt 35 repcnt = args$repcnt
36 36
37
38 # format DE genes into vector suitable for use with goseq 37 # format DE genes into vector suitable for use with goseq
39 dge_table = read.delim(dge_file, header = TRUE, sep="\t", check.names = FALSE) 38 dge_table = read.delim(dge_file, header = TRUE, sep="\t", check.names = FALSE)
40 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff) 39 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff)
41 names(genes) = dge_table[,1] # Assuming first row contains gene names 40 names(genes) = dge_table[,1] # Assuming first row contains gene names
42 41
43 # Get gene lengths 42 # Get gene lengths
44 43 if (length_file !=FALSE) {
45 if (length_file) {
46 length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE, row.names=TRUE) 44 length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE, row.names=TRUE)
47 gene_lengths = length_table[names(genes),]$length 45 gene_lengths = length_table[names(genes),]$length
48 } else { 46 } else {
49 gene_lengths = getlength(names(genes), genome, gene_id) 47 gene_lengths = getlength(names(genes), genome, gene_id)
50 } 48 }
51 49
52 # Estimate PWF 50 # Estimate PWF
53 51
54 pdf(length_bias_plot) 52 pdf(length_bias_plot)
55 pwf=nullp(genes, genome, gene_id, gene_lengths) 53 pwf=nullp(genes, genome, gene_id, gene_lengths)
56 dev.off() 54 message = dev.off()
57 # wallenius approximation of p-values 55 # wallenius approximation of p-values
58 GO.wall=goseq(pwf, genome, gene_id) 56 GO.wall=goseq(pwf, genome, gene_id)
59 57
60 GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric") 58 GO.nobias=goseq(pwf, genome, gene_id, method="Hypergeometric")
61 59
66 pdf(sample_vs_wallenius_plot) 64 pdf(sample_vs_wallenius_plot)
67 plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]), 65 plot(log10(GO.wall[,2]), log10(GO.samp[match(GO.samp[,1],GO.wall[,1]),2]),
68 xlab="log10(Wallenius p-values)",ylab="log10(Sampling p-values)", 66 xlab="log10(Wallenius p-values)",ylab="log10(Sampling p-values)",
69 xlim=c(-3,0)) 67 xlim=c(-3,0))
70 abline(0,1,col=3,lty=2) 68 abline(0,1,col=3,lty=2)
71 dev.off() 69 message = dev.off()
72 write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE) 70 write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE)
73 } 71 }
74 72
75 73
76 write.table(GO.wall, wallenius_tab, sep="\t", row.names = FALSE, quote = FALSE) 74 write.table(GO.wall, wallenius_tab, sep="\t", row.names = FALSE, quote = FALSE)