diff goseq.r @ 16:8ce951313688 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
author mvdbeek
date Thu, 25 Feb 2016 08:49:20 -0500
parents fe71b97cc1a5
children 1b03f6232900
line wrap: on
line diff
--- a/goseq.r	Thu Feb 25 07:24:31 2016 -0500
+++ b/goseq.r	Thu Feb 25 08:49:20 2016 -0500
@@ -13,6 +13,7 @@
     make_option(c("-c", "--cutoff"), type="double",dest="p_adj_cutoff",
                 help="Genes with p.adjust below cutoff are considered not differentially expressed and serve as control genes"),
     make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"),
+    make_option(c("-lf", "--length_file"), default=FALSE, help = "Path to "),
     make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"),
     make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file")
   )
@@ -23,6 +24,7 @@
 dge_file = args$dge_file
 p_adj_column = args$p_adj_colum
 p_adj_cutoff = args$p_adj_cutoff
+length_file = args$length_file
 genome = args$genome
 gene_id = args$gene_id
 wallenius_tab = args$wallenius_tab
@@ -38,10 +40,21 @@
 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff)
 names(genes) = dge_table[,1] # Assuming first row contains gene names
 
+# Get gene lengths
+
+if (length_file) {
+  length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE, row.names=TRUE)
+  gene_lengths = length_table[names(genes),]$length
+  } else {
+  gene_lengths = getlength(names(genes),genome, gene_id)
+  }
+
 # Estimate PWF
 
+print(gene_lengths)
+
 pdf(length_bias_plot)
-pwf=nullp(genes, genome , gene_id)
+pwf=nullp(genes, genome, gene_id, gene_lengths)
 dev.off()
 # Null dstribution wallenius
 GO.wall=goseq(pwf, genome, gene_id)
@@ -64,6 +77,8 @@
 write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE)
 write.table(GO.nobias, nobias_tab, sep="\t", row.names = FALSE, quote = FALSE)
 
+sessionInfo()
+
 # Use the following to get a list of supported genomes / gene ids
 
 # write.table(supportedGenomes(), "available_genomes.tab", row.names = FALSE, quote=FALSE)