diff cpm_tpm_rpk.R @ 1:b74bab5157c4 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cpm_tpm_rpk commit d46436d5d73356c8803d6d97a110a2754e8a03fb
author artbio
date Tue, 05 Feb 2019 19:51:38 -0500
parents 35d032c46a4e
children 563337e780ce
line wrap: on
line diff
--- a/cpm_tpm_rpk.R	Wed Jul 25 13:05:17 2018 -0400
+++ b/cpm_tpm_rpk.R	Tue Feb 05 19:51:38 2019 -0500
@@ -3,14 +3,17 @@
   q("no")
 }
 
-# Load necessary packages (install them if it's not the case)
-requiredPackages = c('optparse')
-for (p in requiredPackages) {
-  if (!require(p, character.only = TRUE, quietly = T)) {
-    install.packages(p)
-  }
-  suppressPackageStartupMessages(suppressMessages(library(p, character.only = TRUE)))
-}
+
+# load packages that are provided in the conda env
+options( show.error.messages=F,
+       error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+warnings()
+library(optparse)
+library(ggplot2)
+library(reshape2)
+library(Rtsne)
+
 
 
 #Arguments
@@ -68,10 +71,39 @@
     default = "res.tab",
     type = 'character',
     help = "Output name [default : '%default' ]"
+  ),
+  make_option(
+    "--tsne",
+    default = FALSE,
+    type = 'logical',
+    help = "performs T-SNE [default : '%default' ]"
+  ),
+  make_option(
+    "--seed",
+    default = 42,
+    type = 'integer',
+    help = "Seed value for reproducibility [default : '%default' ]"
+  ),
+  make_option(
+    "--perp",
+    default = 5.0,
+    type = 'numeric',
+    help = "perplexity [default : '%default' ]"
+  ),
+  make_option(
+    "--theta",
+    default = 1.0,
+    type = 'numeric',
+    help = "theta [default : '%default' ]"
+  ),
+  make_option(
+    c("-D", "--tsne_out"),
+    default = "tsne.pdf",
+    type = 'character',
+    help = "T-SNE pdf [default : '%default' ]"
   )
 )
 
-
 opt = parse_args(OptionParser(option_list = option_list),
                  args = commandArgs(trailingOnly = TRUE))
 
@@ -107,7 +139,7 @@
 
 data = read.table(
   opt$data,
-  h = opt$colnames,
+  header = opt$colnames,
   row.names = 1,
   sep = opt$sep
 )
@@ -147,3 +179,38 @@
   sep = "\t"
 )
 
+## 
+if (opt$tsne == TRUE) {
+  df = cpm(data)
+  # filter and transpose df for tsne
+  df = df[rowSums(df) != 0,] # remove lines without information (with only 0 counts)
+  tdf = t(df)
+  tdf =  log2(tdf + 1)
+  # make tsne and plot results
+  set.seed(opt$seed) ## Sets seed for reproducibility
+  # Run TSNE
+  tsne_out <- Rtsne(tdf, perplexity=opt$perp, theta=opt$theta) # 
+  embedding <- as.data.frame(tsne_out$Y)
+  embedding$Class <- as.factor(sub("Class_", "", rownames(tdf)))
+  gg_legend = theme(legend.position="none")
+  ggplot(embedding, aes(x=V1, y=V2)) +
+    geom_point(size=1.25, color='red') +
+    geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=2.5, color='darkblue') +
+    gg_legend +
+    xlab("") +
+    ylab("") +
+    ggtitle('t-SNE of data (log2CPM transformed)')
+  ggsave(file=opt$tsne_out, device="pdf")
+}
+  
+
+
+
+
+
+
+
+
+
+
+