Repository 'digital_dge'
hg clone https://toolshed.g2.bx.psu.edu/repos/fubar/digital_dge

Changeset 0:1959becd0592 (2011-09-09)
Next changeset 1:ba8c9980350b (2011-09-09)
Commit message:
Initial upload of files for DGE tool
added:
rgDGE.py
b
diff -r 000000000000 -r 1959becd0592 rgDGE.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgDGE.py Fri Sep 09 01:07:48 2011 -0400
[
b'@@ -0,0 +1,338 @@\n+# Copyright ross lazarus\n+# september 2011 \n+# all rights reserved\n+# for the Rgenetics project\n+# all rights reserved\n+# licensed to you under the terms of the LGPL as documented at http://www.gnu.org/copyleft/lesser.html\n+\n+import sys \n+import shutil \n+import subprocess \n+import os \n+import time \n+import tempfile \n+import optparse\n+\n+progname = os.path.split(sys.argv[0])[1] \n+myversion = \'V000.2 September 2011\' \n+verbose = False \n+debug = False\n+\n+\n+galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> \n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> \n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> \n+<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> \n+<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> \n+<title></title> \n+<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> \n+</head> \n+<body> \n+<div class="document"> \n+""" \n+galhtmlattr = """<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output %s run at %s</b><br/>""" \n+galhtmlpostfix = """</div></body></html>\\n"""\n+\n+DGESCRIPT="""\n+#### edgeR.Rscript\n+#\n+# Performs DGE on a count table containing n replicates of two conditions\n+#\n+# Parameters\n+#\n+# 1 - Output Dir\n+\n+# Writen by: S.Lunke and A.Kaspi\n+####\n+\n+\n+usage <- function(){\n+  print("#### edgeR.R", quote=F)\n+  print("", quote=F)\n+  print("Performs DGE on a count table containing n replicates of two conditions", quote=F)\n+  print("", quote=F)\n+  print("USAGE: Rscript edgeR.R <OUT_DIR> <INPUT> <TreatmentName> <ControlName> <Treatcol1,2,3> <Controlcol1,2,3>", quote=F)\n+  print("", quote=F)\n+  print(" Parameters", quote=F)\n+  print("", quote=F)\n+  print(" 1 - Output Dir", quote=F)\n+  print(" 2 - Input File", quote=F)\n+  print(" 3 - Treatment name", quote=F)\n+  print(" 4 - Treatment Columns i.e. 3,4,6", quote=F)\n+  print(" 5 - Control name", quote=F)\n+  print(" 6 - Control Columns i.e. 2,7,8", quote=F)\n+  print(" 7 - Output tabular file name", quote=F)\n+\n+\n+  print("", quote=F)\n+  print(" Interface writen by: S.Lunke and A.Kaspi", quote=F)\n+  print(" Makes extensive use of the edgeR software - Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, PMCID: PMC2796818", quote=F)\n+  print("####", quote=F)\n+  q()\n+   }\n+\n+\n+## edgeIt runs edgeR\n+edgeIt <- function (Input,group,outputfilename) {\n+\n+\n+        ## Error handling\n+        if (length(unique(group))!=2){\n+                print("Number of conditions identified in experiment does not equal 2")\n+                q()\n+        }\n+\n+        #The workhorse  \n+        require(edgeR)\n+\n+        ## Setup DGEList object\n+        DGEList <- DGEList(Count_Matrix, group = group)\n+\n+        #Extract T v C names\n+        TName=unique(group)[1]\n+        CName=unique(group)[2]\n+\n+        # Outfile name - we need something more predictable than   \n+        # outfile <- paste(Out_Dir,"/",TName,"_Vs_",CName, sep="")\n+        # since it needs to be renamed and squirted into the history so added as a paramter\n+ \n+        # Filter all tags that have less than one read per million in half + 1 or more libraries. n = ceiling(length(colnames(DGEList))/2)+1\n+        n = ceiling(length(colnames(DGEList))/2)+1\n+        DGEList <- DGEList[rowSums(1e+06 * DGEList$counts/expandAsMatrix(DGEList$samples$lib.size, dim(DGEList)) > 1) >= n, ]\n+\n+\n+        ## Run tagwise dispersion test and DGE analysis\n+        prior.n <- ceiling(50/(length(group)-length(2)))\n+        DGEList <- calcNormFactors(DGEList)\n+        DGEList <- estimateCommonDisp(DGEList)\n+        DGEList <- estimateTagwiseDisp(DGEList, prior.n=prior.n, trend=T, grid.length=1000)\n+        DE <- exactTest(DGEList, common.disp=F)\n+  \n+        ## Normalized data RPM\n+        normData <- (1e+06 * DGEList$counts/expandAsMatrix(DGEList$samples$lib.size, dim(DGEList)))\n+        colnames(normData) <- paste( "norm",colnames(normData),sep='..b'Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)\n+        retval2 = x.wait()\n+        sto.close()\n+        retval = retval1 or retval2\n+        return retval\n+\n+    def runDGE(self):\n+        """\n+        """\n+        sto = open(self.tlog,\'w\')\n+        x = subprocess.Popen(\' \'.join(self.cl),shell=True,stdout=sto,stderr=sto,cwd=self.opts.output_dir)\n+        retval = x.wait()\n+        sto.close()\n+        flist = os.listdir(self.opts.output_dir)\n+        flist.sort()\n+        html = [galhtmlprefix % progname,]\n+        html.append(\'<h2>Galaxy DGE outputs run at %s<h2></br>Click on the images below to download high quality PDF versions</br>\\n\' % timenow())\n+        if len(flist) > 0:\n+            html.append(\'<table>\\n\')\n+            for fname in flist:\n+                dname,e = os.path.splitext(fname)\n+                if e.lower() == \'.pdf\' : # compress and make a thumbnail\n+                    thumb = \'%s.%s\' % (dname,self.thumbformat)\n+                    pdff = os.path.join(self.opts.output_dir,fname)\n+                    retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)\n+                    if retval == 0:\n+                        s= \'<tr><td><a href="%s"><img src="%s" title="Click to download a print quality PDF of %s" hspace="10" width="600"></a></td></tr>\' \\\n+                         % (fname,thumb,fname)\n+                    else:\n+                        dname = \'%s (thumbnail image not_found)\' % fname\n+                        s= \'<tr><td><a href="%s">%s</a></td></tr>\' % (fname,dname)\n+                    html.append(s)\n+                else:\n+                   html.append(\'<tr><td><a href="%s">%s</a></td></tr>\' % (fname,fname))\n+            html.append(\'</table>\\n\')\n+        else:\n+            html.append(\'<h2>### Error - R returned no files - please confirm that parameters are sane</h1>\')\n+        html.append(\'<h3>R log follows below</h3><hr><pre>\\n\')\n+        rlog = open(self.tlog,\'r\').readlines()\n+        html += rlog\n+        html.append(\'%s CL = %s</br>\\n\' % (self.myName,\' \'.join(sys.argv)))\n+        html.append(\'DGE.R CL = %s</br>\\n\' % (\' \'.join(self.cl)))\n+        html.append(\'</pre>\\n\')\n+        html.append(galhtmlattr % (progname,timenow()))\n+        html.append(galhtmlpostfix)\n+        htmlf = file(self.opts.output_html,\'w\')\n+        htmlf.write(\'\\n\'.join(html))\n+        htmlf.write(\'\\n\')\n+        htmlf.close()\n+  \n+\n+def main():\n+    u = """\n+    This is a Galaxy wrapper. It expects to be called by DGE.xml as:\n+    <command interpreter="python">rgDGE.py --output_html "$html_file.files_path" --input_matrix "$input1" --treatcols "$Treat_cols" --treatname "$treatment_name" \n+--ctrlcols "$Control_cols"\n+      --ctrlname "$control_name" --output_tab "$outtab" --output_html "$html_file" --output_dir "$html_file.files_path" --method "edgeR"\n+    </command>\n+    """\n+    op = optparse.OptionParser()\n+    a = op.add_option\n+    a(\'--input_matrix\',default=None)\n+    a(\'--output_tab\',default=None)\n+    a(\'--output_html\',default=None)\n+    a(\'--treatcols\',default=None)\n+    a(\'--treatname\',default=\'Treatment\')\n+    a(\'--ctrlcols\',default=None)\n+    a(\'--ctrlname\',default=\'Control\')\n+    a(\'--output_dir\',default=None)\n+    a(\'--method\',default=\'edgeR\')\n+    opts, args = op.parse_args()\n+    assert opts.input_matrix and opts.output_html,u\n+    assert os.path.isfile(opts.input_matrix),\'## DGE runner unable to open supplied input file %s\' % opts.input_matrix\n+    assert opts.treatcols,\'## DGE runner requires a comma separated list of treatment columns eg --treatcols 4,5,6\'\n+    assert opts.treatcols,\'## DGE runner requires a comma separated list of control columns eg --ctlcols 2,3,7\'\n+    myName=os.path.split(sys.argv[0])[-1]\n+    if not os.path.exists(opts.output_dir):\n+        os.makedirs(opts.output_dir)\n+    m = DGE(myName, opts=opts)\n+    retcode = m.runDGE()\n+    if retcode:\n+        sys.exit(retcode) # indicate failure to job runner\n+ \n+ \n+ \n+if __name__ == "__main__":\n+    main()\n+\n+\n+\n'