Next changeset 1:ba8c9980350b (2011-09-09) |
Commit message:
Initial upload of files for DGE tool |
added:
rgDGE.py |
b |
diff -r 000000000000 -r 1959becd0592 rgDGE.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgDGE.py Fri Sep 09 01:07:48 2011 -0400 |
[ |
b'@@ -0,0 +1,338 @@\n+# Copyright ross lazarus\n+# september 2011 \n+# all rights reserved\n+# for the Rgenetics project\n+# all rights reserved\n+# licensed to you under the terms of the LGPL as documented at http://www.gnu.org/copyleft/lesser.html\n+\n+import sys \n+import shutil \n+import subprocess \n+import os \n+import time \n+import tempfile \n+import optparse\n+\n+progname = os.path.split(sys.argv[0])[1] \n+myversion = \'V000.2 September 2011\' \n+verbose = False \n+debug = False\n+\n+\n+galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> \n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> \n+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> \n+<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> \n+<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> \n+<title></title> \n+<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> \n+</head> \n+<body> \n+<div class="document"> \n+""" \n+galhtmlattr = """<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output %s run at %s</b><br/>""" \n+galhtmlpostfix = """</div></body></html>\\n"""\n+\n+DGESCRIPT="""\n+#### edgeR.Rscript\n+#\n+# Performs DGE on a count table containing n replicates of two conditions\n+#\n+# Parameters\n+#\n+# 1 - Output Dir\n+\n+# Writen by: S.Lunke and A.Kaspi\n+####\n+\n+\n+usage <- function(){\n+ print("#### edgeR.R", quote=F)\n+ print("", quote=F)\n+ print("Performs DGE on a count table containing n replicates of two conditions", quote=F)\n+ print("", quote=F)\n+ print("USAGE: Rscript edgeR.R <OUT_DIR> <INPUT> <TreatmentName> <ControlName> <Treatcol1,2,3> <Controlcol1,2,3>", quote=F)\n+ print("", quote=F)\n+ print(" Parameters", quote=F)\n+ print("", quote=F)\n+ print(" 1 - Output Dir", quote=F)\n+ print(" 2 - Input File", quote=F)\n+ print(" 3 - Treatment name", quote=F)\n+ print(" 4 - Treatment Columns i.e. 3,4,6", quote=F)\n+ print(" 5 - Control name", quote=F)\n+ print(" 6 - Control Columns i.e. 2,7,8", quote=F)\n+ print(" 7 - Output tabular file name", quote=F)\n+\n+\n+ print("", quote=F)\n+ print(" Interface writen by: S.Lunke and A.Kaspi", quote=F)\n+ print(" Makes extensive use of the edgeR software - Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, PMCID: PMC2796818", quote=F)\n+ print("####", quote=F)\n+ q()\n+ }\n+\n+\n+## edgeIt runs edgeR\n+edgeIt <- function (Input,group,outputfilename) {\n+\n+\n+ ## Error handling\n+ if (length(unique(group))!=2){\n+ print("Number of conditions identified in experiment does not equal 2")\n+ q()\n+ }\n+\n+ #The workhorse \n+ require(edgeR)\n+\n+ ## Setup DGEList object\n+ DGEList <- DGEList(Count_Matrix, group = group)\n+\n+ #Extract T v C names\n+ TName=unique(group)[1]\n+ CName=unique(group)[2]\n+\n+ # Outfile name - we need something more predictable than \n+ # outfile <- paste(Out_Dir,"/",TName,"_Vs_",CName, sep="")\n+ # since it needs to be renamed and squirted into the history so added as a paramter\n+ \n+ # Filter all tags that have less than one read per million in half + 1 or more libraries. n = ceiling(length(colnames(DGEList))/2)+1\n+ n = ceiling(length(colnames(DGEList))/2)+1\n+ DGEList <- DGEList[rowSums(1e+06 * DGEList$counts/expandAsMatrix(DGEList$samples$lib.size, dim(DGEList)) > 1) >= n, ]\n+\n+\n+ ## Run tagwise dispersion test and DGE analysis\n+ prior.n <- ceiling(50/(length(group)-length(2)))\n+ DGEList <- calcNormFactors(DGEList)\n+ DGEList <- estimateCommonDisp(DGEList)\n+ DGEList <- estimateTagwiseDisp(DGEList, prior.n=prior.n, trend=T, grid.length=1000)\n+ DE <- exactTest(DGEList, common.disp=F)\n+ \n+ ## Normalized data RPM\n+ normData <- (1e+06 * DGEList$counts/expandAsMatrix(DGEList$samples$lib.size, dim(DGEList)))\n+ colnames(normData) <- paste( "norm",colnames(normData),sep='..b'Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)\n+ retval2 = x.wait()\n+ sto.close()\n+ retval = retval1 or retval2\n+ return retval\n+\n+ def runDGE(self):\n+ """\n+ """\n+ sto = open(self.tlog,\'w\')\n+ x = subprocess.Popen(\' \'.join(self.cl),shell=True,stdout=sto,stderr=sto,cwd=self.opts.output_dir)\n+ retval = x.wait()\n+ sto.close()\n+ flist = os.listdir(self.opts.output_dir)\n+ flist.sort()\n+ html = [galhtmlprefix % progname,]\n+ html.append(\'<h2>Galaxy DGE outputs run at %s<h2></br>Click on the images below to download high quality PDF versions</br>\\n\' % timenow())\n+ if len(flist) > 0:\n+ html.append(\'<table>\\n\')\n+ for fname in flist:\n+ dname,e = os.path.splitext(fname)\n+ if e.lower() == \'.pdf\' : # compress and make a thumbnail\n+ thumb = \'%s.%s\' % (dname,self.thumbformat)\n+ pdff = os.path.join(self.opts.output_dir,fname)\n+ retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)\n+ if retval == 0:\n+ s= \'<tr><td><a href="%s"><img src="%s" title="Click to download a print quality PDF of %s" hspace="10" width="600"></a></td></tr>\' \\\n+ % (fname,thumb,fname)\n+ else:\n+ dname = \'%s (thumbnail image not_found)\' % fname\n+ s= \'<tr><td><a href="%s">%s</a></td></tr>\' % (fname,dname)\n+ html.append(s)\n+ else:\n+ html.append(\'<tr><td><a href="%s">%s</a></td></tr>\' % (fname,fname))\n+ html.append(\'</table>\\n\')\n+ else:\n+ html.append(\'<h2>### Error - R returned no files - please confirm that parameters are sane</h1>\')\n+ html.append(\'<h3>R log follows below</h3><hr><pre>\\n\')\n+ rlog = open(self.tlog,\'r\').readlines()\n+ html += rlog\n+ html.append(\'%s CL = %s</br>\\n\' % (self.myName,\' \'.join(sys.argv)))\n+ html.append(\'DGE.R CL = %s</br>\\n\' % (\' \'.join(self.cl)))\n+ html.append(\'</pre>\\n\')\n+ html.append(galhtmlattr % (progname,timenow()))\n+ html.append(galhtmlpostfix)\n+ htmlf = file(self.opts.output_html,\'w\')\n+ htmlf.write(\'\\n\'.join(html))\n+ htmlf.write(\'\\n\')\n+ htmlf.close()\n+ \n+\n+def main():\n+ u = """\n+ This is a Galaxy wrapper. It expects to be called by DGE.xml as:\n+ <command interpreter="python">rgDGE.py --output_html "$html_file.files_path" --input_matrix "$input1" --treatcols "$Treat_cols" --treatname "$treatment_name" \n+--ctrlcols "$Control_cols"\n+ --ctrlname "$control_name" --output_tab "$outtab" --output_html "$html_file" --output_dir "$html_file.files_path" --method "edgeR"\n+ </command>\n+ """\n+ op = optparse.OptionParser()\n+ a = op.add_option\n+ a(\'--input_matrix\',default=None)\n+ a(\'--output_tab\',default=None)\n+ a(\'--output_html\',default=None)\n+ a(\'--treatcols\',default=None)\n+ a(\'--treatname\',default=\'Treatment\')\n+ a(\'--ctrlcols\',default=None)\n+ a(\'--ctrlname\',default=\'Control\')\n+ a(\'--output_dir\',default=None)\n+ a(\'--method\',default=\'edgeR\')\n+ opts, args = op.parse_args()\n+ assert opts.input_matrix and opts.output_html,u\n+ assert os.path.isfile(opts.input_matrix),\'## DGE runner unable to open supplied input file %s\' % opts.input_matrix\n+ assert opts.treatcols,\'## DGE runner requires a comma separated list of treatment columns eg --treatcols 4,5,6\'\n+ assert opts.treatcols,\'## DGE runner requires a comma separated list of control columns eg --ctlcols 2,3,7\'\n+ myName=os.path.split(sys.argv[0])[-1]\n+ if not os.path.exists(opts.output_dir):\n+ os.makedirs(opts.output_dir)\n+ m = DGE(myName, opts=opts)\n+ retcode = m.runDGE()\n+ if retcode:\n+ sys.exit(retcode) # indicate failure to job runner\n+ \n+ \n+ \n+if __name__ == "__main__":\n+ main()\n+\n+\n+\n' |