changeset 34:c6fdf2c6d0f4 draft

Citations added (thanks John!) and a few more output formats for Alistair Chilcott
author fubar
date Thu, 28 Aug 2014 02:33:05 -0400
parents ca60c96f0beb
children 117a5ada6a6a
files old.xml rgToolFactoryMultIn.py rgToolFactoryMultIn.xml
diffstat 3 files changed, 1914 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/old.xml	Thu Aug 28 02:33:05 2014 -0400
@@ -0,0 +1,835 @@
+<tool id="rgedgeRpaired" name="edgeR" version="0.20">
+  <description>1 or 2 level models for count data</description>
+  <requirements>
+      <requirement type="package" version="2.12">biocbasics</requirement>
+      <requirement type="package" version="3.0.1">package_r3</requirement>
+  </requirements>
+  
+  <command interpreter="python">
+     rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "edgeR" 
+    --output_dir "$html_file.files_path" --output_html "$html_file" --output_tab "$outtab" --make_HTML "yes"
+  </command>
+  <inputs>
+    <param name="input1"  type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample"
+       help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/>
+    <param name="title" type="text" value="edgeR" size="80" label="Title for job outputs" help="Supply a meaningful name here to remind you what the outputs contain">
+      <sanitizer invalid_char="">
+        <valid initial="string.letters,string.digits"><add value="_" /> </valid>
+      </sanitizer>
+    </param>
+    <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/>
+    <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True" 
+         multiple="true" use_header_names="true" size="120" display="checkboxes">
+        <validator type="no_options" message="Please select at least one column."/>
+    </param>
+    <param name="control_name" type="text" value="Control" size="50" label="Control Name"/>
+    <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True" 
+         multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true">
+    </param>
+    <param name="subjectids" type="text" optional="true" size="120" value = ""
+       label="IF SUBJECTS NOT ALL INDEPENDENT! Enter integers to indicate sample pairing for every column in input"
+       help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter '1,2,1,2'">
+      <sanitizer>
+        <valid initial="string.digits"><add value="," /> </valid>
+      </sanitizer>
+    </param>
+    <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs"
+     help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/>
+    <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1" 
+              label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples"
+     help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/>
+    <conditional name="DESeq">
+    <param name="doDESeq" type="select" 
+       label="Run the same model with DESeq2 and compare findings"
+       help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR">
+      <option value="F" selected="true">Do not run DESeq2</option>
+      <option value="T">Run DESeq2 (only works if NO second GLM factor supplied at present)</option>
+     </param>
+     <when value="T">
+         <param name="DESeq_fitType" type="select">
+            <option value="parametric" selected="true">Parametric (default) fit for dispersions</option>
+            <option value="local">Local fit - use this if parametric fails</option>
+            <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual</option>
+         </param> 
+     </when>
+     <when value="F"> </when>
+    </conditional>
+    <param name="doVoom" type="boolean" truevalue="T" checked='false' falsevalue="F" size="1" label="Run the same model with VOOM transformation and limma."/>
+    <conditional name="camera">
+    <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets" 
+        help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history">
+    <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option>
+    <option value="T">Run GSEA tests with the Camera algorithm</option>
+    </param>
+     <when value="T">
+     <conditional name="gmtSource">
+      <param name="refgmtSource" type="select" 
+         label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set">
+        <option value="indexed" selected="true">Use a built-in gene set</option>
+        <option value="history">Use a gene set from my history</option>
+        <option value="both">Add a gene set from my history to a built in gene set</option>
+      </param>
+      <when value="indexed">
+        <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
+          <options from_data_table="gseaGMT_3.1">
+            <filter type="sort_by" column="2" />
+            <validator type="no_options" message="No GMT v3.1 files are available - please install them"/>
+          </options>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" />
+      </when>
+      <when value="both">
+        <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" />
+        <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis">
+          <options from_data_table="gseaGMT_3.1">
+            <filter type="sort_by" column="2" />
+            <validator type="no_options" message="No GMT v3.1 files are available - please fix tool_data_table and loc files"/>
+          </options>
+        </param>
+       </when>
+     </conditional>
+     </when>
+     <when value="F"> 
+     </when>
+    </conditional>
+    <param name="priordf" type="integer" value="20" size="3" label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n  and prior.df = prior.n * residual.df"
+     help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/>
+    <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control"
+     help="Conventional default value of 0.05 recommended"/>
+    <param name="fdrtype" type="select" label="FDR (Type II error) control method" 
+         help="Use fdr or bh typically to control for the number of tests in a reliable way">
+            <option value="fdr" selected="true">fdr</option>
+            <option value="BH">Benjamini Hochberg</option>
+            <option value="BY">Benjamini Yukateli</option>
+            <option value="bonferroni">Bonferroni</option>
+            <option value="hochberg">Hochberg</option>
+            <option value="holm">Holm</option>
+            <option value="hommel">Hommel</option>
+            <option value="none">no control for multiple tests</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="outtab" label="${title}.xls"/>
+    <data format="html" name="html_file" label="${title}.html"/>
+  </outputs>
+ <stdio>
+     <exit_code range="4"   level="fatal"   description="Number of subject ids must match total number of samples in the input matrix" />
+ </stdio>
+ <tests>
+<test>
+<param name='input1' value='test_bams2mx.xls' ftype='tabular' />
+ <param name='treatment_name' value='case' />
+ <param name='title' value='edgeRtest' />
+ <param name='useNDF' value='' />
+ <param name='fdrtype' value='fdr' />
+ <param name='priordf' value="0" />
+ <param name='fdrthresh' value="0.05" />
+ <param name='control_name' value='control' />
+ <param name='subjectids' value='' />
+  <param name='Treat_cols' value='3,4,5,9' />
+ <param name='Control_cols' value='2,6,7,8' />
+ <output name='outtab' file='edgeRtest1out.xls' compare='diff' />
+ <output name='html_file' file='edgeRtest1out.html'  compare='diff' lines_diff='20' />
+</test>
+</tests>
+
+<configfiles>
+<configfile name="runme">
+<![CDATA[
+## 
+## edgeR.Rscript
+## updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross
+## Performs DGE on a count table containing n replicates of two conditions
+##
+### Original edgeR code by: S.Lunke and A.Kaspi
+reallybig = log10(.Machine\$double.xmax)
+reallysmall = log10(.Machine\$double.xmin)
+library('stringr')
+library('gplots')
+library('edgeR')
+
+hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here')
+{
+    ### Perform clustering for significant pvalues after controlling FWER
+    samples = colnames(cmat)
+    gu = unique(group)
+    if (length(gu) == 2) {
+        col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"}
+        pcols = unlist(lapply(group,col.map))        
+        } else {
+        colours = rainbow(length(gu),start=0,end=4/6)
+        pcols = colours[match(group,gu)]        
+    }
+    gn = rownames(cmat)
+    dm = cmat[(! is.na(gn)),] 
+    ### remove unlabelled hm rows
+    nprobes = nrow(dm)
+    if (nprobes > nsamp) {
+      dm =dm[1:nsamp,]
+    }
+    newcolnames = substr(colnames(dm),1,20)
+    colnames(dm) = newcolnames
+    pdf(outpdfname)
+    heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none',
+         Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5)
+    dev.off()
+}
+
+hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here")
+{
+    ## for 2 groups only was
+    ## col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"}
+    ## pcols = unlist(lapply(group,col.map))
+    gu = unique(group)
+    colours = rainbow(length(gu),start=0.3,end=0.6)
+    pcols = colours[match(group,gu)]
+    nrows = nrow(cmat)
+    mtitle = paste(myTitle,'Heatmap: n contigs =',nrows)
+    if (nrows > nsamp)  {
+               cmat = cmat[c(1:nsamp),]
+               mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='')
+          }
+    newcolnames = substr(colnames(cmat),1,20)
+    colnames(cmat) = newcolnames
+    pdf(outpdfname)
+    heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols)
+    dev.off()
+}
+
+qqPlot = function(descr='Title',pvector, ...)
+## stolen from https://gist.github.com/703512
+{
+    o = -log10(sort(pvector,decreasing=F))
+    e = -log10( 1:length(o)/length(o) )
+    o[o==-Inf] = reallysmall
+    o[o==Inf] = reallybig
+    pdfname = paste(gsub(" ","", descr , fixed=TRUE),'pval_qq.pdf',sep='_')
+    maint = paste(descr,'QQ Plot')
+    pdf(pdfname)
+    plot(e,o,pch=19,cex=1, main=maint, ...,
+        xlab=expression(Expected~~-log[10](italic(p))),
+        ylab=expression(Observed~~-log[10](italic(p))),
+        xlim=c(0,max(e)), ylim=c(0,max(o)))
+    lines(e,e,col="red")
+    grid(col = "lightgray", lty = "dotted")
+    dev.off()
+}
+
+smearPlot = function(DGEList,deTags, outSmear, outMain)
+        {
+        pdf(outSmear)
+        plotSmear(DGEList,de.tags=deTags,main=outMain)
+        grid(col="blue")
+        dev.off()
+        }
+        
+boxPlot = function(rawrs,cleanrs,maint,myTitle)
+{    
+        nc = ncol(rawrs)
+        for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA}
+        fullnames = colnames(rawrs)
+        newcolnames = substr(colnames(rawrs),1,20)
+        colnames(rawrs) = newcolnames
+        newcolnames = substr(colnames(cleanrs),1,20)
+        colnames(cleanrs) = newcolnames
+        pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"sampleBoxplot.pdf",sep='_')
+        defpar = par(no.readonly=T)
+        pdf(pdfname)
+        l = layout(matrix(c(1,2),1,2,byrow=T))
+        print.noquote('raw contig counts by sample:')
+        print.noquote(summary(rawrs))
+        print.noquote('normalised contig counts by sample:')
+        print.noquote(summary(cleanrs))
+        boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint))
+        grid(col="blue")
+        boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint))
+        grid(col="blue")
+        dev.off()
+        pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"samplehistplot.pdf",sep='_') 
+        nc = ncol(rawrs)
+        print.noquote(paste('Using ncol rawrs=',nc))
+        ncroot = round(sqrt(nc))
+        if (ncroot*ncroot < nc) { ncroot = ncroot + 1 }
+        m = c()
+        for (i in c(1:nc)) {
+              rhist = hist(rawrs[,i],breaks=100,plot=F)
+              m = append(m,max(rhist\$counts))
+             }
+        ymax = max(m)
+        pdf(pdfname)
+        par(mfrow=c(ncroot,ncroot))
+        for (i in c(1:nc)) {
+                 hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", 
+                 breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax))
+             }
+        dev.off()
+        par(defpar)
+      
+}
+
+cumPlot = function(rawrs,cleanrs,maint,myTitle)
+{   
+        pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
+        defpar = par(no.readonly=T)
+        pdf(pdfname)
+        par(mfrow=c(2,1))
+        lrs = log(rawrs,10) 
+        lim = max(lrs)
+        hist(lrs,breaks=100,main=paste('Before:',maint),xlab="Reads (log)",
+             ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1)
+        grid(col="blue")
+        lrs = log(cleanrs,10) 
+        hist(lrs,breaks=100,main=paste('After:',maint),xlab="Reads (log)",
+             ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1)
+        grid(col="blue")
+        dev.off()
+        par(defpar)
+}
+
+cumPlot1 = function(rawrs,cleanrs,maint,myTitle)
+{   
+        pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_')
+        pdf(pdfname)
+        par(mfrow=c(2,1))
+        lastx = max(rawrs)
+        rawe = knots(ecdf(rawrs))
+        cleane = knots(ecdf(cleanrs))
+        cy = 1:length(cleane)/length(cleane)
+        ry = 1:length(rawe)/length(rawe)
+        plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads",
+             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
+        grid(col="blue")
+        plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads",
+             ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle)
+        grid(col="blue")
+        dev.off()
+}
+
+
+
+doGSEA = function(y=NULL,design=NULL,histgmt="",
+                  bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
+                  ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH")
+{
+  genesets = c()
+  if (bigmt > "")
+  {
+    bigenesets = readLines(bigmt)
+    genesets = bigenesets
+  }
+  if (histgmt > "")
+  {
+    hgenesets = readLines(histgmt)
+    if (bigmt > "") {
+      genesets = rbind(genesets,hgenesets)
+    } else {
+      genesets = hgenesets
+    } 
+  }
+  print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt))
+  genesets = strsplit(genesets,'\t') 
+  ##### tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n
+  outf = outfname
+  head=paste(myTitle,'edgeR GSEA')
+  write(head,file=outfname,append=F)
+  ntest=length(genesets)
+  urownames = toupper(rownames(y))
+  upcam = c()
+  downcam = c()
+  for (i in 1:ntest) {
+    gs = unlist(genesets[i])
+    g = gs[1] #### geneset_id
+    u = gs[2]
+    if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") }
+    glist = gs[3:length(gs)] #### member gene symbols
+    glist = toupper(glist)
+    inglist = urownames %in% glist
+    nin = sum(inglist)
+    if ((nin > minnin) && (nin < maxnin)) {
+      ### print(paste('@@found',sum(inglist),'genes in glist'))
+      camres = camera(y=y,index=inglist,design=design)
+      if (camres) {
+      rownames(camres) = g 
+      ##### gene set name
+      camres = cbind(GeneSet=g,URL=u,camres)
+      if (camres\$Direction == "Up") 
+        {
+        upcam = rbind(upcam,camres) } else {
+          downcam = rbind(downcam,camres)
+        }
+      }
+   }
+  }
+  uscam = upcam[order(upcam\$PValue),]
+  unadjp = uscam\$PValue
+  uscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
+  nup = max(10,sum((uscam\$adjPValue < fdrthresh)))
+  dscam = downcam[order(downcam\$PValue),]
+  unadjp = dscam\$PValue
+  dscam\$adjPValue = p.adjust(unadjp,method=fdrtype)
+  ndown = max(10,sum((dscam\$adjPValue < fdrthresh)))
+  write.table(uscam,file=paste('upCamera',outfname,sep='_'),quote=F,sep='\t',row.names=F)
+  write.table(dscam,file=paste('downCamera',outfname,sep='_'),quote=F,sep='\t',row.names=F)
+  print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:'))
+  write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F)
+  print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:'))
+  write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F)
+}
+
+
+
+edgeIt = function (Count_Matrix,group,outputfilename,fdrtype='fdr',priordf=5, 
+        fdrthresh=0.05,outputdir='.', myTitle='edgeR',libSize=c(),useNDF=F,
+        filterquantile=0.2, subjects=c(),mydesign=NULL,
+        doDESeq=T,doVoom=T,doCamera=T,org='hg19',
+        histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt",
+        doCook=F,DESeq_fittype="parameteric") 
+{
+  if (length(unique(group))!=2){
+    print("Number of conditions identified in experiment does not equal 2")
+    q()
+    }
+  require(edgeR)
+  options(width = 512) 
+  mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ")
+  allN = nrow(Count_Matrix)
+  nscut = round(ncol(Count_Matrix)/2)
+  colTotmillionreads = colSums(Count_Matrix)/1e6
+  rawrs = rowSums(Count_Matrix)
+  nonzerod = Count_Matrix[(rawrs > 0),] 
+  nzN = nrow(nonzerod)
+  nzrs = rowSums(nonzerod)
+  zN = allN - nzN
+  print('**** Quantiles for non-zero row counts:',quote=F)
+  print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F)
+  if (useNDF == "T")
+  {
+    gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut
+    lo = colSums(Count_Matrix[!gt1rpin3,])
+    workCM = Count_Matrix[gt1rpin3,]
+    cleanrs = rowSums(workCM)
+    cleanN = length(cleanrs)
+    meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="")
+    print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F)
+    maint = paste('Filter >= 1/million reads in >=',nscut,'samples')
+  }   else {        
+    useme = (nzrs > quantile(nzrs,filterquantile))
+    workCM = nonzerod[useme,]
+    lo = colSums(nonzerod[!useme,])
+    cleanrs = rowSums(workCM)
+    cleanN = length(cleanrs)
+    meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="")
+    print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F)
+    maint = paste('Filter below',filterquantile,'quantile')
+  }
+  cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle)
+  allgenes <- rownames(workCM)
+  print(paste("*** Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) 
+  rsums = rowSums(workCM)
+  TName=unique(group)[1]
+  CName=unique(group)[2]
+  DGEList = DGEList(counts=workCM, group = group)
+  DGEList = calcNormFactors(DGEList)
+
+  if (is.null(mydesign)) {
+    if (length(subjects) == 0) 
+    {
+      mydesign = model.matrix(~group)
+    } 
+    else { 
+      subjf = factor(subjects)
+      mydesign = model.matrix(~subjf+group) 
+      ### we block on subject so make group last to simplify finding it
+    }
+  } 
+  print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=',')))
+  print.noquote('Using design matrix:')
+  print.noquote(mydesign)
+  DGEList = estimateGLMCommonDisp(DGEList,mydesign)
+  comdisp = DGEList\$common.dispersion
+  DGEList = estimateGLMTrendedDisp(DGEList,mydesign)
+  if (priordf > 0) {
+    print.noquote(paste("prior.df =",priordf))
+    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = priordf)
+  } else {
+    DGEList = estimateGLMTagwiseDisp(DGEList,mydesign)
+  }
+  lastcoef=ncol(mydesign)
+  print.noquote(paste('*** lastcoef = ',lastcoef))
+  estpriorn = getPriorN(DGEList)
+  predLFC1 = predFC(DGEList,prior.count=1,design=mydesign,dispersion=DGEList\$tagwise.dispersion,offset=getOffset(DGEList))
+  predLFC3 = predFC(DGEList,prior.count=3,design=mydesign,dispersion=DGEList\$tagwise.dispersion,offset=getOffset(DGEList))
+  predLFC5 = predFC(DGEList,prior.count=5,design=mydesign,dispersion=DGEList\$tagwise.dispersion,offset=getOffset(DGEList))
+  DGLM = glmFit(DGEList,design=mydesign)
+  DE = glmLRT(DGLM) 
+  #### always last one - subject is first if needed
+  logCPMnorm = cpm(DGEList,log=T,normalized.lib.sizes=T)
+  logCPMraw = cpm(DGEList,log=T,normalized.lib.sizes=F) 
+  uoutput = cbind( 
+    Name=as.character(rownames(DGEList\$counts)),
+    DE\$table,
+    adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype),
+    Dispersion=DGEList\$tagwise.dispersion,totreads=rsums,
+    predLFC1=predLFC1[,lastcoef],
+    predLFC3=predLFC3[,lastcoef],
+    predLFC5=predLFC5[,lastcoef],
+    logCPMnorm,
+    DGEList\$counts
+  )
+  soutput = uoutput[order(DE\$table\$PValue),]
+  heatlogcpmnorm = logCPMnorm[order(DE\$table\$PValue),]
+  goodness = gof(DGLM, pcutoff=fdrthresh)
+  noutl = (sum(goodness\$outlier) > 0)
+  if (noutl > 0) {
+        print.noquote(paste('***',noutl,'GLM outliers found'))
+        print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F)
+    } else { 
+      print('*** No GLM fit outlier genes found')
+    }
+  z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2)
+  pdf(paste(mt,"GoodnessofFit.pdf",sep='_'))
+  qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion")
+  abline(0,1,lwd=3)
+  points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon")
+  dev.off()
+  print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F)
+  uniqueg = unique(group)
+  sample_colors =  match(group,levels(group))
+  pdf(paste(mt,"MDSplot.pdf",sep='_'))
+  sampleTypes = levels(factor(group))
+  print.noquote(sampleTypes)
+  plotMDS.DGEList(DGEList,main=paste("MDS Plot for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors)
+  legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19)
+  grid(col="blue")
+  dev.off()
+  colnames(logCPMnorm) = paste( colnames(logCPMnorm),'N',sep="_")
+  print(paste('Raw sample CPM',paste(colSums(logCPMraw,na.rm=T),collapse=',')))
+  try(boxPlot(rawrs=logCPMraw,cleanrs=logCPMnorm,maint='TMM Normalisation',myTitle=myTitle))
+  nreads = soutput\$totreads 
+  print('*** writing output',quote=F)
+  write.table(soutput,outputfilename, quote=FALSE, sep="\t",row.names=F)
+  rn = row.names(workCM)
+  print.noquote('@@ rn')
+  print.noquote(head(rn))
+  reg = "^chr([0-9]+):([0-9]+)-([0-9]+)"
+  genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/"
+  ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='')
+  testreg = str_match(rn,reg)
+  nreads = uoutput\$totreads 
+  if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) 
+  {
+    print("@@ using ucsc substitution for urls")
+    urls = paste0(ucsc,"&amp;position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",rn,"</a>")
+  } else {
+    print("@@ using genecards substitution for urls")
+    urls = paste0(genecards,rn,"\'>",rn,"</a>")
+  }
+  tt = uoutput
+  print.noquote("*** edgeR Top tags\n")
+  tt = cbind(tt,ntotreads=nreads,URL=urls) 
+  tt = tt[order(DE\$table\$PValue),] 
+  print.noquote(tt[1:50,])
+  ### Plot MAplot
+  deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,])
+  nsig = length(deTags)
+  print(paste('***',nsig,'tags significant at adj p=',fdrthresh),quote=F)
+  if (nsig > 0) { 
+      print('*** deTags',quote=F) 
+      print(head(deTags))
+    }
+  deColours = ifelse(deTags,'red','black')
+  pdf(paste(mt,"BCV_vs_abundance.pdf",sep='_'))
+  plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance")
+  dev.off()
+  dg = DGEList[order(DE\$table\$PValue),]
+  outpdfname=paste(mt,"heatmap.pdf",sep='_')
+  hmap2(heatlogcpmnorm,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=myTitle)
+  outSmear = paste(mt,"Smearplot.pdf",sep='_')
+  outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='')
+  smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain)
+  qqPlot(descr=myTitle,pvector=DE\$table\$PValue)
+  if (doDESeq == T)
+  {
+    ### DESeq2
+    require('DESeq2')
+    print.noquote(paste('****subjects=',subjects,'length=',length(subjects)))
+    if (length(subjects) == 0)
+        {
+        pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM))
+        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ Rx))
+        } else {
+        pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM))
+        deSEQds = DESeqDataSetFromMatrix(countData = workCM,  colData = pdata, design = formula(~ subjects + Rx))
+        }
+    deSeqDatsizefac <- estimateSizeFactors(deSEQds)
+    deSeqDatdisp <- estimateDispersions(deSeqDatsizefac,fitType=DESeq_fittype)
+    resDESeq <- nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype)
+    rDESeq = as.data.frame(results(resDESeq))
+    srDESeq = rDESeq[order(rDESeq\$pvalue),]
+    write.table(srDESeq,paste(mt,'DESeq2_TopTable.xls',sep='_'), quote=FALSE, sep="\t",row.names=F)
+    topresults.DESeq <- rDESeq[which(rDESeq\$padj < fdrthresh), ]
+    DESeqcountsindex <- which(allgenes %in% rownames(topresults.DESeq))
+    DESeqcounts <- rep(0, length(allgenes))
+    DESeqcounts[DESeqcountsindex] <- 1
+    pdf(paste(mt,"DESeq2_dispersion_estimates.pdf",sep='_'))
+    plotDispEsts(resDESeq)
+    dev.off()
+    if (doCook) {
+       pdf(paste(mt,"DESeq2_cooks_distance.pdf",sep='_'))
+       W <- mcols(resDESeq)\$WaldStatistic_condition_treated_vs_untreated
+       maxCooks <- mcols(resDESeq)\$maxCooks
+       idx <- !is.na(W)
+       plot(rank(W[idx]), maxCooks[idx], xlab="rank of Wald statistic", ylab="maximum Cook's distance per gene",
+          ylim=c(0,5), cex=.4, col="maroon")
+       m <- ncol(dds)
+       p <- 3
+       abline(h=qf(.75, p, m - p),col="darkblue")
+       grid(col="lightgray",lty="dotted")
+    }    
+  }
+  counts.dataframe = as.data.frame(c()) 
+  norm.factor = DGEList\$samples\$norm.factors
+  topresults.edgeR <- soutput[which(soutput\$adj.p.value < fdrthresh), ]
+  edgeRcountsindex <- which(allgenes %in% rownames(topresults.edgeR))
+  edgeRcounts <- rep(0, length(allgenes))
+  edgeRcounts[edgeRcountsindex] <- 1  
+  if (doVoom == T) {
+      pdf(paste(mt,"voomplot.pdf",sep='_'))
+      dat.voomed <- voom(DGEList, mydesign, plot = TRUE, normalize.method="quantil", lib.size = NULL) 
+      dev.off()
+      fit <- lmFit(dat.voomed, mydesign)
+      fit <- eBayes(fit)
+      rvoom <- topTable(fit, coef = length(colnames(mydesign)), adj = "BH", n = Inf)
+      write.table(rvoom,paste(mt,'VOOM_topTable.xls',sep='_'), quote=FALSE, sep="\t",row.names=F)
+      topresults.voom <- rvoom[which(rvoom\$adj.P.Val < fdrthresh), ]
+      voomcountsindex <- which(allgenes %in% rownames(topresults.voom))
+      voomcounts <- rep(0, length(allgenes))
+      voomcounts[voomcountsindex] <- 1
+  }
+  if ((doDESeq==T) || (doVoom==T)) {
+    if ((doVoom==T) && (doDESeq==T)) {
+        vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh)
+        counts.dataframe <- data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, 
+                                       VOOM_limma = voomcounts, row.names = allgenes)
+       } else if (doDESeq==T) {
+         vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh)
+         counts.dataframe <- data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes)
+       } else if (doVoom==T) {
+        vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh)
+        counts.dataframe <- data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes)
+       }
+    
+    if (nrow(counts.dataframe > 1)) {
+      counts.venn <- vennCounts(counts.dataframe)
+      vennf = paste(mt,'venn.pdf',sep='_')
+      pdf(vennf)
+      vennDiagram(counts.venn,main=vennmain,col="maroon")
+      dev.off()
+    }
+  } ### doDESeq or doVoom
+  if (doDESeq==T) {
+    cat("*** DESeq top 50\n")
+    print(srDESeq[1:50,])
+  }
+  if (doVoom==T) {
+      cat("*** VOOM top 50\n")
+      print(rvoom[1:50,])
+  }
+  if (doCamera) {
+  doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle,
+    outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype)
+  }
+  uoutput 
+  
+}
+#### Done
+
+#### sink(stdout(),append=T,type="message")
+
+doDESeq = $DESeq.doDESeq 
+### make these 'T' or 'F'
+doVoom = $doVoom
+doCamera = $camera.doCamera
+Out_Dir = "$html_file.files_path"
+Input =  "$input1"
+TreatmentName = "$treatment_name"
+TreatmentCols = "$Treat_cols"
+ControlName = "$control_name"
+ControlCols= "$Control_cols"
+outputfilename = "$outtab"
+org = "$input1.dbkey"
+if (org == "") { org = "hg19"}
+fdrtype = "$fdrtype"
+priordf = $priordf
+fdrthresh = $fdrthresh
+useNDF = "$useNDF"
+fQ = $fQ 
+myTitle = "$title"
+sids = strsplit("$subjectids",',')
+subjects = unlist(sids)
+nsubj = length(subjects)
+builtin_gmt=""
+history_gmt=""
+
+builtin_gmt = ""
+history_gmt = ""
+DESeq_fittype=""
+#if $DESeq.doDESeq == "T"
+  DESeq_fittype = "$DESeq.DESeq_fitType"
+#end if
+#if $camera.doCamera == 'T'
+  #if $camera.gmtSource.refgmtSource == "indexed" or $camera.gmtSource.refgmtSource == "both":
+     builtin_gmt = "${camera.gmtSource.builtinGMT.fields.path}"
+  #end if
+  #if $camera.gmtSource.refgmtSource == "history" or $camera.gmtSource.refgmtSource == "both":
+    history_gmt = "${camera.gmtSource.ownGMT}"
+    history_gmt_name = "${camera.gmtSource.ownGMT.name}"
+  #end if
+#end if
+if (nsubj > 0) {
+if (doDESeq) {
+ print('WARNING - cannot yet use DESeq2 for 2 way anova - see the docs')
+ doDESeq = F
+ }
+}
+TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1
+CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 
+cat('Got TCols=')
+cat(TCols)
+cat('; CCols=')
+cat(CCols)
+cat('\n')
+useCols = c(TCols,CCols)
+if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
+Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
+snames = colnames(Count_Matrix)
+nsamples = length(snames)
+if (nsubj >  0 & nsubj != nsamples) {
+options("show.error.messages"=T)
+mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','),
+   'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=','))
+write(mess, stderr())
+quit(save="no",status=4)
+}
+
+Count_Matrix = Count_Matrix[,useCols] ### reorder columns
+if (length(subjects) != 0) {subjects = subjects[useCols]}
+rn = rownames(Count_Matrix)
+islib = rn %in% c('librarySize','NotInBedRegions')
+LibSizes = Count_Matrix[subset(rn,islib),][1] # take first
+Count_Matrix = Count_Matrix[subset(rn,! islib),]
+group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) )             
+group = factor(group, levels=c(ControlName,TreatmentName))
+colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_")                   
+results = edgeIt(Count_Matrix=Count_Matrix,group=group,outputfilename=outputfilename,
+                 fdrtype='BH',priordf=priordf,fdrthresh=fdrthresh,outputdir='.',
+                 myTitle='edgeR',useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects,
+                 doDESeq=doDESeq,doVoom=doVoom,doCamera=doCamera,org=org,
+                 histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fittype=DESeq_fittype)
+sessionInfo()
+]]>
+</configfile>
+</configfiles>
+<help>
+
+**What it does**
+
+Performs digital gene expression analysis between a treatment and control on a count matrix.
+Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design.
+
+**Input**
+
+A matrix consisting of non-negative integers. The matrix must have a unique header row identifiying the samples, and a unique set of row names 
+as  the first column. Typically the row names are gene symbols or probe id's for downstream use in GSEA and other methods.
+
+If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples),
+put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or 
+A list of integers, one for each subject or an empty string if samples are all independent.
+If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix.
+Integers for samples that are not in the analysis *must* be present in the string as filler even if not used.
+
+So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones
+eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use
+8,9,1,1,2,2 
+as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6
+
+**Output**
+
+A summary html page with links to the R source code and all the outputs, nice grids of helpful plot thumbnails, and lots
+of logging and the top 50 rows of the topTable.
+
+The main topTables of results are provided as separate excelish tabular files. 
+
+They include adjusted p values and dispersions for each region, raw and cpm sample data counts and shrunken (predicted) log fold change estimates.
+These are provided for downstream analyses such as GSEA and are predictions of the logFC you might expect to see 
+in an independent replication of your original experiment. Higher number means more shrinkage. Shrinkage is more extreme for low coverage features 
+so downstream analyses are more robust against strong effect size estimates based on relatively little experimental information.
+
+**Note on prior.N**
+
+http://seqanswers.com/forums/showthread.php?t=5591 says:
+
+*prior.n*
+
+The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. 
+You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood 
+in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your 
+tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the 
+common likelihood the weight of one observation.
+
+In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, 
+or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that 
+you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation 
+(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? 
+What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. 
+If you have more samples, then the tagwise dispersion estimates will be more reliable, 
+so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. 
+
+
+From Bioconductor Digest, Vol 118, Issue 5, Gordon writes:
+
+Dear Dorota,
+
+The important settings are prior.df and trend.
+
+prior.n and prior.df are related through prior.df = prior.n * residual.df,
+and your experiment has residual.df = 36 - 12 = 24.  So the old setting of
+prior.n=10 is equivalent for your data to prior.df = 240, a very large
+value.  Going the other way, the new setting of prior.df=10 is equivalent
+to prior.n=10/24.
+
+To recover old results with the current software you would use
+
+  estimateTagwiseDisp(object, prior.df=240, trend="none")
+
+To get the new default from old software you would use
+
+  estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE)
+
+Actually the old trend method is equivalent to trend="loess" in the new
+software. You should use plotBCV(object) to see whether a trend is
+required.
+
+Note you could also use
+
+  prior.n = getPriorN(object, prior.df=10)
+
+to map between prior.df and prior.n.
+
+** Old rant on variable name changes in bioconductor versions**
+
+BioC authors sometimes make small mostly cosmetic changes to variable names (eg: from p.value to PValue) 
+often to make them more internally consistent or self describing. Unfortunately, these improvements
+break existing code in ways that can take a while to track down that relies on the library in ways that can take a while to track down, 
+increasing downstream tool maintenance effort uselessly.
+
+Please, don't do that. It hurts us.
+
+
+</help>
+
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgToolFactoryMultIn.py	Thu Aug 28 02:33:05 2014 -0400
@@ -0,0 +1,736 @@
+# rgToolFactoryMultIn.py
+# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
+# 
+# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
+# 
+# all rights reserved
+# Licensed under the LGPL
+# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
+#
+# august 2014
+# Allows arbitrary number of input files
+# NOTE positional parameters are now passed to script
+# and output (may be "None") is *before* arbitrary number of inputs
+#
+# march 2014
+# had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript
+# grrrrr - night before a demo
+# added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable
+#
+# added ghostscript and graphicsmagick as dependencies 
+# fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
+# errors ensued
+#
+# august 2013
+# found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
+#
+# july 2013
+# added ability to combine images and individual log files into html output
+# just make sure there's a log file foo.log and it will be output
+# together with all images named like "foo_*.pdf
+# otherwise old format for html
+#
+# January 2013
+# problem pointed out by Carlos Borroto
+# added escaping for <>$ - thought I did that ages ago...
+#
+# August 11 2012 
+# changed to use shell=False and cl as a sequence
+
+# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
+# It also serves as the wrapper for the new tool.
+# 
+# you paste and run your script
+# Only works for simple scripts that read one input from the history.
+# Optionally can write one new history dataset,
+# and optionally collect any number of outputs into links on an autogenerated HTML page.
+
+# DO NOT install on a public or important site - please.
+
+# installed generated tools are fine if the script is safe.
+# They just run normally and their user cannot do anything unusually insecure
+# but please, practice safe toolshed.
+# Read the fucking code before you install any tool 
+# especially this one
+
+# After you get the script working on some test data, you can
+# optionally generate a toolshed compatible gzip file
+# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
+# safe and largely automated installation in a production Galaxy.
+
+# If you opt for an HTML output, you get all the script outputs arranged
+# as a single Html history item - all output files are linked, thumbnails for all the pdfs.
+# Ugly but really inexpensive.
+# 
+# Patches appreciated please. 
+#
+#
+# long route to June 2012 product
+# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
+# derived from an integrated script model  
+# called rgBaseScriptWrapper.py
+# Note to the unwary:
+#   This tool allows arbitrary scripting on your Galaxy as the Galaxy user
+#   There is nothing stopping a malicious user doing whatever they choose
+#   Extremely dangerous!!
+#   Totally insecure. So, trusted users only
+#
+# preferred model is a developer using their throw away workstation instance - ie a private site.
+# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
+#
+
+import sys 
+import shutil 
+import subprocess 
+import os 
+import time 
+import tempfile 
+import optparse
+import tarfile
+import re
+import shutil
+import math
+
+progname = os.path.split(sys.argv[0])[1] 
+myversion = 'V001.1 March 2014' 
+verbose = False 
+debug = False
+toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
+
+# if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
+# tool xml
+toolhtmldepskel = """<?xml version="1.0"?>
+<tool_dependency>
+    <package name="ghostscript" version="9.10">
+        <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
+    </package>
+    <package name="graphicsmagick" version="1.3.18">
+        <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
+    </package>
+        <readme>
+           %s
+       </readme>
+</tool_dependency>
+"""
+
+protorequirements = """<requirements>
+      <requirement type="package" version="9.10">ghostscript</requirement>
+      <requirement type="package" version="1.3.18">graphicsmagick</requirement>
+  </requirements>"""
+
+def timenow():
+    """return current time as a string
+    """
+    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+html_escape_table = {
+     "&": "&amp;",
+     ">": "&gt;",
+     "<": "&lt;",
+     "$": "\$"
+     }
+
+def html_escape(text):
+     """Produce entities within text."""
+     return "".join(html_escape_table.get(c,c) for c in text)
+
+def cmd_exists(cmd):
+     return subprocess.call("type " + cmd, shell=True, 
+           stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
+
+
+class ScriptRunner:
+    """class is a wrapper for an arbitrary script
+    """
+
+    def __init__(self,opts=None,treatbashSpecial=True):
+        """
+        cleanup inputs, setup some outputs
+        
+        """
+        self.useGM = cmd_exists('gm')
+        self.useIM = cmd_exists('convert')
+        self.useGS = cmd_exists('gs')
+        self.temp_warned = False # we want only one warning if $TMP not set
+        self.treatbashSpecial = treatbashSpecial
+        if opts.output_dir: # simplify for the tool tarball
+            os.chdir(opts.output_dir)
+        self.thumbformat = 'png'
+        self.opts = opts
+        self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
+        self.toolid = self.toolname
+        self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
+        self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
+        self.xmlfile = '%s.xml' % self.toolname
+        s = open(self.opts.script_path,'r').readlines()
+        s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
+        self.script = '\n'.join(s)
+        fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
+        tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
+        tscript.write(self.script)
+        tscript.close()
+        self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help
+        self.escapedScript = '\n'.join([html_escape(x) for x in s])
+        self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
+        if opts.output_dir: # may not want these complexities 
+            self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
+            art = '%s.%s' % (self.toolname,opts.interpreter)
+            artpath = os.path.join(self.opts.output_dir,art) # need full path
+            artifact = open(artpath,'w') # use self.sfile as script source for Popen
+            artifact.write(self.script)
+            artifact.close()
+        self.cl = []
+        self.html = []
+        self.test1Inputs = [] # now a list
+        a = self.cl.append
+        a(opts.interpreter)
+        if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
+            a(self.sfile)
+        else:
+            a('-') # stdin
+        # if multiple inputs - positional or need to distinguish them with cl params
+        if opts.output_tab:
+            a('%s' % opts.output_tab) 
+        if opts.input_tab:
+            tests = []
+            for i,intab in enumerate(opts.input_tab): # if multiple, make tests
+                if intab.find(',') <> -1:
+                    (gpath,uname) = intab.split(',')
+                else:
+                    gpath = uname = intab
+                a('"%s"' % (intab))
+                tests.append(os.path.basename(gpath))
+            self.test1Inputs =  '<param name="input_tab" value="%s" />' % (','.join(tests))
+        else:
+            self.test1Inputs = ''
+        self.outFormats = opts.output_format
+        self.inputFormats = opts.input_formats
+        self.test1Output = '%s_test1_output.xls' % self.toolname
+        self.test1HTML = '%s_test1_output.html' % self.toolname
+
+    def makeXML(self):
+        """
+        Create a Galaxy xml tool wrapper for the new script as a string to write out
+        fixme - use templating or something less fugly than this example of what we produce
+
+        <tool id="reverse" name="reverse" version="0.01">
+            <description>a tabular file</description>
+            <command interpreter="python">
+            reverse.py --script_path "$runMe" --interpreter "python" 
+            --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file" 
+            </command>
+            <inputs>
+            <param name="input1"  type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
+
+            </inputs>
+            <outputs>
+            <data format="tabular" name="tab_file" label="${job_name}"/>
+
+            </outputs>
+            <help>
+            
+**What it Does**
+
+Reverse the columns in a tabular file
+
+            </help>
+            <configfiles>
+            <configfile name="runMe">
+            
+# reverse order of columns in a tabular file
+import sys
+inp = sys.argv[1]
+outp = sys.argv[2]
+i = open(inp,'r')
+o = open(outp,'w')
+for row in i:
+     rs = row.rstrip().split('\t')
+     rs.reverse()
+     o.write('\t'.join(rs))
+     o.write('\n')
+i.close()
+o.close()
+ 
+
+            </configfile>
+            </configfiles>
+            </tool>
+        
+        """ 
+        newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
+%(tooldesc)s
+%(requirements)s
+<command interpreter="python">
+%(command)s
+</command>
+<inputs>
+%(inputs)s
+</inputs>
+<outputs>
+%(outputs)s
+</outputs>
+<configfiles>
+<configfile name="runMe">
+%(script)s
+</configfile>
+</configfiles>
+<tests>
+%(tooltests)s
+</tests>
+<help>
+
+%(help)s
+
+</help>
+</tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
+
+        newCommand="""
+        %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" 
+            --tool_name "%(toolname)s"
+            %(command_inputs)s
+            %(command_outputs)s
+        """
+        # may NOT be an input or htmlout - appended later
+        tooltestsTabOnly = """
+        <test>
+        %(test1Inputs)s
+        <param name="job_name" value="test1"/>
+        <param name="runMe" value="$runMe"/>
+        <output name="tab_file" file="%(test1Output)s" ftype="tabular"/>
+        </test>
+        </tests>
+        """
+        tooltestsHTMLOnly = """
+        <test>
+        %(test1Inputs)s
+        <param name="job_name" value="test1"/>
+        <param name="runMe" value="$runMe"/>
+        <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
+        </test>
+        </tests>
+        """
+        tooltestsBoth = """
+        <test>
+        %(test1Inputs)s
+        <param name="job_name" value="test1"/>
+        <param name="runMe" value="$runMe"/>
+        <output name="tab_file" file="%(test1Output)s" ftype="tabular" />
+        <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
+        </test>
+        </tests>
+        """
+        xdict = {}
+        xdict['requirements'] = ''
+        if self.opts.make_HTML:
+            if self.opts.include_dependencies == "yes":
+                xdict['requirements'] = protorequirements
+        xdict['tool_version'] = self.opts.tool_version
+        xdict['test1HTML'] = self.test1HTML
+        xdict['test1Output'] = self.test1Output
+        xdict['test1Inputs'] = self.test1Inputs
+        if self.opts.make_HTML and self.opts.output_tab <> 'None':
+            xdict['tooltests'] = tooltestsBoth % xdict
+        elif self.opts.make_HTML:
+            xdict['tooltests'] = tooltestsHTMLOnly % xdict
+        else:
+            xdict['tooltests'] = tooltestsTabOnly % xdict
+        xdict['script'] = self.escapedScript 
+        # configfile is least painful way to embed script to avoid external dependencies
+        # but requires escaping of <, > and $ to avoid Mako parsing
+        if self.opts.help_text:
+            helptext = open(self.opts.help_text,'r').readlines()
+            helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
+            xdict['help'] = ''.join([x for x in helptext])
+        else:
+            xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
+        coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
+        coda.append('\n')
+        coda.append(self.indentedScript)
+        coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
+        coda.append('See %s for details of that project' % (toolFactoryURL))
+        coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
+        coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
+        xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
+        if self.opts.tool_desc:
+            xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
+        else:
+            xdict['tooldesc'] = ''
+        xdict['command_outputs'] = '' 
+        xdict['outputs'] = '' 
+        if self.opts.input_tab <> 'None':
+            cins = ['\n',]
+            cins.append('#for intab in $input1:')
+            cins.append('--input_tab "$intab"')
+            cins.append('#end for\n')
+            xdict['command_inputs'] = '\n'.join(cins)
+            xdict['inputs'] = '''<param name="input1" multiple="true"  type="data" format="%s" label="Select a suitable input file from your history"
+                    help="Multiple inputs may be selected if the script can deal with them..."/> \n''' % self.inputFormats
+        else:
+            xdict['command_inputs'] = '' # assume no input - eg a random data generator       
+            xdict['inputs'] = ''
+        xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
+        xdict['toolname'] = self.toolname
+        xdict['toolid'] = self.toolid
+        xdict['interpreter'] = self.opts.interpreter
+        xdict['scriptname'] = self.sfile
+        if self.opts.make_HTML:
+            xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
+            xdict['outputs'] +=  ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
+        else:
+            xdict['command_outputs'] += ' --output_dir "./"' 
+        if self.opts.output_tab <> 'None':
+            xdict['command_outputs'] += ' --output_tab "$tab_file"'
+            xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
+        xdict['command'] = newCommand % xdict
+        xmls = newXML % xdict
+        xf = open(self.xmlfile,'w')
+        xf.write(xmls)
+        xf.write('\n')
+        xf.close()
+        # ready for the tarball
+
+
+    def makeTooltar(self):
+        """
+        a tool is a gz tarball with eg
+        /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
+        """
+        retval = self.run()
+        if retval:
+            print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
+            sys.exit(1)
+        tdir = self.toolname
+        os.mkdir(tdir)
+        self.makeXML()
+        if self.opts.make_HTML:
+            if self.opts.help_text:
+                hlp = open(self.opts.help_text,'r').read()
+            else:
+                hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
+            if self.opts.include_dependencies == "yes":
+                tooldepcontent = toolhtmldepskel  % hlp
+                depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
+                depf.write(tooldepcontent)
+                depf.write('\n')
+                depf.close()
+        if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
+            testdir = os.path.join(tdir,'test-data')
+            os.mkdir(testdir) # make tests directory
+            for i,intab in enumerate(self.opts.input_tab):
+                si = self.opts.input_tab[i]
+                if si.find(',') <> -1:
+                    s = si.split(',')[0]
+                    si = s
+                dest = os.path.join(testdir,os.path.basename(si))
+                if si <> dest:
+                    shutil.copyfile(si,dest)
+            if self.opts.output_tab <> 'None':
+                shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
+            if self.opts.make_HTML:
+                shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
+            if self.opts.output_dir:
+                shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
+        outpif = '%s.py' % self.toolname # new name
+        outpiname = os.path.join(tdir,outpif) # path for the tool tarball
+        pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
+        notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
+        notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
+        notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
+        pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
+        notes += pi
+        outpi = open(outpiname,'w')
+        outpi.write(''.join(notes))
+        outpi.write('\n')
+        outpi.close()
+        stname = os.path.join(tdir,self.sfile)
+        if not os.path.exists(stname):
+            shutil.copyfile(self.sfile, stname)
+        xtname = os.path.join(tdir,self.xmlfile)
+        if not os.path.exists(xtname):
+            shutil.copyfile(self.xmlfile,xtname)
+        tarpath = "%s.gz" % self.toolname
+        tar = tarfile.open(tarpath, "w:gz")
+        tar.add(tdir,arcname=self.toolname)
+        tar.close()
+        shutil.copyfile(tarpath,self.opts.new_tool)
+        shutil.rmtree(tdir)
+        ## TODO: replace with optional direct upload to local toolshed?
+        return retval
+
+
+    def compressPDF(self,inpdf=None,thumbformat='png'):
+        """need absolute path to pdf
+           note that GS gets confoozled if no $TMP or $TEMP
+           so we set it
+        """
+        assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
+        hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
+        sto = open(hlog,'a')
+        our_env = os.environ.copy()
+        our_tmp = our_env.get('TMP',None)
+        if not our_tmp:
+            our_tmp = our_env.get('TEMP',None)
+        if not (our_tmp and os.path.exists(our_tmp)):
+            newtmp = os.path.join(self.opts.output_dir,'tmp')
+            try:
+                os.mkdir(newtmp)
+            except:
+                sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
+            our_env['TEMP'] = newtmp
+            if not self.temp_warned:
+               sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
+               self.temp_warned = True          
+        outpdf = '%s_compressed' % inpdf
+        cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
+        x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
+        retval1 = x.wait()
+        sto.close()
+        if retval1 == 0:
+            os.unlink(inpdf)
+            shutil.move(outpdf,inpdf)
+            os.unlink(hlog)
+        hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
+        sto = open(hlog,'w')
+        outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
+        if self.useGM:        
+            cl2 = ['gm', 'convert', inpdf, outpng]
+        else: # assume imagemagick
+            cl2 = ['convert', inpdf, outpng]
+        x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
+        retval2 = x.wait()
+        sto.close()
+        if retval2 == 0:
+             os.unlink(hlog)
+        retval = retval1 or retval2
+        return retval
+
+
+    def getfSize(self,fpath,outpath):
+        """
+        format a nice file size string
+        """
+        size = ''
+        fp = os.path.join(outpath,fpath)
+        if os.path.isfile(fp):
+            size = '0 B'
+            n = float(os.path.getsize(fp))
+            if n > 2**20:
+                size = '%1.1f MB' % (n/2**20)
+            elif n > 2**10:
+                size = '%1.1f KB' % (n/2**10)
+            elif n > 0:
+                size = '%d B' % (int(n))
+        return size
+
+    def makeHtml(self):
+        """ Create an HTML file content to list all the artifacts found in the output_dir
+        """
+
+        galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
+        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 
+        <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
+        <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> 
+        <title></title> 
+        <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> 
+        </head> 
+        <body> 
+        <div class="toolFormBody"> 
+        """ 
+        galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" 
+        galhtmlpostfix = """</div></body></html>\n"""
+
+        flist = os.listdir(self.opts.output_dir)
+        flist = [x for x in flist if x <> 'Rplots.pdf']
+        flist.sort()
+        html = []
+        html.append(galhtmlprefix % progname)
+        html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
+        fhtml = []
+        if len(flist) > 0:
+            logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
+            logfiles.sort()
+            logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)]
+            logfiles.append(os.path.abspath(self.tlog)) # make it the last one
+            pdflist = []
+            npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
+            for rownum,fname in enumerate(flist):
+                dname,e = os.path.splitext(fname)
+                sfsize = self.getfSize(fname,self.opts.output_dir)
+                if e.lower() == '.pdf' : # compress and make a thumbnail
+                    thumb = '%s.%s' % (dname,self.thumbformat)
+                    pdff = os.path.join(self.opts.output_dir,fname)
+                    retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
+                    if retval == 0:
+                        pdflist.append((fname,thumb))
+                    else:
+                        pdflist.append((fname,fname))
+                if (rownum+1) % 2 == 0:
+                    fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
+                else:
+                    fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
+            for logfname in logfiles: # expect at least tlog - if more
+                if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later
+                    sectionname = 'All tool run'
+                    if (len(logfiles) > 1):
+                        sectionname = 'Other'
+                    ourpdfs = pdflist
+                else:
+                    realname = os.path.basename(logfname)
+                    sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
+                    ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
+                    pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
+                nacross = 1
+                npdf = len(ourpdfs)
+
+                if npdf > 0:
+                    nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
+                    if int(nacross)**2 != npdf:
+                        nacross += 1
+                    nacross = int(nacross)
+                    width = min(400,int(1200/nacross))
+                    html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
+                    html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
+                    ntogo = nacross # counter for table row padding with empty cells
+                    html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
+                    for i,paths in enumerate(ourpdfs): 
+                        fname,thumb = paths
+                        s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" 
+                           alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
+                        if ((i+1) % nacross == 0):
+                            s += '</tr>\n'
+                            ntogo = 0
+                            if i < (npdf - 1): # more to come
+                               s += '<tr>'
+                               ntogo = nacross
+                        else:
+                            ntogo -= 1
+                        html.append(s)
+                    if html[-1].strip().endswith('</tr>'):
+                        html.append('</table></div>\n')
+                    else:
+                        if ntogo > 0: # pad
+                           html.append('<td>&nbsp;</td>'*ntogo)
+                        html.append('</tr></table></div>\n')
+                logt = open(logfname,'r').readlines()
+                logtext = [x for x in logt if x.strip() > '']
+                html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
+                if len(logtext) > 1:
+                    html.append('\n<pre>\n')
+                    html += logtext
+                    html.append('\n</pre>\n')
+                else:
+                    html.append('%s is empty<br/>' % logfname)
+        if len(fhtml) > 0:
+           fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
+           fhtml.append('</table></div><br/>')
+           html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
+           html += fhtml # add all non-pdf files to the end of the display
+        else:
+            html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
+        html.append(galhtmlpostfix)
+        htmlf = file(self.opts.output_html,'w')
+        htmlf.write('\n'.join(html))
+        htmlf.write('\n')
+        htmlf.close()
+        self.html = html
+
+
+    def run(self):
+        """
+        scripts must be small enough not to fill the pipe!
+        """
+        if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
+          retval = self.runBash()
+        else:
+            if self.opts.output_dir:
+                ste = open(self.elog,'w')
+                sto = open(self.tlog,'w')
+                sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
+                sto.flush()
+                p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
+            else:
+                p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
+            p.stdin.write(self.script)
+            p.stdin.close()
+            retval = p.wait()
+            if self.opts.output_dir:
+                sto.close()
+                ste.close()
+                err = open(self.elog,'r').readlines()
+                if retval <> 0 and err: # problem
+                    print >> sys.stderr,err
+            if self.opts.make_HTML:
+                self.makeHtml()
+        return retval
+
+    def runBash(self):
+        """
+        cannot use - for bash so use self.sfile
+        """
+        if self.opts.output_dir:
+            s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
+            sto = open(self.tlog,'w')
+            sto.write(s)
+            sto.flush()
+            p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+        else:
+            p = subprocess.Popen(self.cl,shell=False)            
+        retval = p.wait()
+        if self.opts.output_dir:
+            sto.close()
+        if self.opts.make_HTML:
+            self.makeHtml()
+        return retval
+  
+
+def main():
+    u = """
+    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
+    <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
+    </command>
+    """
+    op = optparse.OptionParser()
+    a = op.add_option
+    a('--script_path',default=None)
+    a('--tool_name',default=None)
+    a('--interpreter',default=None)
+    a('--output_dir',default='./')
+    a('--output_html',default=None)
+    a('--input_tab',default=[], action="append")    
+    a("--input_formats",default="tabular")
+    a('--output_tab',default="None")
+    a('--output_format',default='tabular')
+    a('--user_email',default='Unknown')
+    a('--bad_user',default=None)
+    a('--make_Tool',default=None)
+    a('--make_HTML',default=None)
+    a('--help_text',default=None)
+    a('--tool_desc',default=None)
+    a('--new_tool',default=None)
+    a('--tool_version',default=None)
+    a('--include_dependencies',default=None)   
+    opts, args = op.parse_args()
+    assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
+    assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
+    assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
+    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
+    if opts.output_dir:
+        try:
+            os.makedirs(opts.output_dir)
+        except:
+            pass
+    opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab]
+    r = ScriptRunner(opts)
+    if opts.make_Tool:
+        retcode = r.makeTooltar()
+    else:
+        retcode = r.run()
+    os.unlink(r.sfile)
+    if retcode:
+        sys.exit(retcode) # indicate failure to job runner
+
+
+if __name__ == "__main__":
+    main()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgToolFactoryMultIn.xml	Thu Aug 28 02:33:05 2014 -0400
@@ -0,0 +1,343 @@
+<tool id="rgTFM" name="Tool Factory Multiple Inputs" version="1.12">
+  <description>Makes scripts into tools</description>
+   <requirements>
+      <requirement type="package" version="9.10">ghostscript</requirement>
+      <requirement type="package" version="1.3.18">graphicsmagick</requirement>
+  </requirements>
+  <command interpreter="python">
+#if ( $__user_email__ not in $__admin_users__ ):
+     rgToolFactoryMultIn.py --bad_user $__user_email__
+#else:
+    rgToolFactoryMultIn.py --script_path "$runme" --interpreter "$interpreter" 
+     --tool_name "$tool_name"  --user_email "$__user_email__"
+    #if $make_TAB.value=="yes":
+          --output_tab "$output1"
+          --output_format "$output_format"
+    #end if
+    #if $makeMode.make_Tool=="yes":
+      --make_Tool "$makeMode.make_Tool"
+      --tool_desc "$makeMode.tool_desc"
+      --tool_version "$makeMode.tool_version"
+      --new_tool "$new_tool"
+      --help_text "$helpme"
+      #if $make_HTML.value=="yes":
+          #if $makeMode.include_deps.value=="yes":
+             --include_dependencies "yes"
+          #end if
+      #end if
+    #end if
+    #if $make_HTML.value=="yes":
+      --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
+    #else:
+       --output_dir "."
+    #end if
+    #if $input1 != 'None':
+        --input_formats "$input_formats"
+        #for intab in $input1:
+          #if $add_names.value == "yes":
+              --input_tab "$intab,$intab.name"
+          #else:
+              --input_tab "$intab"
+          #end if
+        #end for
+        --input_formats = "$input_formats"
+    #end if
+#end if 
+  </command>
+  <inputs>
+    <param name="input1"  type="data"  label="Select an input file from your history" optional="true" size="120" multiple="true"
+       help="Use the multiple input widget (above/right of input box) for multiple inputs - your script MUST be ready to parse the command line right - see samples below"/>
+    <param name="input_formats" type="text" value="tabular" label="Select allowable Galaxy input formats for your inputs passed to your script - default is tabular"
+         help="Multiple input formats are allowed as a comma separated list (eg 'tabular,txt'), but your script must be able to deal with whatever is passed in!">
+        <sanitizer invalid_char="">
+            <valid initial="string.letters">
+                <add value=","/>
+                <add value=" "/>
+            </valid>
+        </sanitizer>
+    </param>
+    <param name="add_names" type="select" label="Pass input file(s) as path,name - useful if you need the user supplied Galaxy name for your data sets" 
+         help="Your script is responsible for parsing and dealing with these comma separated values!">
+        <option value="yes">Pass inputs as comma separated path,name values on the script command line</option>
+        <option value="" selected="true">Pass input parameters as path only - do not include the user supplied name</option>
+    </param>
+    <param name="tool_name" type="text" value="My dynamic script"   label="New tool ID and title for outputs" size="60"
+         help="This will become the toolshed repository name so please choose thoughtfully to avoid namespace clashes with other tool writers">
+        <sanitizer invalid_char="">
+            <valid initial="string.letters,string.digits">
+                <add value="_"/>
+            </valid>
+        </sanitizer>
+    </param>
+    <conditional name="makeMode">
+        <param name="make_Tool" type="select" label="Create a tar.gz file ready for local toolshed entry" help="Ready to deploy securely!" size="60">
+        <option value="yes">Generate a Galaxy ToolShed compatible toolshed.gz</option>
+        <option value="" selected="true">No. Just run the script please</option>
+        </param>
+        <when value = "yes">
+            <param name="tool_version" label="Tool Version - bump this to warn users trying to redo old analyses" type="text" value="0.01"
+            help="If you change your script and regenerate the 'same' tool, you should inform Galaxy (and users) by changing (bumping is traditional) this number"/>
+            <param name="tool_desc" label="Tool Description" type="text" value="" size="40" 
+             help="Supply a brief tool description for the Galaxy tool menu entry (optional - appears after the tool name)" />
+            <param name="help_text" label="Tool form documentation and help text for users" type="text" area="true" 
+             size="8x120" value="**What it Does**" 
+             help="Supply the brief user documentation to appear on the new tool form as reStructured text - http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html" >           
+                <sanitizer>
+                    <valid initial="string.printable">
+                    </valid>
+                    <mapping initial="none"/>
+                </sanitizer>
+            </param>
+            <param name="include_deps" type="select" label="Include ghostscript and graphicsmagick dependencies in generated tool" size="60"
+            help="If an HTML file is being created, including dependencies is recommended. Otherwise this setting has no effect">
+                <option value="">Rely on system ghostscript and graphicsmagick rather than include these as dependencies</option>
+                <option value="yes" selected="true">Include dependencies so target installations will always work if HTML is being generated</option>
+            </param>
+
+        </when>
+        <when value = "">
+        </when>
+    </conditional> 
+    <param name="make_HTML" type="select" label="Create an HTML report with links to all output files and thumbnail links to PDF images" size="60"
+         help="Recommended for presenting complex outputs in an accessible manner. Turn off for simple tools so they just create one output">
+        <option value="yes">Yes, arrange all outputs produced by my script as an HTML output</option>
+        <option value="" selected="true">No, no HTML output file thanks</option>
+    </param>
+    <param name="make_TAB" type="select" label="Create a new (default tabular) history output with or without an HTML file specified above" 
+         help="This is useful if your script creates a single new tabular file you want to appear in the history after the tool executes">
+        <option value="yes" selected="true">My script writes to a new history output</option>
+        <option value="">I do not want a new history output file</option>
+    </param>
+    <param name="output_format" type="select" label="Galaxy datatype for your tool's output file if any" help="You may need to edit the xml to extend this list">
+    <option value="tabular" selected="true">Tabular</option>
+    <option value="text">text</option>
+    <option value="interval">Interval</option>
+    <option value="gz">gz</option>
+    </param>
+    <param name="interpreter" type="select" label="Select the interpreter for your code. This must be available on the path of the execution host">
+        <option value="Rscript" selected="true">Rscript</option>
+        <option value="python">python</option>
+        <option value="perl">perl</option>
+        <option value="sh">sh</option>
+    </param>   
+    <param name="dynScript" label="Cut and paste the script to be executed here" type="text" value="" area="True" size="8x120"  
+      help="Script must deal with two command line parameters: Path to input tabular file path (or 'None' if none selected) and path to output tabular history file (or 'None').">
+      <sanitizer>
+         <valid initial="string.printable">
+         </valid>
+         <mapping initial="none"/>
+      </sanitizer>
+     </param>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output1" label="${tool_name}.${output_format}">
+        <filter>make_TAB=="yes"</filter>
+        <change_format>
+            <when input="output_format" value="interval"  format="interval" />
+            <when input="output_format" value="gz" format="gz" />
+            <when input="output_format" value="text"  format="text"  />
+         </change_format>
+    </data>
+    <data format="html" name="html_file" label="${tool_name}.html">
+        <filter>make_HTML == "yes"</filter>
+    </data>
+    <data format="toolshed.gz" name="new_tool" label="${tool_name}.toolshed.gz">
+        <filter>makeMode['make_Tool'] == "yes"</filter>
+    </data>
+  </outputs>
+<configfiles>
+<configfile name="runme">$dynScript</configfile>
+<configfile name="helpme">
+#if $makeMode.make_Tool == "yes":
+${makeMode.help_text}
+#end if
+</configfile>
+</configfiles>
+<help>
+    
+.. class:: warningmark
+
+**Details and attribution** GTF_
+
+**Local Admins ONLY** Only users whose IDs found in the local admin_user configuration setting in universe_wsgi.ini can run this tool.
+
+**If you find a bug** please raise an issue at the bitbucket repository GTFI_
+
+**What it does** This tool enables a user to paste and submit an arbitrary R/python/perl script to Galaxy.
+
+**Input options** This version is limited to simple transformation or reporting requiring only a single input file selected from the history.
+
+**Output options** Optional script outputs include one single new history tabular file, or for scripts that create multiple outputs,
+a new HTML report linking all the files and images created by the script can be automatically generated.
+
+**Tool Generation option** Once the script is working with test data, this tool will optionally generate a new Galaxy tool in a gzip file
+ready to upload to your local toolshed for sharing and installation. Provide a small sample input when you run generate the tool because
+it will become the input for the generated functional test.
+
+.. class:: warningmark
+
+**Note to system administrators** This tool offers *NO* built in protection against malicious scripts. It should only be installed on private/personnal Galaxy instances.
+Admin_users will have the power to do anything they want as the Galaxy user if you install this tool.
+
+.. class:: warningmark
+
+**Use on public servers**  is STRONGLY discouraged for obvious reasons
+
+The tools generated by this tool will run just as securely as any other normal installed Galaxy tool but like any other new tools, should always be checked carefully before installation.
+We recommend that you follow the good code hygiene practices associated with safe toolshed.
+
+**Scripting conventions** The pasted script will be executed with the path to the (optional) input tabular data file path or NONE if you do not select one, and the path to the optional
+output file or None if none is wanted, as the first and second command line parameters. The script must deal appropriately with these - see Rscript examples below.
+Note that if an optional HTML output is selected, all the output files created by the script will be nicely presented as links, with pdf images linked as thumbnails in that output.
+This can be handy for complex scripts creating lots of output.
+
+**Examples**
+<![CDATA[
+
+Each of these following trivial examples can be cut and pasted into the script box for testing.
+Please make sure you choose the appropriate interpreter and upload and select a suitable small matching test data input
+
+A simple Rscript "filter" showing how the command line parameters can be handled, takes an input file, does something (transpose in this case) and 
+writes the results to a new tabular file. Note the use of colClasses to ensure that no fiddling takes place with numeric values by treating everything
+as a string::
+
+ # transpose a tabular input file and write as a tabular output file
+ ourargs = commandArgs(TRUE)
+ inf = ourargs[1]
+ outf = ourargs[2]
+ inp = read.table(inf,head=F,row.names=NULL,sep='\t',colClasses="character")
+ outp = t(inp)
+ write.table(outp,outf, quote=FALSE, sep="\t",row.names=F,col.names=F)
+
+Calculate a multiple test adjusted p value from a column of p values - for this script to be useful, it needs the right column for the input to be specified in the code for the
+given input file type(s) specified when the tool is generated ::
+
+ # use p.adjust - assumes a HEADER row and column 1 - please fix for any real use
+ column = 1 # adjust if necessary for some other kind of input
+ ourargs = commandArgs(TRUE)
+ inf = ourargs[1]
+ outf = ourargs[2]
+ inp = read.table(inf,head=T,row.names=NULL,sep='\t')
+ p = inp[,column]
+ q = p.adjust(p,method='BH')
+ outp = cbind(inp,'BH Adjusted p-value'=q)
+ write.table(outp,outf, quote=FALSE, sep="\t",row.names=F,col.names=T) 
+
+
+A demonstration Rscript example takes no input file but generates some random data based pdf images
+You must make sure the option to create an HTML output file is
+turned on for this to work. Images (pdf) are linked via thumbnails and
+all files have a link on the resulting HTML page::
+
+ # note this script takes NO input or output because it generates random data
+ for (i in 1:10) {
+    foo = runif(100)
+    bar = rnorm(100)
+    bar = foo + 0.05*bar
+    pdf(paste('yet',i,"anotherplot.pdf",sep='_'))
+    plot(foo,bar,main=paste("Foo by Bar plot #",i),col="maroon", pch=3,cex=0.6)
+    dev.off()
+    foo = data.frame(a=runif(100),b=runif(100),c=runif(100),d=runif(100),e=runif(100),f=runif(100))
+    bar = as.matrix(foo)
+    pdf(paste('yet',i,"anotherheatmap.pdf",sep='_'))
+    heatmap(bar,main='Random Heatmap')
+    dev.off()
+ }
+
+A slight variation taking an input tabular file from which we read the first number as nreps
+
+# note this script takes a single parameter
+# number of replicates
+ourargs = commandArgs(TRUE)
+infname = ourargs[1]
+nreps = read.table(infname,head=F)
+nreps = unlist(nreps)[1]
+nreps = max(c(1,nreps))
+nreps = min(c(20,nreps))
+print(paste("Using nreps=",nreps))
+for (i in 1:nreps) {
+   foo = runif(100)
+   bar = rnorm(100)
+   bar = foo + 0.2*bar
+   pdf(paste("yet",i,"anotherplot.pdf",sep="_"))
+   plot(foo,bar,main=paste("Foo by Bar plot ",i),col="maroon", pch=3,cex=0.6)
+   dev.off()
+   foo = data.frame(a=runif(100),b=runif(100),c=runif(100),d=runif(100),e=runif(100),f=runif(100))
+   bar = as.matrix(foo)
+   pdf(paste("yet",i,"anotherheatmap.pdf",sep="_"))
+   heatmap(bar,main="Random Heatmap")
+   dev.off()
+}
+
+A Python example that reverses each row of a tabular file (you'll need to remove the leading spaces 
+for this to work if cut and pasted into the script box)::
+
+ # reverse order of columns in a tabular file
+ import sys
+ inp = sys.argv[1]
+ outp = sys.argv[2]
+ i = open(inp,'r')
+ o = open(outp,'w')
+ for row in i:
+     rs = row.rstrip().split('\t')
+     rs.reverse()
+     o.write('\t'.join(rs))
+     o.write('\n')
+ i.close()
+ o.close()
+ 
+A trivial shell script example to show that it works::
+
+ #!/bin/bash
+ INF=$1
+ OUTF=$2
+ cut -c2,4,6,8,10,12 $INF > $OUTF 
+
+A trivial perl script example to show that even perl works::
+
+ #
+ # change all occurances of a string in a file to another string
+ #
+ $oldfile = $ARGV[0];
+ $newfile = $ARGV[1];
+ $old = "gene";
+ $new = "foo";
+ open(OF, $oldfile);
+ open(NF, ">$newfile");
+ # read in each line of the file
+ while ($line = <OF>) {
+    $line =~ s/$old/$new/;
+    print NF $line;
+ }
+ close(OF);
+ close(NF);
+
+]]>
+
+**Citation**
+
+
+Paper_ :
+
+Creating re-usable tools from scripts: The Galaxy Tool Factory
+Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team
+Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573
+
+
+**Licensing** 
+
+Copyright Ross Lazarus (ross period lazarus at gmail period com) May 2012
+All rights reserved.
+Licensed under the LGPL_
+
+.. _LGPL: http://www.gnu.org/copyleft/lesser.html
+.. _GTF:  https://bitbucket.org/fubar/galaxytoolfactory
+.. _GTFI:  https://bitbucket.org/fubar/galaxytoolfactory/issues
+.. _Paper: http://bioinformatics.oxfordjournals.org/cgi/reprint/bts573?ijkey=lczQh1sWrMwdYWJ&amp;keytype=ref
+
+
+</help>
+
+</tool>
+
+