diff mytools/genomeView.xml @ 0:39217fa39ff2

Uploaded
author xuebing
date Tue, 13 Mar 2012 23:34:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mytools/genomeView.xml	Tue Mar 13 23:34:52 2012 -0400
@@ -0,0 +1,108 @@
+<tool id="genomeview" name="whole genome">
+  <description>plot and correlation</description>
+  <command>cat $script_file | R --vanilla --slave 2> err.log </command>
+  <inputs>
+    <param name="genome" type="select" label="Select genome">
+     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/mouse.mm9.genome" selected="true">mm9</option>
+     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/mouse.mm8.genome">mm8</option>
+     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/human.hg18.genome">hg18</option>
+     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/human.hg19.genome">hg19</option>
+    </param>    
+    <param name="resolution" type="integer" label="resolution" value="5000" help="resolution in bps. It must be between 200 and 10,000,000">
+      <validator type="in_range" max="1000000000" min="200" message="Resolution is out of range, Resolution has to be between 200 to 100000000" />
+    </param>
+    <param name="log" label="plot the log" type="boolean" truevalue="log" falsevalue="none" checked="False"/>
+    <param name="union" label="compute correlation in union regions" help="ignore regions covered by neither interval sets. Recommended for sparse data under high resolution when most regions are empty" type="boolean" truevalue="union" falsevalue="none" checked="False"/>    
+    <repeat name="series" title="input file">
+      <param name="label" type="text" value="" size="30" label="Data Label"/>
+      <param name="input" type="data" format="interval" label="Dataset"/>
+    </repeat>       
+  </inputs>
+
+  <configfiles>
+    <configfile name="script_file">
+      ## Setup R error handling to go to stderr
+      options(warn=-1)
+      source("/Users/xuebing/galaxy-dist/tools/mytools/genomeview.r")
+      genome = read.table( "${genome}")
+      uselog = as.character("${log}")
+      union = as.character("${union}")
+      resolution = as.integer("${resolution}")
+      cat('resolution=',resolution,'\n')
+      offset = caloffset(genome)
+      mcov = matrix(ncol=1,nrow=as.integer(offset[length(offset)] / resolution))
+      ## Open output PDF file
+      pdf( "${out_file1}" ,height=4,width=20)
+      labels = character(0)
+      ## Determine range of all series in the plot
+      #for $i, $s in enumerate( $series )
+        x = read.table( "${s.input.file_name}" )
+        res = coverage(x,genome,offset,resolution)
+        plotcov(res,genome,offset,"${s.label.value}",uselog)
+        labels = c(labels,"${s.label.value}")
+        attach(res)
+        mcov = cbind(mcov,cov)
+        detach(res)
+      #end for
+      dev.off() 
+      pdf("${out_file2}")
+      mcov = mcov[,-1]
+      nSample = length(labels)
+      if (nSample > 1) {
+          if (union == 'union') {
+              cm = matrix(0,nrow=nSample,ncol=nSample)
+              for (i in 1:(nSample-1)) {
+                  cm[i,i] = 1
+                  for (j in (i+1):nSample){
+                      cm[i,j] = union_correlation(mcov[,i],mcov[,j])
+                      cm[j,i] = cm[i,j]        
+                  }
+              }
+              cm[nSample,nSample] = 1
+          } else {
+          cm = cor(mcov)
+          }
+          rm(mcov)
+          ##heatmap(-cm,margins=c(8,8),sym=T,scale='none',labRow=labels,labCol=labels)
+          ##heatmap2(cm,'none',TRUE,c(8,8),labels,labels)
+          x = cm
+          h = heatmap(-x,scale='none',sym=T,margins=c(8,8),labRow=labels,labRol=labels)
+          attach(h)
+    x = x[rowInd,colInd]
+    tx = numeric(0)
+    ty = numeric(0)
+    txt = character(0)
+    for (i in 1:nrow(x)){
+        for (j in 1:ncol(x)){
+            tx = c(tx,i)
+            ty = c(ty,ncol(x)-j+1)
+            txt = c(txt,round(x[i,j]*100)/100)
+        }    
+    }
+	heatmap(-x,scale='none',sym=T,margins=c(8,8),labRow=labels[rowInd],labCol=labels[colInd],add.expr=text(tx,ty,txt,col='black'))
+          library(gplots)
+          heatmap.2(cm,margins=c(8,8),scale='none',key=TRUE,trace='none', symkey=T,symbreaks=T,col=bluered,labRow=labels,labCol=labels,symm=T)
+      }
+      dev.off() 
+    </configfile>
+  </configfiles>
+
+  <outputs>
+    <data format="pdf" name="out_file1" label="${tool.name} on ${on_string}: (plot)" />
+    <data format="pdf" name="out_file2" label="${tool.name} on ${on_string}: (correlation)" />
+  </outputs>
+
+<help>
+.. class:: infomark
+
+This tool allows you to plot multiple intervals across all chromosomes at different resolution, and it also plots the correlation matrix if multiple intervals are provided.
+
+-----
+
+**Example**
+
+.. image:: ./static/images/correlationmatrix.png
+.. image:: ./static/images/wholegenome.png
+
+</help>
+</tool>