view tools/mytools/genomeView.xml @ 1:cdcb0ce84a1b

author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
line wrap: on
line source

<tool id="genomeview" name="whole genome">
  <description>plot and correlation</description>
  <command>cat $script_file | R --vanilla --slave 2> err.log </command>
    <param name="genome" type="select" label="Select genome">
     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/mouse.mm9.genome" selected="true">mm9</option>
     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/mouse.mm8.genome">mm8</option>
     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/human.hg18.genome">hg18</option>
     <option value="/Users/xuebing/galaxy-dist/tool-data/genome/chrsize/human.hg19.genome">hg19</option>
    <param name="resolution" type="integer" label="resolution" value="5000" help="resolution in bps. It must be between 200 and 10,000,000">
      <validator type="in_range" max="1000000000" min="200" message="Resolution is out of range, Resolution has to be between 200 to 100000000" />
    <param name="log" label="plot the log" type="boolean" truevalue="log" falsevalue="none" checked="False"/>
    <param name="union" label="compute correlation in union regions" help="ignore regions covered by neither interval sets. Recommended for sparse data under high resolution when most regions are empty" type="boolean" truevalue="union" falsevalue="none" checked="False"/>    
    <repeat name="series" title="input file">
      <param name="label" type="text" value="" size="30" label="Data Label"/>
      <param name="input" type="data" format="interval" label="Dataset"/>

    <configfile name="script_file">
      ## Setup R error handling to go to stderr
      genome = read.table( "${genome}")
      uselog = as.character("${log}")
      union = as.character("${union}")
      resolution = as.integer("${resolution}")
      offset = caloffset(genome)
      mcov = matrix(ncol=1,nrow=as.integer(offset[length(offset)] / resolution))
      ## Open output PDF file
      pdf( "${out_file1}" ,height=4,width=20)
      labels = character(0)
      ## Determine range of all series in the plot
      #for $i, $s in enumerate( $series )
        x = read.table( "${s.input.file_name}" )
        res = coverage(x,genome,offset,resolution)
        labels = c(labels,"${s.label.value}")
        mcov = cbind(mcov,cov)
      #end for 
      mcov = mcov[,-1]
      nSample = length(labels)
      if (nSample > 1) {
          if (union == 'union') {
              cm = matrix(0,nrow=nSample,ncol=nSample)
              for (i in 1:(nSample-1)) {
                  cm[i,i] = 1
                  for (j in (i+1):nSample){
                      cm[i,j] = union_correlation(mcov[,i],mcov[,j])
                      cm[j,i] = cm[i,j]        
              cm[nSample,nSample] = 1
          } else {
          cm = cor(mcov)
          x = cm
          h = heatmap(-x,scale='none',sym=T,margins=c(8,8),labRow=labels,labRol=labels)
    x = x[rowInd,colInd]
    tx = numeric(0)
    ty = numeric(0)
    txt = character(0)
    for (i in 1:nrow(x)){
        for (j in 1:ncol(x)){
            tx = c(tx,i)
            ty = c(ty,ncol(x)-j+1)
            txt = c(txt,round(x[i,j]*100)/100)
          heatmap.2(cm,margins=c(8,8),scale='none',key=TRUE,trace='none', symkey=T,symbreaks=T,col=bluered,labRow=labels,labCol=labels,symm=T)

    <data format="pdf" name="out_file1" label="${} on ${on_string}: (plot)" />
    <data format="pdf" name="out_file2" label="${} on ${on_string}: (correlation)" />

.. class:: infomark

This tool allows you to plot multiple intervals across all chromosomes at different resolution, and it also plots the correlation matrix if multiple intervals are provided.



.. image:: ./static/images/correlationmatrix.png
.. image:: ./static/images/wholegenome.png