view tools/mytools/align2multiple.xml @ 1:cdcb0ce84a1b

author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
line wrap: on
line source

<tool id="align2multiple" name="align-to-multiple">
  <command>cat $script_file | R --vanilla --slave > $logfile </command>
      <param name="query" type="data" format="interval" label="Query intervals" help= "keep it small (less than 1,000,000 lines)"/>
      <param name="label" type="text" value="" size="30" label="Data Label"/>
    <param name="windowsize" size="10" type="integer" value="5000" label="radius of the window"  help="will create new intervals of w bp flanking the original center. set to 0 will not change input interval size)"/>
    <param name="nbins" size="10" type="integer" value="20" label="Number of bins dividing the window"/>
    <param name="sort" label="Sort intervals" help="Sort by the center of the first input, then the second input, then third..." type="boolean" truevalue="sort" falsevalue="none" checked="True"/>
    <repeat name="series" title="input file">
      <param name="label" type="text" value="" size="30" label="Data Label"/>
      <param name="input" type="data" format="interval" label="Dataset"/>

    <configfile name="script_file">
      ## Setup R error handling to go to stderr
      cat('\n[',date(),'] Start running job\n')
      windowsize = as.integer("$windowsize")
      labels = '$label'
      ## align query to itself
      cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/ -a $query -b $query -o $label-$label --profile-only -q -w $windowsize -n $nbins'
      cat('\n[',date(),'] ',cmd,'\n')
      ## align other sets to query
      #for $i,$s in enumerate( $series )
        labels = c(labels,'$s.label.value')
        cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/ -a $s.input.file_name -b $query -o $label-$s.label.value --profile-only -q -w $windowsize -n $nbins'
        cat('\n[',date(),'] ',cmd,'\n')
      #end for
      cat('\n[',date(),'] Read output\n')
      ## read output of query2query
      x = read.table(paste(labels[1],labels[1],sep='-'))
      ids = as.character(x[,1])
      nfeat = nrow(x)
      x = as.matrix(x[,3:ncol(x)])
      nbin = ncol(x)
      ## a table mapping id to position
      ind = list()
      for (i in 1:nfeat){
          ind[[ids[i]]] = i
      ## read other output files
      for (i in 2:length(labels)){
          x0 = read.table(paste(labels[1],labels[i],sep='-'))
          ids0 = as.character(x0[,1])
          x0 = as.matrix(x0[,3:ncol(x0)])
          x1 = matrix(0,nfeat,nbin)
          for (j in 1:nrow(x0)){
              x1[ind[[ids0[j]]],] = x0[j,]                    
          x = cbind(x,x1)          
      ## reorder
      if ("${sort}" == "sort"){
          cat('\n[',date(),'] Sort intervals\n')
          for (i in rev(2:length(labels))){
              x = x[order(x[,i*nbin-nbin/2]>0),]
      cat('\n[',date(),'] Plot summary\n')
      for (i in 1:length(labels)){
      cat('\n[',date(),'] Plot heatmap\n')
      for (i in 1:length(labels)){
      cat('\n[',date(),'] Finished\n')


    <data format="txt" name="logfile" label="${} on ${on_string}: (log)" />
    <data format="png" name="out_file1" label="${} on ${on_string}: (plot)" />

.. class:: infomark

This tool allows you to check the co-localization pattern of multiple interval sets. All interval sets are aligned to the center of the intervals in the query interval set.

Each row represents a window of certain size around the center of one interval in the query set, such as ChIP peaks. Each heatmap shows the position of other features in the SAME window (the same rows in each heatmap represent the same interval/genomic position).

The example below shows that of all Fox2 peaks, half of them are within 1kb of TSS. Of the half outside TSS, about one half has H3K4me1, two thirds of which are further depleted of H3K4me3.  



.. image:: ./static/images/align2multiple.png