view tools/mytools/binaverage.xml @ 1:cdcb0ce84a1b

author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
line wrap: on
line source

<tool id="binaverage" name="bin and average">
  <description>of numeric columns</description>
  <command>cat $script_file | R --vanilla --slave > $out_log </command>
      <param name="input" type="data" format="tabular" label="Data file"/>
      <param name="data_avg" type="integer" value="1" label="Column number of the data to average"/>
      <param name="label_avg" type="text" value="label-avg" size="30" label="data label"/>    
       <param name="log_avg" label="log2 transform the data" type="boolean" truevalue="logavg" falsevalue="none" checked="False"/> 
       <param name="data_bin" type="integer" value="2" label="Column number of the data used to make bins"/>
      <param name="label_bin" type="text" value="label-bin" size="30" label="data label"/> 
      <param name="log_bin" label="log2 transform the data" type="boolean" truevalue="logbin" falsevalue="none" checked="False"/> 
      <param name="nbin" type="integer" value="3" label="number of bins"/>
      <param name="bintype" type="select" label="Bin by rank or by value" >
		  <option value="rank" selected="true">by rank: bins have the same number of data points</option>
		  <option value="value">by value: bins may have different number of data points</option>
      <param name="legendloc" type="select" label="legend location on CDF plot" >
		  <option value="bottomright" selected="true">bottomright</option>
		  <option value="bottomleft">bottomleft</option>
		  <option value="bottom">bottom</option>
		  <option value="left">left</option>
		  <option value="topleft">topleft</option>
		  <option value="top">top</option>
		  <option value="topright">topright</option>      
		  <option value="right">right</option>
		  <option value="center">center</option>  
      <param name="title" type="text" value="bin-average" size="50" label="title of this analysis"/>       

    <configfile name="script_file">
      ## Setup R error handling to go to stderr
      x = read.table("${input}",sep='\t')
      x = x[,c($data_bin,$data_avg)]
      label_avg = "${label_avg}"
      label_bin = "${label_bin}"
      if ("${log_bin}" == "logbin"){
          x[,1] = log2(1+x[,1])
          label_bin = paste('log2',label_bin)
      if ("${log_avg}" == "logavg"){
          x[,2] = log2(1+x[,2])
          label_avg = paste('log2',label_avg)
      res = binaverage(x,$nbin,"${bintype}")
      for (i in 1:${nbin}){

    <data format="txt" name="out_log" label="${title}: (log)" />
    <data format="pdf" name="out_file" label="${title}: (plot)" />


.. class:: infomark

This tool generates barplot and CDF plot comparing data/rows in a numeric column that are binned by a second numeric column. The input should have at least two numeric columns. One of the column is used to group rows into bins, and then values in the other column are compared using barplot, CDF plot, and KS test.