view idrToolDef.xml @ 20:6f6a9fbe264e draft default tip

author modencode-dcc
date Mon, 21 Jan 2013 13:36:24 -0500
parents aecff0dd0f16
line wrap: on
line source


Script Usage:
Rscript batch-consistency-analysis.r
genome_table.txt [ drop down to select ]

<tool id="batch_consistency_analysis_2" name="IDR">
    <requirement type="set_environment">SCRIPT_PATH</requirement>
  <description>Consistency Analysis on a pair of narrowPeak files</description>
  <command interpreter="Rscript">batch-consistency-analysis.r \$SCRIPT_PATH $input1 $input2 $halfwidth $overlap $option $sigvalue $gtable $rout $aboveIDR $ratio $emSav $uriSav </command>
    <param format="narrowPeak" name="input1" type="data" label="First NarrowPeak File"/>
    <param format="narrowPeak" name="input2" type="data" label="Second NarrowPeak File"/>
    <param name="halfwidth" size="4" type="integer" value="1000" label="Half-Width" help="-1 if using reported peak width"/>
<!--    <param name="outputprefix" type="text" size="50" label="Output Prefix" value="3066_rep1_VS_rep2"/> -->
    <param name="option" type="select" label="File Type" value="F">
      <option value="F">Narrow Peak</option>
      <option value="T">Broad Peak</option>
    <param name="overlap" size="4" type="float" value="0" label="Over-Lap Ratio" help="Between 0 and 1, inclusively" min="0" max="1"/>
    <param name="sigvalue" type="select" label="Significant Value" value="p.value" help="Select p-value if the input peak files are generated by MAC. Select q-value if the input peak files are generated by SPP.">
      <option value="p.value">p-value</option>
      <option value="q.value">q-value</option>
      <option value="signal.value">Significant Value</option>
    <param name="gtable" type="select" label="Genome Table" value="genome_table.worm.ws220.txt">
      <option value="genome_table.human.hg19.txt">human hg19</option>
      <option value="genome_table.mm9.txt">mouse mm9</option>
      <option value="genome_table.worm.ws220.txt">worm ws220</option>
      <option value="genome_table.dmel.r5.32.txt">dmel r5.32</option>
    <data format="txt" name="rout" label="IDR.Rout.txt"/>
    <data format="txt" name="aboveIDR" label="IDR.npeaks-aboveIDR.txt"/>
    <data format="txt" name="ratio" label="IDR.overlapped-peaks.txt"/>
    <data format="txt" name="emSav" label="IDR.em.sav"/>
    <data format="txt" name="uriSav" label="IDR.uri.sav"/>

      <param name="input" value="fa_gc_content_input.fa"/>
      <output name="out_file1" file="fa_gc_content_output.txt"/>

Reproducibility is essential to reliable scientific discovery in high-throughput experiments. The IDR (Irreproducible Discovery Rate) framework is a unified approach to measure the reproducibility of findings identified from replicate experiments and provide highly stable thresholds based on reproducibility. Unlike the usual scalar measures of reproducibility, the IDR approach creates a curve, which quantitatively assesses when the findings are no longer consistent across replicates. In layman's terms, the IDR method compares a pair of ranked lists of identifications (such as ChIP-seq peaks). These ranked lists should not be pre-thresholded i.e. they should provide identifications across the entire spectrum of high confidence/enrichment (signal) and low confidence/enrichment (noise). The IDR method then fits the bivariate rank distributions over the replicates in order to separate signal from noise based on a defined confidence of rank consistency and reproducibility of identifications i.e the IDR threshold. For more information on IDR, see