view idrToolDef.xml @ 1:c6bfb8bcff93 draft

Uploaded
author modencode-dcc
date Thu, 17 Jan 2013 15:44:53 -0500
parents 369b8aa2f7bd
children
line wrap: on
line source

<!--

Script Usage:
Rscript batch-consistency-analysis.r
../3066_rep1_VS_input0.macs14.out.regionPeak
../3066_rep2_VS_input0.macs14.out.regionPeak
1000
3066_rep1_VS_rep2
0
F
p.value
genome_table.txt [ drop down to select ]
-->

<tool id="batch_consistency_analysis_2" name="IDR">
  <description>Consistency Analysis on a pair of narrowPeak files</description>
  <command interpreter="Rscript">batch-consistency-analysis.r $input1 $input2 $halfwidth $overlap $option $sigvalue $gtable $rout $aboveIDR $ratio $emSav $uriSav</command>
  <inputs>
    <param format="narrowPeak" name="input1" type="data" label="First NarrowPeak File"/>
    <param format="narrowPeak" name="input2" type="data" label="Second NarrowPeak File"/>
    <param name="halfwidth" size="4" type="integer" value="1000" label="Half-Width" help="-1 if using reported peak width"/>
<!--    <param name="outputprefix" type="text" size="50" label="Output Prefix" value="3066_rep1_VS_rep2"/> -->
    <param name="option" type="select" label="File Type" value="F">
      <option value="F">Narrow Peak</option>
      <option value="T">Broad Peak</option>
    </param>
    <param name="overlap" size="4" type="float" value="0" label="Over-Lap Ratio" help="Between 0 and 1, inclusively" min="0" max="1"/>
    <param name="sigvalue" type="select" label="Significant Value" value="p.value" help="Select p-value if the input peak files are generated by MAC. Select q-value if the input peak files are generated by SPP.">
      <option value="p.value">p-value</option>
      <option value="q.value">q-value</option>
      <option value="signal.value">Significant Value</option>
    </param>
    <param name="gtable" type="select" label="Genome Table" value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.worm.ws220.txt">
      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.human.hg19.txt">human hg19</option>
      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.mm9.txt">mouse mm9</option>
      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.worm.ws220.txt">worm ws220</option>
      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.dmel.r5.32.txt">dmel r5.32</option>
    </param>
  </inputs>
  <outputs>
    <data format="txt" name="rout" label="IDR.Rout.txt"/>
    <data format="txt" name="aboveIDR" label="IDR.npeaks-aboveIDR.txt"/>
    <data format="txt" name="ratio" label="IDR.overlapped-peaks.txt"/>
    <data format="txt" name="emSav" label="IDR.em.sav"/>
    <data format="txt" name="uriSav" label="IDR.uri.sav"/>
  </outputs>

  <tests>
    <test>
<!--
      <param name="input" value="fa_gc_content_input.fa"/>
      <output name="out_file1" file="fa_gc_content_output.txt"/>
-->
    </test>
  </tests>

  <help>
Reproducibility is essential to reliable scientific discovery in high-throughput experiments. The IDR (Irreproducible Discovery Rate) framework is a unified approach to measure the reproducibility of findings identified from replicate experiments and provide highly stable thresholds based on reproducibility. Unlike the usual scalar measures of reproducibility, the IDR approach creates a curve, which quantitatively assesses when the findings are no longer consistent across replicates. In layman's terms, the IDR method compares a pair of ranked lists of identifications (such as ChIP-seq peaks). These ranked lists should not be pre-thresholded i.e. they should provide identifications across the entire spectrum of high confidence/enrichment (signal) and low confidence/enrichment (noise). The IDR method then fits the bivariate rank distributions over the replicates in order to separate signal from noise based on a defined confidence of rank consistency and reproducibility of identifications i.e the IDR threshold. For more information on IDR, see https://sites.google.com/site/anshulkundaje/projects/idr
  </help>

</tool>