changeset 0:369b8aa2f7bd draft

Uploaded
author modencode-dcc
date Thu, 17 Jan 2013 15:44:33 -0500
parents
children c6bfb8bcff93
files idrToolDef.xml
diffstat 1 files changed, 61 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/idrToolDef.xml	Thu Jan 17 15:44:33 2013 -0500
@@ -0,0 +1,61 @@
+<!--
+
+Script Usage:
+Rscript batch-consistency-analysis.r
+../3066_rep1_VS_input0.macs14.out.regionPeak
+../3066_rep2_VS_input0.macs14.out.regionPeak
+1000
+3066_rep1_VS_rep2
+0
+F
+p.value
+genome_table.txt [ drop down to select ]
+-->
+
+<tool id="batch_consistency_analysis_2" name="IDR">
+  <description>Consistency Analysis on a pair of narrowPeak files</description>
+  <command interpreter="Rscript">batch-consistency-analysis.r $input1 $input2 $halfwidth $overlap $option $sigvalue $gtable $rout $aboveIDR $ratio $emSav $uriSav</command>
+  <inputs>
+    <param format="narrowPeak" name="input1" type="data" label="First NarrowPeak File"/>
+    <param format="narrowPeak" name="input2" type="data" label="Second NarrowPeak File"/>
+    <param name="halfwidth" size="4" type="integer" value="1000" label="Half-Width" help="-1 if using reported peak width"/>
+<!--    <param name="outputprefix" type="text" size="50" label="Output Prefix" value="3066_rep1_VS_rep2"/> -->
+    <param name="option" type="select" label="File Type" value="F">
+      <option value="F">Narrow Peak</option>
+      <option value="T">Broad Peak</option>
+    </param>
+    <param name="overlap" size="4" type="float" value="0" label="Over-Lap Ratio" help="Between 0 and 1, inclusively" min="0" max="1"/>
+    <param name="sigvalue" type="select" label="Significant Value" value="p.value" help="Select p-value if the input peak files are generated by MAC. Select q-value if the input peak files are generated by SPP.">
+      <option value="p.value">p-value</option>
+      <option value="q.value">q-value</option>
+      <option value="signal.value">Significant Value</option>
+    </param>
+    <param name="gtable" type="select" label="Genome Table" value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.worm.ws220.txt">
+      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.human.hg19.txt">human hg19</option>
+      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.mm9.txt">mouse mm9</option>
+      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.worm.ws220.txt">worm ws220</option>
+      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.dmel.r5.32.txt">dmel r5.32</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="txt" name="rout" label="IDR.Rout.txt"/>
+    <data format="txt" name="aboveIDR" label="IDR.npeaks-aboveIDR.txt"/>
+    <data format="txt" name="ratio" label="IDR.overlapped-peaks.txt"/>
+    <data format="txt" name="emSav" label="IDR.em.sav"/>
+    <data format="txt" name="uriSav" label="IDR.uri.sav"/>
+  </outputs>
+
+  <tests>
+    <test>
+<!--
+      <param name="input" value="fa_gc_content_input.fa"/>
+      <output name="out_file1" file="fa_gc_content_output.txt"/>
+-->
+    </test>
+  </tests>
+
+  <help>
+Reproducibility is essential to reliable scientific discovery in high-throughput experiments. The IDR (Irreproducible Discovery Rate) framework is a unified approach to measure the reproducibility of findings identified from replicate experiments and provide highly stable thresholds based on reproducibility. Unlike the usual scalar measures of reproducibility, the IDR approach creates a curve, which quantitatively assesses when the findings are no longer consistent across replicates. In layman's terms, the IDR method compares a pair of ranked lists of identifications (such as ChIP-seq peaks). These ranked lists should not be pre-thresholded i.e. they should provide identifications across the entire spectrum of high confidence/enrichment (signal) and low confidence/enrichment (noise). The IDR method then fits the bivariate rank distributions over the replicates in order to separate signal from noise based on a defined confidence of rank consistency and reproducibility of identifications i.e the IDR threshold. For more information on IDR, see https://sites.google.com/site/anshulkundaje/projects/idr
+  </help>
+
+</tool>