diff append_fdr.xml @ 0:ef7cc296f063 draft default tip

Initial commit.
author galaxyp
date Fri, 10 May 2013 16:42:08 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/append_fdr.xml	Fri May 10 16:42:08 2013 -0400
@@ -0,0 +1,103 @@
+<tool id="append_fdr" version="0.2.0" name="Compute False Discovery Rate (FDR)">
+  <description> and append it to tabular data.</description>
+  <!-- TODO: ESCAPE DECOY PREFIX -->
+  <stdio>
+    <exit_code range="1:"  level="fatal"   description="Error computing FDR" />
+  </stdio>
+  <command interpreter="python">
+    #if $decoy.specify_decoy_how == "prefix"
+    #set $decoy_prefix = $decoy.decoy_prefix
+    #set $identifiers_column = $decoy.identifiers_column
+    #else
+    #set $decoy_prefix = "1"
+    #set $identifiers_column = $decoy.decoy_column
+    #end if
+    #if $score.specify_score
+    #set $score_args = "--score_column='%s' '%s'" % ($score.score_column, $score.invert_scoring)
+    #else
+    #set $score_args = ""
+    #end if
+    append_fdr.py --input=${input} --output=${output} --decoy_prefix='${decoy_prefix}' --identifiers_column='${identifiers_column}' --fdr_type='${fdr_type}' $score_args
+    --scaling='${scaling}'
+  </command>
+  <inputs>
+    <param name="input" type="data" label="Input Table" />
+    <conditional name="decoy">
+      <param name="specify_decoy_how" type="select" label="How are decoys specified?">
+        <option value="prefix">By identifier prefix</option>
+        <option value="column">By indicator column (0 for target or 1 for decoy)</option>
+      </param>
+      <when value="prefix">
+        <param name="decoy_prefix" type="text" default="decoy" label="Decoy Prefix" value="REV_" />
+        <param name="identifiers_column" type="data_column" data_ref="input" multiple="false" numerical="false" label="Column containing identifiers." help="" />
+      </when>
+      <when value="column">
+        <param name="decoy_column" type="data_column" data_ref="input" multiple="false" numerical="true" label="Column containing decoy indicator." help="" />
+      </when>
+    </conditional>
+    <conditional name="score">
+      <param name="specify_score" type="boolean" truevalue="true" falsevalue="false" label="Specify score column?" help="Used to sort data and allows more percise handling of tied hits. If this is not checked, entries should be sorted (from best to worst) prior to use of this tool." value="false" />
+      <when value="false" />
+      <when value="true">
+        <param name="score_column" type="data_column" data_ref="input" multiple="false" numerical="true" label="Column containing scores or probabilities." help="" />
+        <param name="invert_scoring" type="boolean" truevalue="--invert_score" falsevalue="" label="Invert scoring (lower score indicates higher quality match)" value="" />
+      </when>
+    </conditional>
+    <param name="fdr_type" type="select" label="FDR Type">
+      <option value="global_conservative">Global (conservative)</option>
+      <option value="global_permissive">Global (permissive)</option>
+      <!-- <option value="pspep">Local FDR (PSPEP alogrithm) (UNDER DEVELOPMENT!)</option> -->
+    </param>
+    <param name="scaling" type="float" label="Scaling" value="2.0" help="Scaling factor used for FDR calculations, for a balanced databases some prefer 2.0 others 1.0.">
+    </param>
+  </inputs>
+  <outputs>
+    <data format="input" name="output" metadata_source="input" label="${input.name} with FDR"/>
+  </outputs>
+  <help>
+**What it does**
+
+Computes the false discovery rate (FDR) for database hits based on either a decoy prefix (e.g. entires starting with ``REV_`` or ``DECOY_``) or a column specifing whether the corresponding hit is a decoy or not (``0`` for target and ``1`` for decoy).
+
+To demonstrate the use of this tool, assume there is an input file with distributions of hits (``hit_1``, ``hit_2``, and ``hit_3``) and decoys (``rev_1``, ``rev_2``) as follows::
+
+    hit_1
+    hit_2
+    rev_1
+    hit_4
+    rev_2
+
+Then running this tool in ``Global (conservative)`` mode with a scaling factor of ``1`` will produce the following output::
+
+    hit_1 0.000000
+    hit_2 0.000000
+    rev_1 0.333333
+    hit_4 0.333333
+    rev_2 0.400000
+
+Frequently, it is desirable to assume that for each decoy hit there is one falsely identified hit. To incorporate this assumption, simply set the scaling factor to ``2`` and the following output will be produced::
+
+    hit_1 0.000000
+    hit_2 0.000000
+    rev_1 0.666667
+    hit_4 0.666667
+    rev_2 0.800000
+
+If one calls the raw rate rate ``scaling_factor*(#decoys/(#decoys+#hits))``, then ``Global (conservative)`` represents the highest raw rate of any set of any level above or including the current level. To instead compute the lowest raw rate of any level above or including the current level the ``Global (permissive)`` mode may be used. If permissive mode is used in conjuction with a scaling factor of 1, the above input will yield::
+
+    hit_1 0.000000
+    hit_2 0.000000
+    rev_1 0.250000
+    hit_4 0.250000
+    rev_2 0.400000
+
+By specifing a score column you eliminate the need for the hits and reverses to be sorted prior to running this tool and improve the handling of tied hits. 
+
+**Output**
+
+A new tabular file with the same initial columns as the input, but with a new column added at the end - the calculated FDR.
+
+  </help>
+  <!-- The local FDR rate is a reimplementation of the PSPEP alogrithm developed by ABSCIEX. It is largely untested and should not be used at this time. -->
+
+</tool>