comparison append_fdr.xml @ 0:ef7cc296f063 draft default tip

Initial commit.
author galaxyp
date Fri, 10 May 2013 16:42:08 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ef7cc296f063
1 <tool id="append_fdr" version="0.2.0" name="Compute False Discovery Rate (FDR)">
2 <description> and append it to tabular data.</description>
3 <!-- TODO: ESCAPE DECOY PREFIX -->
4 <stdio>
5 <exit_code range="1:" level="fatal" description="Error computing FDR" />
6 </stdio>
7 <command interpreter="python">
8 #if $decoy.specify_decoy_how == "prefix"
9 #set $decoy_prefix = $decoy.decoy_prefix
10 #set $identifiers_column = $decoy.identifiers_column
11 #else
12 #set $decoy_prefix = "1"
13 #set $identifiers_column = $decoy.decoy_column
14 #end if
15 #if $score.specify_score
16 #set $score_args = "--score_column='%s' '%s'" % ($score.score_column, $score.invert_scoring)
17 #else
18 #set $score_args = ""
19 #end if
20 append_fdr.py --input=${input} --output=${output} --decoy_prefix='${decoy_prefix}' --identifiers_column='${identifiers_column}' --fdr_type='${fdr_type}' $score_args
21 --scaling='${scaling}'
22 </command>
23 <inputs>
24 <param name="input" type="data" label="Input Table" />
25 <conditional name="decoy">
26 <param name="specify_decoy_how" type="select" label="How are decoys specified?">
27 <option value="prefix">By identifier prefix</option>
28 <option value="column">By indicator column (0 for target or 1 for decoy)</option>
29 </param>
30 <when value="prefix">
31 <param name="decoy_prefix" type="text" default="decoy" label="Decoy Prefix" value="REV_" />
32 <param name="identifiers_column" type="data_column" data_ref="input" multiple="false" numerical="false" label="Column containing identifiers." help="" />
33 </when>
34 <when value="column">
35 <param name="decoy_column" type="data_column" data_ref="input" multiple="false" numerical="true" label="Column containing decoy indicator." help="" />
36 </when>
37 </conditional>
38 <conditional name="score">
39 <param name="specify_score" type="boolean" truevalue="true" falsevalue="false" label="Specify score column?" help="Used to sort data and allows more percise handling of tied hits. If this is not checked, entries should be sorted (from best to worst) prior to use of this tool." value="false" />
40 <when value="false" />
41 <when value="true">
42 <param name="score_column" type="data_column" data_ref="input" multiple="false" numerical="true" label="Column containing scores or probabilities." help="" />
43 <param name="invert_scoring" type="boolean" truevalue="--invert_score" falsevalue="" label="Invert scoring (lower score indicates higher quality match)" value="" />
44 </when>
45 </conditional>
46 <param name="fdr_type" type="select" label="FDR Type">
47 <option value="global_conservative">Global (conservative)</option>
48 <option value="global_permissive">Global (permissive)</option>
49 <!-- <option value="pspep">Local FDR (PSPEP alogrithm) (UNDER DEVELOPMENT!)</option> -->
50 </param>
51 <param name="scaling" type="float" label="Scaling" value="2.0" help="Scaling factor used for FDR calculations, for a balanced databases some prefer 2.0 others 1.0.">
52 </param>
53 </inputs>
54 <outputs>
55 <data format="input" name="output" metadata_source="input" label="${input.name} with FDR"/>
56 </outputs>
57 <help>
58 **What it does**
59
60 Computes the false discovery rate (FDR) for database hits based on either a decoy prefix (e.g. entires starting with ``REV_`` or ``DECOY_``) or a column specifing whether the corresponding hit is a decoy or not (``0`` for target and ``1`` for decoy).
61
62 To demonstrate the use of this tool, assume there is an input file with distributions of hits (``hit_1``, ``hit_2``, and ``hit_3``) and decoys (``rev_1``, ``rev_2``) as follows::
63
64 hit_1
65 hit_2
66 rev_1
67 hit_4
68 rev_2
69
70 Then running this tool in ``Global (conservative)`` mode with a scaling factor of ``1`` will produce the following output::
71
72 hit_1 0.000000
73 hit_2 0.000000
74 rev_1 0.333333
75 hit_4 0.333333
76 rev_2 0.400000
77
78 Frequently, it is desirable to assume that for each decoy hit there is one falsely identified hit. To incorporate this assumption, simply set the scaling factor to ``2`` and the following output will be produced::
79
80 hit_1 0.000000
81 hit_2 0.000000
82 rev_1 0.666667
83 hit_4 0.666667
84 rev_2 0.800000
85
86 If one calls the raw rate rate ``scaling_factor*(#decoys/(#decoys+#hits))``, then ``Global (conservative)`` represents the highest raw rate of any set of any level above or including the current level. To instead compute the lowest raw rate of any level above or including the current level the ``Global (permissive)`` mode may be used. If permissive mode is used in conjuction with a scaling factor of 1, the above input will yield::
87
88 hit_1 0.000000
89 hit_2 0.000000
90 rev_1 0.250000
91 hit_4 0.250000
92 rev_2 0.400000
93
94 By specifing a score column you eliminate the need for the hits and reverses to be sorted prior to running this tool and improve the handling of tied hits.
95
96 **Output**
97
98 A new tabular file with the same initial columns as the input, but with a new column added at the end - the calculated FDR.
99
100 </help>
101 <!-- The local FDR rate is a reimplementation of the PSPEP alogrithm developed by ABSCIEX. It is largely untested and should not be used at this time. -->
102
103 </tool>