annotate append_fdr.xml @ 0:ef7cc296f063 draft default tip

Initial commit.
author galaxyp
date Fri, 10 May 2013 16:42:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
1 <tool id="append_fdr" version="0.2.0" name="Compute False Discovery Rate (FDR)">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
2 <description> and append it to tabular data.</description>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
3 <!-- TODO: ESCAPE DECOY PREFIX -->
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
4 <stdio>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
5 <exit_code range="1:" level="fatal" description="Error computing FDR" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
6 </stdio>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
7 <command interpreter="python">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
8 #if $decoy.specify_decoy_how == "prefix"
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
9 #set $decoy_prefix = $decoy.decoy_prefix
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
10 #set $identifiers_column = $decoy.identifiers_column
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
11 #else
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
12 #set $decoy_prefix = "1"
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
13 #set $identifiers_column = $decoy.decoy_column
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
14 #end if
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
15 #if $score.specify_score
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
16 #set $score_args = "--score_column='%s' '%s'" % ($score.score_column, $score.invert_scoring)
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
17 #else
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
18 #set $score_args = ""
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
19 #end if
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
20 append_fdr.py --input=${input} --output=${output} --decoy_prefix='${decoy_prefix}' --identifiers_column='${identifiers_column}' --fdr_type='${fdr_type}' $score_args
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
21 --scaling='${scaling}'
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
22 </command>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
23 <inputs>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
24 <param name="input" type="data" label="Input Table" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
25 <conditional name="decoy">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
26 <param name="specify_decoy_how" type="select" label="How are decoys specified?">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
27 <option value="prefix">By identifier prefix</option>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
28 <option value="column">By indicator column (0 for target or 1 for decoy)</option>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
29 </param>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
30 <when value="prefix">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
31 <param name="decoy_prefix" type="text" default="decoy" label="Decoy Prefix" value="REV_" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
32 <param name="identifiers_column" type="data_column" data_ref="input" multiple="false" numerical="false" label="Column containing identifiers." help="" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
33 </when>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
34 <when value="column">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
35 <param name="decoy_column" type="data_column" data_ref="input" multiple="false" numerical="true" label="Column containing decoy indicator." help="" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
36 </when>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
37 </conditional>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
38 <conditional name="score">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
39 <param name="specify_score" type="boolean" truevalue="true" falsevalue="false" label="Specify score column?" help="Used to sort data and allows more percise handling of tied hits. If this is not checked, entries should be sorted (from best to worst) prior to use of this tool." value="false" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
40 <when value="false" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
41 <when value="true">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
42 <param name="score_column" type="data_column" data_ref="input" multiple="false" numerical="true" label="Column containing scores or probabilities." help="" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
43 <param name="invert_scoring" type="boolean" truevalue="--invert_score" falsevalue="" label="Invert scoring (lower score indicates higher quality match)" value="" />
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
44 </when>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
45 </conditional>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
46 <param name="fdr_type" type="select" label="FDR Type">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
47 <option value="global_conservative">Global (conservative)</option>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
48 <option value="global_permissive">Global (permissive)</option>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
49 <!-- <option value="pspep">Local FDR (PSPEP alogrithm) (UNDER DEVELOPMENT!)</option> -->
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
50 </param>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
51 <param name="scaling" type="float" label="Scaling" value="2.0" help="Scaling factor used for FDR calculations, for a balanced databases some prefer 2.0 others 1.0.">
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
52 </param>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
53 </inputs>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
54 <outputs>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
55 <data format="input" name="output" metadata_source="input" label="${input.name} with FDR"/>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
56 </outputs>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
57 <help>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
58 **What it does**
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
59
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
60 Computes the false discovery rate (FDR) for database hits based on either a decoy prefix (e.g. entires starting with ``REV_`` or ``DECOY_``) or a column specifing whether the corresponding hit is a decoy or not (``0`` for target and ``1`` for decoy).
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
61
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
62 To demonstrate the use of this tool, assume there is an input file with distributions of hits (``hit_1``, ``hit_2``, and ``hit_3``) and decoys (``rev_1``, ``rev_2``) as follows::
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
63
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
64 hit_1
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
65 hit_2
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
66 rev_1
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
67 hit_4
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
68 rev_2
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
69
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
70 Then running this tool in ``Global (conservative)`` mode with a scaling factor of ``1`` will produce the following output::
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
71
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
72 hit_1 0.000000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
73 hit_2 0.000000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
74 rev_1 0.333333
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
75 hit_4 0.333333
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
76 rev_2 0.400000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
77
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
78 Frequently, it is desirable to assume that for each decoy hit there is one falsely identified hit. To incorporate this assumption, simply set the scaling factor to ``2`` and the following output will be produced::
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
79
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
80 hit_1 0.000000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
81 hit_2 0.000000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
82 rev_1 0.666667
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
83 hit_4 0.666667
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
84 rev_2 0.800000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
85
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
86 If one calls the raw rate rate ``scaling_factor*(#decoys/(#decoys+#hits))``, then ``Global (conservative)`` represents the highest raw rate of any set of any level above or including the current level. To instead compute the lowest raw rate of any level above or including the current level the ``Global (permissive)`` mode may be used. If permissive mode is used in conjuction with a scaling factor of 1, the above input will yield::
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
87
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
88 hit_1 0.000000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
89 hit_2 0.000000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
90 rev_1 0.250000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
91 hit_4 0.250000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
92 rev_2 0.400000
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
93
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
94 By specifing a score column you eliminate the need for the hits and reverses to be sorted prior to running this tool and improve the handling of tied hits.
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
95
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
96 **Output**
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
97
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
98 A new tabular file with the same initial columns as the input, but with a new column added at the end - the calculated FDR.
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
99
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
100 </help>
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
101 <!-- The local FDR rate is a reimplementation of the PSPEP alogrithm developed by ABSCIEX. It is largely untested and should not be used at this time. -->
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
102
ef7cc296f063 Initial commit.
galaxyp
parents:
diff changeset
103 </tool>