annotate tools/rgenetics/rgQQ.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="rgQQ1" name="QQ Plots:">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <code file="rgQQ_code.py"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <description>for p values from an analysis </description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 rgQQ.py "$input1" "$title" "$sample" "$cols" "$allqq" "$height" "$width" "$logtrans" "$allqq.id" "$__new_file_path__"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 <page>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 <param name="input1" type="data" label="Choose the History dataset containing p values to QQ plot"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 size="80" format="tabular" help="Dataset missing? See Tip below" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <param name="title" type="text" size="80" label = "Descriptive title for QQ plot" value="QQ" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 <param name="logtrans" type="boolean" label = "Use a log scale - recommended for p values in range 0-1.0"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 truevalue="true" falsevalue="false"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <param name="sample" type="float" label="Random sample fraction - set to 1.0 for all data points" value="0.01"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 help="If you have a million values, the QQ plots will be huge - a random sample of 1% will be fine" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <param name="height" type="integer" label="PDF image height (inches)" value="6" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 <param name="width" type="integer" label="PDF image width (inches)" value="6" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 </page>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <page>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 <param name="cols" type="select" display="checkboxes" multiple="True"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 help="Choose from these numeric columns in the data file to make a quantile-quantile plot against a uniform distribution"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 label="Columns (p values 0-1 eg) to make QQ plots" dynamic_options="get_columns( input1 )" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 </page>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 <data format="pdf" name="allqq" label="${title}.html"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 <param name='input1' value='tinywga.pphe' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 <param name='title' value="rgQQtest1" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <param name='logtrans' value="false" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <param name='sample' value='1.0' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <param name='height' value='8' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <param name='width' value='10' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <param name='cols' value='3' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <output name='allqq' file='rgQQtest1.pdf' ftype='binary' compare="diff" lines_diff="29"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 **Explanation**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 A quantile-quantile (QQ) plot is a good way to see systematic departures from the null expectation of uniform p-values
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 from a genomic analysis. If the QQ plot shows departure from the null (ie a uniform 0-1 distribution), you hope that this will be
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 in the very smallest p-values suggesting that there might be some interesting results to look at. A log scale will help emphasise departures
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 from the null at low p values more clear
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 **Syntax**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 This tool has 2 pages. On the first one you choose the data set and output options, then on the second page, the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 column names are shown so you can choose the one containing the p values you wish to plot.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 - **History data** is one of your history tabular data sets
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 - **Descriptive Title** is the text to appear in the output file names to remind you what the plots are!
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 - **Use a Log scale** is recommended for p values in the range 0-1 as it highlights departures from the null at small p values
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 - **Random Sample Fraction** is the fraction of points to randomly sample - highly recommended for >5k or so values
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 - **Height and Width** will determine the scale of the pdf images
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 **Summary**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 Generate a uniform QQ plot for any large number of p values from an analysis.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 Essentially a plot of n ranked p values against their rank as a centile - ie rank/n
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 Works well where you have a column containing p values from
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 a statistical test of some sort. These will be plotted against the values expected under the null. Departure
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 from the diagonal suggests one distribution is more extreme than the other. You hope your p values are
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 smaller than expected under the null.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 The sampling fraction will help cut down the size of the pdfs. If there are fewer than 5k points on any plot, all will be shown.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 Otherwise the sampling fraction will be used or 5k, whichever is larger.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 Note that the use of a log scale is ill-advised if you are plotting log transformed p values because the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 uniform distribution chosen for the qq plot is always 0-1 and log transformation is applied if required.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 The most useful plots for p values are log QQ plots of untransformed p values in the range 0-1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 Originally designed and written for family based data from the CAMP Illumina run of 2007 by
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 ross lazarus (ross.lazarus@gmail.com)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 </tool>