annotate tools/rgenetics/rgEigPCA.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="rgEigPCA1" name="Eigensoft:">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>PCA Ancestry using SNP</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 <param name="i" type="data" label="Input genotype data file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 size="120" format="ldindep" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <param name="title" type="text" value="Ancestry PCA" label="Title for outputs from this run"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 size="80" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <param name="k" type="integer" value="4" label="Number of principal components to output"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 size="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <param name="m" type="integer" value="0" label="Max. outlier removal iterations"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 help="To turn on outlier removal, set m=5 or so. Do this if you plan on adjusting any analyses"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 size="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <param name="t" type="integer" value="5" label="# principal components used for outlier removal"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 size="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <param name="s" type="integer" value="6" label="#SDs for outlier removal"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 help = "Any individual with SD along one of k top principal components > s will be removed as an outlier."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 size="3" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <data name="out_file1" format="html" label="${title}_rgEig.html"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <data name="pca" format="txt" label="${title}_rgEig.txt"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <param name='i' value='tinywga' ftype='ldindep' >
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 <metadata name='base_name' value='tinywga' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 <composite_data value='tinywga.bim' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <composite_data value='tinywga.bed' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <composite_data value='tinywga.fam' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <edit_attributes type='name' value='tinywga' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <param name='title' value='rgEigPCAtest1' />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <param name="k" value="4" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <param name="m" value="2" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <param name="t" value="2" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 <param name="s" value="2" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <output name='out_file1' file='rgtestouts/rgEigPCA/rgEigPCAtest1.html' ftype='html' compare='diff' lines_diff='195'>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 <extra_files type="file" name='rgEigPCAtest1_PCAPlot.pdf' value="rgtestouts/rgEigPCA/rgEigPCAtest1_PCAPlot.pdf" compare="sim_size" delta="3000"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 </output>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <output name='pca' file='rgtestouts/rgEigPCA/rgEigPCAtest1.txt' compare='diff'/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 **Syntax**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 - **Genotype data** is an input genotype dataset in Plink lped (http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml) format. See below for notes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 - **Title** is used to name the output files so you can remember what the outputs are for
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 - **Tuning parameters** are documented in the Eigensoft (http://genepath.med.harvard.edu/~reich/Software.htm) documentation - see below
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 **Summary**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 Eigensoft requires ld-reduced genotype data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 Galaxy has an automatic converter for genotype data in Plink linkage pedigree (lped) format.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 For details of this generic genotype format, please see the Plink documentation at
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 Reading that documentation, you'll see that the linkage pedigree format is really two related files with the same
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 file base name - a map and ped file - eg 'mygeno.ped' and 'mygeno.map'.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 The map file has the chromosome, offset, genetic offset and snp name corresponding to each
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 genotype stored as separate alleles in the ped file. The ped file has family id, individual id, father id (or 0), mother id
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 (or 0), gender (1=male, 2=female, 0=unknown) and affection (1=unaffected, 2=affected, 0=unknown),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 then two separate allele columns for each genotype.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 Once you have your data in the right format, you can upload those into your Galaxy history using the "upload" tool.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 To upload your lped data in the upload tool, choose 'lped' as the 'file format'. The tool form will change to
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 allow you to navigate to and select each member of the pair of ped and map files stored on your local computer
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 (or available at a public URL for Galaxy to grab).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 Give the dataset a meaningful name (replace rgeneticsData with something more useful!) and click execute.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 When the upload is done, your new lped format dataset will appear in your history and then,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 when you choose the ancestry tool, that history dataset will be available as input.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 **Warning for the Impatient**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 When you execute the tool, it will look like it has not started running for a while as the automatic converter
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 reduces the amount of LD - otherwise eigenstrat gives biased results.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 **Attribution**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 This tool runs and relies on the work of many others, including the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 maintainers of the Eigensoft program, and the R and
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 Bioconductor projects. For full attribution, source code and documentation, please see
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 http://genepath.med.harvard.edu/~reich/Software.htm, http://cran.r-project.org/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 and http://www.bioconductor.org/ respectively
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 This implementation is a Galaxy tool wrapper around these third party applications.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 It was originally designed and written for family based data from the CAMP Illumina run of 2007 by
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 ross lazarus (ross.lazarus@gmail.com) and incorporated into the rgenetics toolkit.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 copyright Ross Lazarus 2007
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 Licensed under the terms of the LGPL as documented http://www.gnu.org/licenses/lgpl.html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 but is about as useful as a sponge boat without EIGENSOFT pca code.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 **README from eigensoft2 distribution at http://genepath.med.harvard.edu/~reich/Software.htm**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 [rerla@beast eigensoft2]$ cat README
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 EIGENSOFT version 2.0, January 2008 (for Linux only)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 This is the same as our EIGENSOFT 2.0 BETA release with a few recent changes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 as described at http://genepath.med.harvard.edu/~reich/New_In_EIGENSOFT.htm.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 Features of EIGENSOFT version 2.0 include:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 -- Keeping track of ref/var alleles in all file formats: see CONVERTF/README
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 -- Handling data sets up to 8 billion genotypes: see CONVERTF/README
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 -- Output SNP weightings of each principal component: see POPGEN/README
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 The EIGENSOFT package implements methods from the following 2 papers:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 Patterson N. et al. 2006 PLoS Genetics in press (population structure)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 Price A.L. et al. 2006 NG 38:904-9 (EIGENSTRAT stratification correction)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 See POPGEN/README for documentation of population structure programs.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 See EIGENSTRAT/README for documentation of EIGENSTRAT programs.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 See CONVERTF/README for documentation of programs for converting file formats.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 Executables and source code:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 ----------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 All C executables are in the bin/ directory.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 We have placed source code for all C executables in the src/ directory,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 for users who wish to modify and recompile our programs. For example, to
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 recompile the eigenstrat program, type
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 "cd src"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 "make eigenstrat"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 "mv eigenstrat ../bin"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 Note that some of our software will only compile if your system has the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 lapack package installed. (This package is used to compute eigenvectors.)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 Some users may need to change "blas-3" to "blas" in the Makefile,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 depending on how blas and lapack are installed.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 If cc is not available on your system, try "cp Makefile.alt Makefile"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 and then recompile.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 If you have trouble compiling and running our code, try compiling and
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 running the pcatoy program in the src directory:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 "cd src"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 "make pcatoy"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 "./pcatoy"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 If you are unable to run the pcatoy program successfully, please contact
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 your system administrator for help, as this is a systems issue which is
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 beyond our scope. Your system administrator will be able to troubleshoot
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 your systems issue using this trivial program. [You can also try running
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 the pcatoy program in the bin directory, which we have already compiled.]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 </tool>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167