diff prepare_population_structure.xml @ 18:f04f40a36cc8

Latest changes from Belinda and Cathy. Webb's updates to the Fst tools.
author Richard Burhans <burhans@bx.psu.edu>
date Tue, 23 Oct 2012 12:41:52 -0400
parents 8ae67e9fb6ff
children 248b06e86022
line wrap: on
line diff
--- a/prepare_population_structure.xml	Fri Sep 28 11:57:18 2012 -0400
+++ b/prepare_population_structure.xml	Tue Oct 23 12:41:52 2012 -0400
@@ -19,13 +19,10 @@
 
   <inputs>
     <param name="input" type="data" format="gd_snp" label="SNP dataset" />
-    <param name="min_reads" type="integer" min="0" value="0" label="Minimum reads covering a SNP, per individual" />
-    <param name="min_qual" type="integer" min="0" value="0" label="Minimum quality value, per individual" />
-    <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs on the same scaffold" />
     <conditional name="individuals">
       <param name="choice" type="select" label="Individuals">
-        <option value="0" selected="true">All</option>
-        <option value="1">Choose</option>
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Specified populations</option>
       </param>
       <when value="0" />
       <when value="1">
@@ -34,6 +31,9 @@
         </repeat>
       </when>
     </conditional>
+    <param name="min_reads" type="integer" min="0" value="0" label="Minimum SNP coverage" />
+    <param name="min_qual" type="integer" min="0" value="0" label="Minimum SNP quality" />
+    <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs" />
   </inputs>
 
   <outputs>
@@ -62,11 +62,8 @@
 
 **Dataset formats**
 
-The input datasets are in gd_snp_ and gd_indivs_ formats.  It is important
-for the Individuals datasets to have unique names; rename them if
-necessary to make them unique.  These names are used by the later tools in
-the graphical displays.
-The output dataset is gd_ped_.  (`Dataset missing?`_)
+The input datasets are in gd_snp_ and gd_indivs_ formats.
+The output dataset is in gd_ped_ format.  (`Dataset missing?`_)
 
 .. _gd_snp: ./static/formatHelp.html#gd_snp
 .. _gd_indivs: ./static/formatHelp.html#gd_indivs
@@ -77,16 +74,22 @@
 
 **What it does**
 
-The tool converts a gd_snp dataset into two tables, called "admix.map" and
-"admix.ped", needed for estimating the population structure.  The user
-can read or download those files, or simply pass this tool's output on to
-other programs.  The user imposes conditions on which SNPs to consider,
-such as the minimum coverage and/or quality value for every individual,
-or the distance to the closest SNP in the same contig (as named in the
-first column of the SNP table).  A useful piece of information produced
-by the tool is the number of SNPs meeting those conditions, which can
-be found by clicking on the eye icon in the history panel after the program 
-runs.
+This tool converts a gd_snp dataset into the format needed for estimating
+the population structure.  You can select the individuals to be included,
+by using "population" datasets created via the Specify Individuals tool.
+(It is important for these population datasets to have distinguishable names,
+since they will be stored in the output's metadata so that subsequent tools
+can use them as labels.  If necessary, rename the datasets to give them
+distinct and meaningful names before running this tool.)
+
+You can also filter the SNPs, based on criteria such as minimum coverage
+(a qualifying SNP must have at least this many reads for every included
+individual), minimum quality score (for every included individual), and/or
+minimum spacing (SNPs that are too close together on the same chromosome or
+scaffold are discarded).  In addition to producing the filtered and formatted
+.map and .ped files for subsequent analysis, the tool reports the number of
+SNPs meeting these conditions, which can be seen by clicking on the eye icon
+in the history panel after the program runs.
 
 -----
 
@@ -94,25 +97,36 @@
 
 - input::
 
-    Contig161_chr1_4641264_4641879  115     C       T       73.5    chr1    4641382 C       6       0       2       45      8       0       2       51      15      0       2       72      5       0       2       42      6       0       2       45      10      0       2       57      Y       54      0.323   0
-    Contig48_chr1_10150253_10151311 11      A       G       94.3    chr1    10150264        A       1       0       2       30      1       0       2       30      1       0       2       30      3       0       2       36      1       0       2       30      1       0       2       30      Y       22      +99.    0
-    Contig20_chr1_21313469_21313570 66      C       T       54.0    chr1    21313534        C       4       0       2       39      4       0       2       39      5       0       2       42      4       0       2       39      4       0       2       39      5       0       2       42      N       1       +99.    0
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45  10  0  2  57   Y  54  0.323  0
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30   1  0  2  30   Y  22  +99.   0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39   5  0  2  42   N   1  +99.   0
     etc.
 
-- output map file::
+- output cover page::
+
+    Prepare to look for population structure Galaxy Composite Dataset
+    Output completed: 2012-10-01 04:09:36 PM
+
+    Outputs
+        * admix.ped (link)
+        * admix.map (link)
+        * Using 222 of 400 SNPs
 
-    1 snp1 0 2
-    1 snp3 0 4
-    1 snp4 0 5
-    1 snp5 0 6
-    1 snp6 0 7
-    1 snp7 0 8
-    1 snp8 0 9
-    1 snp9 0 10
+    Inputs
+        * Minimum reads covering a SNP, per individual: 6
+        * Minimum quality value, per individual: 0
+        * Minimum spacing between SNPs on the same scaffold: 0
 
-- output ped file::
-
-    PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+    Populations
+        * Pop. A
+             1. PB1
+             2. PB2
+        * Pop. B
+             1. PB3
+             2. PB4
+        * Pop. C
+             1. PB6
+             2. PB8
 
   </help>
 </tool>