diff phylogenetic_tree.xml @ 18:f04f40a36cc8

Latest changes from Belinda and Cathy. Webb's updates to the Fst tools.
author Richard Burhans <burhans@bx.psu.edu>
date Tue, 23 Oct 2012 12:41:52 -0400
parents 8ae67e9fb6ff
children 248b06e86022
line wrap: on
line diff
--- a/phylogenetic_tree.xml	Fri Sep 28 11:57:18 2012 -0400
+++ b/phylogenetic_tree.xml	Tue Oct 23 12:41:52 2012 -0400
@@ -31,7 +31,7 @@
     <param name="input" type="data" format="gd_snp" label="SNP dataset" />
 
     <conditional name="individuals">
-      <param name="choice" type="select" label="Individuals">
+      <param name="choice" type="select" label="Compute for">
         <option value="0" selected="true">All individuals</option>
         <option value="1">Individuals in a population</option>
       </param>
@@ -41,16 +41,17 @@
       </when>
     </conditional>
 
-    <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum coverage" />
+    <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum SNP coverage" />
 
-    <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum quality" help="Note: minimum coverage and minimum quality cannot both be 0" />
+    <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum SNP quality"
+           help="Note: minimum coverage and minimum quality cannot both be 0" />
 
     <param name="include_reference" type="select" format="integer" label="Include reference sequence">
       <option value="1" selected="true">Yes</option>
       <option value="0">No</option>
     </param>
 
-    <param name="data_source" type="select" format="integer" label="Data source">
+    <param name="data_source" type="select" format="integer" label="Distance metric">
       <option value="0" selected="true">sequence coverage</option>
       <option value="1">estimated genotype</option>
     </param>
@@ -133,49 +134,49 @@
 The input parameters are:
 
 SNP dataset
-  A table of SNPs for various individuals, in gd_snp format.
+   A table of SNPs for various individuals, in gd_snp format.
 
 Individuals
-  By default all individuals are included in the analysis, but this can
-  optionally be restricted to a subset that has been defined using the
-  Specify Individuals tool.
+   By default all individuals are included in the analysis, but this can
+   optionally be restricted to a subset that has been defined using the
+   Specify Individuals tool.
 
-Minimum coverage
-  For each pair of individuals, the tool looks for informative SNPs, i.e.,
-  where the sequence data for both individuals is adequate according to
-  some criterion.  Specifying, say, 7 for this option instructs the tool
-  to consider only SNPs with coverage at least 7 in both individuals
-  when estimating their "genetic distance".
+Minimum SNP coverage
+   For each pair of individuals, the tool looks for informative SNPs, i.e.,
+   where the sequence data for both individuals is adequate.  Specifying,
+   say, 7 for this option instructs the tool to consider only SNPs with
+   at least 7 reads in each of the two individuals (regardless of the
+   alleles) when estimating their genetic distance.
 
-Minimum quality
-  Specifying, say, 37 for this option instructs the tool to consider
-  only SNPs with SAMtools quality value at least 37 in both individuals
-  when estimating their "genetic distance".
+Minimum SNP quality
+   Specifying, say, 37 for this option instructs the tool to consider
+   only SNPs with a quality score of at least 37 in both individuals
+   when estimating their genetic distance.
 
 Include reference sequence
-  For gd_snp datasets containing columns for a reference sequence, the
-  user can ask that the reference be indicated in the tree, to help with
-  rooting it.  If the dataset has no reference columns, this option has
-  no effect.
+   For gd_snp datasets containing columns for a reference sequence, the
+   user can ask that the reference be indicated in the tree, to help with
+   rooting it.  If the dataset has no reference columns, this option has
+   no effect.
 
-Data source
-  The genetic distance between two individuals at a given SNP can
-  be estimated two ways.  One method is to use the absolute value of the
-  difference in the frequency of the first allele (or equivalently, the
-  second allele).  For instance, if the first individual has 5 reads of
-  each allele and the second individual has respectively 3 and 6 reads,
-  then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that
-  SNP.  The other approach is to use the SAMtools genotypes to estimate
-  the difference in the number of occurrences of the first allele.
-  For instance, if the two genotypes are 2 and 1, i.e., the individuals
-  are estimated to have respectively 2 and 1 occurrences of the first
-  allele at this location, then the distance is 1 (the absolute value
-  of the difference of the two numbers).
+Distance metric
+   The genetic distance between two individuals at a given SNP can
+   be estimated two ways.  One method is to use the absolute value of the
+   difference in the frequency of the first allele (or equivalently, the
+   second allele).  For instance, if the first individual has 5 reads of
+   each allele and the second individual has respectively 3 and 6 reads,
+   then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that
+   SNP.  The other approach is to use the genotype calls to estimate
+   the difference in the number of occurrences of the first allele.
+   For instance, if the two genotypes are 2 and 1, i.e., the individuals
+   are estimated to have respectively 2 and 1 occurrences of the first
+   allele at this location, then the distance is 1 (the absolute value
+   of the difference of the two numbers).
 
 Output options
-  The final four options apply mostly to the graphical drawing of the
-  tree, except that the branch lengths are also added to the Newick text
-  file.
+   The final four options apply mostly to the graphical drawing of the
+   tree, except that the branch lengths are also added to the Newick text
+   file.
 
 -----