diff aggregate_gd_indivs.xml @ 26:91e835060ad2

Updates to Admixture, Aggregate Individuals, and Restore Attributes to support gd_genotype
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 03 Jun 2013 12:29:29 -0400
parents 95a05c1ef5d5
children 8997f2ca8c7a
line wrap: on
line diff
--- a/aggregate_gd_indivs.xml	Wed May 29 13:49:19 2013 -0400
+++ b/aggregate_gd_indivs.xml	Mon Jun 03 12:29:29 2013 -0400
@@ -1,8 +1,13 @@
-<tool id="gd_sum_gd_snp" name="Aggregate Individuals" version="1.0.0">
+<tool id="gd_sum_gd_snp" name="Aggregate Individuals" version="1.1.0">
   <description>: Append summary columns for a population</description>
 
   <command interpreter="python">
     aggregate_gd_indivs.py "$input" "$p1_input" "$output"
+    #if $input_type.choice == '0'
+      "gd_snp"
+    #else if $input_type.choice == '1'
+      "gd_genotype"
+    #end if
     #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
         #set $arg = '%s:%s' % ($individual_col, $individual)
         "$arg"
@@ -10,12 +15,26 @@
   </command>
 
   <inputs>
-    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+
+  <conditional name="input_type">
+    <param name="choice" type="select" format="integer" label="Input format">
+      <option value="0" selected="true">gd_snp</option>
+      <option value="1">gd_genotype</option>
+    </param>
+
+    <when value="0">
+      <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+    </when>
+    <when value="1">
+      <param name="input" type="data" format="gd_genotype" label="Genotype dataset" />
+    </when>
+  </conditional>
+
     <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
   </inputs>
 
   <outputs>
-    <data name="output" format="gd_snp" metadata_source="input" />
+    <data name="output" format="input" format_source="input" metadata_source="input" />
   </outputs>
 
   <tests>
@@ -30,10 +49,11 @@
 
 **Dataset formats**
 
-The input datasets are in gd_snp_ and gd_indivs_ formats.
-The output dataset is in gd_snp_ format.  (`Dataset missing?`_)
+The input datasets are in gd_snp_, gd_genotype_, and gd_indivs_ formats.
+The output dataset is in gd_snp_ or gd_genotype_ format.  (`Dataset missing?`_)
 
 .. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_genotype: ./static/formatHelp.html#gd_genotype
 .. _gd_indivs: ./static/formatHelp.html#gd_indivs
 .. _Dataset missing?: ./static/formatHelp.html
 
@@ -41,17 +61,19 @@
 
 **What it does**
 
-The user specifies that some of the individuals in a gd_snp dataset form a
-"population", by supplying a list that has been previously created using the
-Specify Individuals tool.  The program appends a
-new "entity" (set of four columns) to the gd_snp table, analogous to the columns
-for an individual but containing summary data for the population as a group.
-These four columns give the total counts for the two alleles, the "genotype" for
-the population, and the maximum quality value, taken over all individuals in the
-population.  If all defined genotypes in the population are 2 (agree with the
-reference), then the population's genotype is 2, and similarly for 0; otherwise
-the genotype is 1 (unless all individuals have undefined genotype, in which case
-it is -1).
+The user specifies that some of the individuals in a gd_snp or gd_genotype
+dataset form a "population", by supplying a list that has been previously
+created using the Specify Individuals tool.  The program appends a new
+"entity" (set of four columns for a gd_snp table, or one column for a
+gd_genotype table), analogous to the column(s) for an individual but
+containing summary data for the population as a group.  For a gd_snp
+table, these four columns give the total counts for the two alleles,
+the "genotype" for the population, and the maximum quality value, taken
+over all individuals in the population.  If all defined genotypes in
+the population are 2 (agree with the reference), then the population's
+genotype is 2, and similarly for 0; otherwise the genotype is 1 (unless
+all individuals have undefined genotype, in which case it is -1).
+For a gd_genotype file, only the aggregate genotype is appended.
 
 -----