diff prepare_population_structure.xml @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 8ae67e9fb6ff
children f04f40a36cc8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_population_structure.xml	Fri Sep 28 11:57:18 2012 -0400
@@ -0,0 +1,118 @@
+<tool id="gd_prepare_population_structure" name="Prepare Input" version="1.0.0">
+  <description>: Filter and convert to the format needed for these tools</description>
+
+  <command interpreter="python">
+    prepare_population_structure.py "$input" "$min_reads" "$min_qual" "$min_spacing" "$output" "$output.files_path"
+    #if $individuals.choice == '0'
+        "all_individuals"
+    #else if $individuals.choice == '1'
+        #for $population in $individuals.populations
+          #set $pop_arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name))
+          "$pop_arg"
+        #end for
+    #end if
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = 'individual:%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+    <param name="min_reads" type="integer" min="0" value="0" label="Minimum reads covering a SNP, per individual" />
+    <param name="min_qual" type="integer" min="0" value="0" label="Minimum quality value, per individual" />
+    <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs on the same scaffold" />
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Individuals">
+        <option value="0" selected="true">All</option>
+        <option value="1">Choose</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <repeat name="populations" title="Population" min="1">
+          <param name="p_input" type="data" format="gd_indivs" label="Individuals" />
+        </repeat>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_ped">
+      <actions>
+        <action type="metadata" name="base_name" default="admix" />
+      </actions>
+    </data>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="min_reads" value="3" />
+      <param name="min_qual" value="30" />
+      <param name="min_spacing" value="0" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/prepare_population_structure/prepare_population_structure.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="admix.map" value="test_out/prepare_population_structure/admix.map" />
+        <extra_files type="file" name="admix.ped" value="test_out/prepare_population_structure/admix.ped" />
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_ and gd_indivs_ formats.  It is important
+for the Individuals datasets to have unique names; rename them if
+necessary to make them unique.  These names are used by the later tools in
+the graphical displays.
+The output dataset is gd_ped_.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The tool converts a gd_snp dataset into two tables, called "admix.map" and
+"admix.ped", needed for estimating the population structure.  The user
+can read or download those files, or simply pass this tool's output on to
+other programs.  The user imposes conditions on which SNPs to consider,
+such as the minimum coverage and/or quality value for every individual,
+or the distance to the closest SNP in the same contig (as named in the
+first column of the SNP table).  A useful piece of information produced
+by the tool is the number of SNPs meeting those conditions, which can
+be found by clicking on the eye icon in the history panel after the program 
+runs.
+
+-----
+
+**Example**
+
+- input::
+
+    Contig161_chr1_4641264_4641879  115     C       T       73.5    chr1    4641382 C       6       0       2       45      8       0       2       51      15      0       2       72      5       0       2       42      6       0       2       45      10      0       2       57      Y       54      0.323   0
+    Contig48_chr1_10150253_10151311 11      A       G       94.3    chr1    10150264        A       1       0       2       30      1       0       2       30      1       0       2       30      3       0       2       36      1       0       2       30      1       0       2       30      Y       22      +99.    0
+    Contig20_chr1_21313469_21313570 66      C       T       54.0    chr1    21313534        C       4       0       2       39      4       0       2       39      5       0       2       42      4       0       2       39      4       0       2       39      5       0       2       42      N       1       +99.    0
+    etc.
+
+- output map file::
+
+    1 snp1 0 2
+    1 snp3 0 4
+    1 snp4 0 5
+    1 snp5 0 6
+    1 snp6 0 7
+    1 snp7 0 8
+    1 snp8 0 9
+    1 snp9 0 10
+
+- output ped file::
+
+    PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+
+  </help>
+</tool>