diff modify_snp_table.xml @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modify_snp_table.xml	Wed Sep 12 17:10:26 2012 -0400
@@ -0,0 +1,107 @@
+<tool id="gd_modify_gd_snp" name="Modify gd_snp" version="1.0.0">
+  <description>modify a gd_snp dataset</description>
+
+  <command interpreter="python">
+    modify_snp_table.py "$input" "$p1_input" "$output"
+    #if $limit_coverage.choice == "0"
+        "-1" "-1" "-1" "-1"
+    #else
+        "${limit_coverage.lo_coverage}" "${limit_coverage.hi_coverage}" "${limit_coverage.low_ind_cov}" "${limit_coverage.lo_quality}"
+    #end if
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="gd_snp dataset" />
+    <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+    <conditional name="limit_coverage">
+      <param name="choice" type="select" format="integer" label="Option">
+        <option value="0" selected="true">add columns to the gd_snp table</option>
+        <option value="1">discard some SNPs</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="lo_coverage" type="integer" min="0" value="0" label="Lower bound on total coverage" />
+        <param name="hi_coverage" type="integer" min="0" value="1000" label="Upper bound on total coverage" />
+        <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" />
+        <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" />
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_snp" metadata_source="input" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="choice" value="1" />
+      <param name="lo_coverage" value="0" />
+      <param name="hi_coverage" value="1000" />
+      <param name="low_ind_cov" value="3" />
+      <param name="lo_quality" value="30" />
+      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+**Dataset formats**
+
+The input datasets are gd_snp_ and gd_indivs_ formats.
+The output dataset is in gd_snp_ format.  (`Dataset missing?`_)
+
+.. _Dataset missing?: ./static/formatHelp.html
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+
+**What it does**
+
+The user specifies that some of the individuals in the selected gd_snp_ table are
+form a "population" that has been previously defined using the Galaxy tool to
+select individuals from a gd_snp dataset.  One option is for the program to append
+four columns to the table, giving the total counts for the two alleles, the
+"genotype" for the population and the maximum quality value, taken over all
+individuals in the population.  If all defined genotypes in the population
+are 2 (agree with the reference), the population's genotype is 2; similarly
+for 0; otherwise the genotype is 1 (unless all individuals have undefined
+genotype, in which case it is -1.  The other option is to remove rows from
+the table for which the total coverage for the population is either too low
+or too high, and/or if the individual coverage or quality value is too low.
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+
+**Examples**
+
+- input gd_snp::
+
+    Contig161_chr1_4641264_4641879  115     C       T       73.5    chr1    4641382	    C       6       0       2       45      8       0       2       51      15      0       2       72      5       0       2       42      6       0       2       45      10      0       2       57      Y       54      0.323   0
+    Contig48_chr1_10150253_10151311 11      A       G       94.3    chr1    10150264        A       1       0       2       30      1       0       2       30      1       0       2       30      3       0       2       36      1       0       2       30      1       0       2       30      Y       22      +99.    0
+    Contig20_chr1_21313469_21313570 66      C       T       54.0    chr1    21313534        C       4       0       2       39      4       0       2       39      5       0       2       42      4       0       2       39      4       0       2       39      5       0       2       42      N       1       +99.    0
+    etc.
+
+- input individuals::
+
+    9	PB1
+    13	PB2
+    17	PB3
+
+- output from appending columns::
+
+    Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	        C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0	29	0	2	72
+    Contig48_chr1_10150253_10151311	11	A	G	94.3	chr1	10150264	A	1	0	2	30	1	0	2	30	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	Y	22	+99.	0	3	0	2	30
+    Contig20_chr1_21313469_21313570	66	C	T	54.0	chr1	21313534	C	4	0	2	39	4	0	2	39	5	0	2	42	4	0	2	39	4	0	2	39	5	0	2	42	N	1	+99.	0	13	0	2	42
+    etc.
+
+- output from filter SNPs with minimum count of 3 for the individuals::
+
+    Contig161_chr1_4641264_4641879  115     C       T       73.5    chr1    4641382	    C       6       0       2       45      8       0       2       51      15      0       2       72      5       0       2       42      6       0       2       45      10      0       2       57      Y       54      0.323   0
+    Contig20_chr1_21313469_21313570 66      C       T       54.0    chr1    21313534        C       4       0       2       39      4       0       2       39      5       0       2       42      4       0       2       39      4       0       2       39      5       0       2       42      N       1       +99.    0
+    etc.
+
+  </help>
+</tool>