diff filter_gd_snp.xml @ 13:fdb4240fb565

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:34:31 -0400
parents
children f04f40a36cc8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_gd_snp.xml	Fri Sep 28 11:34:31 2012 -0400
@@ -0,0 +1,83 @@
+<tool id="gd_filter_gd_snp" name="Filter SNPs" version="1.0.0">
+  <description>: Discard some SNPs based on coverage or quality</description>
+
+  <command interpreter="python">
+    modify_snp_table.py "$input" "$p1_input" "$output" "$lo_coverage" "$hi_coverage" "$low_ind_cov" "$lo_quality"
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+    <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+    <param name="lo_coverage" type="integer" min="0" value="0" label="Lower bound on total coverage" />
+    <param name="hi_coverage" type="integer" min="0" value="1000" label="Upper bound on total coverage" />
+    <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" />
+    <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_snp" metadata_source="input" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="choice" value="1" />
+      <param name="lo_coverage" value="0" />
+      <param name="hi_coverage" value="1000" />
+      <param name="low_ind_cov" value="3" />
+      <param name="lo_quality" value="30" />
+      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_ and gd_indivs_ formats.
+The output dataset is in gd_snp_ format.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user specifies that some of the individuals in a gd_snp dataset form a
+"population", by supplying a list that has been previously created using the
+Specify Individuals tool.  SNPs are then discarded if their total coverage
+for the population is too low or too high, or if their coverage or quality
+score for any individual in the population is too low.
+
+-----
+
+**Example**
+
+- input gd_snp::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
+    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
+    etc.
+
+- input individuals::
+
+    9   PB1
+    13  PB2
+    17  PB3
+
+- output when the lower bound on individual coverage is "3"::
+
+    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
+    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
+    etc.
+
+  </help>
+</tool>