view modify_snp_table.xml @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents
children
line wrap: on
line source

<tool id="gd_modify_gd_snp" name="Modify gd_snp" version="1.0.0">
  <description>modify a gd_snp dataset</description>

  <command interpreter="python">
    modify_snp_table.py "$input" "$p1_input" "$output"
    #if $limit_coverage.choice == "0"
        "-1" "-1" "-1" "-1"
    #else
        "${limit_coverage.lo_coverage}" "${limit_coverage.hi_coverage}" "${limit_coverage.low_ind_cov}" "${limit_coverage.lo_quality}"
    #end if
    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
        #set $arg = '%s:%s' % ($individual_col, $individual)
        "$arg"
    #end for
  </command>

  <inputs>
    <param name="input" type="data" format="gd_snp" label="gd_snp dataset" />
    <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
    <conditional name="limit_coverage">
      <param name="choice" type="select" format="integer" label="Option">
        <option value="0" selected="true">add columns to the gd_snp table</option>
        <option value="1">discard some SNPs</option>
      </param>
      <when value="0" />
      <when value="1">
        <param name="lo_coverage" type="integer" min="0" value="0" label="Lower bound on total coverage" />
        <param name="hi_coverage" type="integer" min="0" value="1000" label="Upper bound on total coverage" />
        <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" />
        <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" />
      </when>
    </conditional>
  </inputs>

  <outputs>
    <data name="output" format="gd_snp" metadata_source="input" />
  </outputs>

  <tests>
    <test>
      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
      <param name="choice" value="1" />
      <param name="lo_coverage" value="0" />
      <param name="hi_coverage" value="1000" />
      <param name="low_ind_cov" value="3" />
      <param name="lo_quality" value="30" />
      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
    </test>
  </tests>

  <help>
**Dataset formats**

The input datasets are gd_snp_ and gd_indivs_ formats.
The output dataset is in gd_snp_ format.  (`Dataset missing?`_)

.. _Dataset missing?: ./static/formatHelp.html
.. _gd_snp: ./static/formatHelp.html#gd_snp
.. _gd_indivs: ./static/formatHelp.html#gd_indivs

**What it does**

The user specifies that some of the individuals in the selected gd_snp_ table are
form a "population" that has been previously defined using the Galaxy tool to
select individuals from a gd_snp dataset.  One option is for the program to append
four columns to the table, giving the total counts for the two alleles, the
"genotype" for the population and the maximum quality value, taken over all
individuals in the population.  If all defined genotypes in the population
are 2 (agree with the reference), the population's genotype is 2; similarly
for 0; otherwise the genotype is 1 (unless all individuals have undefined
genotype, in which case it is -1.  The other option is to remove rows from
the table for which the total coverage for the population is either too low
or too high, and/or if the individual coverage or quality value is too low.

.. _gd_snp: ./static/formatHelp.html#gd_snp

**Examples**

- input gd_snp::

    Contig161_chr1_4641264_4641879  115     C       T       73.5    chr1    4641382	    C       6       0       2       45      8       0       2       51      15      0       2       72      5       0       2       42      6       0       2       45      10      0       2       57      Y       54      0.323   0
    Contig48_chr1_10150253_10151311 11      A       G       94.3    chr1    10150264        A       1       0       2       30      1       0       2       30      1       0       2       30      3       0       2       36      1       0       2       30      1       0       2       30      Y       22      +99.    0
    Contig20_chr1_21313469_21313570 66      C       T       54.0    chr1    21313534        C       4       0       2       39      4       0       2       39      5       0       2       42      4       0       2       39      4       0       2       39      5       0       2       42      N       1       +99.    0
    etc.

- input individuals::

    9	PB1
    13	PB2
    17	PB3

- output from appending columns::

    Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	        C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0	29	0	2	72
    Contig48_chr1_10150253_10151311	11	A	G	94.3	chr1	10150264	A	1	0	2	30	1	0	2	30	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	Y	22	+99.	0	3	0	2	30
    Contig20_chr1_21313469_21313570	66	C	T	54.0	chr1	21313534	C	4	0	2	39	4	0	2	39	5	0	2	42	4	0	2	39	4	0	2	39	5	0	2	42	N	1	+99.	0	13	0	2	42
    etc.

- output from filter SNPs with minimum count of 3 for the individuals::

    Contig161_chr1_4641264_4641879  115     C       T       73.5    chr1    4641382	    C       6       0       2       45      8       0       2       51      15      0       2       72      5       0       2       42      6       0       2       45      10      0       2       57      Y       54      0.323   0
    Contig20_chr1_21313469_21313570 66      C       T       54.0    chr1    21313534        C       4       0       2       39      4       0       2       39      5       0       2       42      4       0       2       39      4       0       2       39      5       0       2       42      N       1       +99.    0
    etc.

  </help>
</tool>