view snpFreq.xml @ 0:72ea0d13dd66 draft

Imported from capsule None
author devteam
date Mon, 28 Jul 2014 11:56:46 -0400
parents
children
line wrap: on
line source

<tool id="hgv_snpFreq" name="snpFreq" version="1.0.1">
  <description>significant SNPs in case-control data</description>

  <requirements>
    <requirement type="package" version="2.11.0">R</requirement>
    <requirement type="package" version="1.34.0">bioc_qvalue</requirement>
  </requirements>

  <command interpreter="perl">
    snpFreq2.pl $inTypeCond.inType 0.05 $input $output
    #if $inTypeCond.inType == "tab" 
       $inTypeCond.group1_1 $inTypeCond.group1_2 $inTypeCond.group1_3 
       $inTypeCond.group2_1 $inTypeCond.group2_2 $inTypeCond.group2_3 0.05
    #else if $inTypeCond.inType == "snp" 
       $group1 $group2
    #end if
  </command>

  <inputs>
    <conditional name="inTypeCond">
       <param name="inType" type="select" label="Format of input" >
          <option value="tab">Alleles pre-counted</option>
          <option value="snp">SNP table</option>
       </param>
       <when value="tab">
       <param format="tabular" name="input" type="data" label="Dataset" />
       <param name="group1_1" label="Column with genotype 1 count for group 1" type="data_column" data_ref="input" />
       <param name="group1_2" label="Column with genotype 2 count for group 1" type="data_column" data_ref="input" />
       <param name="group1_3" label="Column with genotype 3 count for group 1" type="data_column" data_ref="input" />
       <param name="group2_1" label="Column with genotype 1 count for group 2" type="data_column" data_ref="input" />
       <param name="group2_2" label="Column with genotype 2 count for group 2" type="data_column" data_ref="input" />
       <param name="group2_3" label="Column with genotype 3 count for group 2" type="data_column" data_ref="input" />
       </when>
       <when value="snp">
       <param format="snp" name="input" type="data" label="SNP Dataset" />
       <param format="ind" name="group1" type="data" label="Group 1" />
       <param format="ind" name="group2" type="data" label="Group 2" />
       </when>
    </conditional>
  </inputs>

  <outputs>
    <data format="tabular" name="output" />
  </outputs>

  <tests>
    <test>
      <param name="inType" value="tab" />
      <param name="input" ftype="tabular" value="snpFreqInput.txt" dbkey="hg18" />
      <param name="group1_1" value="4" />
      <param name="group1_2" value="5" />
      <param name="group1_3" value="6" />
      <param name="group2_1" value="7" />
      <param name="group2_2" value="8" />
      <param name="group2_3" value="9" />
      <output name="output" file="snpFreqTestOut.txt" />
    </test>
  </tests>

  <help>

**Dataset formats**

The input is tabular_, with six columns of allele counts.  The output is also tabular,
and includes all of the input data plus the additional columns described below.
(`Dataset missing?`_)

.. _tabular: ${static_path}/formatHelp.html#tab
.. _Dataset missing?: ${static_path}/formatHelp.html

-----

**What it does**

This tool performs a basic analysis of bi-allelic SNPs in case-control
data, using the R statistical environment and Fisher's exact test to
identify SNPs with a significant difference in the allele frequencies
between the two groups.  R's "qvalue" package is used to correct for
multiple testing.

The input file includes counts for each allele combination (AA aa Aa)
for each group at each SNP position.  The assignment of codes (1 2 3)
to these genotypes is arbitrary, as long as it is consistent for both
groups.  Any other input columns are ignored in the computation, but
are copied to the output.  The output appends eight additional columns,
namely the minimum expected counts of the three genotypes for each
group, the p-value, and the q-value.

-----

**Example**

- input file::

    chr1  210  211  38  4  15  56  0   1   x
    chr1  228  229  55  0  2   56  0   1   x
    chr1  230  231  46  0  11  55  0   2   x
    chr1  234  235  43  0  14  55  0   2   x
    chr1  236  237  55  0  2   13  10  34  x
    chr1  437  438  55  0  2   46  0   11  x
    chr1  439  440  56  0  1   55  0   2   x
    chr1  449  450  56  0  1   13  20  24  x
    chr1  518  519  56  0  1   38  4   15  x

Here the group 1 genotype counts are in columns 4 - 6, while those
for group 2 are in columns 7 - 9.

Note that the "x" column has no meaning.  It was added to this example
to show that extra columns can be included, and to make it easier
to see where the new columns are appended in the output.

- output file::

    chr1  210  211  38  4  15  56  0   1   x  47    2   8     47    2   8     1.50219088598917e-05  6.32501425679652e-06
    chr1  228  229  55  0  2   56  0   1   x  55.5  0   1.5   55.5  0   1.5   1                     0.210526315789474
    chr1  230  231  46  0  11  55  0   2   x  50.5  0   6.5   50.5  0   6.5   0.0155644201009862    0.00409590002657532
    chr1  234  235  43  0  14  55  0   2   x  49    0   8     49    0   8     0.00210854461554067   0.000739840215979182
    chr1  236  237  55  0  2   13  10  34  x  34    5   18    34    5   18    6.14613878554783e-17  4.31307984950725e-17
    chr1  437  438  55  0  2   46  0   11  x  50.5  0   6.5   50.5  0   6.5   0.0155644201009862    0.00409590002657532
    chr1  439  440  56  0  1   55  0   2   x  55.5  0   1.5   55.5  0   1.5   1                     0.210526315789474
    chr1  449  450  56  0  1   13  20  24  x  34.5  10  12.5  34.5  10  12.5  2.25757007974134e-18  2.37638955762246e-18
    chr1  518  519  56  0  1   38  4   15  x  47    2   8     47    2   8     1.50219088598917e-05  6.32501425679652e-06

  </help>
</tool>