view gfapts/gfap_r1.0_known_var_finder.xml @ 0:f753b30013e6 draft

Uploaded
author rdaveau
date Fri, 29 Jun 2012 10:20:55 -0400
parents
children
line wrap: on
line source

<tool id="gfap_r1.0_known_var_finder" name="Known variants finder">
	<description>Search the GFAP database for known variants</description>
	<command interpreter="perl">gfap_r1.0_known_var_finder.pl -varfile=$varfile -buildver=$buildver -outdir=$__new_file_path__/gfap -dir_1000g=db/1000g -dir_dbsnp=db/dbsnp -dir_cosmic=db/cosmic -release_1000g=$release_1000g -release_dbsnp=$release_dbsnp -release_cosmic=$release_cosmic -outfile=$outfile</command>
	<inputs>
		<param name="varfile" format="txt" type="data" label="Input VAR file" />
		<param name="buildver" type="select" label="Human reference genome assembly">
			<option value="hg19">GRCh37 ie. hg19</option>
		</param>
		<param name="release_1000g" type="select" label="1000 Genomes data release">
			<option value="phase1_20101123">phase 1 r20101123 </option>
		</param>
		<param name="release_dbsnp" type="select" label="dbSNP data release">
			<option value="v135">v135</option>
		</param>
		<param name="release_cosmic" type="select" label="COSMIC data release">
			<option value="v56">v56</option>
		</param>
	</inputs>
	<outputs>
		<data format="txt" name="outfile" label="${varfile.name}.dbi" />
	</outputs>
	<help>
.. class:: infomark

**What it does**

Annotate a VAR-file with **1000G**, **dbSNP** and **COSMIC** data.

- This VAR-file has to be generated by the **SAMVCF_data_parser** gfap utility.
- Included in the gfap archive, variant databases mentioned ahead consist in **built-in pre-processed flat files**.

.. class:: infomark

**Third-party resources**

- 1000G: http://www.1000genomes.org
- dbSNP: http://www.ncbi.nlm.nih.gov/projects/SNP
- COSMIC: http://www.sanger.ac.uk/genetics/CGP/cosmic

----

**Input .var file**::

	#chr  start    end     ref  alt  ann  QC   NRF  NRR  NAF  NAR  VCF.FILTER  P.str     P.ref     P.alt      DP  AD  AF      VAR.FILTER
	chr1  14907    14907   A    G    het  9    4    0    0    3    NONE        3.33e-01  4.17e-02  8.33e-02   7   3   0.4290  SKIP
	chr1  14930    14930   A    G    het  37   4    2    0    5    NONE        1.83e-01  2.29e-01  2.08e-02   11  5   0.4550  SKIP
	chr1  68896    68896   G    A    hom  18   0    0    3    0    NONE        8.33e-02  3.33e-01  8.33e-02   3   3   1.0000  SKIP
	chr1  69270    69270   A    G    hom  179  0    0    31   0    NONE        3.10e-10  3.33e-01  3.10e-10   31  31  1.0000  SKIP
	chr1  69511    69511   A    G    hom  222  0    0    13   12   NONE        3.33e-01  3.33e-01  3.33e-01   25  25  1.0000  PASS
	chr1  69897    69897   T    C    het  14   1    0    0    3    NONE        2.08e-01  3.33e-01  8.33e-02   4   3   0.7500  SKIP
	chr1  129285   129285  G    A    het  56   0    4    0    4    NONE        2.60e-03  4.17e-02  4.17e-02   8   4   0.5000  SKIP
	chr1  567697   567697  G    A    hom  30   0    0    0    2    NONE        1.67e-01  3.33e-01  1.67e-01   2   2   1.0000  SKIP
	chr1  569803   569803  G    A    hom  50   0    0    4    0    NONE        4.17e-02  3.33e-01  4.17e-02   4   4   1.0000  SKIP
	chr1  808631   808631  G    A    hom  142  0    0    7    1    NONE        2.34e-02  3.33e-01  2.34e-02   8   8   1.0000  SKIP
	chr1  808922   808922  G    A    hom  222  0    0    15   26   NONE        3.91e-02  3.33e-01  3.91e-02   41  41  1.0000  PASS
	chr1  808928   808928  C    T    hom  222  0    0    14   31   NONE        5.36e-03  3.33e-01  5.36e-03   45  45  1.0000  PASS
	chr1  816725   816725  A    G    hom  22   0    0    2    0    NONE        1.67e-01  3.33e-01  1.67e-01   2   2   1.0000  SKIP
	chr1  821030   821030  G    T    hom  36   0    0    2    0    NONE        1.67e-01  3.33e-01  1.67e-01   2   2   1.0000  SKIP
	chr1  821143   821143  G    T    hom  8    0    0    0    2    NONE        1.67e-01  3.33e-01  1.67e-01   2   2   1.0000  SKIP

----

**Output .dbi file**::

	#From [chr] to [VAR.FILTER] = VAR file header; DPT.FILTER = check for heterogeneous depth in substituted blocks
	#AF_ALL = global AF in phase1_20101123 1000g data
	#AF_AFR = AF in AFR phase1_20101123 1000g data
	#AF_AMR = AF in AMR phase1_20101123 1000g data
	#AF_ASN = AF in ASN phase1_20101123 1000g data
	#AF_EUR = AF in EUR phase1_20101123 1000g data
	#AF_COS = AF in v56 cosmic data
	#cid = cosmic mutation identifier from v56 release
	#rs = dbsnp rs identifier(s) from v135 release
	#dbsnp = dbsnp build version(s) from v135 release
	#chr   start   end     ref  alt  NRF  NRR  NAF  NAR  DP  AD  AF      QC   P.str     P.ref     P.alt     VCF.FILTER DPT.FILTER VAR.FILTER  AF_ALL   AF_AFR   AF_AMR   AF_ASN   AF_EUR   AF_COS   cid rs dbsnp
	chr1   14907   14907   A    G    4    0    0    3    7   3   0.4290  9    3.33e-01  4.17e-02  8.33e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   14930   14930   A    G    4    2    0    5    11  5   0.4550  37   1.83e-01  2.29e-01  2.08e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   68896   68896   G    A    0    0    3    0    3   3   1.0000  18   8.33e-02  3.33e-01  8.33e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   69270   69270   A    G    0    0    31   0    31  31  1.0000  179  3.10e-10  3.33e-01  3.10e-10  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   69511   69511   A    G    0    0    13   12   25  25  1.0000  222  3.33e-01  3.33e-01  3.33e-01  NONE       PASS       PASS        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   69897   69897   T    C    1    0    0    3    4   3   0.7500  14   2.08e-01  3.33e-01  8.33e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   129285  129285  G    A    0    4    0    4    8   4   0.5000  56   2.60e-03  4.17e-02  4.17e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   567697  567697  G    A    0    0    0    2    2   2   1.0000  30   1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   569803  569803  G    A    0    0    4    0    4   4   1.0000  50   4.17e-02  3.33e-01  4.17e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   808631  808631  G    A    0    0    7    1    8   8   1.0000  142  2.34e-02  3.33e-01  2.34e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   808922  808922  G    A    0    0    15   26   41  41  1.0000  222  3.91e-02  3.33e-01  3.91e-02  NONE       PASS       PASS        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   808928  808928  C    T    0    0    14   31   45  45  1.0000  222  5.36e-03  3.33e-01  5.36e-03  NONE       PASS       PASS        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   816725  816725  A    G    0    0    2    0    2   2   1.0000  22   1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   821030  821030  G    T    0    0    2    0    2   2   1.0000  36   1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na
	chr1   821143  821143  G    T    0    0    0    2    2   2   1.0000  8    1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na na  na

----

.. class:: infomark

**Feedback**: romain.daveau@curie.fr
	</help>
</tool>