view gfapts/gfap_r1.0_allvar_genomic_annotater.xml @ 1:028f435b6cfb draft default tip

Uploaded
author rdaveau
date Fri, 03 Aug 2012 05:50:41 -0400
parents f753b30013e6
children
line wrap: on
line source

<tool id="gfap_r1.0_allvar_genomic_annotater" name="Map to genomic features">
	<description>Annotate variants with ANNOVAR</description>
	<command interpreter="perl">gfap_r1.0_allvar_genomic_annotater.pl -varfile=$varfile -buildver=$buildver -refseq_dir=db/refseq -refseq_release=$refseq_release -cosmic_dir=db/cosmic -cosmic_release=$cosmic_release -annovar_release=$annovar_release -outdir=$__new_file_path__/gfap -noncoding=$noncoding -coding=$coding</command>
	<inputs>
		<param name="varfile" format="txt" type="data" label="Input VAR or DBI file" />
		<param name="buildver" type="select" label="Human reference genome assembly">
			<option value="hg19">GRCh37 ie. hg19</option>
		</param>
		<param name="refseq_release" type="select" label="human.protein.gpff release">
			<option value="r16012012">Jan 16, 2012</option>
		</param>
		<param name="cosmic_release" type="select" label="COSMIC db release">
			<option value="v56">v56</option>
		</param>		
		<param name="annovar_release" type="select" label="ANNOVAR db release">
			<option value="jan2012">Jan 2012</option>
		</param>
	</inputs>
	<outputs>
		<data format="txt" name="noncoding" label="${varfile.name}.nc" />
		<data format="txt" name="coding" label="${varfile.name}.cds" />
	</outputs>
	<help>
.. class:: infomark

**What it does**

Annotate a VAR- or DBI-file with **genomic features** and discriminate between **coding** and **non-coding** regions.

- This VAR- or DBI-file has to be generated by either the **SAMVCF_data_parser** or the **Known_variants_finder** gfap utility.
- The core annotation procedure is mainly performed by the third-party program **annotate_variation.pl** from the ANNOVAR software tools.

.. class:: infomark

**Third-party resources**

- RefSeq: http://www.ncbi.nlm.nih.gov/RefSeq
- COSMIC: http://www.sanger.ac.uk/genetics/CGP/cosmic
- ANNOVAR: http://www.openbioinformatics.org/annovar

----

**Input .dbi file**::

	#chr   start   end     ref  alt  NRF  NRR  NAF  NAR  DP  AD  AF      QC   P.str     P.ref     P.alt     VCF.FILTER DPT.FILTER VAR.FILTER  AF_ALL   AF_AFR   AF_AMR   AF_ASN   AF_EUR   AF_COS   cid rs  dbsnp
	chr1   14907   14907   A    G    4    0    0    3    7   3   0.4290  9    3.33e-01  4.17e-02  8.33e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   14930   14930   A    G    4    2    0    5    11  5   0.4550  37   1.83e-01  2.29e-01  2.08e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   68896   68896   G    A    0    0    3    0    3   3   1.0000  18   8.33e-02  3.33e-01  8.33e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   69270   69270   A    G    0    0    31   0    31  31  1.0000  179  3.10e-10  3.33e-01  3.10e-10  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   69511   69511   A    G    0    0    13   12   25  25  1.0000  222  3.33e-01  3.33e-01  3.33e-01  NONE       PASS       PASS        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   69897   69897   T    C    1    0    0    3    4   3   0.7500  14   2.08e-01  3.33e-01  8.33e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   129285  129285  G    A    0    4    0    4    8   4   0.5000  56   2.60e-03  4.17e-02  4.17e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   567697  567697  G    A    0    0    0    2    2   2   1.0000  30   1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   569803  569803  G    A    0    0    4    0    4   4   1.0000  50   4.17e-02  3.33e-01  4.17e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   808631  808631  G    A    0    0    7    1    8   8   1.0000  142  2.34e-02  3.33e-01  2.34e-02  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   808922  808922  G    A    0    0    15   26   41  41  1.0000  222  3.91e-02  3.33e-01  3.91e-02  NONE       PASS       PASS        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   808928  808928  C    T    0    0    14   31   45  45  1.0000  222  5.36e-03  3.33e-01  5.36e-03  NONE       PASS       PASS        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   816725  816725  A    G    0    0    2    0    2   2   1.0000  22   1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   821030  821030  G    T    0    0    2    0    2   2   1.0000  36   1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na
	chr1   821143  821143  G    T    0    0    0    2    2   2   1.0000  8    1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP        0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na


----

**Output .nc file [non-coding]**::

	#From [chr] to [dbsnp] = DBI file header
	#annot = ig:intergenic; pp:1kb-upstream; 5|3u:UTR; in:intronic; ss:splice; nc:ncRNA
	#ogs = official gene symbol(s)
	#cos = gene listed in cosmic v56 release
	#chr  start   end     ref  alt NRF NRR NAF NAR DP  AD  AF      QC  P.str     P.ref     P.alt     VCF.FILTER DPT.FILTER VAR.FILTER AF_ALL   AF_AFR   AF_AMR   AF_ASN   AF_EUR   AF_COS   cid rs  dbsnp annot ogs      cos
	chr1  14907   14907   A    G   4   0   0   3   7   3   0.4290  9   3.33e-01  4.17e-02  8.33e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    nc    WASH7P   FALSE
	chr1  14930   14930   A    G   4   2   0   5   11  5   0.4550  37  1.83e-01  2.29e-01  2.08e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    nc    WASH7P   FALSE
	chr1  68896   68896   G    A   0   0   3   0   3   3   1.0000  18  8.33e-02  3.33e-01  8.33e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    pp    OR4F5    TRUE
	chr1  129285  129285  G    A   0   4   0   4   8   4   0.5000  56  2.60e-03  4.17e-02  4.17e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ig    na       na
	chr1  567697  567697  G    A   0   0   0   2   2   2   1.0000  30  1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ig    na       na
	chr1  569803  569803  G    A   0   0   4   0   4   4   1.0000  50  4.17e-02  3.33e-01  4.17e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ig    na       na
	chr1  808631  808631  G    A   0   0   7   1   8   8   1.0000  142 2.34e-02  3.33e-01  2.34e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    nc    FAM41C   FALSE
	chr1  808922  808922  G    A   0   0   15  26  41  41  1.0000  222 3.91e-02  3.33e-01  3.91e-02  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    nc    FAM41C   FALSE
	chr1  808928  808928  C    T   0   0   14  31  45  45  1.0000  222 5.36e-03  3.33e-01  5.36e-03  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    nc    FAM41C   FALSE
	chr1  816725  816725  A    G   0   0   2   0   2   2   1.0000  22  1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ig    na       na
	chr1  821030  821030  G    T   0   0   2   0   2   2   1.0000  36  1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ig    na       na
	chr1  821143  821143  G    T   0   0   0   2   2   2   1.0000  8   1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ig    na       na
	chr1  846489  846489  T    C   0   0   1   1   2   2   1.0000  16  3.33e-01  3.33e-01  3.33e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ig    na       na
	chr1  866319  866319  G    A   0   0   2   1   3   3   1.0000  31  3.33e-01  3.33e-01  3.33e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    in    SAMD11   TRUE
	chr1  870903  870903  T    C   0   0   3   0   3   3   1.0000  65  8.33e-02  3.33e-01  8.33e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    in    SAMD11   TRUE

----

**Output .cds file [coding]**::

	#From [chr] to [dbsnp] = DBI file header
	#annot = fd:frameshift deletion; fi:frameshift insertion; nd:nonframeshift deletion; ni:nonframeshift insertion; bs:block substitution; ss:synonymous SNV; ns:nonsynonymous SNV; sg:stopgain; sl:stoploss; na:unknown
	#ogs = official gene symbol(s)
	#cos = gene listed in cosmic v56 release
	#mid = RefSeq mRNA identifier(s) from human.protein.gpff r16012012 release
	#pid = RefSeq protein identifier(s) from human.protein.gpff r16012012 release
	#c.x = ATG-based variant descriptor in mRNA
	#p.x = ATG-based variant descriptor in protein
	#chr  start   end     ref  alt NRF NRR NAF NAR DP  AD  AF      QC  P.str     P.ref     P.alt     VCF.FILTER DPT.FILTER VAR.FILTER AF_ALL   AF_AFR   AF_AMR   AF_ASN   AF_EUR   AF_COS   cid rs  dbsnp annot ogs      cos    mid                     pid                     c.x                p.x
	chr1  69270   69270   A    G   0   0   31  0   31  31  1.0000  179 3.10e-10  3.33e-01  3.10e-10  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    OR4F5    TRUE   NM_001005484            NP_001005484            c.A180G            p.S60S
	chr1  69511   69511   A    G   0   0   13  12  25  25  1.0000  222 3.33e-01  3.33e-01  3.33e-01  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ns    OR4F5    TRUE   NM_001005484            NP_001005484            c.A421G            p.T141A
	chr1  69897   69897   T    C   1   0   0   3   4   3   0.7500  14  2.08e-01  3.33e-01  8.33e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    OR4F5    TRUE   NM_001005484            NP_001005484            c.T807C            p.S269S
	chr1  881627  881627  G    A   0   0   3   8   11  11  1.0000  88  7.55e-02  3.33e-01  7.55e-02  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    NOC2L    TRUE   NM_015658               NP_056473               c.C1843T           p.L615L
	chr1  887801  887801  A    G   0   0   6   0   6   6   1.0000  56  1.04e-02  3.33e-01  1.04e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    NOC2L    TRUE   NM_015658               NP_056473               c.T1182C           p.T394T
	chr1  888639  888639  T    C   0   0   4   9   13  13  1.0000  142 8.89e-02  3.33e-01  8.89e-02  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    NOC2L    TRUE   NM_015658               NP_056473               c.A918G            p.E306E
	chr1  888659  888659  T    C   0   0   3   9   12  12  1.0000  146 4.87e-02  3.33e-01  4.87e-02  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ns    NOC2L    TRUE   NM_015658               NP_056473               c.A898G            p.I300V
	chr1  897325  897325  G    C   0   0   9   11  20  20  1.0000  188 2.75e-01  3.33e-01  2.75e-01  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    KLHL17   TRUE   NM_198317               NP_938073               c.G609C            p.A203A
	chr1  909238  909238  G    C   0   0   3   5   8   8   1.0000  130 2.42e-01  3.33e-01  2.42e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ns    PLEKHN1  FALSE  NM_032129|NM_001160184  NP_115505|NP_001153656  c.G1460C|c.G1355C  p.R487P|p.R452P
	chr1  909242  909242  A    G   2   4   1   2   9   3   0.3330  15  1.69e-01  2.29e-01  3.33e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    PLEKHN1  FALSE  NM_032129|NM_001160184  NP_115505|NP_001153656  c.A1464G|c.A1359G  p.G488G|p.G453G
	chr1  935222  935222  C    A   0   0   0   2   2   2   1.0000  10  1.67e-01  3.33e-01  1.67e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ns    HES4     FALSE  NM_001142467            NP_001135939            c.G132T            p.R44S
	chr1  949654  949654  A    G   0   0   11  9   20  20  1.0000  222 2.75e-01  3.33e-01  2.75e-01  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    ISG15    TRUE   NM_005101               NP_005092               c.A294G            p.V98V
	chr1  981931  981931  A    G   0   0   1   1   2   2   1.0000  36  3.33e-01  3.33e-01  3.33e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    AGRN     TRUE   NM_198576               NP_940978               c.A3066G           p.S1022S
	chr1  982994  982994  T    C   0   0   13  16  29  29  1.0000  146 2.37e-01  3.33e-01  2.37e-01  NONE       PASS       PASS       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    AGRN     TRUE   NM_198576               NP_940978               c.T3558C           p.F1186F
	chr1  1021346 1021346 A    G   0   2   2   3   7   5   0.7140  66  1.51e-01  1.67e-01  3.33e-01  NONE       PASS       SKIP       0.00000  0.00000  0.00000  0.00000  0.00000  0.00000  na  na  na    ss    C1orf159 FALSE  NM_017891               NP_060361               c.T357C            p.I119I

----

.. class:: infomark

**Feedback**: romain.daveau@curie.fr
	</help>
</tool>