annotate TV-vs-background.xml @ 4:58815aed4ec3 draft default tip

few bugfixes in VCF-2-variantlist
author saskia-hiltemann
date Wed, 04 Nov 2015 05:06:12 -0500
parents 885ba15c2564
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
1 <tool id="t-vs-vnormal" name="Virtual Normal Correction SmallVars" version="1.7">
0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
2 <description> Filter small variants based on presence in Virtual Normal set </description>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
3
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
4 <requirements>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
5 <requirement type="package" version="1.7">cgatools</requirement>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
6 </requirements>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
7
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
8 <command interpreter="bash">
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
9 TV-vs-background.sh
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
10 --variants $variants
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
11 --reference ${reference.fields.reference_crr_cgatools}
2
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
12 --VN_varfiles "${reference.fields.VN_genomes_varfiles_list}${VNset}"
0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
13 --threshold $threshold
2
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
14 --thresholdhc $thresholdhc
0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
15 --outputfile_all $output_all
2
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
16 --outputfile_filtered $output_filtered
0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
17 </command>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
18
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
19 <inputs>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
20 <param name="variants" type="data" format="tabular" label="List of Variants as produced by Listvariants program or VCF-2-LV conversion program"/>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
21 <!--select build-->
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
22 <param name="reference" type="select" label="Select Build">
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
23 <options from_data_table="virtual_normal_correction" />
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
24 </param>
2
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
25
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
26 <!-- edit these options to reflect sets of normal you have available. The values must name files within the directories specified in data_table_conf.xml file -->
0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
27 <param name="VNset" type="select" label="Select Virtual Normal set to use" help="1000Genomes set can only be used for hg19 samples, for hg18 54 genomes will be used.">
2
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
28 <option value="46_diversity.txt" > CG Diversity Panel and trios (54 Genomes) </option>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
29 <option value="433_1000g.txt" > CG 1000G project genomes (433 Genomes) (hg19 only) </option>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
30 <option value="479_diversity_1000g.txt" > Diversity and 1000G (479 genomes) (hg19 only) </option>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
31 <option value="10_tutorial.txt" > Small VN for tutorial (10 Genomes) </option>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
32 </param>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
33
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
34 <param name="threshold" type="text" value="1" label="Filter out variants present in at least this number of the virtual normal genomes"/>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
35 <param name="thresholdhc" type="text" value="10" label="High Confidence Threshold: Label a somatic variant as high-confidence if locus was fully called in at least this many normal genomes" help="Please adjust according to number of normals used and desired stringency. "/>
0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
36 <param name="fname" type="text" value="" label="Prefix for your output file" help="Optional. For example sample name."/>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
37 </inputs>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
38
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
39 <outputs>
2
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
40 <data format="tabular" name="output_all" label="All variants for ${tool.name} on ${on_string}"/>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
41 <data format="tabular" name="output_filtered" label="Filtered variants for ${tool.name} on ${on_string}"/>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
42 <data format="tabular" name="output_filtered_highconf" label="${fname} High Confidence Filtered variants for ${tool.name} on ${on_string}" from_work_dir="output_filtered_highconf.tsv"/>
885ba15c2564 Uploaded
saskia-hiltemann
parents: 0
diff changeset
43
0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
44 </outputs>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
45
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
46 <help>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
47 **What it does**
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
48
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
49 This tool compares a list of variants to a set of normal genomes. Each variant will be annotated with the number of normal samples it appears in.
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
50 The tool will also output how often the variant was found in one or both alleles (01 or 11), and distinguish between a variant not being present in the normal (00)
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
51 or the location being no-called in the normal (NN) or half-called (0N,1N) etc.
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
52
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
53 This may take quite some time depending on the number of input variants and the number of normal genomes.
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
54
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
55 **Input Files**
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
56
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
57 This program takes as input a list of variants as produced by the ListVariants tool, or the vcf-to-LV preprocessing tool. Input must be a tab-separated file of the following format::
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
58
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
59 variantID - chromosome - begin - end - varType - reference - alleleSeq - xRef
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
60 1034 chr1 972803 972804 snp T C dbsnp:rs31238120
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
61
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
62 valid entries in varType column are: snp,sub,ins,del.
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
63
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
64 Chromosome coordinates must be zero-based half-open.
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
65
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
66 Column names must match the ones given above.
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
67
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
68
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
69 **Output Files**
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
70
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
71 1) Original input file annotated with presence (or lack thereof) in background genomes
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
72
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
73 2) Filtered version of output 1, variants are removed when present in at least *threshold* of the background normal genomes (default: 1) (filters on column 9 of output file)
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
74
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
75 3) High Confidence filtered version of output 2. Of all the variants labelled somatic, filter out any variants not fully called in at least *high confidence threshold* normals. (filter on column 11 of output file)
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
76
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
77 Example output format::
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
78
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
79 variantId chromosome begin end varType reference alleleSeq xRef VN_occurrences VN_frequency VN_fullycalled_count VN_fullycalled_frequency VN_00 VN_01 VN_11 VN_0N VN_1N VN_NN VN_0 VN_1 VN_N
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
80 34 chr1 46661 46662 snp T C dbsnp.100:rs2691309 26 0.472727 33 0.6 7 19 7 1 0 20 0 0 0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
81 35 chr1 46850 46850 ins A 0 0 10 0.181818 10 0 0 5 0 39 0 0 0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
82 36 chr1 46895 46896 snp T C dbsnp.100:rs2691311 8 0.145455 40 0.727273 33 7 0 2 1 11 0 0 0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
83 37 chr1 46926 46927 snp G A dbsnp.100:rs2548884 7 0.127273 43 0.781818 36 7 0 2 0 9 0 0 0
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
84
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
85 </help>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
86
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
87 </tool>
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
88
1209f18a5a83 Uploaded
saskia-hiltemann
parents:
diff changeset
89