Mercurial > repos > nick > allele_counts
annotate allele-counts.xml @ 10:7f19e8c03358 draft
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 19875a8a45eb5e40a47fe177deafe690fb4f04fb"
author | nick |
---|---|
date | Tue, 31 Mar 2020 20:24:27 -0400 |
parents | 6cc488e11544 |
children | cf2af5c3118c |
rev | line source |
---|---|
9
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
1 <tool id="allele_counts_1" version="1.3" name="Variant Annotator"> |
5 | 2 <description> process variant counts</description> |
9
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
3 <stdio> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
4 <exit_code range="1:" level="fatal" /> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
5 <exit_code range=":-1" level="fatal" /> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
6 </stdio> |
10
7f19e8c03358
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 19875a8a45eb5e40a47fe177deafe690fb4f04fb"
nick
parents:
9
diff
changeset
|
7 <command>allele-counts.py -i '$input' -o '$output' -f $freq -c $covg $header $stranded $nofilt |
7
a72277535a2c
allele-counts.xml: Fix bug causing crash when no seed is given.
nicksto <nmapsy@gmail.com>
parents:
6
diff
changeset
|
8 #if $seed: |
10
7f19e8c03358
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 19875a8a45eb5e40a47fe177deafe690fb4f04fb"
nick
parents:
9
diff
changeset
|
9 -r '$seed' |
7
a72277535a2c
allele-counts.xml: Fix bug causing crash when no seed is given.
nicksto <nmapsy@gmail.com>
parents:
6
diff
changeset
|
10 #end if |
a72277535a2c
allele-counts.xml: Fix bug causing crash when no seed is given.
nicksto <nmapsy@gmail.com>
parents:
6
diff
changeset
|
11 </command> |
0 | 12 <inputs> |
13 <param name="input" type="data" format="vcf" label="Input variants from Naive Variants Detector"/> | |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
14 <param name="freq" type="float" value="1.0" min="0" max="100" label="Minor allele frequency threshold" help="in percent"/> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
15 <param name="covg" type="integer" value="10" min="0" label="Coverage threshold" help="in reads (per strand)"/> |
5 | 16 <param name="nofilt" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Do not filter sites or alleles" /> |
17 <param name="stranded" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output stranded base counts" /> | |
3 | 18 <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="True" label="Write header line" /> |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
19 <param name="seed" type="text" value="" label="PRNG seed" /> |
0 | 20 </inputs> |
21 <outputs> | |
9
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
22 <data name="output" format="tabular" /> |
0 | 23 </outputs> |
24 | |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
25 <tests> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
26 <test> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
27 <param name="input" value="tests/artificial.vcf.in" /> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
28 <param name="freq" value="10" /> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
29 <param name="covg" value="10" /> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
30 <param name="seed" value="1" /> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
31 <output name="output" file="tests/artificial.csv.out" /> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
32 </test> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
33 </tests> |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
34 |
0 | 35 <help> |
3 | 36 |
4 | 37 .. class:: infomark |
38 | |
39 **What it does** | |
40 | |
5 | 41 This tool parses variant counts from a special VCF file. It counts simple variants, calculates numbers of alleles, and calculates minor allele frequency. It can apply filters based on coverage, strand bias, and minor allele frequency cutoffs. |
4 | 42 |
43 ----- | |
44 | |
5 | 45 .. class:: infomark |
46 | |
47 **Input Format** | |
48 | |
3 | 49 .. class:: warningmark |
50 | |
5 | 51 **Note:** variants that are not A/C/G/T SNVs will be ignored! |
3 | 52 |
5 | 53 The input VCF should be like the output of the **Naive Variant Detector** tool (using the stranded option). The sample column(s) must give the read count for each variant **on each strand**. Below is an example of a valid sample column entry (the important part is after the last colon):: |
54 | |
55 0/0:1:0.02:+T=27,+G=1,-T=22, | |
3 | 56 |
57 ----- | |
58 | |
59 .. class:: infomark | |
60 | |
5 | 61 **Output** |
3 | 62 |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
63 Each row represents one site in one sample. For **unstranded** output, 13 fields give information about that site:: |
0 | 64 |
5 | 65 1. SAMPLE - Sample name (from VCF sample column labels) |
3 | 66 2. CHR - Chromosome of the site |
67 3. POS - Chromosomal coordinate of the site | |
68 4. A - Number of reads supporting an 'A' | |
5 | 69 5. C - 'C' reads |
70 6. G - 'G' reads | |
71 7. T - 'T' reads | |
3 | 72 8. CVRG - Total (number of reads supporting one of the four bases above) |
73 9. ALLELES - Number of qualifying alleles | |
5 | 74 10. MAJOR - Major allele |
75 11. MINOR - Minor allele (2nd most prevalent variant) | |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
76 12. MAF - Frequency of minor allele |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
77 13. BIAS - Strand bias measure |
3 | 78 |
5 | 79 For stranded output, instead of using 4 columns to report read counts per base, 8 are used to report the stranded counts per base:: |
80 | |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
81 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
82 SAMPLE CHR POS +A +C +G +T -A -C -G -T CVRG ALLELES MAJOR MINOR MAF BIAS |
5 | 83 |
4 | 84 **Example** |
85 | |
5 | 86 Below is a header line, followed by some example data lines. Since the input contained three samples, the data for each site is reported on three consecutive lines. However, if a sample fell below the coverage threshold at that site, the line will be omitted:: |
4 | 87 |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
88 #SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
89 BLOOD_1 chr20 99 0 101 1 2 104 1 C T 0.01923 0.33657 |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
90 BLOOD_2 chr20 99 82 44 0 1 127 2 A C 0.34646 0.07823 |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
91 BLOOD_3 chr20 99 0 110 1 0 111 1 C G 0.009 1.00909 |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
92 BLOOD_1 chr20 100 3 5 100 0 108 1 G C 0.0463 0.15986 |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
93 BLOOD_3 chr20 100 1 118 11 0 130 0 C G 0.08462 0.04154 |
3 | 94 |
95 ----- | |
96 | |
97 .. class:: warningmark | |
98 | |
99 **Site printing and allele tallying requirements** | |
100 | |
5 | 101 Coverage threshold: |
3 | 102 |
5 | 103 If a coverage threshold is used, the number of reads **on each strand** must be at or above the threshold. If either strand is below the threshold, the line will be omitted. **N.B.** this means the total coverage for each printed site will be at least twice the number you give in the "coverage threshold" option. Also, since only simple variants are counted, a site with 100 reads, all supporting a deletion variant, would not be printed. |
3 | 104 |
5 | 105 Frequency threshold: |
3 | 106 |
5 | 107 If a frequency threshold is used, alleles are only counted (in the ALLELES column) if they meet or exceed this minor allele frequency threshold. |
3 | 108 |
5 | 109 Strand bias: |
3 | 110 |
5 | 111 The alleles passing the threshold on each strand must match (though not in order), or the allele count will be 0. So a site with A, C, G on the plus strand and A, G on the minus strand will get an allele count of zero, though the (strand-independent) major allele, minor allele, and minor allele frequency will still be reported. If there is a tie for the minor allele, one will be randomly chosen. |
3 | 112 |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
113 Additionally, a measure of strand bias is given in the last column. This is calculated using the method of Guo et al., 2012. A value of "." is given when there is no valid result of the calculation due to a zero denominator. This occurs when there are no reads on one of the strands, or when there is no minor allele. |
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
114 |
0 | 115 </help> |
116 | |
9
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
117 <citations> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
118 <citation type="bibtex"> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
119 @article{Blankenberg2014, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
120 author = {Blankenberg, Daniel and {Von Kuster}, Gregory and Bouvier, Emil and Baker, Dannon and Afgan, Enis and Stoler, Nicholas and Taylor, James and Nekrutenko, Anton}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
121 doi = {10.1186/gb4161}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
122 issn = {1465-6906}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
123 journal = {Genome Biology}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
124 keywords = {galaxy}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
125 number = {2}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
126 pages = {403}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
127 title = {{Dissemination of scientific software with Galaxy ToolShed}}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
128 url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb4161}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
129 volume = {15}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
130 year = {2014} |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
131 } |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
132 </citation> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
133 <citation type="bibtex"> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
134 @article{Dickins2014, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
135 archivePrefix = {arXiv}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
136 arxivId = {15334406}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
137 author = {Dickins, Benjamin and Rebolledo-Jaramillo, Boris and Su, Marcia Shu Wei and Paul, Ian M and Blankenberg, Daniel and Stoler, Nicholas and Makova, Kateryna D and Nekrutenko, Anton}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
138 doi = {10.2144/000114146}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
139 eprint = {15334406}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
140 isbn = {5049880467}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
141 issn = {19409818}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
142 journal = {BioTechniques}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
143 number = {3}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
144 pages = {134--141}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
145 pmid = {24641477}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
146 title = {{Controlling for contamination in re-sequencing studies with a reproducible web-based phylogenetic approach}}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
147 volume = {56}, |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
148 year = {2014} |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
149 } |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
150 </citation> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
151 </citations> |
6cc488e11544
"planemo upload for repository https://github.com/galaxyproject/dunovo commit 5a2e08bc1213b0437d0adcb45f7f431bd3c735f4"
nick
parents:
7
diff
changeset
|
152 |
6
df3b28364cd2
allele-counts.{py,xml}: Add strand bias, documentation updates.
nicksto <nmapsy@gmail.com>
parents:
5
diff
changeset
|
153 </tool> |