annotate vcfsamplecompare.xml @ 1:cc6eda569fc4 draft

planemo upload commit 9c7264014db750b32a8fa78f511c7efbd12529d9-dirty
author hepcat72
date Mon, 15 Oct 2018 15:25:30 -0400
parents cdd7fecae37c
children 2260dedc4765
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
1 <tool id="vcfsamplecompare"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
2 name="vcfsamplecompare"
1
cc6eda569fc4 planemo upload commit 9c7264014db750b32a8fa78f511c7efbd12529d9-dirty
hepcat72
parents: 0
diff changeset
3 version="0.1.2">
0
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
4
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
5 <description>sort/filter variants that differ between sample groups</description>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
6
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
7 <requirements>
1
cc6eda569fc4 planemo upload commit 9c7264014db750b32a8fa78f511c7efbd12529d9-dirty
hepcat72
parents: 0
diff changeset
8 <requirement type="package" version="v2.008">vcfsamplecompare</requirement>
0
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
9 </requirements>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
10
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
11 <version_command>vcfSampleCompare.pl --version</version_command>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
12
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
13 <command detect_errors="aggressive">
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
14 <![CDATA[
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
15 vcfSampleCompare.pl
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
16 '$infile'
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
17
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
18 #for $comp in $rep_comparisons:
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
19 -s '$comp.groupone'
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
20 -d '$comp.minone'
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
21 -s '$comp.grouptwo'
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
22 -d '$comp.mintwo'
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
23 #end for
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
24
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
25 $genotypemode
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
26 $filtermode
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
27 $growmode
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
28 $header
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
29 -u '.vsc'
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
30 -o '.vsc.vcf' &&
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
31 mv '${infile}.vsc.vcf' '$vcfoutfile' &&
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
32 mv '${infile}.vsc' '$rvcfoutfile'
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
33 ]]>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
34 </command>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
35
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
36 <inputs>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
37 <param format="vcf"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
38 name="infile"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
39 label="VCF file"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
40 argument="-i"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
41 type="data"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
42
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
43 help="A VCF file that has more than 1 sample column."/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
44
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
45 <repeat name="rep_comparisons" title="Sample Comparison" min="0">
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
46 <param name="groupone"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
47 type="text"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
48 label="Sample Group 1"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
49 help="Space delimited list of sample names (exactly as they appear in the column headers of the VCF file), e.g. 'wt1 wt2 wt3'."/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
50 <param name="minone"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
51 type="integer"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
52 min="1"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
53 value="1"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
54 label="Minimum size of Sample Group 1"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
55 help="The minimum size of sample group 1 must be an integer between 1 and the number of samples in sample group 1. The minimum of 1 of the 2 sample groups must represent a majority of that group (to produce meaningful results). Use-case: If you have 3 wild type replicate samples in group 1 and you want to find at least 2 of 10 mutant samples from group 2 that differ from all of the wildtype samples, you would specify `-d 3` for group 1 and `-d 2` for group 2"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
56 <param name="grouptwo"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
57 type="text"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
58 label="Sample Group 2"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
59 help="Space delimited list of sample names (exactly as they appear in the column headers of the VCF file), e.g. 'mutant1 mutant2 mutant3'."/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
60 <param name="mintwo"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
61 type="integer"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
62 min="1"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
63 value="1"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
64 label="Minimum size of Sample Group 2"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
65 help="The minimum size of sample group 2 must be an integer between 1 and the number of samples in sample group 2. The minimum of 1 of the 2 sample groups must represent a majority of that group (to produce meaningful results). Use-case: If you have 3 wild type replicate samples in group 1 and you want to find at least 2 of 10 mutant samples from group 2 that differ from all of the wildtype samples, you would specify `-d 3` for group 1 and `-d 2` for group 2"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
66 </repeat>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
67
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
68 <param name="obsrat"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
69 label="Allelic frequency difference gap"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
70 argument="-a"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
71 type="float"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
72 value="0.6"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
73
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
74 help="A decimal value between 0.0 and 1.0 (inclusive) indicating the minimum difference in the observation ratio averages of a particular variant state (e.g. 'AO/DP') in the sample groups being compared. Not used in genotype mode." />
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
75
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
76 <param name="genotypemode"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
77 label="Comparison mode"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
78 argument="--genotype|--nogenotype"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
79 type="select"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
80
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
81 help="The basis for the degree of difference between the 2 sample groups can be either the genotype calls or allelic frequency (i.e. observation ratios (AO/DP)).">
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
82 <option value="--genotype" selected="true">Genotype calls</option>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
83 <option value="--nogenotype">Allelic Frequencies</option>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
84 </param>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
85
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
86 <param name="filtermode"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
87 label="Comparison mode"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
88 argument="--filter|--nofilter"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
89 type="select"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
90
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
91 help="Filtering will omit rows based on the comparison mode. If the comparison mode is based on genotype calls, the sample groups must not share any genotype calls in common. If the comparison mode is allelic frequencies, the difference between the average observation ratios in the sample groups must be greater than (or equal to) the allelic frequency difference gap (-a).">
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
92 <option value="--filter" selected="true">Filter</option>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
93 <option value="--nofilter">Do not filter</option>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
94 </param>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
95
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
96 <param name="growmode"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
97 label="Grow mode"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
98 argument="--grow|--nogrow"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
99 type="select"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
100
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
101 help="Sample groups will initially be greedily created using their minimum size (-d). If the minimum sample group size is less than the number of samples available for a group, samples will be added when in grow mode (in a way consistent with the comparison mode). If the comparison mode is based on genotype calls, as long as the sample groups do not share any genotype calls in common, samples will be greedily added to each group. If the comparison mode is allelic frequencies, as long as the difference between the average observation ratios in the sample groups is greater than (or equal to) the allelic frequency difference gap (-a), samples will be greedily added to each group. Note, grow mode may lower the sort order of a variant in allelic frequency mode.">
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
102 <option value="--grow" selected="true">Grow sample groups from the minimum size (-d)</option>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
103 <option value="--nogrow">Do not grow sample groups</option>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
104 </param>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
105
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
106 <param name="header"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
107 label="Header"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
108 argument="--header|--noheader"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
109 type="hidden"
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
110 value="--noheader"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
111 </inputs>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
112
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
113 <outputs>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
114 <data format="vcf" name="vcfoutfile" />
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
115 <data format="tabular" name="rvcfoutfile" />
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
116 </outputs>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
117
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
118 <tests>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
119 <test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
120 <param name="infile" value="fbse1.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
121 <repeat name="rep_comparisons">
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
122 <param name="groupone" value="gDNA-PA14"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
123 <param name="minone" value="1"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
124 <param name="grouptwo" value="205w3 205w2 205w1"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
125 <param name="mintwo" value="1"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
126 </repeat>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
127 <param name="obsrat" value="0.6"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
128 <param name="genotypemode" value="--nogenotype"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
129 <param name="filtermode" value="--filter"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
130 <param name="growmode" value="--grow"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
131 <param name="header" value="--noheader"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
132 <output name="vcfoutfile" file="test7.in1.s1.s3.d1.d1.af.a6.nh.fy.gy.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
133 <output name="rvcfoutfile" file="test7.in1.s1.s3.d1.d1.af.a6.nh.fy.gy.rvcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
134 </test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
135 <test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
136 <param name="infile" value="fbse1.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
137 <repeat name="rep_comparisons">
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
138 <param name="groupone" value="gDNA-PA14"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
139 <param name="minone" value="1"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
140 <param name="grouptwo" value="205w3 205w2 205w1"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
141 <param name="mintwo" value="1"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
142 </repeat>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
143 <param name="obsrat" value="0.6"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
144 <param name="genotypemode" value="--genotype"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
145 <param name="filtermode" value="--filter"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
146 <param name="growmode" value="--grow"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
147 <param name="header" value="--noheader"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
148 <output name="vcfoutfile" file="test8.in1.s1.s3.d1.d1.gt.nh.fy.gy.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
149 <output name="rvcfoutfile" file="test8.in1.s1.s3.d1.d1.gt.nh.fy.gy.rvcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
150 </test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
151 <test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
152 <param name="infile" value="lest1.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
153 <param name="obsrat" value="0.6"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
154 <param name="genotypemode" value="--genotype"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
155 <param name="filtermode" value="--filter"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
156 <param name="growmode" value="--grow"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
157 <param name="header" value="--noheader"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
158 <output name="vcfoutfile" file="test9.in2.s0.d0.gt.hy.fy.gy.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
159 <output name="rvcfoutfile" file="test9.in2.s0.d0.gt.hy.fy.gy.rvcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
160 </test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
161 <test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
162 <param name="infile" value="lest2.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
163 <param name="obsrat" value="0.6"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
164 <param name="genotypemode" value="--nogenotype"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
165 <param name="filtermode" value="--filter"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
166 <param name="growmode" value="--grow"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
167 <param name="header" value="--noheader"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
168 <output name="vcfoutfile" file="test10.in3.s0.d0.af.a6.hy.fy.gy.vcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
169 <output name="rvcfoutfile" file="test10.in3.s0.d0.af.a6.hy.fy.gy.rvcf"/>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
170 </test>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
171 </tests>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
172
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
173 <help>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
174 <![CDATA[
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
175
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
176 This script sorts and (optionally) filters the rows/variants of a VCF file (containing data for 2 or more samples) based on the differences in the variant data between samples or sample groups. Degree of "difference" is determined by either the ratio of group-specific genotype calls over group size or the difference in average allelic frequency (with a gap size threshold). The the pair of samples or sample groups used to represent the difference for a variant row is the one leading to the greatest difference in consistent genotype or average allelic frequencies (i.e. observation ratios, e.g. AO/DP) of the same variant state.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
177
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
178 This script works with VCF files generated by freeBayes (for SNP and small nucleotide variants) or svTyper (for structural variants). It will work with any other VCF data that includes GT or AO, RO, and DP tags in the FORMAT string.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
179
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
180 Each row in a VCF file will be assumed to represent a variant (or variant position). In the context of this script, there are two ways to look at differences among the samples: genotype calls and the ratio of observations of a particular variant out of the total observatons. We'll refer to this as either "allelic frequency" or "observation ratios" throughout the documentation.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
181
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
182 DEFAULT SORTING BEHAVIOR
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
183
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
184 VCF rows/variants are sorted by the (maximum) degree of difference that exists between the pairs of sample groups you define. If multiple pairs are defined, the maximum difference computed among those pairs is used in the sort. How the degree of difference is calculated depends on whether the --genotype or --nogenotype flag is supplied.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
185
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
186 If --genotype is supplied, sorting will be based on the degree of difference in genotype calls between the 2 sample groups. Put simply, variants where all the genotype calls differ between sample group 1 and sample group 2 will be at the top of the results. If the genotype calls within a group are not consistent, the rank of the row falls and it will appear lower in the results. If all of the genotype calls between 2 sample groups are the same, the row will be at the bottom of the results. If samples do not have genotype calls, the rank falls even lower. The very bottom of the results will contain variant which have no genotype calls among any samples in the 2 groups.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
187
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
188 If --nogenotype is supplied, sorting will be based on the degree of difference in allelic frequencies between the 2 samples groups. The degree of difference between allelic frequencies will be the maximum difference in observation ratios among the samples, e.g. an 'A' in sample 1 is seen in 1 out of 10 reads that map over the variant position whereas an 'A' is seen in sample 2 in 10 out of 10 reads. The difference in those observation ratios is 9/10 or 0.9. The variant state (among all the observations in the 2 sample groups) with the largest difference in observation ratios between the samples in the 2 sample groups is selected to represent the row. The difference in average ratios of each group is what is used in the sort.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
189
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
190 SETTING A MINIMUM GROUP SIZE
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
191
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
192 Supplying a --min-group-size affects sorting and allows you to find which samples among 2 sample groups differ (by bringing them to the top of the sorted results). By default, all group members are used to compute maximum difference between 2 sample groups as described above.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
193
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
194 When --min-group-size is supplied with --nogentotype, the maximum difference between the sample groups' average observation ratios is computed twice, between N members of sample group 1 and M members of sample group 2. When comparing sample groups, the maximum difference is determined by taking the greater difference of 2 comparisons: 1. the top N observation ratios of sample group 1 versus the bottom M (inverse) observation ratios of sample group 2. 2. the bottom N observation ratios of sample group 1 versus top M observation ratios of sample group 2. In order to avoid meaningless results, either N or M must represent a majority of their respective sample groups. It is recommended to always set -d to the group size for 1 of the 2 groups. --min-group-size should only be used when the groups being compared are not 2 sets of replicates.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
195
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
196 When --min-group-size is supplied with --gentotype, the maximum difference between the sample groups' is computed in the same manner as described above for --genotype, except calls within a sample group are allowed to differ as long as there exists a subgroup of at least size --min-group-size with a consistent genotype call.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
197
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
198 DYNAMIC CREATION OF SAMPLE GROUPS
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
199
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
200 When pairs of sample groups are not supplied, sample groups are greedily determined on each row independently. Up to 2 --min-group-size's can be supplied, but must not sum to more than the number of samples. The default minimum group sizes are both 1. Sorting is performed in the same manner, except (in the case of --nogenotype) the top N and bottom M samples compared are selected from a single list. In the case of --genotype, the samples are ordered by genotype call abundance and assigned to the groups from either end (omitting those with no-calls).
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
201
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
202 GROWING SAMPLE GROUPS FROM THE MINIMUM GROUP SIZE
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
203
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
204 If you have supplied a --min-group-size that is less than the number of samples defined in the group, you can allow sample groups to grow using the --grow parameter. This allows you to identify groups of different (i.e. non-replicate) samples that share a difference with the comparison group. Growing groups behaves differently depending on whether --genotype or --nogenotype is supplied.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
205
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
206 If --nogenotype is supplied, grow groups is done using the --separation-gap threshold. It uses the difference in the obervation ratio of 1 group and its inverse observation ratio in the comparison group. For example, if you supply `--grow --nogenotype --separation-gap 0.5`, samples will be greedily* added to the 2 groups in order of their difference with the current group's observation ratio average and stops just before the difference in the averages crosses the threshold of 0.5.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
207
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
208 If --genotype is supplied, all members of a sample group matching a genotype call in the sample group of size --min-group-size are added to the group. If sample groups are being created dynamically and the groups have genotype calls in common, no other samples of the common genotype call will be added.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
209
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
210 \* Sample groups are seeded with members from either the bottom or top set of observation ratios. Samples in different groups are seeded from opposite ends (top or bottom). Samples are then traversed top-down or bottom-up and greedily added to the respective sample group in order of ascending difference from the current group average.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
211
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
212 FILTERING
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
213
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
214 There are 2 threshold options that can be used to filter variants that do not contain differences between the sample groups that meet the thresholds. In --genotype mode, the threshold is --min-group-size. In --nogenotype mode the threshold is the combination of --separation-gap and --min-group-size.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
215
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
216 n --nogenotype mode, if the difference between the observation ratios between (all of the*) pairs of sample groups is less than the separation gap threshold, the row will not be printed.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
217
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
218 In --genotype mode, if the (all of the*) pairs of sample groups share a common genotypoe call, the row will not be printed.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
219
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
220 \* In either case, if any pair of sample groups meets the threshold(s), the row will be printed regardless of whether or not any other pair fails the threshold(s).
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
221
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
222 EXAMPLE
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
223
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
224 To sort based on the difference between specific samples or groups of samples, those groups can be defined on the command line using -s. You can specify a minimum number of samples in the groups to differ. So for example, say you have 3 wildtype (WT) replicates and you would like to see differences that all 3 WT samples have with any one of a set of 10 mutant samples. You would do that on the command line using the sample names:
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
225
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
226 -s "wt1 wt2 wt3" -d 3 -s "m1 m2 m3 m4 m5 m6 m7 m8 m9 m10" -d 1
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
227
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
228 The largest difference that the average observation ratio of the WT samples has with 1 of the mutant samples will be at the top of the results.
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
229 ]]>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
230 </help>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
231
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
232 <citations>
1
cc6eda569fc4 planemo upload commit 9c7264014db750b32a8fa78f511c7efbd12529d9-dirty
hepcat72
parents: 0
diff changeset
233 <citation type="doi">10.5281/zenodo.1463080</citation>
0
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
234 </citations>
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
235
cdd7fecae37c planemo upload commit c786cb50e512783d81d4a9ad57add9151ac5904f
hepcat72
parents:
diff changeset
236 </tool>