# HG changeset patch
# User iuc
# Date 1467803014 14400
# Node ID 85dc172ae24f6693ca6bafa39291bad8440e5bca
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit ef90c4602bdb83ea7455946c9d175ea27284e643
diff -r 000000000000 -r 85dc172ae24f README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,68 @@
+# bcftools (v1.3)
+
+Copied from branch bcftools1.2:
+
+This aims to be a "faithful" rendering of the bcftool suite. I.e. options are
+presented essentially as closely to the command line version as is useful.
+
+This may not appeal to all, if you'd like to see smaller and more dedicated
+tools (e.g. "intersect", "union" and "complement" being separate tools instead
+of all of them included in the "isec" tool,) please feel free to file an issue.
+
+Updated for bcftools v1.3
+
+This was extended from the bcftools1.2 branch then greatly hand edited to
+group params and manage param innteractions.
+
+In the macros.xml there are macros and tokens to handle file input and output.
+These use the datatypes currently available in galaxy: Vcf and Bcf
+The macros take care of bgzip and indexing of inputs.
+
+The convert command was split into 2 tools, "convert to vcf" and "convert from vcf"
+
+## TODO:
+
+- stats needs a matplotlib tool dependency and pdflatex for generating a pdf of plots
+- cnv needs a matplotlib tool dependency for generating images, then a means to consolidate those.
+- cnv needs an input.vcf for testing, runs with bcftools cnv -s "HG00101" -o 'HG00101/' -p 5 mpileup.vcf
+- roh needs a more useful input.vcf for testing
+- plugin color chrs
+- plugin frameshifts
+
+## Status
+
+The wrappers were automatically generated in bulk. That doesn't get them 100%
+of the way there (e.g. meaningful test cases), so the rest of the process is a
+bit slower.
+
+- [x] annotate
+- [x] call
+- [ ] cnv (needs real test data, needs plotting)
+- [x] concat
+- [x] consensus
+- [x] convert from vcf
+- [x] convert to vcf
+- [x] filter
+- [x] gtcheck
+- [x] isec
+- [x] merge
+- [x] norm
+- [x] query
+- [x] query list samples
+- [x] reheader
+- [x] roh
+- [x] stats (needs plotting)
+- [x] view
+- [ ] +color chrs
+- [x] +counts
+- [x] +dosage
+- [x] +fill an ac
+- [x] +fill tags
+- [x] +fixploidy
+- [ ] +frameshifts
+- [x] +impute info
+- [x] +mendelian
+- [x] +missing2ref
+- [x] +setgt
+- [x] +tag2tag
+- [x] +vcf2sex
diff -r 000000000000 -r 85dc172ae24f bcftools_gtcheck.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_gtcheck.xml Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,124 @@
+
+
+ Check sample identity
+
+ gtcheck
+ macros.xml
+
+
+
+ $genotypes_vcf &&
+ bcftools index $genotypes_vcf &&
+ #elif $section.genotypes.datatype.file_ext == 'bcf'
+ #set $genotypes_vcf = 'genotypes.bcf'
+ ln -s "$ection.genotypes" $genotypes_vcf &&
+ #if $section.genotypes.metadata.bcf_index:
+ ln -s $section.genotypes.metadata.bcf_index ${genotypes_vcf}.csi &&
+ #else
+ bcftools index $genotypes_vcf &&
+ #end if
+ #end if
+#end if
+
+bcftools @EXECUTABLE@
+
+## Default section
+#set $section = $sec_default
+
+#if $genotypes_vcf:
+ --genotypes $genotypes_vcf
+#end if
+
+#if $section.genotypes:
+ --genotypes "${section.genotypes}"
+#end if
+
+${section.all_sites}
+
+#if $section.GTs_only:
+ --GTs-only "${section.GTs_only}"
+#end if
+
+${section.homs_only}
+
+#if $section.plot:
+ --plot "${section.plot}"
+#end if
+
+#if $section.query_sample:
+ --query-sample "${section.query_sample}"
+#end if
+
+#if $section.target_sample:
+ --target-sample "${section.target_sample}"
+#end if
+
+#set $section = $sec_restrict
+@REGIONS@
+@TARGETS@
+
+## Primary Input/Outputs
+
+@INPUT_FILE@
+> "$output_file"
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 85dc172ae24f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,674 @@
+
+
+ 1.3
+
+
+
+
+
+
+
+
+
+
+ bcftools
+
+ htslib
+ tabix
+ samtools
+
+
+
+ bcftools 2>&1 | grep 'Version:'
+
+
+
+
+ 10.1093/bioinformatics/btp352
+
+
+
+ https://github.com/samtools/bcftools/wiki
+ http://samtools.github.io/bcftools/bcftools.html
+
+ --threads \${GALAXY_SLOTS:-4}
+
+
+
+
+
+
+
+
+ $input_vcf &&
+ bcftools index $input_vcf &&
+#elif $input_file.datatype.file_ext == 'vcf_bgzip'
+ ln -s "$input_file" $input_vcf
+#elif $input_file.datatype.file_ext == 'bcf'
+ #set $input_vcf = 'input.bcf'
+ ln -s "$input_file" $input_vcf &&
+ #if $input_file.metadata.bcf_index:
+ ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&
+ #else
+ bcftools index $input_vcf &&
+ #end if
+#elif $input_file.datatype.file_ext == 'bcf_bgzip'
+ ln -s "$input_file" $input_vcf
+#end if
+]]>
+
+
+$input_vcf
+
+
+
+
+
+
+> $vcfs_list_file &&
+ #if $input_file.datatype.file_ext == 'vcf'
+ bgzip -c "$input_file" > $input_vcf &&
+ bcftools index $input_vcf &&
+ #elif $input_file.datatype.file_ext == 'vcf_bgz'
+ ln -s "$input_file" $input_vcf
+ #elif $input_file.datatype.file_ext == 'bcf'
+ #set $input_vcf = 'input' + str($i) + '.bcf.gz'
+ ## bgzip -c "$input_file" > $input_vcf &&
+ ln -s "$input_file" $input_vcf &&
+ #if $input_file.metadata.bcf_index:
+ ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&
+ #else
+ bcftools index $input_vcf &&
+ #end if
+ #elif $input_file.datatype.file_ext == 'bcfvcf_bgz'
+ ln -s "$input_file" $input_vcf &&
+ #end if
+ $input_vcfs.append($input_vcf)
+#end for
+]]>
+
+
+#echo ' '.join($input_vcfs)#
+
+
+$vcfs_list_file
+
+
+
+
+
+
+
+
+
+#if $input_fa_ref is not None:
+ --fasta-ref "$input_fa_ref"
+#elif 'fasta_ref' in $section and $section.fasta_ref:
+ --fasta-ref "${section.fasta_ref}"
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#if 'AF_file' in $section and $section.AF_file:
+ --AF-file "${section.AF_file}"
+#end if
+
+
+
+
+
+
+#if 'estimate_AF' in $section and $section.estimate_AF:
+ --estimate-AF "${section.estimate_AF}"
+#end if
+
+
+
+
+
+
+ $exons_path &&
+ tabix -s 1 -b 2 -e 3 $exons_path &&
+#end if
+]]>
+
+
+#if 'exons_file' in $section and $section.exons_file:
+ --exons $exons_path
+#end if
+
+
+
+
+
+
+#if 'ploidy_file' in $section and $section.ploidy_file:
+ --ploidy "${section.ploidy_file}"
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#if $section.collapse:
+ --collapse "${section.collapse}"
+#end if
+
+
+
+
+ ^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$
+
+
+
+#if $section.apply_filters:
+ --apply-filters "${section.apply_filters}"
+#end if
+
+
+
+
+
+
+
+
+
+
+#if str($output_type) != "__none__":
+ --output-type "${output_type}"
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$
+
+
+
+
+
+
+
+
+#if $section.regions.regions_src == 'regions' and $section.regions.regions != '':
+ --regions "$section.regions.regions"
+#elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file:
+ --regions-file "$section.regions.regions_file"
+#end if
+
+
+
+
+
+
+
+
+
+ $targets_path &&
+ tabix -s 1 -b 2 -e 2 $targets_path &&
+ #end if
+#elif $tgts_sec.targets_file:
+ #set $targets_path = 'targets_file.tab.gz'
+ bgzip -c "$section.targets_file" > $targets_path &&
+ tabix -s 1 -b 2 -e 2 $targets_path &&
+#end if
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$
+
+
+
+
+
+ true
+
+
+
+
+
+
+
+
+
+
+ ^(\w+(,\w+)*)?$
+
+
+
+
+
+
+#set $samples_defined = False
+#if str($section.samples) != '':
+ #set $samples_defined = True
+ --samples "${section.invert_samples}${section.samples}"
+#end if
+#if $section.samples_file:
+ #set $samples_defined = True
+ --samples-file "${section.invert_samples_file}${section.samples_file}"
+#end if
+
+
+
+
+
+
+#if $section.sample:
+ --sample "${section.sample}"
+#end if
+
+
+
+
+
+ ^[^']*$
+
+
+
+
+#if $section.include:
+ --include '${section.include}'
+#end if
+
+
+
+
+ ^[^']*$
+
+
+
+
+#if $section.exclude:
+ --exclude '${section.exclude}'
+#end if
+
+
+
+
+ ^([^,]+(,[^,]+)*)?$
+
+
+
+#if $section.columns != '':
+ --columns "${section.columns}"
+#end if
+
+
+
+
+
+
+
+
+
+
+${section.vcf_ids}
+
+
+
+ BCF conversion.
+
+This Galaxy tool recommends using the compressed BCF format
+as piping is not implemented, and uncompressed data would
+use unnecessary amounts of space.
+
+]]>
+
+
+ als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz
+ ]]>
+
+
+
+
+
+Collapse
+--------
+
+Controls how to treat records with duplicate positions and defines compatible
+records across multiple input files. Here by "compatible" we mean records which
+should be considered as identical by the tools. For example, when performing
+line intersections, the desire may be to consider as identical all sites with
+matching positions (bcftools isec -c all), or only sites with matching variant
+type (bcftools isec -c snps -c indels), or only sites with all alleles
+identical (bcftools isec -c none).
+
+
++------------+----------------------------------------------------------------+
+| Flag value | Result |
++============+================================================================+
+| none | only records with identical REF and ALT alleles are compatible |
++------------+----------------------------------------------------------------+
+| some | only records where some subset of ALT alleles match are |
+| | compatible |
++------------+----------------------------------------------------------------+
+| all | all records are compatible, regardless of whether the ALT |
+| | alleles match or not. In the case of records with the same |
+| | position, only the first wil lbe considered and appear on |
+| | output. |
++------------+----------------------------------------------------------------+
+| snps | any SNP records are compatible, regardless of whether the ALT |
+| | alleles match or not. For duplicate positions, only the first |
+| | SNP record will be considered and appear on output. |
++------------+----------------------------------------------------------------+
+| indels | all indel records are compatible, regardless of whether the |
+| | REF and ALT alleles match or not. For duplicate positions, |
+| | only the first indel record will be considered and appear on |
+| | output. |
++------------+----------------------------------------------------------------+
+| both | abbreviation of "-c indels -c snps" |
++------------+----------------------------------------------------------------+
+| id | only records with identical ID column are compatible. |
+| | Supportedby bcftools merge only. |
++------------+----------------------------------------------------------------+
+
+
+
+
+ , >=, <=, <, !=
+
+- regex operators "~" and its negation "!~"
+
+ ::
+
+ INFO/HAYSTACK ~ "needle"
+
+- parentheses
+
+ ::
+
+ (, )
+
+- logical operators
+
+ ::
+
+ && (same as &), ||, |
+
+- INFO tags, FORMAT tags, column names
+
+ ::
+
+ INFO/DP or DP
+ FORMAT/DV, FMT/DV, or DV
+ FILTER, QUAL, ID, REF, ALT[0]
+
+- 1 (or 0) to test the presence (or absence) of a flag
+
+ ::
+
+ FlagA=1 && FlagB=0
+
+- "." to test missing values
+
+ ::
+
+ DP=".", DP!=".", ALT="."
+
+- missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression
+
+ ::
+
+ GT="."
+
+- TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)
+
+ ::
+
+ TYPE="indel" | TYPE="snp"
+
+- array subscripts, "*" for any field
+
+ ::
+
+ (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3
+ DP4[*] == 0
+ CSQ[*] ~ "missense_variant.*deleterious"
+
+- function on FORMAT tags (over samples) and INFO tags (over vector fields)
+
+ ::
+
+ MAX, MIN, AVG, SUM, STRLEN, ABS
+
+- variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes
+
+ ::
+
+ N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN
+
+**Notes:**
+
+- String comparisons and regular expressions are case-insensitive
+- If the subscript "*" is used in regular expression search, the whole field
+ is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be
+ true for the string vector INFO/STR=AB,CD.
+- Variables and function names are case-insensitive, but not tag names. For
+ example, "qual" can be used instead of "QUAL", "strlen()" instead of
+ "STRLEN()" , but not "dp" instead of "DP".
+
+**Examples:**
+
+ ::
+
+ MIN(DV)>5
+ MIN(DV/DP)>0.3
+ MIN(DP)>10 & MIN(DV)>3
+ FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample
+ FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples
+ QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples
+ QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites
+ TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)
+ MIN(DP)>35 && AVG(GQ)>50
+ ID=@file .. selects lines with ID present in the file
+ ID!=@~/file .. skip lines with ID present in the ~/file
+ MAF[0]<0.05 .. select rare variants at 5% cutoff
+
+]]>
+
+
+
+
+
diff -r 000000000000 -r 85dc172ae24f test-data/23andme.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/23andme.fa Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,14 @@
+>1
+CACGTNACGGCTGAAGTCCAAGGTAC
+CGTATCGAGTTCACAGTCGATAGCTC
+GATCGATAGCATCGCTAGCNNNACTA
+CGATCGATCGCTCTCCGTAACACTCA
+AAAACGATCGATCGACTGCTCTTTAG
+CGATGACTTTAGGGGAAAAA
+>2
+CGCTCAGCCGTACAGCCGAGCAGGAC
+ACGCTATTTTAGATCGACTGGCTNNG
+CGCTAGCTACGCTTTAGCACGAGAA
+>Y
+NNNGCATACGTGTCCATCACGATGAT
+AGCGATGATCGATC
diff -r 000000000000 -r 85dc172ae24f test-data/annotate.hdr
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.hdr Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,4 @@
+##INFO=
+##INFO=
+##INFO=
+##INFO=
diff -r 000000000000 -r 85dc172ae24f test-data/annotate.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.tab Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,18 @@
+3 3212016 CTT C,CT indel_3212016 . . . 1
+4 3258448 TACACACAC T indel_3258448 . . . 1
+4 4000000 T C id1 . . . 1
+4 4000001 T C,A id2 . . . 1
+2 3199812 G GTT,GT indel_3199812 . . . 1
+1 3000150 C CT indel_3000150 . . . 1
+1 3000150 C T snp_3000150 999 1,2 1e-10,2e-10 .
+1 3000151 C T snp_3000151 999 1 2e-10 .
+1 3062915 G T,C snp_3062915 999 1 2e-10 .
+1 3062915 GTTT G indel_3062915 . . . 1
+1 3106154 A C snp_3106154 999 1 2e-10 .
+1 3106154 C CT indel_3106154 . . . 1
+1 3106154 CAAA C indel_3106154 . . . 1
+1 3157410 GA G indel_3157410 . . . 1
+1 3162006 GAA G indel_3162006 . . . 1
+1 3177144 G . ref_3177144 999 1 2e-10 .
+1 3177144 G T snp_3177144 999 1 2e-10 0
+1 3184885 TAAAA TA,T indel_3184885 . . . 1
diff -r 000000000000 -r 85dc172ae24f test-data/annotate.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.1
+##INFO=
+##FORMAT=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##contig=
+##contig=
+##contig=
+##contig=
+##test=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 . C T 59.2 PASS AN=4;AC=2 GT:GQ 0/1:245 0/1:245
+1 3000151 . C T 59.2 PASS AN=4;AC=2 GT:DP:GQ 0/1:32:245 0/1:32:245
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2;INDEL;STR=test GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=3;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 2:0,1:409:35:-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . C CT 59.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3157410 . GA G 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . GAA G 60.2 PASS AN=4;AC=2 GT:GQ:DP 0/1:212:22 0/1:212:22
+1 3177144 . G T 45 PASS AN=4;AC=2 GT:GQ:DP 0/0:150:30 1/1:150:30
+1 3177144 . G . 45 PASS AN=4;AC=0 GT:GQ:DP 0/0:150:30 0/0:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000000 . T A,C 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 4000001 . T A 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
diff -r 000000000000 -r 85dc172ae24f test-data/annotate2.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.tab Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,2 @@
+2 3000000 3199812 region_3000000_3199812
+1 3000150 3106154 region_3000150_3106154
diff -r 000000000000 -r 85dc172ae24f test-data/annotate2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate2.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C
+1 3000001 xx C T 11 PASS FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:xxx 0/0:11:1.1:x 0/0:11:1.1:x
+1 3000002 . C T . . . GT . . .
+1 3000003 xx C T 11 q11 FLAG;IINT=.;IFLT=.;ISTR=. GT:FINT:FFLT:FSTR 0/0:.:.:. 0/0:.:.:. 0/0:.:.:.
+1 3000004 xx C T 11 q11 FLAG;IINT=11;IFLT=1.1;ISTR=xxx GT:FINT:FFLT:FSTR 0/0:11:1.1:x 0/0:11:1.1:xxx 0/0:11:1.1:xxx
diff -r 000000000000 -r 85dc172ae24f test-data/annotate3.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate3.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,21 @@
+##fileformat=VCFv4.1
+##FILTER=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FILTER=
+##FILTER=
+##FILTER=
+##FILTER=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000000 id C . 20 . AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000001 id C . 20 PASS AA=1;BB=2;X=3;Y=4 GT:X:PL:Y:AA 0/1:1:2:3:1 0/1:1:2:3:1
+1 3000002 id C . 20 fltY;fltA;fltB;fltX BB=2;X=3;Y=4;AA=1 GT:Y:X:PL:AA 0/1:3:1:2:1 0/1:3:1:2:1
diff -r 000000000000 -r 85dc172ae24f test-data/annotate4.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate4.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,13 @@
+##fileformat=VCFv4.2
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO
+1 1 . C T . . .
+1 2 . C T,G . . FA=.,9.9;FR=.,9.9,.;IA=.,99;IR=.,99,.;SA=.,99;SR=.,99,.
+1 3 . C A,T . . .
diff -r 000000000000 -r 85dc172ae24f test-data/annots.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,37 @@
+##fileformat=VCFv4.1
+##INFO=
+##FORMAT=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##contig=
+##contig=
+##contig=
+##contig=
+##test=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3000150 id1 C T 99 PASS STR=id1;AN=4;AC=0 GT:GQ 0|0:999 0|0:999
+1 3000151 id2 C T 99 PASS STR=id2;AN=4;AC=0 GT:DP:GQ 0|0:99:999 0|0:99:999
+1 3062915 idIndel GTTT G 99 PASS DP4=1,2,3,4;AN=4;AC=0;INDEL;STR=testIndel GT:GQ:DP:GL 0|0:999:99:-99,-9,-99 0|0:999:99:-99,-9,-99
+1 3062915 idSNP G T,C 99 PASS STR=testSNP;TEST=5;DP4=1,2,3,4;AN=3;AC=0,0 GT:TT:GQ:DP:GL 0|0:9,9:999:99:-99,-9,-99,-99,-9,-99 0:9,9:999:99:-99,-9,-99
+1 3106154 id4 CAAA C 99 PASS STR=id4;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3106154 id5 C CT 99 PASS STR=id5;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3157410 id6 GA GC,G 99 PASS STR=id6;AN=4;AC=0 GT:GQ:DP 0|0:99:99 0|0:99:99
+1 3162006 id7 GAA GG 99 PASS STR=id7;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id8 G T 99 PASS STR=id8;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3177144 id9 G . 99 PASS STR=id9;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
+1 3184885 id10 TAAAA TA,T 99 PASS STR=id10;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+2 3199812 id11 G GTT,GT 99 PASS STR=id11;AN=4;AC=0,0 GT:GQ:DP 0|0:999:99 0|0:999:99
+3 3212016 id12 CTT C,CT 99 PASS STR=id12;AN=4;AC=0,0 GT:GQ:DP 0|0:99:99 0|0:99:99
+4 3258448 id13 TACACACAC T 99 PASS STR=id13;AN=4;AC=0 GT:GQ:DP 0|0:999:99 0|0:999:99
diff -r 000000000000 -r 85dc172ae24f test-data/annots2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots2.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.1
+##FILTER=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT B A
+1 3000001 . C T . . . GT . .
+1 3000002 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000003 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
+1 3000004 id C T 99 q99 FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999 GT:FINT:FFLT:FSTR 1|1:88,99:8.8,9.9:888,999 0|1:77:7.7:77
diff -r 000000000000 -r 85dc172ae24f test-data/annots4.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots4.tab Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,3 @@
+1 1 C A,T,G 0,1.1,0 1.1,0,2.2,0 0,1,0 1,0,2,0 X,11,XXX 1,XX,222,XXX
+1 2 C T,G 1.1,2.2 1.1,2.2,3.3 1,2 1,2,3 11,2 111,22,3
+1 3 C T 1.1 1.1,2.2 1 1,2 11 11,2
diff -r 000000000000 -r 85dc172ae24f test-data/annots4.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annots4.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,13 @@
+##fileformat=VCFv4.2
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO
+1 1 . C A,T,G . . FA=0,1.1,0;FR=1.1,0,2.2,0;IA=0,1,0;IR=1,0,2,0;SA=X,11,XXX;SR=1,XX,222,XXX
+1 2 . C T,G . . FA=1.1,2.2;FR=1.1,2.2,3.3;IA=1,2;IR=1,2,3;SA=11,2;SR=111,22,3
+1 3 . C T . . FA=1.1;FR=1.1,2.2;IA=1;IR=1,2;SA=11;SR=11,2
diff -r 000000000000 -r 85dc172ae24f test-data/check.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/check.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,38 @@
+##fileformat=VCFv4.1
+##INFO=
+##FORMAT=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##FILTER=
+##contig=
+##contig=
+##contig=
+##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
+##readme=AAAAAA
+##readme=BBBBBB
+##INFO=
+##INFO=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+1 3062915 id3D GTTT G 12.9 q10 DP4=1,2,3,4;AN=4;AC=2 GT:GQ:DP:GL 0/1:409:35:-20,-5,-20 0/1:409:35:-20,-5,-20
+1 3062915 idSNP G T,C 12.6 test TEST=5;DP4=1,2,3,4;AN=4;AC=1,1 GT:TT:GQ:DP:GL 0/1:0,1:409:35:-20,-5,-20,-20,-5,-20 0/2:0,1:409:35:-20,-5,-20,-20,-5,-20
+1 3106154 . CAAA C 342 PASS AN=4;AC=2 GT:GQ:DP 0/1:245:32 0/1:245:32
+1 3106154 . G A 59.2 PASS AN=4;AC=1 GT:GQ:DP 0/1:245:32 0/0:245:32
+1 3157410 . G A 90.6 q10 AN=4;AC=4 GT:GQ:DP 1/1:21:21 1/1:21:21
+1 3162006 . G A 60.2 PASS AN=4;AC=3 GT:GQ:DP 1/1:212:22 0/1:212:22
+1 3177144 . GT G 45 PASS AN=4;AC=2 GT:GQ:DP 0/1:150:30 0/1:150:30
+1 3184885 . TAAAA TA,T 61.5 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:12:10 1/2:12:10
+2 3199812 . G GTT,GT 82.7 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:322:26 1/2:322:26
+3 3212016 . CTT C,CT 79 PASS AN=4;AC=2,2 GT:GQ:DP 1/2:91:26 1/2:91:26
+4 3258448 . TACACACAC T 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258449 . GCAAA GA,G 59.9 PASS DP=62;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258450 . AAAAGAAAAAG A,AAAAAAG 59.9 PASS DP=60;AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258451 . AAA AGT 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258452 . AAA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . AACA AGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258453 . ACA AAGA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
+4 3258454 . AACA AACA 59.9 PASS AN=4;AC=2 GT:GQ:DP 0/1:325:31 0/1:325:31
diff -r 000000000000 -r 85dc172ae24f test-data/concat.1.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.a.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.0
+##FILTER=
+##FILTER=
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##contig=
+##contig=
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 XX=11;DP=35 GT:GQ:DP 0/1:409:35
+1 110 . C T,G 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 110 . CAAA C 1792 Fail DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 Fail DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 Fail DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
diff -r 000000000000 -r 85dc172ae24f test-data/concat.1.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.1.b.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,19 @@
+##fileformat=VCFv4.0
+##samtoolsVersion=0.2.0-rc10+htslib-0.2.0-rc10
+##samtoolsCommand=samtools mpileup -t INFO/DPR -C50 -pm3 -F0.2 -d10000 -ug -r 1:1-1000000 -b mpileup.2014-07-03//lists/chr1-pooled.list -f human_g1k_v37.fasta
+##ALT=
+##bcftools_callVersion=0.2.0-rc10-2-gcd94fde+htslib-0.2.0-rc10
+##bcftools_callCommand=call -vm -f GQ -S mpileup.2014-07-03//pooled/1/1:1-1000000.samples -
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+3 142 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+3 152 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+3 162 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+3 172 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+3 182 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+3 192 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
diff -r 000000000000 -r 85dc172ae24f test-data/concat.2.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.a.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.0
+##INFO=
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+2 140 . A G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
+1 110 . C T,G 1792 Fail XX=11;DP=32 GT:GQ:DP 0/1:245:32
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
diff -r 000000000000 -r 85dc172ae24f test-data/concat.2.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.2.b.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.0
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A
+1 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+1 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+1 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+1 160 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0/1:409:35
+2 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0/1:245:32
+2 120 . GA G 628 q10 DP=21 GT:GQ:DP 1/1:21:21
+2 130 . GAA G 1016 PASS DP=22 GT:GQ:DP 0/1:212:22
+2 140 . GT G 727 PASS DP=30 GT:GQ:DP 0/1:150:30
+2 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 1/2:12:10
+2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
diff -r 000000000000 -r 85dc172ae24f test-data/concat.3.0.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.0.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.0
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
diff -r 000000000000 -r 85dc172ae24f test-data/concat.3.a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.a.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.0
+##INFO=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FILTER=
+##contig=
+##contig=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
+9 202 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0|1:409:35 0|1
+9 212 . C T,G 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+9 212 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+9 222 . GA G 628 q10 DP=21 GT:GQ:DP 0|1:21:21 0|1
+9 232 . G T 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+9 232 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+9 242 . GT G 727 PASS DP=30 GT:GQ:DP 0|1:150:30 0|1
+9 252 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+9 262 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+1 100 . GTTT G 1806 q10 DP=35 GT:GQ:DP 0|1:409:35 0|1
+1 110 . C T,G 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+1 110 . CAAA C 1792 PASS DP=32 GT:GQ:DP 0|1:245:32 0|1
+1 120 . GA G 628 q10 DP=21 GT:GQ:DP 0|1:21:21 0|1
+1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0|1:212:22 0|1
+1 140 . GT G 727 PASS DP=30 GT:GQ:DP 0|1:150:30 0|1
+1 150 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
+1 160 . TAAAA TA,T 246 PASS DP=10 GT:GQ:DP 0|1:12:10 0|1
diff -r 000000000000 -r 85dc172ae24f test-data/concat.3.b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat.3.b.vcf Wed Jul 06 07:03:34 2016 -0400
@@ -0,0 +1,223 @@
+##fileformat=VCFv4.0
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=