changeset 1:0bc0009f9ea0 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
author iuc
date Sun, 27 Sep 2020 10:08:14 +0000
parents 5e258fba246c
children a52b819aa990
files macros.xml test-data/NC_002945v4.fasta test-data/bam_input.bam test-data/fasta_indexes.loc test-data/output_metrics.tabular test-data/output_vcf.vcf test-data/vcf_input.vcf tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test vsnp_add_zero_coverage.py vsnp_build_tables.xml
diffstat 12 files changed, 533 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,30 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <token name="@PROFILE@">19.09</token>
+    <xml name="param_input_type">
+        <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
+            <option value="single" selected="true">Single files</option>
+            <option value="collection">Collections of files</option>
+        </param>
+    </xml>
+    <xml name="param_reference_source">
+        <param name="reference_source" type="select" label="Choose the source for the reference genome">
+            <option value="cached" selected="true">locally cached</option>
+            <option value="history">from history</option>
+        </param>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @misc{None,
+                journal = {None},
+                author = {1. Stuber T},
+                title = {Manuscript in preparation},
+                year = {None},
+                url = {https://github.com/USDA-VS/vSNP},}
+            </citation>
+        </citations>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NC_002945v4.fasta	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,101 @@
+>NC_002945.4 Mycobacterium bovis AF2122/97 genome assembly, chromosome: Mycobacterium_bovis_AF2122/97
+TTGACCGATGACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTTAACGGCGACC
+CTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTGACCCCTCAGCAAAGGGCTTG
+GCTCAATCTCGTCCAGCCATTGACCATCGTCGAGGGGTTTGCTCTGTTATCCGTGCCGAGCAGCTTTGTC
+CAAAACGAAATCGAGCGCCATCTGCGGGCCCCGATTACCGACGCTCTCAGCCGCCGACTCGGACATCAGA
+TCCAACTCGGGGTCCGCATCGCTCCGCCGGCGACCGACGAAGCCGACGACACTACCGTGCCGCCTTCCGA
+AAATCCTGCTACCACATCGCCAGACACCACAACCGACAACGACGAGATTGATGACAGCGCTGCGGCACGG
+GGCGATAACCAGCACAGTTGGCCAAGTTACTTCACCGAGCGCCCGCGCAATACCGATTCCGCTACCGCTG
+GCGTAACCAGCCTTAACCGTCGCTACACCTTTGATACGTTCGTTATCGGCGCCTCCAACCGGTTCGCGCA
+CGCCGCCGCCTTGGCGATCGCAGAAGCACCCGCCCGCGCTTACAACCCCCTGTTCATCTGGGGCGAGTCC
+GGTCTCGGCAAGACACACCTGCTACACGCGGCAGGCAACTATGCCCAACGGTTGTTCCCGGGAATGCGGG
+TCAAATATGTCTCCACCGAGGAATTCACCAACGACTTCATTAACTCGCTCCGCGATGACCGCAAGGTCGC
+ATTCAAACGCAGCTACCGCGACGTAGACGTGCTGTTGGTCGACGACATCCAATTCATTGAAGGCAAAGAG
+GGTATTCAAGAGGAGTTCTTCCACACCTTCAACACCTTGCACAATGCCAACAAGCAAATCGTCATCTCAT
+CTGACCGCCCACCCAAGCAGCTCGCCACCCTCGAGGACCGGCTGAGAACCCGCTTTGAGTGGGGGCTGAT
+CACTGACGTACAACCACCCGAGCTGGAGACCCGCATCGCCATCTTGCGCAAGAAAGCACAGATGGAACGG
+CTCGCGATCCCCGACGATGTCCTCGAACTCATCGCCAGCAGTATCGAACGCAATATCCGTGAACTCGAGG
+GCGCGCTGATCCGGGTCACCGCGTTCGCCTCATTGAACAAAACACCAATCGACAAAGCGCTGGCCGAGAT
+TGTGCTTCGCGATCTGATCGCCGACGCCAACACCATGCAAATCAGCGCGGCGACGATCATGGCTGCCACC
+GCCGAATACTTCGACACTACCGTCGAAGAGCTTCGCGGGCCCGGCAAGACCCGAGCACTGGCCCAGTCAC
+GACAGATTGCGATGTACCTGTGTCGTGAGCTCACCGATCTTTCGTTGCCCAAAATCGGCCAAGCGTTCGG
+CCGTGATCACACAACCGTCATGTACGCCCAACGCAAGATCCTGTCCGAGATGGCCGAGCGCCGTGAGGTC
+TTTGATCACGTCAAAGAACTCACCACTCGCATCCGTCAGCGCTCCAAGCGCTAGCACGGCGTGTTCTTCC
+GACAACGTTCTTAAAAAAACTTCTCTCTCCCAGGTCACACCAGTCACAGAGATTGGCTGTGAGTGTCGCT
+GTGCACAAACCGCGCACAGACTCATACAGTCCCGGCGGTTCCGTTCACAACCCACGCCTCATCCCCACCG
+ACCCAACACACACCCCACAGTCATCGCCACCGTCATCCACAACTCCGACCGACGTCGACCTGCACCAAGA
+CCAGACTGTCCCCAAACTGCACACCCTCTAATACTGTTACCGAGATTTCTTCGTCGTTTGTTCTTGGAAA
+GACAGCGCTGGGGATCGTTCGCTGGATACCACCCGCATAACTGGCTCGTCGCGGTGGGTCAGAGGTCAAT
+GATGAACTTTCAAGTTGACGTGAGAAGCTCTACGGTTGTTGTTCGACTGCTGTTGCGGCCGTCGTGGCGG
+GTCACGCGTCATGGGCGTTCGTCGTTGGCAGTCCCCACGCTAGCGGGGCGCTAGCCACGGGATCGAACTC
+ATCGTGAGGTGAAAGGGCGCAATGGACGCGGCTACGACAAGAGTTGGCCTCACCGACTTGACGTTTCGTT
+TGCTACGAGAGTCTTTCGCCGATGCGGTGTCGTGGGTGGCTAAAAATCTGCCAGCCAGGCCCGCGGTGCC
+GGTGCTCTCCGGCGTGTTGTTGACCGGCTCGGACAACGGTCTGACGATTTCCGGATTCGACTACGAGGTT
+TCCGCCGAGGCCCAGGTTGGCGCTGAAATTGTTTCTCCTGGAAGCGTTTTAGTTTCTGGCCGATTGTTGT
+CCGATATTACCCGGGCGTTGCCTAACAAGCCCGTAGGCGTTCATGTCGAAGGTAACCGGGTCGCATTGAC
+CTGCGGTAACGCCAGGTTTTCGCTACCGACGATGCCAGTCGAGGATTATCCGACGCTGCCGACGCTGCCG
+GAAGAGACCGGATTGTTGCCTGCGGAATTATTCGCCGAGGCAATCAGTCAGGTCGCTATCGCCGCCGGCC
+GGGACGACACGCTGCCTATGTTGACCGGCATCCGGGTCGAAATCCTCGGTGAGACGGTGGTTTTGGCCGC
+TACCGACAGGTTTCGCCTGGCTGTTCGAGAACTGAAGTGGTCGGCGTCGTCGCCAGATATCGAAGCGGCT
+GTGCTGGTCCCGGCCAAGACGCTGGCCGAGGCCGCCAAAGCGGGCATCGGCGGCTCTGACGTTCGTTTGT
+CGTTGGGTACTGGGCCGGGGGTGGGCAAGGATGGCCTGCTCGGTATCAGTGGGAACGGCAAGCGCAGCAC
+CACGCGACTTCTTGATGCCGAGTTCCCGAAGTTTCGGCAGTTGCTACCAACCGAACACACCGCGGTGGCC
+ACCATGGACGTGGCCGAGTTGATCGAAGCGATCAAGCTGGTTGCGTTGGTAGCTGATCGGGGCGCGCAGG
+TGCGCATGGAGTTCGCTGATGGCAGCGTGCGGCTTTCTGCGGGTGCCGATGATGTTGGACGAGCCGAGGA
+AGATCTTGTTGTTGACTATGCCGGTGAACCATTGACGATTGCGTTTAACCCAACCTATCTAACGGACGGT
+TTGAGTTCGTTGCGCTCGGAGCGAGTGTCTTTCGGGTTTACGACTGCGGGTAAGCCTGCCTTGCTACGTC
+CGGTGTCCGGGGACGATCGCCCTGTGGCGGGTCTGAATGGCAACGGTCCGTTCCCGGCGGTGTCGACGGA
+CTATGTCTATCTGTTGATGCCGGTTCGGTTGCCGGGCTGAGCACTTGGCGCCCGGGTAGGTGTACGTCCG
+TCATTTGGGGCTGCGTGACTTCCGGTCCTGGGCATGTGTAGATCTGGAATTGCATCCAGGGCGGACGGTT
+TTTGTTGGGCCTAACGGTTATGGTAAGACGAATCTTATTGAGGCACTGTGGTATTCGACGACGTTAGGTT
+CGCACCGCGTTAGCGCCGATTTGCCGTTGATCCGGGTAGGTACCGATCGTGCGGTGATCTCCACGATCGT
+GGTGAACGACGGTAGAGAATGTGCCGTCGACCTCGAGATCGCCACGGGGCGAGTCAACAAAGCGCGATTG
+AATCGATCATCGGTCCGAAGTACACGTGATGTGGTCGGAGTGCTTCGAGCTGTGTTGTTTGCCCCTGAGG
+ATCTGGGGTTGGTTCGTGGGGATCCCGCTGACCGGCGGCGCTATCTGGATGATCTGGCGATCGTGCGTAG
+GCCTGCGATCGCTGCGGTACGAGCCGAATATGAGAGGGTGGTGCGCCAGCGGACGGCGTTATTGAAGTCC
+GTACCTGGAGCACGGTATCGGGGTGACCGGGGTGTGTTTGACACTCTTGAGGTATGGGACAGTCGTTTGG
+CGGAGCACGGGGCTGAACTGGTGGCCGCCCGCATCGATTTGGTCAACCAGTTGGCACCGGAAGTGAAGAA
+GGCATACCAGCTGTTGGCGCCGGAATCGCGATCGGCGTCTATCGGTTATCGGGCCAGCATGGATGTAACC
+GGTCCCAGCGAGCAGTCAGATACCGATCGGCAATTGTTAGCAGCTCGGCTGTTGGCGGCGCTGGCGGCCC
+GTCGGGATGCCGAACTCGAGCGTGGGGTTTGTCTAGTTGGTCCGCACCGTGACGACCTAATACTGCGACT
+AGGCGATCAACCCGCGAAAGGATTTGCTAGCCATGGGGAGGCGTGGTCGTTGGCGGTGGCACTGCGGTTG
+GCGGCCTATCAACTGTTACGCGTTGATGGTGGTGAGCCGGTGTTGTTGCTCGACGACGTGTTCGCCGAAC
+TGGATGTCATGCGCCGTCGAGCGTTGGCGACGGCGGCCGAGTCCGCCGAACAGGTGTTGGTGACTGCCGC
+GGTGCTCGAGGATATTCCCGCCGGCTGGGACGCCAGGCGGGTGCACATCGATGTGCGTGCCGATGACACC
+GGATCGATGTCGGTGGTTCTGCCATGACGGGTTCTGTTGACCGGCCCGACCAGAATCGCGGTGAGCGATT
+AATGAAGTCACCAGGGTTGGATTTGGTCAGGCGCACCCTGGACGAAGCTCGTGCTGCTGCCCGCGCGCGC
+GGACAAGACGCCGGTCGAGGGCGGGTCGCTTCCGTTGCGTCGGGTCGGGTGGCCGGACGGCGACGAAGCT
+GGTCGGGTCCGGGGCCCGACATTCGTGATCCACAACCGCTGGGTAAGGCCGCTCGTGAGCTGGCAAAGAA
+ACGCGGCTGGTCGGTGCGGGTCGCCGAGGGTATGGTGCTCGGCCAGTGGTCTGCGGTGGTCGGCCACCAG
+ATCGCCGAACATGCACGCCCGACTGCGCTAAACGACGGGGTGTTGAGCGTGATTGCGGAGTCGACGGCGT
+GGGCGACGCAGTTGAGGATCATGCAGGCCCAGCTTCTGGCCAAGATCGCCGCAGCGGTTGGCAACGATGT
+GGTGCGATCGCTAAAGATCACCGGGCCGGCGGCACCATCGTGGCGCAAGGGGCCTCGCCATATTGCCGGT
+AGGGGTCCGCGCGACACCTACGGATAACACGTCGATCGGCCCAGAACAAGGCGCTCCGGTCCCGGCCTGA
+GAGCCTCGAGGACGAAGCGGATCCGTATGCCGGACGTCGGGACGCACCAGGAAGAAAGATGTCCGACGCA
+CGGCGCGGTTAGATGGGTAAAAACGAGGCCAGAAGATCGGCCCTGGCGCCCGATCACGGTACAGTGGTGT
+GCGACCCCCTGCGGCGACTCAACCGCATGCACGCAACCCCTGAGGAGAGTATTCGGATCGTGGCTGCCCA
+GAAAAAGAAGGCCCAAGACGAATACGGCGCTGCGTCTATCACCATTCTCGAAGGGCTGGAGGCCGTCCGC
+AAACGTCCCGGCATGTACATTGGCTCGACCGGTGAGCGCGGTTTACACCATCTCATTTGGGAGGTGGTCG
+ACAACGCGGTCGACGAGGCGATGGCCGGTTATGCAACCACAGTGAACGTAGTGCTGCTTGAGGATGGCGG
+TGTCGAGGTCGCCGACGACGGCCGCGGCATTCCGGTCGCCACCCACGCCTCCGGCATACCGACCGTCGAC
+GTGGTGATGACACAACTACATGCCGGCGGCAAGTTCGACTCGGACGCGTATGCGATATCTGGTGGTCTGC
+ACGGCGTCGGCGTGTCGGTGGTTAACGCGCTATCCACCCGGCTCGAAGTCGAGATCAAGCGCGACGGGTA
+CGAGTGGTCTCAGGTTTATGAGAAGTCGGAACCCCTGGGCCTCAAGCAAGGGGCGCCGACCAAGAAGACG
+GGGTCAACGGTACGGTTCTGGGCCGACCCCGCTGTTTTCGAAACCACGGAATACGACTTCGAAACCGTCG
+CCCGCCGGCTGCAAGAGATGGCGTTCCTCAACAAGGGGCTGACCATCAACCTGACCGACGAGAGGGTGAC
+CCAAGACGAGGTCGTCGACGAAGTGGTCAGCGACGTCGCCGAGGCGCCGAAGTCGGCAAGTGAACGCGCA
+GCCGAATCCACTGCACCGCACAAAGTTAAGAGCCGCACCTTTCACTATCCGGGTGGCCTGGTGGACTTCG
+TGAAACACATCAACCGCACCAAGAACGCGATTCATAGCAGCATCGTGGACTTTTCCGGCAAGGGCACCGG
+GCACGAGGTGGAGATCGCGATGCAATGGAACGCCGGGTATTCGGAGTCGGTGCACACCTTCGCCAACACC
+ATCAACACCCACGAGGGCGGCACCCACGAAGAGGGCTTCCGCAGCGCGCTGACGTCGGTGGTGAACAAGT
+ACGCCAAGGACCGCAAGCTACTGAAGGACAAGGACCCCAACCTCACCGGTGACGATATCCGGGAAGGCCT
+GGCCGCTGTGATCTCGGTGAAGGTCAGCGAACCGCAGTTCGAGGGCCAGACCAAGACCAAGTTGGGCAAC
+ACCGAGGTCAAATCGTTTGTGCAGAAGGTCTGTAATGAACAGCTGACCCACTGGTTTGAAGCCAACCCCA
+CCGACTCGAAAGTCGTTGTGAACAAGGCTGTGTCCTCGGCGCAAGCCCGTATCGCGGCACGTAAGGCACG
+AGAGTTGGTGCGGCGTAAGAGCGCCACCGACATCGGTGGATTGCCCGGCAAGCTGGCCGATTGCCGTTCC
+ACGGATCCGCGCAAGTCCGAACTGTATGTCGTAGAAGGTGACTCGGCCGGCGGTTCTGCAAAAAGCGGTC
+GCGATTCGATGTTCCAGGCGATACTTCCGCTGCGCGGCAAGATCATCAATGTGGAGAAAGCGCGCATCGA
+CCGGGTGCTAAAGAACACCGAAGTTCAGGCGATCATCACGGCGCTGGGCACCGGGATCCACGACGAGTTC
+GATATCGGCAAGCTGCGCTACCACAAGATCGTGCTGATGGCCGACGCCGATGTTGACGGCCAACATATTT
+CCACGCTGTTGTTGACGTTGTTGTTCCGGTTCATGCGGCCGCTCATCGAGAACGGGCATGTGTTTTTGGC
+ACAACCGCCGCTGTACAAACTCAAGTGGCAGCGCAGTGACCCGGAATTCGCATACTCCGACCGCGAGCGC
Binary file test-data/bam_input.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,1 @@
+89	89	Mycobacterium_AF2122	${__HERE__}/NC_002945v4.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_metrics.tabular	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,2 @@
+# File	Number of Good SNPs	Average Coverage	Genome Coverage
+	0		
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_vcf.vcf	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,100 @@
+##fileformat=VCFv4.2
+##fileDate=20200302
+##source=freeBayes v1.3.1-dirty
+##reference=/home/galaxy/galaxy/tool-data/AF2122/seq/AF2122.fa
+##contig=<ID=NC_002945.4,length=4349904>
+##phasing=none
+##commandline="freebayes --region NC_002945.4:0..4349904 --bam b_0.bam --fasta-reference /home/galaxy/galaxy/tool-data/AF2122/seq/AF2122.fa --vcf ./vcf_output/part_NC_002945.4:0..4349904.vcf -u -n 0 --haplotype-length -1 --min-repeat-size 5 --min-repeat-entropy 1 -m 1 -q 0 -R 0 -Y 0 -e 1 -F 0.05 -C 2 -G 1 --min-alternate-qsum 0"
+##filter="QUAL > 0"
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
+##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype.">
+##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype.">
+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">
+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">
+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">
+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">
+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">
+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">
+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">
+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">
+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">
+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">
+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">
+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele">
+##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele">
+##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio.  Ratio between depth in samples with each called alternate allele and those without.">
+##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">
+##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout.">
+##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing.  Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">
+##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">
+##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">
+##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
+##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">
+##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">
+##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">
+##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">
+##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">
+##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record.">
+##INFO=<ID=technology.ILLUMINA,Number=A,Type=Float,Description="Fraction of observations supporting the alternate observed in reads from ILLUMINA">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	13-1941-6
+NC_002945.4	1	.	N	.	.	.	.	GT	./.
+NC_002945.4	2	.	N	.	.	.	.	GT	./.
+NC_002945.4	3	.	N	.	.	.	.	GT	./.
+NC_002945.4	4	.	N	.	.	.	.	GT	./.
+NC_002945.4	5	.	N	.	.	.	.	GT	./.
+NC_002945.4	6	.	N	.	.	.	.	GT	./.
+NC_002945.4	7	.	N	.	.	.	.	GT	./.
+NC_002945.4	8	.	N	.	.	.	.	GT	./.
+NC_002945.4	9	.	N	.	.	.	.	GT	./.
+NC_002945.4	10	.	N	.	.	.	.	GT	./.
+NC_002945.4	11	.	N	.	.	.	.	GT	./.
+NC_002945.4	12	.	N	.	.	.	.	GT	./.
+NC_002945.4	13	.	N	.	.	.	.	GT	./.
+NC_002945.4	14	.	N	.	.	.	.	GT	./.
+NC_002945.4	15	.	N	.	.	.	.	GT	./.
+NC_002945.4	16	.	N	.	.	.	.	GT	./.
+NC_002945.4	17	.	N	.	.	.	.	GT	./.
+NC_002945.4	18	.	N	.	.	.	.	GT	./.
+NC_002945.4	19	.	N	.	.	.	.	GT	./.
+NC_002945.4	20	.	N	.	.	.	.	GT	./.
+NC_002945.4	21	.	N	.	.	.	.	GT	./.
+NC_002945.4	22	.	N	.	.	.	.	GT	./.
+NC_002945.4	23	.	N	.	.	.	.	GT	./.
+NC_002945.4	24	.	N	.	.	.	.	GT	./.
+NC_002945.4	25	.	N	.	.	.	.	GT	./.
+NC_002945.4	26	.	N	.	.	.	.	GT	./.
+NC_002945.4	27	.	N	.	.	.	.	GT	./.
+NC_002945.4	28	.	N	.	.	.	.	GT	./.
+NC_002945.4	29	.	N	.	.	.	.	GT	./.
+NC_002945.4	30	.	N	.	.	.	.	GT	./.
+NC_002945.4	31	.	N	.	.	.	.	GT	./.
+NC_002945.4	32	.	N	.	.	.	.	GT	./.
+NC_002945.4	33	.	N	.	.	.	.	GT	./.
+NC_002945.4	34	.	N	.	.	.	.	GT	./.
+NC_002945.4	35	.	N	.	.	.	.	GT	./.
+NC_002945.4	36	.	N	.	.	.	.	GT	./.
+NC_002945.4	37	.	N	.	.	.	.	GT	./.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcf_input.vcf	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,64 @@
+##fileformat=VCFv4.2
+##fileDate=20200302
+##source=freeBayes v1.3.1-dirty
+##reference=/home/galaxy/galaxy/tool-data/AF2122/seq/AF2122.fa
+##contig=<ID=NC_002945.4,length=4349904>
+##phasing=none
+##commandline="freebayes --region NC_002945.4:0..4349904 --bam b_0.bam --fasta-reference /home/galaxy/galaxy/tool-data/AF2122/seq/AF2122.fa --vcf ./vcf_output/part_NC_002945.4:0..4349904.vcf -u -n 0 --haplotype-length -1 --min-repeat-size 5 --min-repeat-entropy 1 -m 1 -q 0 -R 0 -Y 0 -e 1 -F 0.05 -C 2 -G 1 --min-alternate-qsum 0"
+##filter="QUAL > 0"
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
+##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
+##INFO=<ID=RO,Number=1,Type=Integer,Description="Count of full observations of the reference haplotype.">
+##INFO=<ID=AO,Number=A,Type=Integer,Description="Count of full observations of this alternate haplotype.">
+##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">
+##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">
+##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">
+##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">
+##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">
+##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">
+##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">
+##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">
+##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">
+##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">
+##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">
+##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
+##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele">
+##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele">
+##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
+##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio.  Ratio between depth in samples with each called alternate allele and those without.">
+##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">
+##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout.">
+##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
+##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing.  Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">
+##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">
+##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">
+##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
+##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">
+##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">
+##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">
+##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">
+##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">
+##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record.">
+##INFO=<ID=technology.ILLUMINA,Number=A,Type=Float,Description="Fraction of observations supporting the alternate observed in reads from ILLUMINA">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	13-1941-6
+NC_002945.4	2898437	.	T	G	0.263449	.	AB=0;ABP=0;AC=0;AF=0;AN=2;AO=2;CIGAR=1X;DP=2;DPB=2;DPRA=0;EPP=3.0103;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=2.77259;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=0;QR=0;RO=0;RPL=2;RPP=7.35324;RPPR=0;RPR=0;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.ILLUMINA=1	GT:DP:AD:RO:QR:AO:QA:GL	0/0:2:0,2:0:0:2:0:0,-0.60206,-8.68589e-09
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool_data_table_conf.xml.sample	Fri May 08 12:59:27 2020 -0400
+++ b/tool_data_table_conf.xml.sample	Sun Sep 27 10:08:14 2020 +0000
@@ -1,4 +1,10 @@
 <tables>
+    <!-- Location of SAMTools indexes for FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+    <!-- Location of genbank files for vsnp_build_tables tool -->
     <table name="vsnp_genbank" comment_char="#">
         <columns>value, name, path, description</columns>
         <file path="tool-data/vsnp_genbank.loc" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vsnp_add_zero_coverage.py	Sun Sep 27 10:08:14 2020 +0000
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+
+import argparse
+import multiprocessing
+import os
+import pandas
+import pysam
+import queue
+import re
+import shutil
+from Bio import SeqIO
+
+INPUT_BAM_DIR = 'input_bam_dir'
+INPUT_VCF_DIR = 'input_vcf_dir'
+OUTPUT_VCF_DIR = 'output_vcf_dir'
+OUTPUT_METRICS_DIR = 'output_metrics_dir'
+
+
+def get_base_file_name(file_path):
+    base_file_name = os.path.basename(file_path)
+    if base_file_name.find(".") > 0:
+        # Eliminate the extension.
+        return os.path.splitext(base_file_name)[0]
+    elif base_file_name.endswith("_vcf"):
+        # The "." character has likely
+        # changed to an "_" character.
+        return base_file_name.rstrip("_vcf")
+    return base_file_name
+
+
+def get_coverage_and_snp_count(task_queue, reference, output_metrics, output_vcf, timeout):
+    while True:
+        try:
+            tup = task_queue.get(block=True, timeout=timeout)
+        except queue.Empty:
+            break
+        bam_file, vcf_file = tup
+        # Create a coverage dictionary.
+        coverage_dict = {}
+        coverage_list = pysam.depth(bam_file, split_lines=True)
+        for line in coverage_list:
+            chrom, position, depth = line.split('\t')
+            coverage_dict["%s-%s" % (chrom, position)] = depth
+        # Convert it to a data frame.
+        coverage_df = pandas.DataFrame.from_dict(coverage_dict, orient='index', columns=["depth"])
+        # Create a zero coverage dictionary.
+        zero_dict = {}
+        for record in SeqIO.parse(reference, "fasta"):
+            chrom = record.id
+            total_len = len(record.seq)
+            for pos in list(range(1, total_len + 1)):
+                zero_dict["%s-%s" % (str(chrom), str(pos))] = 0
+        # Convert it to a data frame with depth_x
+        # and depth_y columns - index is NaN.
+        zero_df = pandas.DataFrame.from_dict(zero_dict, orient='index', columns=["depth"])
+        coverage_df = zero_df.merge(coverage_df, left_index=True, right_index=True, how='outer')
+        # depth_x "0" column no longer needed.
+        coverage_df = coverage_df.drop(columns=['depth_x'])
+        coverage_df = coverage_df.rename(columns={'depth_y': 'depth'})
+        # Covert the NaN to 0 coverage and get some metrics.
+        coverage_df = coverage_df.fillna(0)
+        coverage_df['depth'] = coverage_df['depth'].apply(int)
+        total_length = len(coverage_df)
+        average_coverage = coverage_df['depth'].mean()
+        zero_df = coverage_df[coverage_df['depth'] == 0]
+        total_zero_coverage = len(zero_df)
+        total_coverage = total_length - total_zero_coverage
+        genome_coverage = "{:.2%}".format(total_coverage / total_length)
+        # Process the associated VCF input.
+        column_names = ["CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "Sample"]
+        vcf_df = pandas.read_csv(vcf_file, sep='\t', header=None, names=column_names, comment='#')
+        good_snp_count = len(vcf_df[(vcf_df['ALT'].str.len() == 1) & (vcf_df['REF'].str.len() == 1) & (vcf_df['QUAL'] > 150)])
+        base_file_name = get_base_file_name(vcf_file)
+        if total_zero_coverage > 0:
+            header_file = "%s_header.csv" % base_file_name
+            with open(header_file, 'w') as outfile:
+                with open(vcf_file) as infile:
+                    for line in infile:
+                        if re.search('^#', line):
+                            outfile.write("%s" % line)
+            vcf_df_snp = vcf_df[vcf_df['REF'].str.len() == 1]
+            vcf_df_snp = vcf_df_snp[vcf_df_snp['ALT'].str.len() == 1]
+            vcf_df_snp['ABS_VALUE'] = vcf_df_snp['CHROM'].map(str) + "-" + vcf_df_snp['POS'].map(str)
+            vcf_df_snp = vcf_df_snp.set_index('ABS_VALUE')
+            cat_df = pandas.concat([vcf_df_snp, zero_df], axis=1, sort=False)
+            cat_df = cat_df.drop(columns=['CHROM', 'POS', 'depth'])
+            cat_df[['ID', 'ALT', 'QUAL', 'FILTER', 'INFO']] = cat_df[['ID', 'ALT', 'QUAL', 'FILTER', 'INFO']].fillna('.')
+            cat_df['REF'] = cat_df['REF'].fillna('N')
+            cat_df['FORMAT'] = cat_df['FORMAT'].fillna('GT')
+            cat_df['Sample'] = cat_df['Sample'].fillna('./.')
+            cat_df['temp'] = cat_df.index.str.rsplit('-', n=1)
+            cat_df[['CHROM', 'POS']] = pandas.DataFrame(cat_df.temp.values.tolist(), index=cat_df.index)
+            cat_df = cat_df[['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', 'Sample']]
+            cat_df['POS'] = cat_df['POS'].astype(int)
+            cat_df = cat_df.sort_values(['CHROM', 'POS'])
+            body_file = "%s_body.csv" % base_file_name
+            cat_df.to_csv(body_file, sep='\t', header=False, index=False)
+            if output_vcf is None:
+                output_vcf_file = os.path.join(OUTPUT_VCF_DIR, "%s.vcf" % base_file_name)
+            else:
+                output_vcf_file = output_vcf
+            with open(output_vcf_file, "w") as outfile:
+                for cf in [header_file, body_file]:
+                    with open(cf, "r") as infile:
+                        for line in infile:
+                            outfile.write("%s" % line)
+        else:
+            if output_vcf is None:
+                output_vcf_file = os.path.join(OUTPUT_VCF_DIR, "%s.vcf" % base_file_name)
+            else:
+                output_vcf_file = output_vcf
+            shutil.copyfile(vcf_file, output_vcf_file)
+        bam_metrics = [base_file_name, "", "%4f" % average_coverage, genome_coverage]
+        vcf_metrics = [base_file_name, str(good_snp_count), "", ""]
+        if output_metrics is None:
+            output_metrics_file = os.path.join(OUTPUT_METRICS_DIR, "%s.tabular" % base_file_name)
+        else:
+            output_metrics_file = output_metrics
+        metrics_columns = ["File", "Number of Good SNPs", "Average Coverage", "Genome Coverage"]
+        with open(output_metrics_file, "w") as fh:
+            fh.write("# %s\n" % "\t".join(metrics_columns))
+            fh.write("%s\n" % "\t".join(bam_metrics))
+            fh.write("%s\n" % "\t".join(vcf_metrics))
+        task_queue.task_done()
+
+
+def set_num_cpus(num_files, processes):
+    num_cpus = int(multiprocessing.cpu_count())
+    if num_files < num_cpus and num_files < processes:
+        return num_files
+    if num_cpus < processes:
+        half_cpus = int(num_cpus / 2)
+        if num_files < half_cpus:
+            return num_files
+        return half_cpus
+    return processes
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--output_metrics', action='store', dest='output_metrics', required=False, default=None, help='Output metrics text file')
+    parser.add_argument('--output_vcf', action='store', dest='output_vcf', required=False, default=None, help='Output VCF file')
+    parser.add_argument('--reference', action='store', dest='reference', help='Reference dataset')
+    parser.add_argument('--processes', action='store', dest='processes', type=int, help='User-selected number of processes to use for job splitting')
+
+    args = parser.parse_args()
+
+    # The assumption here is that the list of files
+    # in both INPUT_BAM_DIR and INPUT_VCF_DIR are
+    # equal in number and named such that they are
+    # properly matched if the directories contain
+    # more than 1 file (i.e., hopefully the bam file
+    # names and vcf file names will be something like
+    # Mbovis-01D6_* so they can be # sorted and properly
+    # associated with each other).
+    bam_files = []
+    for file_name in sorted(os.listdir(INPUT_BAM_DIR)):
+        file_path = os.path.abspath(os.path.join(INPUT_BAM_DIR, file_name))
+        bam_files.append(file_path)
+    vcf_files = []
+    for file_name in sorted(os.listdir(INPUT_VCF_DIR)):
+        file_path = os.path.abspath(os.path.join(INPUT_VCF_DIR, file_name))
+        vcf_files.append(file_path)
+
+    multiprocessing.set_start_method('spawn')
+    queue1 = multiprocessing.JoinableQueue()
+    num_files = len(bam_files)
+    cpus = set_num_cpus(num_files, args.processes)
+    # Set a timeout for get()s in the queue.
+    timeout = 0.05
+
+    # Add each associated bam and vcf file pair to the queue.
+    for i, bam_file in enumerate(bam_files):
+        vcf_file = vcf_files[i]
+        queue1.put((bam_file, vcf_file))
+
+    # Complete the get_coverage_and_snp_count task.
+    processes = [multiprocessing.Process(target=get_coverage_and_snp_count, args=(queue1, args.reference, args.output_metrics, args.output_vcf, timeout, )) for _ in range(cpus)]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+    queue1.join()
+
+    if queue1.empty():
+        queue1.close()
+        queue1.join_thread()
--- a/vsnp_build_tables.xml	Fri May 08 12:59:27 2020 -0400
+++ b/vsnp_build_tables.xml	Sun Sep 27 10:08:14 2020 +0000
@@ -1,5 +1,8 @@
-<tool id="vsnp_build_tables" name="vSNP: build tables" version="1.0.0">
+<tool id="vsnp_build_tables" name="vSNP: build tables" version="@WRAPPER_VERSION@.0" profile="@PROFILE@">
     <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <requirements>
         <requirement type="package" version="1.76">biopython</requirement>
         <requirement type="package" version="0.25.3">pandas</requirement>
@@ -52,10 +55,7 @@
 ]]></command>
     <inputs>
         <conditional name="input_type_cond">
-            <param name="input_type" type="select" label="Choose the category for the files to be analyzed">
-                <option value="single" selected="true">Single files</option>
-                <option value="collection">Collection of files</option>
-            </param>
+            <expand macro="param_input_type"/>
             <when value="single">
                 <param name="input_snps_json" type="data" format="json" label="SNPs json file">
                     <validator type="unspecified_build"/>
@@ -192,15 +192,6 @@
  * **Choose the category for the files to be analyzed** -  select "Single files" or "Collections of files", then select the appropriate history items (single SNPs json, average MQ json and newick files, or collections of each) based on the selected option.
  * **Use Genbank file** - Select "yes" to annotate the tables using the information in the Genbank file.  Locally cached files, if available, provide the most widely used annotations, but more custom Genbank files can be chosen from the current history.
     </help>
-    <citations>
-        <citation type="bibtex">
-            @misc{None,
-            journal = {None},
-            author = {1. Stuber T},
-            title = {Manuscript in preparation},
-            year = {None},
-            url = {https://github.com/USDA-VS/vSNP},}
-        </citation>
-    </citations>
+    <expand macro="citations"/>
 </tool>