Repository 'vcf2maf'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/vcf2maf

Changeset 0:2973994fecd6 (2022-06-28)
Next changeset 1:e8510e04a86a (2022-10-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vcf2maf commit 30046d5e0df4d80ac687edd03cf44b2afaa04550
added:
test-data/fasta_indexes.loc
test-data/input_test1.vcf
test-data/input_test2.vcf
test-data/output_test1.tabular
test-data/output_test2.tabular
test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt
test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt
test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz
test-data/test1.fa
test-data/test2.fa
test-data/vep_versioned_annotation_cache.loc
tool-data/dbkeys.loc.sample
tool-data/fasta_indexes.loc.sample
tool-data/vep_versioned_annotation_cache.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
vcf2maf.xml
b
diff -r 000000000000 -r 2973994fecd6 test-data/fasta_indexes.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,30 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>     <dbkey> <display_name>  <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon      hg18    Human (Homo sapiens): hg18 Canonical    /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full       hg18    Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full       hg19    Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
+hg19test hg19 Human (Homo sapiens): hg19 Test ${__HERE__}/test1.fa
\ No newline at end of file
b
diff -r 000000000000 -r 2973994fecd6 test-data/input_test1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_test1.vcf Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##contig=<ID=21,assembly=GCF_000001405.26,length=46709983>
+##contig=<ID=22,assembly=GCF_000001405.26,length=50818468>
+##ALT=<ID=CNV,Description="Copy Number Polymorphism">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##VEP="v105" time="2022-03-30 19:24:59" ensembl=105.525fbcb ensembl-io=105.2a0a40c ensembl-funcgen=105.660df8f ensembl-variation=105.ac8178e
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|SOURCE|custom_annotation.gtf.gz">
+##INFO=<ID=custom_annotation.gtf.gz,Number=.,Type=String,Description="custom_annotation.gtf.gz (overlap)">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
+21 5733 rs142513484 C T . . CSQ=T|3_prime_UTR_variant|MODIFIER|MRPL39|ENSG00000154719|Transcript|ENST00000307301|protein_coding|3/3||||159|||||||-1||||custom_annotation.gtf.gz|,T|missense_variant|MODERATE|MRPL39|ENSG00000154719|Transcript|ENST00000352957|protein_coding|2/2||||70|70|24|A/T|Gca/Aca|||-1||||custom_annotation.gtf.gz|,T|upstream_gene_variant|MODIFIER|AP000223.42|ENSG00000260583|Transcript|ENST00000567517|antisense|||||||||||2407|-1||||custom_annotation.gtf.gz| GT 0|0
b
diff -r 000000000000 -r 2973994fecd6 test-data/input_test2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_test2.vcf Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,3 @@
+##fileformat=VCFv4.0
+#CHROM POS ID REF ALT QUAL FILTER INFO
+chrM 5701 . GA G . . .
b
diff -r 000000000000 -r 2973994fecd6 test-data/output_test1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test1.tabular Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,3 @@
+#version 2.4
+Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED GENE_PHENO FILTER flanking_bps vcf_id vcf_qual gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF vcf_pos
+MRPL39 0 . GRCh37 21 5733 5733 + Missense_Mutation SNP C C T novel TUMOR NORMAL C C ENST00000352957 2/2 MRPL39,missense_variant,,ENST00000352957,;MRPL39,3_prime_UTR_variant,,ENST00000307301,;AP000223.42,upstream_gene_variant,,ENST00000567517,; T ENSG00000154719 ENST00000352957 Transcript missense_variant 70 70 24 A/T Gca/Aca -1 MRPL39 protein_coding 2/2 MODERATE . GCT rs142513484 . 5733
b
diff -r 000000000000 -r 2973994fecd6 test-data/output_test2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test2.tabular Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,3 @@
+#version 2.4
+Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED GENE_PHENO FILTER flanking_bps vcf_id vcf_qual gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF vcf_pos
+mt:ND3 0 . BDGP6.32 chrM 5702 5702 + Frame_Shift_Del DEL A A - novel TUMOR NORMAL A A c.95del p.Asp32AlafsTer28 p.D32Afs*28 FBtr0100870 1/1 mt:ND3,frameshift_variant,p.Asp32AlafsTer28,FBtr0100870,;mt:ND2,downstream_gene_variant,,FBtr0100857,;mt:CoI,downstream_gene_variant,,FBtr0100861,;mt:CoII,downstream_gene_variant,,FBtr0100863,;mt:ATPase8,downstream_gene_variant,,FBtr0100866,;mt:CoIII,downstream_gene_variant,,FBtr0100868,;mt:ND4L,downstream_gene_variant,,FBtr0100880,;mt:ND6,upstream_gene_variant,,FBtr0100883,;mt:ATPase6,downstream_gene_variant,,FBtr0433498,;mt:ND4,downstream_gene_variant,,FBtr0433500,;mt:ND5,downstream_gene_variant,,FBtr0433501,;mt:Cyt-b,upstream_gene_variant,,FBtr0433502,;mt:tRNA:Trp-TCA,downstream_gene_variant,,FBtr0100858,;mt:tRNA:Cys-GCA,upstream_gene_variant,,FBtr0100859,;mt:tRNA:Tyr-GTA,upstream_gene_variant,,FBtr0100860,;mt:tRNA:Leu-TAA,downstream_gene_variant,,FBtr0100862,;mt:tRNA:Lys-CTT,downstream_gene_variant,,FBtr0100864,;mt:tRNA:Asp-GTC,downstream_gene_variant,,FBtr0100865,;mt:tRNA:Gly-TCC,downstream_gene_variant,,FBtr0100869,;mt:tRNA:Ala-TGC,upstream_gene_variant,,FBtr0100871,;mt:tRNA:Arg-TCG,upstream_gene_variant,,FBtr0100872,;mt:tRNA:Asn-GTT,upstream_gene_variant,,FBtr0100873,;mt:tRNA:Ser-GCT,upstream_gene_variant,,FBtr0100874,;mt:tRNA:Glu-TTC,upstream_gene_variant,,FBtr0100875,;mt:tRNA:Phe-GAA,downstream_gene_variant,,FBtr0100876,;mt:tRNA:His-GTG,downstream_gene_variant,,FBtr0100878,;mt:tRNA:Thr-TGT,upstream_gene_variant,,FBtr0100881,;mt:tRNA:Pro-TGG,downstream_gene_variant,,FBtr0100882,; - FBgn0013681 FBtr0100870 Transcript frameshift_variant 95/354 95/354 32/117 D/X gAc/gc 1 1 mt:ND3 FlyBaseName_gene protein_coding YES FBpp0100181 P18930,P18930 B6E0P8,J7FKZ6 UPI0000000AE1 1/1 Gene3D:1.20.58.1610,Pfam:PF00507,PANTHER:PTHR11058,PANTHER:PTHR11058:SF9 HIGH 1 deletion . CGAC . . 5701
b
diff -r 000000000000 -r 2973994fecd6 test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,7 @@
+KJ947872 chrM
+NC_024511.2 chrM
+KJ947872 mitochondrion_genome
+KJ947872.2 chrM
+KJ947872.2 mitochondrion_genome
+NC_024511.2 mitochondrion_genome
+chrM mitochondrion_genome
b
diff -r 000000000000 -r 2973994fecd6 test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,7 @@
+species drosophila_melanogaster
+assembly BDGP6.32
+sift b
+polyphen
+source_sift sift
+source_genebuild dmel_r6.32_FB2020_01
+source_assembly BDGP6.32
b
diff -r 000000000000 -r 2973994fecd6 test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz
b
Binary file test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz has changed
b
diff -r 000000000000 -r 2973994fecd6 test-data/test1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fa Tue Jun 28 21:07:04 2022 +0000
b
b'@@ -0,0 +1,168 @@\n+>21 dna:chromosome chromosome:GRCh38:21:1:10000:1 REF\n+CCACAATCATTTTAGGAGAATGGGTTTAAGAAAGGAAAAAAAAAAAAAAGATTTCTGTAT\n+GCTCTTAAGAGAAAATCTAAAAAATAATGACATGAAAAAGTTGAAAGGAATGGAAAAATA\n+TGTACCATTAAAAGGAAACCCGACGTATGAATGCCATTATCAGACAAAACAGATTTTTTT\n+CTTTTTGAGATGGAGTCTCACTCTGTGGCCCAGGCTGGAGTGCAGTGGCACAATCTCTGC\n+TCACAGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCCAAGTAGC\n+TGGGACTACAGGCACCCACCACCACACCAGGCTAGTATTTGTATTTTTAGTAGAGACGGG\n+GTTTCATCGTGTTAGCCAGGATGGTCTCAATCTTCTGACCTCGTGATCTGCCCACCTCAG\n+CCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGCGCCCAGCCAGACAAAACAGATT\n+TTAAGACAACTAAGAAGTTAACAAGCTGACCCTACAATAAGCATGAAAATTTTGAAAAAG\n+AATAGGAAAGGAGAACTCACCATAAGAGAAATTGAAACTTGTTATAAAGCTATAGTTGTT\n+AAAACGGTGTTACTACAGTGGTACATGGACAGATAAATGGACCAATGAAGCAGACCCAGG\n+CACTGAAAGGAACCTTTTATATGACAGCATGGCACAATCAGTAAGAATAGAGAGGAAATA\n+GGCCAGGCACGGTGGCTCACGCCTGTAATCCCAGTACTTTGGGAGGCCAAGGCAGGCAGA\n+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGTCAACATGGTGAAACCGCGTCTCTACT\n+AAAAATACAAAAATTAGCTGGTCGTGGTGGCAGGCACCTGTAATCCCAGCTACTCGGGAG\n+GCTGAGGCAGGAGAATCACTTGAACCCGGCTAGCGGAGGCTGCAGTGAGCCGAGATCACG\n+CCATTGCACTCCAGCCTAGGCAACAAGAGTGAAACTCCATCACAAAAAAAAAAGAAAAAG\n+AGTAGACAGGAAATAAATGGTCCAGAATAACTGCCTATCCTTGTGGAGGAGAGGGTGATT\n+CAAAATTAGGTCCCTTTCCTCACTCTATATGCAAAAAACAAACTTCAAATAAATTATACA\n+ATTAAATGTGAAAATCAAGACTTTAAAATAAACAATGCAGTAGGCTGCTTTATAATATCA\n+AGTTAGGGAAGGCTTTCTTAAATTTCATAAACATAAATCATAGAGGAAAAGATGAACTGT\n+CTACCTTAAAATTAAAGACGATATAAACAAAATTAAAAGGTAAGCCAGACAAAAGAAATA\n+TTTGTAGTGACAACGGTTTAACTTTCTTTCTTTCTTTTTTTTTTTTTGAGACGGAATCTC\n+ACTCTGTCACCCAGGCTGGAGTACACTGGTGCAATCTCAGCTCACTGCAACCTCCACCTC\n+CCAGGTTCAAGCGATTCTTGTGCCTCAGCCTCCCAAGTAGCTGGGATGACAGGTACGCAC\n+CACCACACCCAGCTAATTTTTTGTATTTTTAGCAGAGACGGGGTTTCACCATGTTGGCCA\n+GGCTGGTCTCGAATTCCTGACCTCAGGTGATCCATCTGCCTCAGCCTCCCAAAGTGCTGG\n+GATTACAGGAGTGAGCCACTGCACCTGGCCCACAAGGGTTTCACTTTCTAAAAATATAAA\n+GAACTGGCCAGGTGCAATGGCTCACACTTGTAATCCTAGCACTTTGGGAGGGCAAGGAGA\n+GCAGATCGCTAGAGGCCAGAAGCTGGAGAACAGCCTGGCCAACATGGTGAAACCCCATCT\n+CTACTTTAAAAATATAAAAATTAGTTGGGTGTGGTGGCACTTGCCTATAATCCCAGCTAC\n+TTGGCTACTCAGGAGGCCGAGGCAGGGGAATCGCTTGAATCCAGTAGGCAGAGGCTACAG\n+TGAGCCAAGATCATGCCACTGCACTACAGCTTGGGCAACAGAGTGAGACTTGGTCTCAAA\n+AAAAAAAAAAATTATATATATATATCTTATATATATACACTATTATATATATACACACAC\n+ACACACACACACACACACACACACACACACACAATTAATATGAGATGCCCAAAAATCCAA\n+TTGTAAAAAGGGGCAAAGGTTGTAAACTGGTAATTCATAAAAACAAATGAAGAGATGCTT\n+ATTGGTACTATATGCTCAGTATTAAGCAAATTAAATGAGATAGGATCGTGCATATTCAAC\n+CAACAAAATATCTGAATGTCTGAAAATAATAAATGTTAATGAGGGAGTGGAGAAAATGGG\n+AATGCTCATACTGCTGATAGAGAGTAAACTGGTACAACTATTGTGGCAGCCAATTAATAT\n+TTAGTAAAGCTGAAGATGCATGGTCCACTGTGGTACAGGCCCTGGAGATATTATCAAATG\n+TGTACACAAAGAAACACGCACAAGGATATTTTCTGCGATACTGTAATACTCAAAAGCCAA\n+TGACATCCTCAGTGGTCATCAATAAGAAAATGAATTAATGATGGGATTAATCATATAATG\n+AAATACTATATAGCAGTTGAAATGAATGTACTCTTTACATGTATCAACATGCTATACATA\n+AAAAACAATGATGAGCAATAAAAGCAAATTGCAAAAGGATATATATTATGAAACCAATTA\n+TGTTTAGTTTTAAAACACAGAGAATACTATGGATTGTAGTAAAAAAAAATATAAAATCAT\n+GAAGAGTAAGGACAGGTACAAACAGGATAGTGGTTCTTCTTGAGAGGAAGGAAATGACAT\n+AACAAGACCTTCAATGGATGTGCAGCTTTTCCTTTATTTAAACACAAAAGGATCTGAAGG\n+AAATAAGGAAAAAGGTTGACAGTGGTTACAATTAAATAGTGGCTGTATGTCAACACTCTT\n+GGTTACAAACAACAGGATCTACACTAGCTAGTTTAAACAAACAGAATAAAATGGCTCACA\n+AGTCACAGGCGTGGAGGTCAGGACTACGGAGCCCCCAAAAATGCCCATTTTTATACCTTG\n+GAGCAGCTGCGGGGGAAAAACTGCTAAGCAAAGCCTCCACACCTTGCACCATTACATGGG\n+ACCTCTGCCACTGCTGCCTTGAAAACCACATCACTGCTCCGTTCAACAAAATGTATCTCA\n+TACTACTCTTGTCTGCAAAGTACTTGCTTCCAGATTTCACACAGTTATGTCTAATTGGTG\n+AGCCCATGCTACCTGTCTGAGCCACAGCTGCAAGGAAGGGCAGGAGATTAAATTTCATTC\n+TTCTACTGGGTAAGGCGAGATCCACAGAGTGGGAAGTTGCCAAAAAGCAGGTGTTCAAAC\n+AGTGCTAGCTGCCCAAAAAGCACGAAAAGTGCCCACTCAAACAAGAGTTGGTGAAAATAT\n+TCTCTCTACTTTTCTGTATGCTCAAATATTTCACAATTTTTTTAAAGAAAAAATGTCGAA\n+GTATGTAAATTCACAAACAACAAAGGGAATGGAAAAAAAATCAATAAACAAGAGACGTCA\n+ACCAAATTCTAAAAGACAAAAAGCTAAGTGACTAATAAAACACTAGAGAATGTCACCACC\n+TAGAAAACATGTGGAGGAAGTTCCATCAGAGGCAGCCAACCGGCCCAGCTGGGCCTTAGC\n+TCAGAGGCAGCAAAAGTTCATAGATGATAGAAGACAGTAGAGGGATTAGTTAAATCTAAA\n+CTAATTAAAAAGAAAGGAATTAATTAAAGGTCTGTATACAAACTGGTTGAACACTACCCC\n+ATCTTCAAATATAGAAGGGCCTAAATCCAGGAAGTGCCCCATGCAAAATATTGAAGGGAT\n+CTTTATTATAAAGTAAGTG'..b'TACAGATTTAGTCTAAATAGATATCTCTATGTCCCTATTTCTATTCTGTG\n+TCACTCAGACTCATCAAACAGACATGGCTAAATCTGAATTCAACACCCCAAAACCTGTCC\n+TACCTGCATCATCTATCTTGGTAAAGGGCTCCACCCAGATACCTAAGGACAGCATTCAAG\n+AATTCACTCTCTTCTTCCTACCTGCATATTCCATCAACAGTCCTACTAATTCAGCCTAAG\n+TTAACTCTCACAGTCATTCTCCCTATCGCGGCTGAACTTTAGGACCTCATTCTCTTCTAT\n+CCCCCTAACAGAACTCTTGGCTTAAGTCACAGCTATATCCCATGCTACCCCTAGAATGAG\n+CTTTTAAACAAGAATAGCTGATCAACAGGTCACTTCCCTACTCAAAATTCTCCAGTTACT\n+CTCTAGCATCTCAGTGGGGTGGAGGAGGAAAGTCAGGAATGCAAGTTCCACAACACGGGA\n+CACAAGCCCCTCATATTCTGTTTTTGGACTGACTCTTTACCTATCCTACATGTGACCTGT\n+ACATTCAAATTCATCTTACATTCATCATGTTCTCTATGTAGGCAAGTGACTTCCTCTGAG\n+ATAACCTCCAACCCATTTTCCAACAGATTATCTGCATGGTCCACTTTTATTCACCCTTTA\n+AAATCAGCTTAAGAGACTCCAATTCCTGGAAGCCTTCCTTAAGCAACTCCCCAGCTTGGT\n+CTGAGTCCTCTTCTTTATGCCAGAGCCCCACATGTACACCTGAACTGTCTGCTTATAAGT\n+CTAATTATCCAATCAACATTACTTCCCTGAGGCAGCAAGTAAATTTTGTGCATTTTTTTG\n+TACCTCCAGACCATAACAGTACCTGGCACACAATAAACAATCAGTAAGTCTGTGTTGCAT\n+CAATGAACCACGTTTAATCTAATCCCTTCAGAAAAGCAAAATGGACAGTAGAAAAAAGCA\n+ACTAAACCAGAATAAATGTTTGAAAAAATATGTATTGACTAACATCACTAGATATTCTAG\n+GTAAGACACTGTCTCTGCCGTTTTTCTACATTTTTCATTCCTTTAAAGTGTTCAGTTACT\n+ACTGCTTCTTTAATTTTCCAAACTATTTCTTTAATATTCTTTAGCTTTTTAAATTTTTTT\n+TAATTTTACTTAACTCCAAGTAAACATGCTGAACATTTTTTCTTTAGTTTCTTCATCTAA\n+AATGATCACTGTGACCAACTAAAAAGAGAAAATTACTCCATCCATTCAAAGGTAATTTCT\n+TCTCATGAGAAAAAAAATTATATAACCCTTACTCAAGAACATTATGGTGAAGTCCCTAAA\n+CAATAATTCCTATCTACATAGTCATTAATAAAGACTAGGTTTATATATCATTATGAAAAA\n+TTGTCAATAAATATTTCTTGGGTAACCCCTTCATGTGCTAATGTTGTGTTCAAAATTTGG\n+GCTTACACAAATATAACAAATCCTGAAAGGCTTAACAGACAGAAAATAAATGTACTGGCA\n+AAGAAAACAATCAAAAAATAGTATGTGGACATATACAATCAAGCACAAACTTATATGAAT\n+AATTATTTGTTAGAGATTACTCTGAGACTGTTCCATGGAGGAGGTACGACTCAGGCGAGG\n+TAGTGAAGAATGACTGCGTAGTAAATAGAAGGAGAATGGGAAATGGAGTCTAGAAAGAAA\n+AACTATCATGAAGAAATAAGCAAACTTGAAAATAAGTTTAATGTGTGATGCTTAATGTGC\n+CTGCAAGCTTAACCAGTGTTTGTGGCATTGTGGGAAATTAGGATAAGGAGGAAGTCCTAA\n+TGAGGACAGATTACACAGGACAGCTATCAGATAACCTAAGTAAAAGAGGCACACTTAGCC\n+TATGAAATAGGAAACAGGAATAGTTGTGGTCAAGAAATACAATCACCCGGGCCGGGCGCG\n+GTGGCTCACGCCTGTAATCCCAGCACCTTGGGAGGCTGAGGGGGGTGGATCACGAGGTCA\n+GGAGATCGAGACCATCCTGGCTAACACGGTAAAACCCTGTCTCTACTAAAAATACAAAAA\n+AATTAGTTGGACGTAGTGGTGGGCACCCATAGTCCCAGCTACTTGGGAGGCTGAGGCACG\n+AGAATCGCTTGAGCCCGGGAGGTGGAGGTTGCAGTGAGCTGAGATCGCGCCACTGCACTC\n+CAGCCTGGGAGACAGAACAAGACTCTGTCTCAAAAAAAAAAAGAAAAGAAATACTATCAT\n+TCCTACTCATTAAGTCTAAATGCTAATGACAAAGAATAAACTACAAAAACACAAACATTC\n+AAATCTGGTTTTTCAGGACATTAGGATATTTGCACATTTACTGTTAATGGCTAAAGGCGG\n+CTTTAAAATACACCTGACAAATCTTAATCCTCTAAATATTCATTAAGTTTACAAAATAAA\n+TCCTAAGACCTAACTGATACCATTACAACTATTAAGAACTATTACTATTACATAGTAATA\n+TTCTTATTTCTCTTCAAGTTCTACAATATCATTTTCACGATATTAACAAATAAAATCAAA\n+AGAGAACTGCCAAGTTACAACTTTACCCTTACCAACACACTAGTGAACAATCACAACACT\n+GCTTTGACTATTGAGACAACTTACTTTTTTCCTTTCTCTTTGTTTCTTTCATTTTTCCCC\n+CCTTTGGTTGTTATTGGTTAATTTTGATGAATTTCTTTATAGAAGAGTACTCAATAGCTG\n+TCAAAAACACTTACCATTTTCCGAGATCTTTCCAATAGCTTATCCCATATTGTAAAATGT\n+GCCTTGAAAAGAAAAGATTTGCGATGAACTAAATGAAGCAGTAATGTCAAAGTAAAAAGG\n+ACAGAGTCTTTATATAGCTTTTAGAACAAAATAAATAAAAAGCTAGGCAGTACTAATTTA\n+GATAAGTGGTTATAAAAGTGGATCATAGTAGCAAAAAAAGGTGCTACATGGGGATAGCTG\n+CTTTTTTGCTTTCTTCAATCGCATGAACCCGGGAGGCAGAGGTTGCAGTGAGCATTTAAG\n+AAAAAAACGCTTAAGGATTGAACGTGCCCTTCTTTCACTTTTGCCCATCTTAATCATTTC\n+CATCATAGAAAGCATTTCTTCTAATAGGAAGCAAAAATCTGCCCTACTTTCCCATAGACT\n+GTGGTTCAATCCTTAAACAGCCAGTTTCAACATTCTAGAAGTCATTCTTCAAATCATAAG\n+CAATCACAAGAAGACAGTCATTAAAAACCAACTGTGACATCTTCACACTGAAATGGCAGA\n+AGCAGAGGATTCATCAAGTTACCCTAAAGAATAAGTAAAACCTAACAAAGACAATAGTAA\n+ATTTTTTTTTTTTTTGAAATGGAGTTTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGTGC\n+GATCTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTC\n+CCAAGACAATAGTAAAATTTAAACTCAATTTCCTTAGTCCATAACAACCTCCTGTAGCAG\n+AGGATTATCAGAATGCATTAAAGAAAACTGTGCAAAGTGTATCATGACCACATCAATTTT\n+ATTAGGTCAACTGAAAGTGGTAACAAGACATATGGGCAGGCCAGTGACTACTCCACACTG\n+AATGAGCTCATAAAATCTATAATAAAAGGTAAAATTAATAAATATCAACATACAAACCCT\n+TCCAGGGAAAGAGCTGACTGGTATGTTTAAAGGGAAAACCATGCCTGACTCAGGCGGAAT\n+GAACTGCTGGTGCAGAGACCTTAAGCTGTGGCTGGAATATAGTGAGTGAGGAAAGGAGTG\n+GTGTTAGATAAAGTCAGAGAAGCAGGCAGGGACCAGATAATGCAGGGCTTTGTGAGACAG\n+GGTAAAGAGTTGGGATTTGTTCAAGGGAAGCCATTGGAAA\n'
b
diff -r 000000000000 -r 2973994fecd6 test-data/test2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2.fa Tue Jun 28 21:07:04 2022 +0000
b
b'@@ -0,0 +1,2 @@\n+>chrM\n+AATGAATTGCCTGATAAAAAGGATTACCTTGATAGGGTAAATCATGCAGTTTTCTGCATTCATTGACTGATTTATATATTATTTATAAAGATGATTTTATATTTAATAGAATTAAACTATTTCTAAAAGTATCAAAAACTTTTGTGCATCATACACCAAAATATATTTACAAAAAGATAAGCTAATTAAGCTACTGGGTTCATACCCCATTTATAAAGGTTATAATCCTTTTCTTTTTAATTTTTAATAATTCGTCAAAAATTTTATTTATTACAATTATAATTATTGGGACATTAATTACAGTTACATCTAATTCTTGGTTAGGAGCTTGAATAGGTTTAGAAATTAATTTATTATCTTTTATCCCCCTATTAAGAGATAATAATAATTTAATATCTACAGAAGCTTCTTTAAAATATTTTTTAACCCAAGTTTTAGCTTCAACTGTTTTATTATTTTCTTCAATTTTATTAATATTAAAAAATAATATAAATAATGAAATTAATGAATCTTTTACATCCATAATTATTATATCAGCTTTATTATTAAAAAGTGGAGCCGCTCCTTTCCATTTTTGATTTCCTAATATAATAGAAGGTTTAACATGAATAAATGCTTTAATATTAATAACTTGACAAAAAATTGCACCTTTAATATTAATTTCTTATCTTAATATTAAATATTTATTATTAATTAGAGTAATTTTATCAGTTATTATTGGAGCTATTGGAGGATTAAATCAAACTTCTTTACGAAAATTAATAGCATTTTCTTCAATTAATCATTTAGGGTGAATATTAAGATCTTTAATAATTAGAGAATCAATTTGATTAATTTATTTTTTTTTTTATTCATTTTTATCATTTGTATTAACATTTATATTTAATATTTTTAAATTATTTCATTTAAATCAATTATTTTCTTGATTTGTTAATAGAAAAATTTTGAAATTTACATTATTTATAAATTTTTTATCATTAGGAGGATTACCTCCATTTTTAGGATTTTTACCAAAATGACTTGTAATTCAACAATTAACATTATGTAATCAATATTTTATATTAACACTTATAATAATATCAACTTTAATTACATTATTTTTTTATTTACGAATTTGTTATTCCGCTTTTATAATAAATTATTTTGAAAATAACTGAATCATAAAGATAAATATAAATAGTATTAATTATAATATATATATAATTATAACttttttttcaatttttggattatttttaatttctttattttattttatattTTAAGGCTTTAAGTTAATAAAACTAATAACCTTCAAAGCTATAAATAAAGAAATTTCTTTAAGCCTTAGTAAAACTTACTCCTTCAAAATTGCAGTTTGATATCATTATTGACTATAAGACCTAATTAATTTGTCCTTATTTGATTAAGAAGAATAAATCTTATATATAGATTTACAATCTATCGCCTAAACTTCAGCCACTTAATCAATAATCGCGACAATGATTATTTTCTACAAATCATAAAGATATCGGAACTTTATATTTTATTTTTGGAGCTTGAGCTGGAATAGTTGGAACATCTTTAAGAATTTTAATTCGAGCTGAATTAGGACATCCTGGAGCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCACATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGTGGATTTGGAAATTGATTAGTGCCTTTAATATTAGGTGCTCCTGATATAGCATTCCCACGAATAAATAATATAAGATTTTGACTTCTACCTCCTGCTCTTTCTTTACTATTAGTAAGTAGAATAGTTGAAAATGGAGCTGGGACAGGATGAACTGTTTATCCACCTCTATCCGCTGGAATTGCTCATGGTGGAGCTTCAGTTGATTTAGCTATTTTTTCTCTACATTTAGCAGGAATTTCTTCAATTTTAGGAGCTGTAAATTTTATTACAACTGTAATTAATATACGATCAACAGGAATTTCATTAGATCGTATACCTttatttgtttgatcagtagttattactgctttattattattattatCACTTCCAGTACTAGCAGGAGCTATTACTATATTATTAACAGATCGAAATTTAAATACATCATTTTTTGACCCAGCGGGAGGAGGAGATCCTATTTTATACCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGAATAATTTCTCATATTATTAGACAAGAATCAGGAAAAAAGGAAACTTTTGGTTCTCTAGGAATAATTTATGCTATATTAGCTATTGGATTATTAGGATTTATTGTATGAGCTCATCATATATTTACCGTTGGAATAGATGTAGATACTCGAGCTTATTTTACCTCAGCTACTATAATTATTGCAGTTCCTACTGGAATTAAAATTTTTAGTTGATTAGCTACTTTACATGGAACTCAACTTTCTTATTCTCCAGCTATTTTATGAGCTTTAGGATTTGTTTTTTTATTTACAGTAGGAGGATTAACAGGAGTTGTTTTAGCTAATTCATCAGTAGATATTATTTTACATGATACTTATTATGTAGTAGCTCATTTTCATTATGTTTTATCTATAGGAGCTGTATTTGCTATTATAGCAGGTTTTATTCACTGATACCCCTTATTTACTGGATTAACGTTAAATAATAAATGATTAAAAAGTCATTTCATTATTATATTTATTGGAGTTAATTTAACATTTTTTCCTCAACATTTTTTAGGATTGGCTGGAATACCTCGACGTTATTCAGATTACCCAGATGCTTACACAACATGAAATATTGTATCAACTATTGGATCAACTATTTCATTATTAGGAATTTTATTCTTTTTTTTTATTATTTGAGAAAGTTTAGTATCACAACGACAAGTAATTTACCCAATTCAACTAAATTCATCAATTGAATGATACCAAAATACTCCACCAGCTGAACATAGATATTCTGAATTACCACTTTTAACAAATTAATTTCTAATATGGCAGATTAGTGCAATAGATTTAAGCTCTATATATAAAGTATTTTACTTTTATTAGAAAATAAATGTCTACATGAGCTAATTTAGGTTTACAAGATAGAGCTTCTCCTTTAATAGAACAATTAATTTTTTTTCATGATCATGCATTATTAATTTTAGTAATAATTACAGTATTGGTGGGATATTTAATATTTATATTATTTTTTAATAATTATGTAAATCGATTTCTTTTACATGGACAACTTATTGAAATAATTTGAACTATTTTACCAGCAATTATTTTACTATTTATTGCTCTTCCTTCTTTACGTTTACTTTATTTATTAGATGAAATTAATGAACCATCTGTAACTTTAAAAAGAATCGGCCATCAATGATATTGAAGTTACGAATATTCAGATTTTAATAATATTGAATTTGATTCATATATAATTCCAACAAATGAATTAATAACTGATGGATTTCGATTATTAGATGTTGATAACCGAGTAGTTTTACCCATAAACTCACAAATTCGAATTTTAGTAACAGCTGCTGATGTTATTCATTCTTGAACAGTACCTGCTTTAGGAGTAAAAGTTGACGGTACACCTGGACGATTAAATCAAACTAATTTTTTTATTAATCGACCGGGTTTATTTTATGGTCAATGTTCAGAAATCTGTGGAGCTAATCATAGATTTATACCGATTGTAATTGAAAGTGTTCCTGTAAATTACTTTATTAAATGAATTTCTAGAAATAACTCTTCATTAGATGACTGAAAGCAAGTACTGGTCTCTTAAACCATTTAATAGTAAATTAGCACTTACTTCTAATGATAAAAAATTAGTTAAAATCATAACATTAGTATGTCAAACTAAAATTATTAAATAATTAATATTTTTTAATTCCACAAATAGCACCtattagatgattattattatttattattttttctattacatttattttattt'..b'TCAATATATATATAATAATAAATAATTTGATTATTAATTAAATTATACGAATAATAAATATAATAAATAATTTATTTTAATCAATAAATCTGAAATAATTAATTATATACATATATATATATATGTAAATAAATAAAAATAAATTTATTCCCCCTATTTATAAATTTATTATATAATTAAAACTTAAAAAATATTTTTTTTAAAAAAATAGTTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATAAATAAAATTATTTAAAATAATTAATAAAAATATTTTTATTGTAATAAAAATTAAAAATAATTTTAAAAAAATTAAATTTATATATTTATATATATATATATATAATTTTTAATTTTCAATTAAATTATATAAATATAATAAAATAATTTTATTTAATCACTAAATCTGAAATAATTAATtataaatatatatatatatatatatatatatatatatatataAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTGTATAATTAAAACTTAAAAAATATTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATAAATAAAATTATTTTAAATAATTAATAAAAATATTTTTAATATAATAAAAATTTAAAATGATTTTTTATAAAAATTAAATTCATATTTATATATATATATATATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATATATATATATATATATAAAAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTTATAAAAATATTTTTAATATAATGAAAATTTAAAATGATTTTTTATTATTAATTAAATTCatatatttacatatatatatatatatatatatatatataGATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATGTATATATATATATATATATATAAAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTAAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTAATAAAAATATTTTTAATATAATGAAAATTTAAAATGATTTTTTATTATTAATTAAATTCatatatttatatatatatatatatatatatatataGATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATGtatatatatatatatatatatatatatatataAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTAATAAAAATTATATATATATATAAATGAAAATAATTTTTAAATTTTAATAATAAATAAATTTAATAATTAATAATTAAATAAAATCTATTCATTATTAATATTTAATTAATAATAAATAAATTTAATAACTAATAATTAAATAAAATTTATTTATTATTAATATTTAATTAATAATAAAAAATCATCAttttttttttttttttttttatttAATTAATTATtatatatttataaatttatatattattcaatatttataatatatatatatatatatatatataAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATTATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAATTTAAAAAAAAATAATTTTTTTTTAAAAAAAAACTATATACTAATTATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAAATTTTTTAAAAAAAAATAATTTTTTTTTTAAAAAAACTATATACTAATTATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAATTTTTTAAAAAAAAATAATTTTTTTTTAAAAAAACTATATACTAAATATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTTAAAAAAAATTTTTTAAAAAAAATAATTTTTTTTTAAAAAAACTATATACTAAATATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATATAATATATATATATATAGAAAAATAAAATTATTTAAATAATTTTACATAAAATTTTAAAAAATTTCTTAAATGTATTATTTAATAAAAAATTACTTTTTAAAAAAAATAATTTTAATTTTTTaaaaaaaatagtaaataataaaaaaaaaaaaaaaaaaaaatgaaaaTTATATTATT\n'
b
diff -r 000000000000 -r 2973994fecd6 test-data/vep_versioned_annotation_cache.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vep_versioned_annotation_cache.loc Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,3 @@
+#<value> <dbkey> <version> <cachetype> <name> <species> <path>
+#
+drosophila_melanogaster_vep_106_BDGP6.32 dm6 106 default Drosophila melanogaster dm6 (V106) drosophila_melanogaster ${__HERE__}/test-cache
b
diff -r 000000000000 -r 2973994fecd6 tool-data/dbkeys.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dbkeys.loc.sample Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,1 @@
+#<dbkey> <display_name> <len_file_path>
\ No newline at end of file
b
diff -r 000000000000 -r 2973994fecd6 tool-data/fasta_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name> <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
b
diff -r 000000000000 -r 2973994fecd6 tool-data/vep_versioned_annotation_cache.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/vep_versioned_annotation_cache.loc.sample Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,11 @@
+#This file describes vep cache data and its metadata available on the server.
+#The data table has the format (white space characters are TAB characters):
+#
+#<value> <dbkey> <version> <cachetype> <name> <species> <path>
+#
+#So, vep_versioned_annotation_cache.loc tables could look like this:
+#
+#homo_sapiens_vep_105_GRCh38 hg38 105 default Homo sapiens hg38 (V105) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/default
+#homo_sapiens_refseq_vep_105_GRCh38 hg38 105 refseq Homo sapiens hg38 (V105, Refseq) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/refseq
+#homo_sapiens_merged_vep_105_GRCh38 hg38 105 merged Homo sapiens hg38 (V105, Merged) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/merged
+#
\ No newline at end of file
b
diff -r 000000000000 -r 2973994fecd6 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,17 @@
+<tables>
+    <!-- Table of installed versioned vep cache data -->
+    <table name="vep_versioned_annotation_cache" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, species, path</columns>
+        <file path="tool-data/vep_versioned_annotation_cache.loc" />
+    </table>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="tool-data/dbkeys.loc" />
+    </table>
+    <!-- Location of SAMTools indexes for FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 2973994fecd6 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Jun 28 21:07:04 2022 +0000
b
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Table of versioned vep cache data for testing -->
+    <table name="vep_versioned_annotation_cache" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, species, path</columns>
+        <file path="${__HERE__}/test-data/vep_versioned_annotation_cache.loc" />
+    </table>
+    <!-- Locations of all sam indexes under genome directory -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 2973994fecd6 vcf2maf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf2maf.xml Tue Jun 28 21:07:04 2022 +0000
[
b'@@ -0,0 +1,153 @@\n+<tool id="vcf2maf" name="Convert VCF to MAF" version="@TOOL_VERSION@">\n+\t<description>with vcf2maf</description>\n+\t<macros>\n+\t\t<token name="@TOOL_VERSION@">1.6.21</token>\n+\t\t<token name="@DB_VERSION@">106</token>\n+\t</macros>\n+\t<requirements>\n+\t\t<requirement type="package" version="@TOOL_VERSION@">vcf2maf</requirement>\n+\t\t<requirement type="package" version="@DB_VERSION@.1">ensembl-vep</requirement>\n+\t</requirements>\n+\t<command detect_errors="exit_code"><![CDATA[\n+\t\tln -s \'${input1}\' MainInput.vcf &&\n+\t\t#if $ref_seq.ref_source == "cached":\n+\t\t\tln -s \'${ref_seq.ref.fields.path}\' reference.fa &&\n+\t\t#elif $ref_seq.ref_source == "history":\n+\t\t\tln -s \'${ref_seq.ref}\' reference.fa &&\n+\t\t#end if\n+\t\tvcf2maf.pl --input-vcf MainInput.vcf --output-maf MainOutput.maf --ref-fasta reference.fa\n+\t\t#if $annotation_cache.source == "no_vep":\n+\t\t\t--inhibit-vep\n+\t\t#else:\n+\t\t\t--vep-path \\$(dirname \\$(which vep))\n+\t\t\t--vep-data \'${annotation_cache.cache_file.fields.path}\'\n+\t\t\t--species \'${annotation_cache.cache_file.fields.species}\'\n+\t\t\t--ncbi-build \'${annotation_cache.cache_file.fields.value.split($annotation_cache.cache_file.fields.version + "_")[-1]}\'\n+\t\t\t#if $annotation_cache.cache_file.fields.version != "@DB_VERSION@": --cache-version $annotation_cache.cache_file.fields.version\n+\t\t#end if\n+\n+\t\t#if $tumor_id:\n+\t\t\t--tumor-id \'${tumor_id}\'\n+\t\t#end if\n+\t\t#if $normal_id:\n+\t\t\t--normal-id \'${normal_id}\'\n+\t\t#end if\n+\t\t#if $vcf_tumor_id:\n+\t\t\t--vcf-tumor-id \'${vcf_tumor_id}\'\n+\t\t#end if\n+\t\t#if $vcf_normal_id:\n+\t\t\t--vcf-normal-id \'${vcf_normal_id}\'\n+\t\t#end if\n+\n+\t\t#if $adv_opt.any_allele:\n+\t\t\t--any-allele\n+\t\t#end if\n+\t\t#if $adv_opt.min_hom_vaf:\n+\t\t\t--min-hom-vaf $adv_opt.min_hom_vaf\n+\t\t#end if\n+\t\t#if $adv_opt.maf_center:\n+\t\t\t--maf-center \'${adv_opt.maf_center}\'\n+\t\t#end if\n+\t\t#if $adv_opt.retain_info:\n+\t\t\t--retain-info \'${adv_opt.retain_info}\'\n+\t\t#end if\n+\t\t#if $adv_opt.retain_fmt:\n+\t\t\t--retain-fmt \'${adv_opt.retain_fmt}\'\n+\t\t#end if\n+\t\t#if $adv_opt.retain_ann:\n+\t\t\t--retain-ann \'${adv_opt.retain_ann}\'\n+\t\t#end if\n+\t]]></command>\n+\t<inputs>\n+\t\t<param type="data" name="input1" label="VCF input file" format="vcf">\n+\t\t\t<validator type="unspecified_build" />\n+\t\t</param>\n+\t\t<conditional name="ref_seq">\n+\t\t\t<param name="ref_source" type="select" label="Select FASTA file as reference sequence">\n+\t\t\t\t<option value="cached">Locally cached</option>\n+\t\t\t\t<option value="history">History</option>\n+\t\t\t</param>\n+\t\t\t<when value="cached">\n+\t\t\t\t<param name="ref" type="select" label="Select reference sequence">\n+\t\t\t\t\t<options from_data_table="fasta_indexes">\n+\t\t\t\t\t\t<validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />\n+\t\t\t\t\t</options>\n+\t\t\t\t</param>\n+\t\t\t</when>\n+\t\t\t<when value="history">\n+\t\t\t\t<param name="ref" type="data" format="fasta" label="Select reference sequence" />\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t<conditional name="annotation_cache">\n+\t\t\t<param name="source" type="select" label="Select the source of annotation data if you want to use VEP" help="vcf2maf can utilize Ensembl\'s VEP to select a single effect per variant. VEP can only be used if SIFT is available for the selected genome assembly. Ensembl strongly recommends to only use annotation cache files with a version number matching the VEP version. You can disable the corresponding filtering of available cache files at your own risk.">\n+\t\t\t\t<option value="no_vep" selected="true">Do not use VEP</option>\n+\t\t\t\t<option value="restricted">Use VEP with a cache file with matching version number</option>\n+\t\t\t\t<option value="unrestricted">Use VEP with any cache file</option>\n+\t\t\t</param>\n+\t\t\t<when value="no_vep"/>\n+\t\t\t<when value="restricted">\n+\t\t\t\t<param name="cache_file" type="select" label="Select annotation cache file" help="If the annotation data of interest is not listed, have a look at all available cache files regardless of their version number or contact your Galaxy admin.">\n+\t\t\t\t\t<options from_data_table="vep_vers'..b'rcode column of the output MAF with the tumor sample ID."/>\n+\t\t<param argument="--normal-id" type="text" optional="true" label="Enter normal sample ID (optional)" help="Used to fill the Matched_Norm_Sample_Barcode column of the output MAF with the normal sample ID."/>\n+\t\t<param argument="--vcf-tumor-id" type="text" optional="true" label="Enter name of tumor genotype column (optional)" help="VCFs from variant callers like VarScan use hardcoded sample IDs TUMOR/NORMAL to name genotype columns. Use this parameter to have vcf2maf correctly locate these columns to parse genotypes, while still printing proper sample IDs in the output MAF."/>\n+\t\t<param argument="--vcf-normal-id" type="text" optional="true" label="Enter name of normal genotype column (optional)" help="VCFs from variant callers like VarScan use hardcoded sample IDs TUMOR/NORMAL to name genotype columns. Use this parameter to have vcf2maf correctly locate these columns to parse genotypes, while still printing proper sample IDs in the output MAF."/>\n+\t\t\n+\t\t<section name="adv_opt" title="Advanced options">\n+\t\t\t<param argument="--any-allele" type="boolean" optional="true" checked="false" label="Allow also mismatched variant alleles when reporting co-located variants"/>\n+\t\t\t<param argument="--min-hom-vaf" type="float" optional="true" min="0" max="1" label="Enter minimum allele fraction to call a variant homozygous if GT is undefined in VCF" help="Default value is 0.7"/>\n+\t\t\t<param argument="--maf-center" type="text" optional="true" label="Enter variant calling center to report in MAF"/>\n+\t\t\t<param argument="--retain-info" type="text" optional="true" label="Enter comma-delimited names of INFO fields to retain as extra columns in MAF"/>\n+\t\t\t<param argument="--retain-fmt" type="text" optional="true" label="Enter comma-delimited names of FORMAT fields to retain as extra columns in MAF"/>\n+\t\t\t<param argument="--retain-ann" type="text" optional="true" label="Enter comma-delimited names of VEP annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF"/>\n+\t\t</section>\n+\t</inputs>\n+\t<outputs>\n+\t\t<data name="output1" format="tabular" from_work_dir="MainOutput.maf" />\n+\t</outputs>\n+\t<tests>\n+\t\t<test expect_num_outputs="1">\n+\t\t\t<param name="input1" dbkey="hg19" value="input_test1.vcf" ftype="vcf" />\n+\t\t\t<param name="ref_source" value="history" />\n+\t\t\t<param name="ref" dbkey="hg19" value="test1.fa" ftype="fasta" />\n+\t\t\t<param name="annotation_cache.source" value="no_vep" />\n+\t\t\t<output name="output1" file="output_test1.tabular" ftype="tabular" />\n+\t\t</test>\n+\t\t<test expect_num_outputs="1">\n+\t\t\t<param name="input1" dbkey="hg19" value="input_test1.vcf" ftype="vcf" />\n+\t\t\t<param name="ref_source" value="cached" />\n+\t\t\t<param name="ref" value="hg19test" />\n+\t\t\t<param name="annotation_cache.source" value="no_vep" />\n+\t\t\t<output name="output1" file="output_test1.tabular" ftype="tabular" />\n+\t\t</test>\n+\t\t<test expect_num_outputs="1">\n+\t\t\t<param name="input1" dbkey="dm6" value="input_test2.vcf" ftype="vcf" />\n+\t\t\t<param name="ref_source" value="history" />\n+\t\t\t<param name="ref" dbkey="dm6" value="test2.fa" ftype="fasta" />\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+\t\t\t<output name="output1" file="output_test2.tabular" ftype="tabular" />\n+\t\t</test>\n+\t</tests>\n+\t<help><![CDATA[\n+\t\tThe tool vcf2maf can parse a wide range of VCF-like formats and convert these into the `Mutation Annotation Format (MAF) <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`__. A central part of the conversion process is the selection of a single effect per variant. While this is often a subjective decision, vcf2maf offers a standardized way to achieve this by optionally utilizing Ensembl\'s `Variant Effect Predictor (VEP) <https://www.ensembl.org/info/docs/tools/vep/index.html>`__.\t]]></help>\n+\t<citations>\n+\t\t<citation type="doi">10.5281/zenodo.593251</citation>\n+\t</citations>\n+</tool>\n\\ No newline at end of file\n'