Repository 'ensembl_vep'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ensembl_vep

Changeset 0:7303183cdb87 (2022-05-11)
Next changeset 1:27fd1c1f00a8 (2022-06-28)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ensembl_vep commit 2db33cd5bcf5e2d7e3a43f11855c4cfc3b1b9f56
added:
ensembl_vep.xml
test-data/input_test1.vcf
test-data/input_test2.vcf
test-data/output_test1.vcf
test-data/output_test2.vcf
test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt
test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt
test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz
test-data/test.fa
test-data/test.gtf
test-data/vep_versioned_annotation_cache.loc
tool-data/dbkeys.loc.sample
tool-data/vep_versioned_annotation_cache.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 7303183cdb87 ensembl_vep.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ensembl_vep.xml Wed May 11 13:03:38 2022 +0000
[
b'@@ -0,0 +1,370 @@\n+<tool id="ensembl_vep" name="Predict variant effects" version="@TOOL_VERSION@">\n+\t<description>with VEP</description>\n+\t<macros>\n+\t\t<token name="@TOOL_VERSION@">106.1</token>\n+\t\t<token name="@DB_VERSION@">106</token>\n+\t\t<xml name="vcf_input">\n+\t\t\t<param type="data" name="input1" label="VCF input file" format="vcf">\n+\t\t\t\t<validator type="unspecified_build" />\n+\t\t\t\t<yield />\n+\t\t\t</param>\n+\t\t</xml>\n+\t\t<xml name="vcf_input_validation">\n+\t\t\t<validator type="dataset_metadata_in_data_table" table_name="vep_versioned_annotation_cache" metadata_name="dbkey" metadata_column="1" message="No annotation caches are available for the specified build" />\n+\t\t</xml>\n+\t</macros>\n+\t<requirements>\n+\t\t<requirement type="package" version="@TOOL_VERSION@">ensembl-vep</requirement>\n+\t\t<requirement type="package" version="0.1">perl-math-cdf</requirement>\n+\t\t<requirement type="package" version="3.4">grep</requirement>\n+\t</requirements>\n+\t<command detect_errors="exit_code"><![CDATA[\n+\t\t#if $annotation_cache.source == "custom":\n+\t\t\t#set $custom_ext = $annotation_cache.custom_annotation.ext[:3]\n+\t\t\tset -o pipefail &&\n+\t\t\tgrep -v "#" \'${annotation_cache.custom_annotation}\' | \n+\t\t\tLC_ALL=C sort -k1,1 -k4,4n -k5,5n -t$\'\\t\' | \n+\t\t\tbgzip -c > \'custom_annotation.${custom_ext}.gz\' &&\n+\t\t\ttabix -p gff \'custom_annotation.${custom_ext}.gz\' &&\n+\t\t#end if\n+\t\tvep -i \'${annotation_cache.input1}\' -o MainOutput.vcf --vcf\n+\t\t#if $annotation_cache.source == "custom":\n+\t\t\t--$custom_ext \'custom_annotation.${custom_ext}.gz\'\n+\t\t#else:\n+\t\t\t--cache\n+\t\t\t--species \'${annotation_cache.cache_file.fields.species}\'\n+\t\t\t--dir_cache \'${annotation_cache.cache_file.fields.path}\'\n+\t\t\t#if $annotation_cache.cache_file.fields.cachetype == "refseq": --refseq\n+\t\t\t#if $annotation_cache.cache_file.fields.cachetype == "merged": --merged\n+\t\t\t#if $annotation_cache.cache_file.fields.version != "@DB_VERSION@": --cache_version $annotation_cache.cache_file.fields.version\n+\t\t\t## The --offline flag automatically activates --cache. This is not wanted in our gff/gtf case but also not needed as no internet connection is required for annotating with these custom annotation sources.\n+\t\t\t--offline\n+\t\t#end if\n+\t\t#if $ref_seq.ref_source == "cached":\n+\t\t\t--fasta \'${ref_seq.ref.fields.path}\'\n+\t\t#elif $ref_seq.ref_source == "history":\n+\t\t\t--fasta \'${ref_seq.ref}\'\n+\t\t#end if\n+\t\t--stats_text\n+\t\t\n+\t\t#if $out_opt.sift != "None": --sift $out_opt.sift\n+\t\t#if $out_opt.polyphen != "None": --polyphen $out_opt.polyphen\n+\t\t#if $out_opt.nearest != "None": --nearest $out_opt.nearest\n+\t\t#if int($out_opt.distance_custom.distance_1) > -1:\n+\t\t\t--distance $out_opt.distance_custom.distance_1#if int($out_opt.distance_custom.distance_2) > -1: ,$out_opt.distance_custom.distance_2\n+\t\t#end if\n+\t\t#if $out_opt.cell_type != "": --cell_type \'$out_opt.cell_type\'\n+\t\t#if $out_opt.individual != "": --individual $out_opt.individual\n+\t\t--vcf_info_field $out_opt.vcf_info_type.vcf_info_field\n+\t\t--terms $out_opt.terms\n+\t\t#if $out_opt.out_opt_checkboxes != "None": ${\' \'.join(str($out_opt.out_opt_checkboxes).split(","))}\n+\t\t#if $out_opt.shift_var.shift_selector != "None":\n+\t\t\t$out_opt.shift_var.shift_selector\n+\t\t\t#if $out_opt.shift_var.shift_selector == "--shift_hgvs 0 --shift_3prime 1"\n+\t\t\t\t#if $out_opt.shift_var.shift_length != "None": $out_opt.shift_var.shift_length\n+\t\t\t#end if\n+\t\t#end if\n+\t\t#if $ident.input_synonyms: --synonyms \'${ident.input_synonyms}\'\n+\t\t#if $ident.ident_checkboxes != "None": ${\' \'.join(str($ident.ident_checkboxes).split(","))}\n+\t\t#if $colo_var.colo_var_checkboxes != "None": ${\' \'.join(str($colo_var.colo_var_checkboxes).split(","))}\n+\t\t#if $fil_qc.fil_qc_checkboxes != "None": ${\' \'.join(str($fil_qc.fil_qc_checkboxes).split(","))}\n+\t\t$plugins.carol\n+\t\t$plugins.condel\n+\t\t#if $plugins.exacpli_file: --plugin ExACpLI,\'${plugins.exacpli_file}\'\n+\t\t#if $plugins.loftool_file: --plugin LoFtool,\'${plugins.loftool_file}\'\n+\t]]></command>\n+\t<inputs>\n+\t\t<conditional name="annotation_cache">\n+\t\t\t<param name="source" type="select'..b'pe="vcf" />\n+\t\t\t<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+\t\t\t<output name="output1" file="output_test1.vcf" ftype="vcf" lines_diff="2" />\n+\t\t</test>\n+\t\t<test expect_num_outputs="2">\n+\t\t\t<param name="source" value="custom" />\n+\t\t\t<param name="input1" dbkey="hg19" value="input_test2.vcf" ftype="vcf" />\n+\t\t\t<param name="custom_annotation" dbkey="hg19" value="test.gtf" ftype="gtf" />\n+\t\t\t<param name="ref_source" value="history" />\n+\t\t\t<param name="ref" dbkey="hg19" value="test.fa" ftype="fasta" />\n+\t\t\t<output name="output1" file="output_test2.vcf" ftype="vcf" lines_diff="2" />\n+\t\t</test>\n+\t\t<test expect_num_outputs="2">\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+\t\t\t<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="vcf_info_field" value="ANN" />\n+\t\t\t<output name="output1">\n+\t\t\t\t<assert_contents>\n+\t\t\t\t\t<has_text text="##INFO=&lt;ID=ANN" />\n+\t\t\t\t\t<has_n_lines n="5" />\n+\t\t\t\t</assert_contents>\n+\t\t\t</output>\n+\t\t</test>\n+\t\t<test expect_num_outputs="2">\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+\t\t\t<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="terms" value="display" />\n+\t\t\t<output name="output1">\n+\t\t\t\t<assert_contents>\n+\t\t\t\t\t<has_text text="|DOWNSTREAM|" />\n+\t\t\t\t\t<not_has_text text="|downstream_gene_variant|" />\n+\t\t\t\t\t<has_text text="|UPSTREAM|" />\n+\t\t\t\t\t<not_has_text text="|upstream_gene_variant|" />\n+\t\t\t\t\t<has_n_lines n="5" />\n+\t\t\t\t</assert_contents>\n+\t\t\t</output>\n+\t\t</test>\n+\t\t<test expect_num_outputs="2">\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+\t\t\t<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="out_opt_checkboxes" value="--overlaps,--allele_number" />\n+\t\t\t<output name="output1">\n+\t\t\t\t<assert_contents>\n+\t\t\t\t\t<has_text text="|OverlapBP|OverlapPC" />\n+\t\t\t\t\t<has_text text="|ALLELE_NUM" />\n+\t\t\t\t\t<has_n_lines n="5" />\n+\t\t\t\t</assert_contents>\n+\t\t\t</output>\n+\t\t</test>\n+\t\t<test expect_num_outputs="2">\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+\t\t\t<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="ident_checkboxes" value="--uniprot,--biotype" />\n+\t\t\t<output name="output1">\n+\t\t\t\t<assert_contents>\n+\t\t\t\t\t<has_text text="|SWISSPROT|TREMBL|UNIPARC|UNIPROT_ISOFORM" />\n+\t\t\t\t\t<has_text text="|BIOTYPE" />\n+\t\t\t\t\t<has_n_lines n="5" />\n+\t\t\t\t</assert_contents>\n+\t\t\t</output>\n+\t\t</test>\n+\t\t<test expect_num_outputs="2">\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+\t\t\t<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+\t\t\t<param name="source" value="restricted" />\n+\t\t\t<param name="colo_var_checkboxes" value="--max_af,--pubmed" />\n+\t\t\t<output name="output1">\n+\t\t\t\t<assert_contents>\n+\t\t\t\t\t<has_text text="|MAX_AF|MAX_AF_POPS" />\n+\t\t\t\t\t<has_text text="|PUBMED" />\n+\t\t\t\t\t<has_n_lines n="5" />\n+\t\t\t\t</assert_contents>\n+\t\t\t</output>\n+\t\t</test>\n+\t</tests>\n+\t<help><![CDATA[\n+\t\tThe Ensembl Variant Effect Predictor (VEP) is able to determine the effect of variants (e.g. SNPs, insertions or deletions) on genes, transcripts, protein sequences and regulatory regions. Given the coordinates and nucleotide changes of a variant, it outputs affected genes, the exact location and consequences of the variant as well as known variants matching this one.\n+\t]]></help>\n+\t<citations>\n+\t\t<citation type="doi">10.1186/s13059-016-0974-4</citation>\n+\t</citations>\n+</tool>\n'
b
diff -r 000000000000 -r 7303183cdb87 test-data/input_test1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_test1.vcf Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,3 @@
+##fileformat=VCFv4.0
+#CHROM POS ID REF ALT QUAL FILTER INFO
+chrM 5701 . GA G . . .
b
diff -r 000000000000 -r 7303183cdb87 test-data/input_test2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_test2.vcf Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.1
+##contig=<ID=21,assembly=GCF_000001405.26,length=46709983>
+##contig=<ID=22,assembly=GCF_000001405.26,length=50818468>
+##ALT=<ID=CNV,Description="Copy Number Polymorphism">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
+21 5733 rs142513484 C T . . . GT 0|0
b
diff -r 000000000000 -r 7303183cdb87 test-data/output_test1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test1.vcf Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,5 @@
+##fileformat=VCFv4.0
+##VEP="v105" time="2022-03-07 13:29:51" cache="/home/sebastian/.planemo/planemo_tmp_fhheutk7/test-data/test-cache/drosophila_melanogaster/105_BDGP6.32" db="drosophila_melanogaster_core_105_9@ensembldb.ensembl.org" ensembl-io=105.2a0a40c ensembl-variation=105.ac8178e ensembl=105.525fbcb ensembl-funcgen=105.660df8f assembly="BDGP6.32" genebuild="dmel_r6.32_FB2020_01"
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+chrM 5701 . GA G . . CSQ=-|downstream_gene_variant|MODIFIER|mt:ND2|FBgn0013680|Transcript|FBtr0100857|protein_coding|||||||||||4439|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Trp-TCA|FBgn0013709|Transcript|FBtr0100858|tRNA|||||||||||4373|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Cys-GCA|FBgn0013690|Transcript|FBtr0100859|tRNA|||||||||||4319|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Tyr-GTA|FBgn0013710|Transcript|FBtr0100860|tRNA|||||||||||4234|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:CoI|FBgn0013674|Transcript|FBtr0100861|protein_coding|||||||||||2693|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Leu-TAA|FBgn0013699|Transcript|FBtr0100862|tRNA|||||||||||2625|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:CoII|FBgn0013675|Transcript|FBtr0100863|protein_coding|||||||||||1935|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Lys-CTT|FBgn0013697|Transcript|FBtr0100864|tRNA|||||||||||1864|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Asp-GTC|FBgn0013691|Transcript|FBtr0100865|tRNA|||||||||||1796|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ATPase8|FBgn0013673|Transcript|FBtr0100866|protein_coding|||||||||||1634|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:CoIII|FBgn0013676|Transcript|FBtr0100868|protein_coding|||||||||||178|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Gly-TCC|FBgn0013694|Transcript|FBtr0100869|tRNA|||||||||||95|1||FlyBaseName_gene|,-|frameshift_variant|HIGH|mt:ND3|FBgn0013681|Transcript|FBtr0100870|protein_coding|1/1||||95|95|32|D/X|gAc/gc|||1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Ala-TGC|FBgn0013689|Transcript|FBtr0100871|tRNA|||||||||||281|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Arg-TCG|FBgn0013704|Transcript|FBtr0100872|tRNA|||||||||||360|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Asn-GTT|FBgn0013701|Transcript|FBtr0100873|tRNA|||||||||||424|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Ser-GCT|FBgn0013705|Transcript|FBtr0100874|tRNA|||||||||||489|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Glu-TTC|FBgn0013692|Transcript|FBtr0100875|tRNA|||||||||||557|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Phe-GAA|FBgn0013693|Transcript|FBtr0100876|tRNA|||||||||||642|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:His-GTG|FBgn0013695|Transcript|FBtr0100878|tRNA|||||||||||2439|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ND4L|FBgn0013683|Transcript|FBtr0100880|protein_coding|||||||||||3843|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Thr-TGT|FBgn0013707|Transcript|FBtr0100881|tRNA|||||||||||4136|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Pro-TGG|FBgn0013702|Transcript|FBtr0100882|tRNA|||||||||||4202|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:ND6|FBgn0013685|Transcript|FBtr0100883|protein_coding|||||||||||4269|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ATPase6|FBgn0013672|Transcript|FBtr0433498|protein_coding|||||||||||966|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ND4|FBgn0262952|Transcript|FBtr0433500|protein_coding|||||||||||2505|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ND5|FBgn0013684|Transcript|FBtr0433501|protein_coding|||||||||||707|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:Cyt-b|FBgn0013678|Transcript|FBtr0433502|protein_coding|||||||||||4797|1||FlyBaseName_gene|
b
diff -r 000000000000 -r 7303183cdb87 test-data/output_test2.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test2.vcf Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##contig=<ID=21,assembly=GCF_000001405.26,length=46709983>
+##contig=<ID=22,assembly=GCF_000001405.26,length=50818468>
+##ALT=<ID=CNV,Description="Copy Number Polymorphism">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##VEP="v105" time="2022-03-30 19:24:59" ensembl=105.525fbcb ensembl-io=105.2a0a40c ensembl-funcgen=105.660df8f ensembl-variation=105.ac8178e
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|SOURCE|custom_annotation.gtf.gz">
+##INFO=<ID=custom_annotation.gtf.gz,Number=.,Type=String,Description="custom_annotation.gtf.gz (overlap)">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
+21 5733 rs142513484 C T . . CSQ=T|3_prime_UTR_variant|MODIFIER|MRPL39|ENSG00000154719|Transcript|ENST00000307301|protein_coding|3/3||||159|||||||-1||||custom_annotation.gtf.gz|,T|missense_variant|MODERATE|MRPL39|ENSG00000154719|Transcript|ENST00000352957|protein_coding|2/2||||70|70|24|A/T|Gca/Aca|||-1||||custom_annotation.gtf.gz|,T|upstream_gene_variant|MODIFIER|AP000223.42|ENSG00000260583|Transcript|ENST00000567517|antisense|||||||||||2407|-1||||custom_annotation.gtf.gz| GT 0|0
b
diff -r 000000000000 -r 7303183cdb87 test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,7 @@
+KJ947872 chrM
+NC_024511.2 chrM
+KJ947872 mitochondrion_genome
+KJ947872.2 chrM
+KJ947872.2 mitochondrion_genome
+NC_024511.2 mitochondrion_genome
+chrM mitochondrion_genome
b
diff -r 000000000000 -r 7303183cdb87 test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,6 @@
+species drosophila_melanogaster
+assembly BDGP6.32
+sift
+polyphen
+source_genebuild dmel_r6.32_FB2020_01
+source_assembly BDGP6.32
b
diff -r 000000000000 -r 7303183cdb87 test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz
b
Binary file test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz has changed
b
diff -r 000000000000 -r 7303183cdb87 test-data/test.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fa Wed May 11 13:03:38 2022 +0000
b
b'@@ -0,0 +1,168 @@\n+>21 dna:chromosome chromosome:GRCh38:21:1:10000:1 REF\n+CCACAATCATTTTAGGAGAATGGGTTTAAGAAAGGAAAAAAAAAAAAAAGATTTCTGTAT\n+GCTCTTAAGAGAAAATCTAAAAAATAATGACATGAAAAAGTTGAAAGGAATGGAAAAATA\n+TGTACCATTAAAAGGAAACCCGACGTATGAATGCCATTATCAGACAAAACAGATTTTTTT\n+CTTTTTGAGATGGAGTCTCACTCTGTGGCCCAGGCTGGAGTGCAGTGGCACAATCTCTGC\n+TCACAGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCCAAGTAGC\n+TGGGACTACAGGCACCCACCACCACACCAGGCTAGTATTTGTATTTTTAGTAGAGACGGG\n+GTTTCATCGTGTTAGCCAGGATGGTCTCAATCTTCTGACCTCGTGATCTGCCCACCTCAG\n+CCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGCGCCCAGCCAGACAAAACAGATT\n+TTAAGACAACTAAGAAGTTAACAAGCTGACCCTACAATAAGCATGAAAATTTTGAAAAAG\n+AATAGGAAAGGAGAACTCACCATAAGAGAAATTGAAACTTGTTATAAAGCTATAGTTGTT\n+AAAACGGTGTTACTACAGTGGTACATGGACAGATAAATGGACCAATGAAGCAGACCCAGG\n+CACTGAAAGGAACCTTTTATATGACAGCATGGCACAATCAGTAAGAATAGAGAGGAAATA\n+GGCCAGGCACGGTGGCTCACGCCTGTAATCCCAGTACTTTGGGAGGCCAAGGCAGGCAGA\n+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGTCAACATGGTGAAACCGCGTCTCTACT\n+AAAAATACAAAAATTAGCTGGTCGTGGTGGCAGGCACCTGTAATCCCAGCTACTCGGGAG\n+GCTGAGGCAGGAGAATCACTTGAACCCGGCTAGCGGAGGCTGCAGTGAGCCGAGATCACG\n+CCATTGCACTCCAGCCTAGGCAACAAGAGTGAAACTCCATCACAAAAAAAAAAGAAAAAG\n+AGTAGACAGGAAATAAATGGTCCAGAATAACTGCCTATCCTTGTGGAGGAGAGGGTGATT\n+CAAAATTAGGTCCCTTTCCTCACTCTATATGCAAAAAACAAACTTCAAATAAATTATACA\n+ATTAAATGTGAAAATCAAGACTTTAAAATAAACAATGCAGTAGGCTGCTTTATAATATCA\n+AGTTAGGGAAGGCTTTCTTAAATTTCATAAACATAAATCATAGAGGAAAAGATGAACTGT\n+CTACCTTAAAATTAAAGACGATATAAACAAAATTAAAAGGTAAGCCAGACAAAAGAAATA\n+TTTGTAGTGACAACGGTTTAACTTTCTTTCTTTCTTTTTTTTTTTTTGAGACGGAATCTC\n+ACTCTGTCACCCAGGCTGGAGTACACTGGTGCAATCTCAGCTCACTGCAACCTCCACCTC\n+CCAGGTTCAAGCGATTCTTGTGCCTCAGCCTCCCAAGTAGCTGGGATGACAGGTACGCAC\n+CACCACACCCAGCTAATTTTTTGTATTTTTAGCAGAGACGGGGTTTCACCATGTTGGCCA\n+GGCTGGTCTCGAATTCCTGACCTCAGGTGATCCATCTGCCTCAGCCTCCCAAAGTGCTGG\n+GATTACAGGAGTGAGCCACTGCACCTGGCCCACAAGGGTTTCACTTTCTAAAAATATAAA\n+GAACTGGCCAGGTGCAATGGCTCACACTTGTAATCCTAGCACTTTGGGAGGGCAAGGAGA\n+GCAGATCGCTAGAGGCCAGAAGCTGGAGAACAGCCTGGCCAACATGGTGAAACCCCATCT\n+CTACTTTAAAAATATAAAAATTAGTTGGGTGTGGTGGCACTTGCCTATAATCCCAGCTAC\n+TTGGCTACTCAGGAGGCCGAGGCAGGGGAATCGCTTGAATCCAGTAGGCAGAGGCTACAG\n+TGAGCCAAGATCATGCCACTGCACTACAGCTTGGGCAACAGAGTGAGACTTGGTCTCAAA\n+AAAAAAAAAAATTATATATATATATCTTATATATATACACTATTATATATATACACACAC\n+ACACACACACACACACACACACACACACACACAATTAATATGAGATGCCCAAAAATCCAA\n+TTGTAAAAAGGGGCAAAGGTTGTAAACTGGTAATTCATAAAAACAAATGAAGAGATGCTT\n+ATTGGTACTATATGCTCAGTATTAAGCAAATTAAATGAGATAGGATCGTGCATATTCAAC\n+CAACAAAATATCTGAATGTCTGAAAATAATAAATGTTAATGAGGGAGTGGAGAAAATGGG\n+AATGCTCATACTGCTGATAGAGAGTAAACTGGTACAACTATTGTGGCAGCCAATTAATAT\n+TTAGTAAAGCTGAAGATGCATGGTCCACTGTGGTACAGGCCCTGGAGATATTATCAAATG\n+TGTACACAAAGAAACACGCACAAGGATATTTTCTGCGATACTGTAATACTCAAAAGCCAA\n+TGACATCCTCAGTGGTCATCAATAAGAAAATGAATTAATGATGGGATTAATCATATAATG\n+AAATACTATATAGCAGTTGAAATGAATGTACTCTTTACATGTATCAACATGCTATACATA\n+AAAAACAATGATGAGCAATAAAAGCAAATTGCAAAAGGATATATATTATGAAACCAATTA\n+TGTTTAGTTTTAAAACACAGAGAATACTATGGATTGTAGTAAAAAAAAATATAAAATCAT\n+GAAGAGTAAGGACAGGTACAAACAGGATAGTGGTTCTTCTTGAGAGGAAGGAAATGACAT\n+AACAAGACCTTCAATGGATGTGCAGCTTTTCCTTTATTTAAACACAAAAGGATCTGAAGG\n+AAATAAGGAAAAAGGTTGACAGTGGTTACAATTAAATAGTGGCTGTATGTCAACACTCTT\n+GGTTACAAACAACAGGATCTACACTAGCTAGTTTAAACAAACAGAATAAAATGGCTCACA\n+AGTCACAGGCGTGGAGGTCAGGACTACGGAGCCCCCAAAAATGCCCATTTTTATACCTTG\n+GAGCAGCTGCGGGGGAAAAACTGCTAAGCAAAGCCTCCACACCTTGCACCATTACATGGG\n+ACCTCTGCCACTGCTGCCTTGAAAACCACATCACTGCTCCGTTCAACAAAATGTATCTCA\n+TACTACTCTTGTCTGCAAAGTACTTGCTTCCAGATTTCACACAGTTATGTCTAATTGGTG\n+AGCCCATGCTACCTGTCTGAGCCACAGCTGCAAGGAAGGGCAGGAGATTAAATTTCATTC\n+TTCTACTGGGTAAGGCGAGATCCACAGAGTGGGAAGTTGCCAAAAAGCAGGTGTTCAAAC\n+AGTGCTAGCTGCCCAAAAAGCACGAAAAGTGCCCACTCAAACAAGAGTTGGTGAAAATAT\n+TCTCTCTACTTTTCTGTATGCTCAAATATTTCACAATTTTTTTAAAGAAAAAATGTCGAA\n+GTATGTAAATTCACAAACAACAAAGGGAATGGAAAAAAAATCAATAAACAAGAGACGTCA\n+ACCAAATTCTAAAAGACAAAAAGCTAAGTGACTAATAAAACACTAGAGAATGTCACCACC\n+TAGAAAACATGTGGAGGAAGTTCCATCAGAGGCAGCCAACCGGCCCAGCTGGGCCTTAGC\n+TCAGAGGCAGCAAAAGTTCATAGATGATAGAAGACAGTAGAGGGATTAGTTAAATCTAAA\n+CTAATTAAAAAGAAAGGAATTAATTAAAGGTCTGTATACAAACTGGTTGAACACTACCCC\n+ATCTTCAAATATAGAAGGGCCTAAATCCAGGAAGTGCCCCATGCAAAATATTGAAGGGAT\n+CTTTATTATAAAGTAAGTG'..b'TACAGATTTAGTCTAAATAGATATCTCTATGTCCCTATTTCTATTCTGTG\n+TCACTCAGACTCATCAAACAGACATGGCTAAATCTGAATTCAACACCCCAAAACCTGTCC\n+TACCTGCATCATCTATCTTGGTAAAGGGCTCCACCCAGATACCTAAGGACAGCATTCAAG\n+AATTCACTCTCTTCTTCCTACCTGCATATTCCATCAACAGTCCTACTAATTCAGCCTAAG\n+TTAACTCTCACAGTCATTCTCCCTATCGCGGCTGAACTTTAGGACCTCATTCTCTTCTAT\n+CCCCCTAACAGAACTCTTGGCTTAAGTCACAGCTATATCCCATGCTACCCCTAGAATGAG\n+CTTTTAAACAAGAATAGCTGATCAACAGGTCACTTCCCTACTCAAAATTCTCCAGTTACT\n+CTCTAGCATCTCAGTGGGGTGGAGGAGGAAAGTCAGGAATGCAAGTTCCACAACACGGGA\n+CACAAGCCCCTCATATTCTGTTTTTGGACTGACTCTTTACCTATCCTACATGTGACCTGT\n+ACATTCAAATTCATCTTACATTCATCATGTTCTCTATGTAGGCAAGTGACTTCCTCTGAG\n+ATAACCTCCAACCCATTTTCCAACAGATTATCTGCATGGTCCACTTTTATTCACCCTTTA\n+AAATCAGCTTAAGAGACTCCAATTCCTGGAAGCCTTCCTTAAGCAACTCCCCAGCTTGGT\n+CTGAGTCCTCTTCTTTATGCCAGAGCCCCACATGTACACCTGAACTGTCTGCTTATAAGT\n+CTAATTATCCAATCAACATTACTTCCCTGAGGCAGCAAGTAAATTTTGTGCATTTTTTTG\n+TACCTCCAGACCATAACAGTACCTGGCACACAATAAACAATCAGTAAGTCTGTGTTGCAT\n+CAATGAACCACGTTTAATCTAATCCCTTCAGAAAAGCAAAATGGACAGTAGAAAAAAGCA\n+ACTAAACCAGAATAAATGTTTGAAAAAATATGTATTGACTAACATCACTAGATATTCTAG\n+GTAAGACACTGTCTCTGCCGTTTTTCTACATTTTTCATTCCTTTAAAGTGTTCAGTTACT\n+ACTGCTTCTTTAATTTTCCAAACTATTTCTTTAATATTCTTTAGCTTTTTAAATTTTTTT\n+TAATTTTACTTAACTCCAAGTAAACATGCTGAACATTTTTTCTTTAGTTTCTTCATCTAA\n+AATGATCACTGTGACCAACTAAAAAGAGAAAATTACTCCATCCATTCAAAGGTAATTTCT\n+TCTCATGAGAAAAAAAATTATATAACCCTTACTCAAGAACATTATGGTGAAGTCCCTAAA\n+CAATAATTCCTATCTACATAGTCATTAATAAAGACTAGGTTTATATATCATTATGAAAAA\n+TTGTCAATAAATATTTCTTGGGTAACCCCTTCATGTGCTAATGTTGTGTTCAAAATTTGG\n+GCTTACACAAATATAACAAATCCTGAAAGGCTTAACAGACAGAAAATAAATGTACTGGCA\n+AAGAAAACAATCAAAAAATAGTATGTGGACATATACAATCAAGCACAAACTTATATGAAT\n+AATTATTTGTTAGAGATTACTCTGAGACTGTTCCATGGAGGAGGTACGACTCAGGCGAGG\n+TAGTGAAGAATGACTGCGTAGTAAATAGAAGGAGAATGGGAAATGGAGTCTAGAAAGAAA\n+AACTATCATGAAGAAATAAGCAAACTTGAAAATAAGTTTAATGTGTGATGCTTAATGTGC\n+CTGCAAGCTTAACCAGTGTTTGTGGCATTGTGGGAAATTAGGATAAGGAGGAAGTCCTAA\n+TGAGGACAGATTACACAGGACAGCTATCAGATAACCTAAGTAAAAGAGGCACACTTAGCC\n+TATGAAATAGGAAACAGGAATAGTTGTGGTCAAGAAATACAATCACCCGGGCCGGGCGCG\n+GTGGCTCACGCCTGTAATCCCAGCACCTTGGGAGGCTGAGGGGGGTGGATCACGAGGTCA\n+GGAGATCGAGACCATCCTGGCTAACACGGTAAAACCCTGTCTCTACTAAAAATACAAAAA\n+AATTAGTTGGACGTAGTGGTGGGCACCCATAGTCCCAGCTACTTGGGAGGCTGAGGCACG\n+AGAATCGCTTGAGCCCGGGAGGTGGAGGTTGCAGTGAGCTGAGATCGCGCCACTGCACTC\n+CAGCCTGGGAGACAGAACAAGACTCTGTCTCAAAAAAAAAAAGAAAAGAAATACTATCAT\n+TCCTACTCATTAAGTCTAAATGCTAATGACAAAGAATAAACTACAAAAACACAAACATTC\n+AAATCTGGTTTTTCAGGACATTAGGATATTTGCACATTTACTGTTAATGGCTAAAGGCGG\n+CTTTAAAATACACCTGACAAATCTTAATCCTCTAAATATTCATTAAGTTTACAAAATAAA\n+TCCTAAGACCTAACTGATACCATTACAACTATTAAGAACTATTACTATTACATAGTAATA\n+TTCTTATTTCTCTTCAAGTTCTACAATATCATTTTCACGATATTAACAAATAAAATCAAA\n+AGAGAACTGCCAAGTTACAACTTTACCCTTACCAACACACTAGTGAACAATCACAACACT\n+GCTTTGACTATTGAGACAACTTACTTTTTTCCTTTCTCTTTGTTTCTTTCATTTTTCCCC\n+CCTTTGGTTGTTATTGGTTAATTTTGATGAATTTCTTTATAGAAGAGTACTCAATAGCTG\n+TCAAAAACACTTACCATTTTCCGAGATCTTTCCAATAGCTTATCCCATATTGTAAAATGT\n+GCCTTGAAAAGAAAAGATTTGCGATGAACTAAATGAAGCAGTAATGTCAAAGTAAAAAGG\n+ACAGAGTCTTTATATAGCTTTTAGAACAAAATAAATAAAAAGCTAGGCAGTACTAATTTA\n+GATAAGTGGTTATAAAAGTGGATCATAGTAGCAAAAAAAGGTGCTACATGGGGATAGCTG\n+CTTTTTTGCTTTCTTCAATCGCATGAACCCGGGAGGCAGAGGTTGCAGTGAGCATTTAAG\n+AAAAAAACGCTTAAGGATTGAACGTGCCCTTCTTTCACTTTTGCCCATCTTAATCATTTC\n+CATCATAGAAAGCATTTCTTCTAATAGGAAGCAAAAATCTGCCCTACTTTCCCATAGACT\n+GTGGTTCAATCCTTAAACAGCCAGTTTCAACATTCTAGAAGTCATTCTTCAAATCATAAG\n+CAATCACAAGAAGACAGTCATTAAAAACCAACTGTGACATCTTCACACTGAAATGGCAGA\n+AGCAGAGGATTCATCAAGTTACCCTAAAGAATAAGTAAAACCTAACAAAGACAATAGTAA\n+ATTTTTTTTTTTTTTGAAATGGAGTTTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGTGC\n+GATCTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTC\n+CCAAGACAATAGTAAAATTTAAACTCAATTTCCTTAGTCCATAACAACCTCCTGTAGCAG\n+AGGATTATCAGAATGCATTAAAGAAAACTGTGCAAAGTGTATCATGACCACATCAATTTT\n+ATTAGGTCAACTGAAAGTGGTAACAAGACATATGGGCAGGCCAGTGACTACTCCACACTG\n+AATGAGCTCATAAAATCTATAATAAAAGGTAAAATTAATAAATATCAACATACAAACCCT\n+TCCAGGGAAAGAGCTGACTGGTATGTTTAAAGGGAAAACCATGCCTGACTCAGGCGGAAT\n+GAACTGCTGGTGCAGAGACCTTAAGCTGTGGCTGGAATATAGTGAGTGAGGAAAGGAGTG\n+GTGTTAGATAAAGTCAGAGAAGCAGGCAGGGACCAGATAATGCAGGGCTTTGTGAGACAG\n+GGTAAAGAGTTGGGATTTGTTCAAGGGAAGCCATTGGAAA\n'
b
diff -r 000000000000 -r 7303183cdb87 test-data/test.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.gtf Wed May 11 13:03:38 2022 +0000
b
b'@@ -0,0 +1,20 @@\n+21\thavana\texon\t2770\t3326\t.\t-\t.\tgene_id "ENSG00000260583"; gene_version "1"; transcript_id "ENST00000567517"; transcript_version "1"; exon_number "1"; gene_name "AP000223.42"; gene_source "havana"; gene_biotype "antisense"; havana_gene "OTTHUMG00000172769"; havana_gene_version "1"; transcript_name "AP000223.42-001"; transcript_source "havana"; transcript_biotype "antisense"; havana_transcript "OTTHUMT00000420390"; havana_transcript_version "1"; exon_id "ENSE00002629631"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tgene\t2770\t3326\t.\t-\t.\tgene_id "ENSG00000260583"; gene_version "1"; gene_name "AP000223.42"; gene_source "havana"; gene_biotype "antisense"; havana_gene "OTTHUMG00000172769"; havana_gene_version "1";\n+21\thavana\ttranscript\t2770\t3326\t.\t-\t.\tgene_id "ENSG00000260583"; gene_version "1"; transcript_id "ENST00000567517"; transcript_version "1"; gene_name "AP000223.42"; gene_source "havana"; gene_biotype "antisense"; havana_gene "OTTHUMG00000172769"; havana_gene_version "1"; transcript_name "AP000223.42-001"; transcript_source "havana"; transcript_biotype "antisense"; havana_transcript "OTTHUMT00000420390"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";\n+21\tensembl_havana\tthree_prime_utr\t5656\t5706\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000352957"; transcript_version "8"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13573"; havana_transcript "OTTHUMT00000171194"; havana_transcript_version "1"; tag "basic"; transcript_support_level "1";\n+21\tensembl_havana\tthree_prime_utr\t5656\t5750\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000307301"; transcript_version "11"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-002"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS33522"; havana_transcript "OTTHUMT00000171195"; havana_transcript_version "2"; tag "basic"; transcript_support_level "5";\n+21\tensembl_havana\texon\t5656\t5754\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000307301"; transcript_version "11"; exon_number "11"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-002"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS33522"; havana_transcript "OTTHUMT00000171195"; havana_transcript_version "2"; exon_id "ENSE00003528074"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+21\tensembl_havana\texon\t5656\t5754\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000352957"; transcript_version "8"; exon_number "10"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13573"; havana_transcript "OTTHUMT00000171194"; havana_transcript_version "1"; exon_id "ENSE00003605259"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+21\tensembl_havana\tgene\t5656\t27517\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4";\n+21\tensembl_havana\ttranscript\t5656\t27517\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000307301"; transcript_version "11"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene '..b'ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-002"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS33522"; havana_transcript "OTTHUMT00000171195"; havana_transcript_version "2"; protein_id "ENSP00000305682"; protein_version "7"; tag "basic"; transcript_support_level "5";\n+21\tensembl_havana\tCDS\t7701\t7789\t.\t-\t0\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000307301"; transcript_version "11"; exon_number "10"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-002"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS33522"; havana_transcript "OTTHUMT00000171195"; havana_transcript_version "2"; protein_id "ENSP00000305682"; protein_version "7"; tag "basic"; transcript_support_level "5";\n+21\tensembl_havana\texon\t7701\t7789\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000307301"; transcript_version "11"; exon_number "10"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-002"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS33522"; havana_transcript "OTTHUMT00000171195"; havana_transcript_version "2"; exon_id "ENSE00001149457"; exon_version "5"; tag "basic"; transcript_support_level "5";\n+21\tensembl_havana\tCDS\t8835\t8882\t.\t-\t0\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000307301"; transcript_version "11"; exon_number "9"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-002"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS33522"; havana_transcript "OTTHUMT00000171195"; havana_transcript_version "2"; protein_id "ENSP00000305682"; protein_version "7"; tag "basic"; transcript_support_level "5";\n+21\tensembl_havana\tCDS\t8835\t8882\t.\t-\t0\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000352957"; transcript_version "8"; exon_number "9"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13573"; havana_transcript "OTTHUMT00000171194"; havana_transcript_version "1"; protein_id "ENSP00000284967"; protein_version "6"; tag "basic"; transcript_support_level "1";\n+21\tensembl_havana\texon\t8835\t8882\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000307301"; transcript_version "11"; exon_number "9"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-002"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS33522"; havana_transcript "OTTHUMT00000171195"; havana_transcript_version "2"; exon_id "ENSE00001017286"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+21\tensembl_havana\texon\t8835\t8882\t.\t-\t.\tgene_id "ENSG00000154719"; gene_version "13"; transcript_id "ENST00000352957"; transcript_version "8"; exon_number "9"; gene_name "MRPL39"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000078371"; havana_gene_version "4"; transcript_name "MRPL39-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS13573"; havana_transcript "OTTHUMT00000171194"; havana_transcript_version "1"; exon_id "ENSE00001017286"; exon_version "1"; tag "basic"; transcript_support_level "1";\n'
b
diff -r 000000000000 -r 7303183cdb87 test-data/vep_versioned_annotation_cache.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vep_versioned_annotation_cache.loc Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,3 @@
+#<value> <dbkey> <version> <cachetype> <name> <species> <path>
+#
+drosophila_melanogaster_vep_106_BDGP6.32 dm6 106 default Drosophila melanogaster dm6 (V106) drosophila_melanogaster ${__HERE__}/test-cache
b
diff -r 000000000000 -r 7303183cdb87 tool-data/dbkeys.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dbkeys.loc.sample Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,1 @@
+#<dbkey> <display_name> <len_file_path>
\ No newline at end of file
b
diff -r 000000000000 -r 7303183cdb87 tool-data/vep_versioned_annotation_cache.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/vep_versioned_annotation_cache.loc.sample Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,11 @@
+#This file describes vep cache data and its metadata available on the server.
+#The data table has the format (white space characters are TAB characters):
+#
+#<value> <dbkey> <version> <cachetype> <name> <species> <path>
+#
+#So, vep_versioned_annotation_cache.loc tables could look like this:
+#
+#homo_sapiens_vep_105_GRCh38 hg38 105 default Homo sapiens hg38 (V105) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/default
+#homo_sapiens_refseq_vep_105_GRCh38 hg38 105 refseq Homo sapiens hg38 (V105, Refseq) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/refseq
+#homo_sapiens_merged_vep_105_GRCh38 hg38 105 merged Homo sapiens hg38 (V105, Merged) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/merged
+#
\ No newline at end of file
b
diff -r 000000000000 -r 7303183cdb87 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Table of installed versioned vep cache data -->
+    <table name="vep_versioned_annotation_cache" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, species, path</columns>
+        <file path="tool-data/vep_versioned_annotation_cache.loc" />
+    </table>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="tool-data/dbkeys.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 7303183cdb87 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed May 11 13:03:38 2022 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Table of versioned vep cache data for testing -->
+    <table name="vep_versioned_annotation_cache" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, species, path</columns>
+        <file path="${__HERE__}/test-data/vep_versioned_annotation_cache.loc" />
+    </table>
+</tables>
\ No newline at end of file