Repository 'ensembl_vep'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ensembl_vep

Changeset 2:b43df0ce6c87 (2022-10-26)
Previous changeset 1:27fd1c1f00a8 (2022-06-28) Next changeset 3:ff0a8cda0c0b (2023-03-28)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ensembl_vep commit 2d42622b1aafec3492a8e42a15c52e4b96f55b8b
modified:
ensembl_vep.xml
test-data/output_test1.vcf
test-data/output_test2.vcf
b
diff -r 27fd1c1f00a8 -r b43df0ce6c87 ensembl_vep.xml
--- a/ensembl_vep.xml Tue Jun 28 10:41:15 2022 +0000
+++ b/ensembl_vep.xml Wed Oct 26 10:02:51 2022 +0000
[
b'@@ -1,375 +1,373 @@\n <tool id="ensembl_vep" name="Predict variant effects" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n-\t<description>with VEP</description>\n-\t<macros>\n-\t\t<token name="@TOOL_VERSION@">106.1</token>\n-\t\t<token name="@VERSION_SUFFIX@">1</token>\n-\t\t<token name="@DB_VERSION@">106</token>\n-\t\t<xml name="vcf_input">\n-\t\t\t<param type="data" name="input1" label="VCF input file" format="vcf">\n-\t\t\t\t<validator type="unspecified_build" />\n-\t\t\t\t<yield />\n-\t\t\t</param>\n-\t\t</xml>\n-\t\t<xml name="vcf_input_validation">\n-\t\t\t<validator type="dataset_metadata_in_data_table" table_name="vep_versioned_annotation_cache" metadata_name="dbkey" metadata_column="1" message="No annotation caches are available for the specified build" />\n-\t\t</xml>\n-\t</macros>\n-\t<requirements>\n-\t\t<requirement type="package" version="@TOOL_VERSION@">ensembl-vep</requirement>\n-\t\t<requirement type="package" version="0.1">perl-math-cdf</requirement>\n-\t\t<requirement type="package" version="3.4">grep</requirement>\n-\t</requirements>\n-\t<command detect_errors="exit_code"><![CDATA[\n-\t\t#if $annotation_cache.source == "custom":\n-\t\t\t#set $custom_ext = $annotation_cache.custom_annotation.ext[:3]\n-\t\t\tset -o pipefail &&\n-\t\t\tgrep -v "#" \'${annotation_cache.custom_annotation}\' | \n-\t\t\tLC_ALL=C sort -k1,1 -k4,4n -k5,5n -t$\'\\t\' | \n-\t\t\tbgzip -c > \'custom_annotation.${custom_ext}.gz\' &&\n-\t\t\ttabix -p gff \'custom_annotation.${custom_ext}.gz\' &&\n-\t\t#end if\n-\t\t#if $ref_seq.ref_source == "cached":\n-\t\t\tln -s \'$ref_seq.ref.fields.path\' reference.fa &&\n-\t\t#else if $ref_seq.ref_source == "history":\n-\t\t\tln -s \'$ref_seq.ref\' reference.fa &&\n-\t\t#end if\n-\t\t\n-\t\tvep -i \'${annotation_cache.input1}\' -o MainOutput.vcf --vcf\n-\t\t#if $annotation_cache.source == "custom":\n-\t\t\t--$custom_ext \'custom_annotation.${custom_ext}.gz\'\n-\t\t#else:\n-\t\t\t--cache\n-\t\t\t--species \'${annotation_cache.cache_file.fields.species}\'\n-\t\t\t--dir_cache \'${annotation_cache.cache_file.fields.path}\'\n-\t\t\t#if $annotation_cache.cache_file.fields.cachetype == "refseq": --refseq\n-\t\t\t#if $annotation_cache.cache_file.fields.cachetype == "merged": --merged\n-\t\t\t#if $annotation_cache.cache_file.fields.version != "@DB_VERSION@": --cache_version $annotation_cache.cache_file.fields.version\n-\t\t\t## The --offline flag automatically activates --cache. This is not wanted in our gff/gtf case but also not needed as no internet connection is required for annotating with these custom annotation sources.\n-\t\t\t--offline\n-\t\t#end if\n-\t\t#if $ref_seq.ref_source != "no_ref"\n-\t\t\t--fasta reference.fa\n-\t\t#end if\n-\t\t--stats_text\n-\t\t\n-\t\t#if $out_opt.sift != "None": --sift $out_opt.sift\n-\t\t#if $out_opt.polyphen != "None": --polyphen $out_opt.polyphen\n-\t\t#if $out_opt.nearest != "None": --nearest $out_opt.nearest\n-\t\t#if int($out_opt.distance_custom.distance_1) > -1:\n-\t\t\t--distance $out_opt.distance_custom.distance_1#if int($out_opt.distance_custom.distance_2) > -1: ,$out_opt.distance_custom.distance_2\n-\t\t#end if\n-\t\t#if $out_opt.cell_type != "": --cell_type \'$out_opt.cell_type\'\n-\t\t#if $out_opt.individual != "": --individual $out_opt.individual\n-\t\t--vcf_info_field $out_opt.vcf_info_type.vcf_info_field\n-\t\t--terms $out_opt.terms\n-\t\t#if $out_opt.out_opt_checkboxes != "None": ${\' \'.join(str($out_opt.out_opt_checkboxes).split(","))}\n-\t\t#if $out_opt.shift_var.shift_selector != "None":\n-\t\t\t$out_opt.shift_var.shift_selector\n-\t\t\t#if $out_opt.shift_var.shift_selector == "--shift_hgvs 0 --shift_3prime 1"\n-\t\t\t\t#if $out_opt.shift_var.shift_length != "None": $out_opt.shift_var.shift_length\n-\t\t\t#end if\n-\t\t#end if\n-\t\t#if $ident.input_synonyms: --synonyms \'${ident.input_synonyms}\'\n-\t\t#if $ident.ident_checkboxes != "None": ${\' \'.join(str($ident.ident_checkboxes).split(","))}\n-\t\t#if $colo_var.colo_var_checkboxes != "None": ${\' \'.join(str($colo_var.colo_var_checkboxes).split(","))}\n-\t\t#if $fil_qc.fil_qc_checkboxes != "None": ${\' \'.join(str($fil_qc.fil_qc_checkboxes).split(","))}\n-\t\t$plugins.carol\n-\t\t$plugins.condel\n-\t\t#if $plugins.exacpli_file: --plugin ExACpLI,\'${plugins.exacpli_file}\'\n-\t\t'..b'9" value="test.fa" ftype="fasta" />\n+            <output name="output1" file="output_test2.vcf" ftype="vcf" lines_diff="2" />\n+        </test>\n+        <test expect_num_outputs="2">\n+            <param name="source" value="unrestricted" />\n+            <param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+            <param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+            <param name="vcf_info_field" value="ANN" />\n+            <output name="output1">\n+                <assert_contents>\n+                    <has_text text="##INFO=&lt;ID=ANN" />\n+                    <has_n_lines n="6" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test expect_num_outputs="2">\n+            <param name="source" value="unrestricted" />\n+            <param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+            <param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+            <param name="terms" value="display" />\n+            <output name="output1">\n+                <assert_contents>\n+                    <has_text text="|DOWNSTREAM|" />\n+                    <not_has_text text="|downstream_gene_variant|" />\n+                    <has_text text="|UPSTREAM|" />\n+                    <not_has_text text="|upstream_gene_variant|" />\n+                    <has_n_lines n="6" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test expect_num_outputs="2">\n+            <param name="source" value="unrestricted" />\n+            <param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+            <param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+            <param name="out_opt_checkboxes" value="--overlaps,--allele_number" />\n+            <output name="output1">\n+                <assert_contents>\n+                    <has_text text="|OverlapBP|OverlapPC" />\n+                    <has_text text="|ALLELE_NUM" />\n+                    <has_n_lines n="6" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test expect_num_outputs="2">\n+            <param name="source" value="unrestricted" />\n+            <param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+            <param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+            <param name="ident_checkboxes" value="--uniprot,--biotype" />\n+            <output name="output1">\n+                <assert_contents>\n+                    <has_text text="|SWISSPROT|TREMBL|UNIPARC|UNIPROT_ISOFORM" />\n+                    <has_text text="|BIOTYPE" />\n+                    <has_n_lines n="6" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test expect_num_outputs="2">\n+            <param name="source" value="unrestricted" />\n+            <param name="input1" dbkey="dm6" value="input_test1.vcf" ftype="vcf" />\n+            <param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />\n+            <param name="colo_var_checkboxes" value="--max_af,--pubmed" />\n+            <output name="output1">\n+                <assert_contents>\n+                    <has_text text="|MAX_AF|MAX_AF_POPS" />\n+                    <has_text text="|PUBMED" />\n+                    <has_n_lines n="6" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+The Ensembl Variant Effect Predictor (VEP) is able to determine the effect of variants (e.g. SNPs, insertions or deletions) on genes, transcripts, protein sequences and regulatory regions. Given the coordinates and nucleotide changes of a variant, it outputs affected genes, the exact location and consequences of the variant as well as known variants matching this one.\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1186/s13059-016-0974-4</citation>\n+    </citations>\n </tool>\n'
b
diff -r 27fd1c1f00a8 -r b43df0ce6c87 test-data/output_test1.vcf
--- a/test-data/output_test1.vcf Tue Jun 28 10:41:15 2022 +0000
+++ b/test-data/output_test1.vcf Wed Oct 26 10:02:51 2022 +0000
b
@@ -1,5 +1,4 @@
 ##fileformat=VCFv4.0
-##VEP="v105" time="2022-03-07 13:29:51" cache="/home/sebastian/.planemo/planemo_tmp_fhheutk7/test-data/test-cache/drosophila_melanogaster/105_BDGP6.32" db="drosophila_melanogaster_core_105_9@ensembldb.ensembl.org" ensembl-io=105.2a0a40c ensembl-variation=105.ac8178e ensembl=105.525fbcb ensembl-funcgen=105.660df8f assembly="BDGP6.32" genebuild="dmel_r6.32_FB2020_01"
 ##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID">
 #CHROM POS ID REF ALT QUAL FILTER INFO
 chrM 5701 . GA G . . CSQ=-|downstream_gene_variant|MODIFIER|mt:ND2|FBgn0013680|Transcript|FBtr0100857|protein_coding|||||||||||4439|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Trp-TCA|FBgn0013709|Transcript|FBtr0100858|tRNA|||||||||||4373|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Cys-GCA|FBgn0013690|Transcript|FBtr0100859|tRNA|||||||||||4319|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Tyr-GTA|FBgn0013710|Transcript|FBtr0100860|tRNA|||||||||||4234|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:CoI|FBgn0013674|Transcript|FBtr0100861|protein_coding|||||||||||2693|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Leu-TAA|FBgn0013699|Transcript|FBtr0100862|tRNA|||||||||||2625|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:CoII|FBgn0013675|Transcript|FBtr0100863|protein_coding|||||||||||1935|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Lys-CTT|FBgn0013697|Transcript|FBtr0100864|tRNA|||||||||||1864|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Asp-GTC|FBgn0013691|Transcript|FBtr0100865|tRNA|||||||||||1796|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ATPase8|FBgn0013673|Transcript|FBtr0100866|protein_coding|||||||||||1634|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:CoIII|FBgn0013676|Transcript|FBtr0100868|protein_coding|||||||||||178|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Gly-TCC|FBgn0013694|Transcript|FBtr0100869|tRNA|||||||||||95|1||FlyBaseName_gene|,-|frameshift_variant|HIGH|mt:ND3|FBgn0013681|Transcript|FBtr0100870|protein_coding|1/1||||95|95|32|D/X|gAc/gc|||1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Ala-TGC|FBgn0013689|Transcript|FBtr0100871|tRNA|||||||||||281|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Arg-TCG|FBgn0013704|Transcript|FBtr0100872|tRNA|||||||||||360|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Asn-GTT|FBgn0013701|Transcript|FBtr0100873|tRNA|||||||||||424|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Ser-GCT|FBgn0013705|Transcript|FBtr0100874|tRNA|||||||||||489|1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Glu-TTC|FBgn0013692|Transcript|FBtr0100875|tRNA|||||||||||557|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Phe-GAA|FBgn0013693|Transcript|FBtr0100876|tRNA|||||||||||642|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:His-GTG|FBgn0013695|Transcript|FBtr0100878|tRNA|||||||||||2439|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ND4L|FBgn0013683|Transcript|FBtr0100880|protein_coding|||||||||||3843|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:tRNA:Thr-TGT|FBgn0013707|Transcript|FBtr0100881|tRNA|||||||||||4136|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:tRNA:Pro-TGG|FBgn0013702|Transcript|FBtr0100882|tRNA|||||||||||4202|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:ND6|FBgn0013685|Transcript|FBtr0100883|protein_coding|||||||||||4269|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ATPase6|FBgn0013672|Transcript|FBtr0433498|protein_coding|||||||||||966|1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ND4|FBgn0262952|Transcript|FBtr0433500|protein_coding|||||||||||2505|-1||FlyBaseName_gene|,-|downstream_gene_variant|MODIFIER|mt:ND5|FBgn0013684|Transcript|FBtr0433501|protein_coding|||||||||||707|-1||FlyBaseName_gene|,-|upstream_gene_variant|MODIFIER|mt:Cyt-b|FBgn0013678|Transcript|FBtr0433502|protein_coding|||||||||||4797|1||FlyBaseName_gene|
b
diff -r 27fd1c1f00a8 -r b43df0ce6c87 test-data/output_test2.vcf
--- a/test-data/output_test2.vcf Tue Jun 28 10:41:15 2022 +0000
+++ b/test-data/output_test2.vcf Wed Oct 26 10:02:51 2022 +0000
b
@@ -4,8 +4,7 @@
 ##ALT=<ID=CNV,Description="Copy Number Polymorphism">
 ##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
 ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##VEP="v105" time="2022-03-30 19:24:59" ensembl=105.525fbcb ensembl-io=105.2a0a40c ensembl-funcgen=105.660df8f ensembl-variation=105.ac8178e
 ##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|SOURCE|custom_annotation.gtf.gz">
-##INFO=<ID=custom_annotation.gtf.gz,Number=.,Type=String,Description="custom_annotation.gtf.gz (overlap)">
+##INFO=<ID=custom_annotation.gtf.gz,Number=.,Type=String,Description="custom_annotation.gtf.gz">
 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
 21 5733 rs142513484 C T . . CSQ=T|3_prime_UTR_variant|MODIFIER|MRPL39|ENSG00000154719|Transcript|ENST00000307301|protein_coding|3/3||||159|||||||-1||||custom_annotation.gtf.gz|,T|missense_variant|MODERATE|MRPL39|ENSG00000154719|Transcript|ENST00000352957|protein_coding|2/2||||70|70|24|A/T|Gca/Aca|||-1||||custom_annotation.gtf.gz|,T|upstream_gene_variant|MODIFIER|AP000223.42|ENSG00000260583|Transcript|ENST00000567517|antisense|||||||||||2407|-1||||custom_annotation.gtf.gz| GT 0|0