Mercurial > repos > iuc > packaged_annotation_loader
changeset 0:0de428c589f3 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/packaged_annotation_loader commit 339a6c16fb6d944d4e147b5192cbeb0ebd26d18e"
author | iuc |
---|---|
date | Tue, 04 Jan 2022 18:34:48 +0000 |
parents | |
children | |
files | load_annotations.xml retrieve_meta.py test-data/anno-version.tsv test-data/packaged_annotation_data.loc test-data/test-annotation-data/civic.variants.test test-data/test-annotation-data/hotspots.data.test test-data/test-annotation-data/meta.yml tool-data/packaged_annotation_data.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 10 files changed, 255 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/load_annotations.xml Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,133 @@ +<tool id="packaged_annotation_loader" name="Load packaged annotation data" version="0.1" profile="20.05"> + <description>as a collection</description> + <requirements> + <requirement type="package" version="3.9">python</requirement> + <requirement type="package" version="6.0">pyyaml</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + cp $c1 galaxy.json + #if str($retrieve_meta): + && python '$__tool_directory__/retrieve_meta.py' --format $retrieve_meta -o meta.out galaxy.json + #end if + ]]> </command> + <configfiles> + <configfile name="c1"> + #set $elems=",".join(['{{"name": "%s", "ext": "%s", "filename": "{0}/%s", "dbkey": "{1}"}}' % (i, f, i) for i, f in zip($annotation, $annotation.fields.format.split(','))]) + #set $elems=$elems.format($packaged_data.fields.path, $packaged_data.fields.dbkey) +{"list_output": {"elements": [$elems]}} + </configfile> + </configfiles> + <inputs> + <param name="packaged_data" type="select" label="Annotation data package to load"> + <options from_data_table="packaged_annotation_data"> + <column name="name" index="6" /> + <column name="value" index="5" /> + <column name="path" index="7" /> + <column name="dbkey" index="1" /> + <filter type="unique_value" column="value" /> + <filter type="sort_by" column="name" /> + <validator type="no_options" message="No annotation paackage available" /> + </options> + </param> + <param name="annotation" type="select" multiple="true" optional="false" label="Packaged annotations to load into collection"> + <options from_data_table="packaged_annotation_data"> + <column name="name" index="2" /> + <column name="value" index="3" /> + <column name="format" index="4" /> + <column name="package_id" index="5" /> + <filter type="param_value" ref="packaged_data" column="package_id" /> + <validator type="no_options" message="No annotation data available" /> + </options> + </param> + <param name="retrieve_meta" type="select" label="Retrieve annotation sources metadata as separate dataset?"> + <option value="">No metadata</option> + <option value="yaml">Return the metadata in orignal yaml format</option> + <option value="tab">Return tabular annotation source/version info</option> + </param> + </inputs> + <outputs> + <collection name="list_output" type="list" label="Output List"> + <discover_datasets from_provided_metadata="true" visible="false" /> + </collection> + <data name="meta_out" format="tabular" label="Loaded annotations metadata" from_work_dir="meta.out"> + <filter>bool(str(retrieve_meta))</filter> + <change_format> + <when input="retrieve_meta" value="yaml" format="yaml" /> + </change_format> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" /> + <param name="annotation" value="hotspots.data.test" /> + <output_collection name="list_output" type="list" count="1"> + <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test"> + <metadata name="dbkey" value="hg19" /> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" /> + <param name="annotation" value="hotspots.data.test,civic.variants.test" /> + <output_collection name="list_output" type="list" count="2"> + <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test"> + <metadata name="dbkey" value="hg19" /> + </element> + <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test"> + <metadata name="dbkey" value="hg19" /> + </element> + </output_collection> + </test> + <test expect_num_outputs="2"> + <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" /> + <param name="annotation" value="hotspots.data.test,civic.variants.test" /> + <param name="retrieve_meta" value="yaml" /> + <output_collection name="list_output" type="list" count="2"> + <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test"> + <metadata name="dbkey" value="hg19" /> + </element> + <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test"> + <metadata name="dbkey" value="hg19" /> + </element> + </output_collection> + <output name="meta_out" ftype="yaml" file="test-annotation-data/meta.yml" /> + </test> + <test expect_num_outputs="2"> + <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" /> + <param name="annotation" value="hotspots.data.test,civic.variants.test" /> + <param name="retrieve_meta" value="tab" /> + <output_collection name="list_output" type="list" count="2"> + <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test"> + <metadata name="dbkey" value="hg19" /> + </element> + <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test"> + <metadata name="dbkey" value="hg19" /> + </element> + </output_collection> + <output name="meta_out" ftype="tabular" file="anno-version.tsv"/> + </test> + <test expect_num_outputs="2"> + <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" /> + <param name="annotation" value="civic.variants.test" /> + <param name="retrieve_meta" value="tab" /> + <output_collection name="list_output" type="list" count="1"> + <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test"> + <metadata name="dbkey" value="hg19" /> + </element> + </output_collection> + <output name="meta_out" ftype="tabular"> + <assert_contents> + <has_n_lines n="2" /> + <has_line line="Annotation	Version" /> + <has_line line="CIViC variants	01-Feb-2019" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +This tool can be used to make genome annotation data installed via the +`data manager for packaged_annotation data +<https://toolshed.g2.bx.psu.edu/view/wolma/data_manager_packaged_annotation_data>`__ +accessible as a list of datasets collection. + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/retrieve_meta.py Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import argparse +import json +import os + +import yaml + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('galaxy_json') + parser.add_argument( + '-o', '--ofile', + required=True + ) + parser.add_argument( + '--format', choices=['yaml', 'tab'], default='yaml' + ) + args = parser.parse_args() + + galaxy_collection_info = json.load(open(args.galaxy_json)) + annotation_info = next(iter(galaxy_collection_info.values()))['elements'] + selected_ids = {i['name'] for i in annotation_info} + package_meta_file = os.path.join( + os.path.dirname(annotation_info[0]['filename']), + 'meta.yml' + ) + meta = yaml.safe_load(open(package_meta_file)) + meta['records'] = [ + rec for rec in meta['records'] if rec['id'] in selected_ids + ] + + with open(args.ofile, 'w') as fo: + if args.format == 'yaml': + yaml.dump( + meta, fo, allow_unicode=False, default_flow_style=False + ) + else: + print('Annotation\tVersion', file=fo) + for record in meta['records']: + print(record['name'], record['version'], sep='\t', file=fo)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/anno-version.tsv Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,3 @@ +Annotation Version +CancerHotspots v2 +CIViC variants 01-Feb-2019
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/packaged_annotation_data.loc Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,4 @@ +#<value> <dbkey> <data_name> <data_id> <data_format> <package_id> <package_name> <path> +# +Cancer_variant_data__hg19__1__2021-12-16:hotspots.data.test hg19 CancerHotspots (v2, hg19; from Cancer variant data/vol:1/build2021-12-16) hotspots.data.test bed Cancer_variant_data__hg19__1__2021-12-16 Cancer variant data (hg19, vol:1/build:2021-12-16) ${__HERE__}/test-annotation-data +Cancer_variant_data__hg19__1__2021-12-16:civic.variants.test hg19 CIViC variants (01-Feb-2019, hg19; from Cancer variant data/vol:1/build2021-12-16) civic.variants.test bed Cancer_variant_data__hg19__1__2021-12-16 Cancer variant data (hg19, vol:1/build:2021-12-16) ${__HERE__}/test-annotation-data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-annotation-data/civic.variants.test Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,7 @@ +1 11217229 11217230 https://civic.genome.wustl.edu/links/variants/543 MTOR +1 11856377 11856378 https://civic.genome.wustl.edu/links/variants/258 MTHFR +1 26211930 26232957 https://civic.genome.wustl.edu/links/variants/357 STMN1 +1 27022523 27108595 https://civic.genome.wustl.edu/links/variants/2108 ARID1A +1 36932095 36933444 https://civic.genome.wustl.edu/links/variants/560 CSF3R +1 40361097 40367685 https://civic.genome.wustl.edu/links/variants/670 MYCL +1 57159475 57159478 https://civic.genome.wustl.edu/links/variants/385 PRKAA2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-annotation-data/hotspots.data.test Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,12 @@ +1 2488104 2488105 TNFRSF14 0.0100655869328098 +1 2488105 2488106 TNFRSF14 0.0100655869328098 +1 2491288 2491289 TNFRSF14 0.0888304463274096 +1 2491289 2491290 TNFRSF14 0.0888304463274096 +1 8073431 8073432 ERRFI1 0.0212231821625514 +1 8073433 8073434 ERRFI1 0.0212231821625514 +1 8074312 8074313 ERRFI1 0.0452215429932304 +1 9779981 9779982 PIK3CD 0.0177652807514415 +1 9784112 9784113 PIK3CD 0.0656422395683915 +1 9784113 9784114 PIK3CD 0.0656422395683915 +1 9787029 9787030 PIK3CD 7.49932571758753e-06 +1 11169374 11169375 MTOR 2.29333311583453e-13
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-annotation-data/meta.yml Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,20 @@ +build: '2021-12-16' +id: Cancer_variant_data__hg19__1__2021-12-16 +name: Cancer variant data +records: +- checksum: md5:ec8ec9afd4ae4935ac474e150e4e90aa + doi: 10.1158/2159-8290.CD-17-0321 + format: bed + id: hotspots.data.test + name: CancerHotspots + source: https://zenodo.org/api/files/a89ff3af-261e-4c24-a9fb-5050ce8807b2/hotspots.bed + version: v2 +- checksum: md5:9e42bb7492be9e0011bf29b7e4f83f41 + doi: http://dx.doi.org/10.1038/ng.3774 + format: bed + id: civic.variants.test + name: CIViC variants + source: https://zenodo.org/api/files/a89ff3af-261e-4c24-a9fb-5050ce8807b2/01-Feb-2019-CIVic.bed + version: 01-Feb-2019 +refgenome: hg19 +volume: 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/packaged_annotation_data.loc.sample Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,20 @@ +#This file describes genome annotation data packages and their contents +#available on the server. +#Such data can consist of any number of individual files in a variety of +#formats (e.g., bed, vcf, tabular) describing any features with respect to the +#genome with the associated dbkey. +#The directory referenced in the <path> column of the table is expected to +#contain the file listed under <data_id> and a meta.yml file with details about +#the annotation package volume and all of its contents. +#This data table has the format (white space characters are TAB characters): +# +#<value> <dbkey> <data_name> <data_id> <data_format> <package_id> <package_name> <path> +# +#So, packaged_annotation_data.loc tables could look like this: +# +#dbSNP_hg19__1__1:dbSNP.tidy hg19 dbSNP tidy (b147.20160601, hg19; from dbSNP/vol:1/build:1) dbSNP.tidy vcf_bgzip dbSNP__hg19__1__1 dbSNP (hg19, vol:1/build:1) /path/to/packaged_annotation_data/hg19/dbSNP/1/1 +#Cancer_variant_data__1__1:hotspots.data hg19 CancerHotspots (v2, hg19; from Cancer variant data/vol:1/build:1) hotspots.data bed Cancer_variant_data__hg19__1__1 Cancer variant data (hg19, vol:1/build:1) /path/to/packaged_annotation_data/hg19/Cancer_variant_data/1/1 +#Cancer_genes_data__1__1:civic.genes hg19 CIViC genes (01-Feb-2019, hg19; from Cancer gene data/vol:1/build:1) civic.genes tabular Cancer_gene_data__hg19__1__1 Cancer gene data (hg19, vol:1/build:1) /path/to/packaged_annotation_data/hg19/Cancer_variant_data/1/1 +#SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1:ARTICv3 NC_045512.2 ARTIC (v3, NC_045512.2; from SARS-CoV-2 amplicon primer sets/vol:1/build:1) ARTICv3 bed6 SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1 SARS-CoV-2 amplicon primer sets (NC_045512.2, vol:1/build:1) /path/to/packaged_annotation_data/NC_045512.2/SARS-CoV-2_amplicon_primer_sets/1/1 +#SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1:ARTICv4 NC_045512.2 ARTIC (v4, NC_045512.2; from SARS-CoV-2 amplicon primer sets/vol:1/build:1) ARTICv4 bed6 SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1 SARS-CoV-2 amplicon primer sets (NC_045512.2, vol:1/build:1) /path/to/packaged_annotation_data/NC_045512.2/SARS-CoV-2_amplicon_primer_sets/1/1 +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Table of installed structured annotation data --> + <table name="packaged_annotation_data" comment_char="#"> + <columns>value, dbkey, data_name, data_id, data_format, package_id, package_name, path</columns> + <file path="tool-data/packaged_annotation_data.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Jan 04 18:34:48 2022 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Table of installed structured annotation data --> + <table name="packaged_annotation_data" comment_char="#"> + <columns>value, dbkey, data_name, data_id, data_format, package_id, package_name, path</columns> + <file path="${__HERE__}/test-data/packaged_annotation_data.loc" /> + </table> +</tables>