changeset 0:0de428c589f3 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/packaged_annotation_loader commit 339a6c16fb6d944d4e147b5192cbeb0ebd26d18e"
author iuc
date Tue, 04 Jan 2022 18:34:48 +0000
parents
children
files load_annotations.xml retrieve_meta.py test-data/anno-version.tsv test-data/packaged_annotation_data.loc test-data/test-annotation-data/civic.variants.test test-data/test-annotation-data/hotspots.data.test test-data/test-annotation-data/meta.yml tool-data/packaged_annotation_data.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 10 files changed, 255 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/load_annotations.xml	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,133 @@
+<tool id="packaged_annotation_loader" name="Load packaged annotation data" version="0.1" profile="20.05">
+    <description>as a collection</description>
+    <requirements>
+        <requirement type="package" version="3.9">python</requirement>
+        <requirement type="package" version="6.0">pyyaml</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        cp $c1 galaxy.json
+        #if str($retrieve_meta):
+            && python '$__tool_directory__/retrieve_meta.py' --format $retrieve_meta -o meta.out galaxy.json
+        #end if
+    ]]>    </command>
+    <configfiles>
+        <configfile name="c1">
+            #set $elems=",".join(['{{"name": "%s", "ext": "%s", "filename": "{0}/%s", "dbkey": "{1}"}}' % (i, f, i) for i, f in zip($annotation, $annotation.fields.format.split(','))])
+            #set $elems=$elems.format($packaged_data.fields.path, $packaged_data.fields.dbkey)
+{"list_output": {"elements": [$elems]}}
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="packaged_data" type="select" label="Annotation data package to load">
+            <options from_data_table="packaged_annotation_data">
+                <column name="name" index="6" />
+                <column name="value" index="5" />
+                <column name="path" index="7" />
+                <column name="dbkey" index="1" />
+                <filter type="unique_value" column="value" />
+                <filter type="sort_by" column="name" />
+                <validator type="no_options" message="No annotation paackage available" />
+            </options>
+        </param>
+        <param name="annotation" type="select" multiple="true" optional="false" label="Packaged annotations to load into collection">
+            <options from_data_table="packaged_annotation_data">
+                <column name="name" index="2" />
+                <column name="value" index="3" />
+                <column name="format" index="4" />
+                <column name="package_id" index="5" />
+                <filter type="param_value" ref="packaged_data" column="package_id" />
+                <validator type="no_options" message="No annotation data available" />
+            </options>
+        </param>
+        <param name="retrieve_meta" type="select" label="Retrieve annotation sources metadata as separate dataset?">
+            <option value="">No metadata</option>
+            <option value="yaml">Return the metadata in orignal yaml format</option>
+            <option value="tab">Return tabular annotation source/version info</option>
+        </param>
+    </inputs>
+    <outputs>
+        <collection name="list_output" type="list" label="Output List">
+            <discover_datasets from_provided_metadata="true" visible="false" />
+        </collection>
+        <data name="meta_out" format="tabular" label="Loaded annotations metadata" from_work_dir="meta.out">
+            <filter>bool(str(retrieve_meta))</filter>
+            <change_format>
+                <when input="retrieve_meta" value="yaml" format="yaml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" />
+            <param name="annotation" value="hotspots.data.test" />
+            <output_collection name="list_output" type="list" count="1">
+                <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" />
+            <param name="annotation" value="hotspots.data.test,civic.variants.test" />
+            <output_collection name="list_output" type="list" count="2">
+                <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+                <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" />
+            <param name="annotation" value="hotspots.data.test,civic.variants.test" />
+            <param name="retrieve_meta" value="yaml" />
+            <output_collection name="list_output" type="list" count="2">
+                <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+                <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+            </output_collection>
+            <output name="meta_out" ftype="yaml" file="test-annotation-data/meta.yml" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" />
+            <param name="annotation" value="hotspots.data.test,civic.variants.test" />
+            <param name="retrieve_meta" value="tab" />
+            <output_collection name="list_output" type="list" count="2">
+                <element name="hotspots.data.test" ftype="bed" file="test-annotation-data/hotspots.data.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+                <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+            </output_collection>
+            <output name="meta_out" ftype="tabular" file="anno-version.tsv"/>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="packaged_data" value="Cancer_variant_data__hg19__1__2021-12-16" />
+            <param name="annotation" value="civic.variants.test" />
+            <param name="retrieve_meta" value="tab" />
+            <output_collection name="list_output" type="list" count="1">
+                <element name="civic.variants.test" ftype="bed" file="test-annotation-data/civic.variants.test">
+                    <metadata name="dbkey" value="hg19" />
+                </element>
+            </output_collection>
+            <output name="meta_out" ftype="tabular">
+                <assert_contents>
+                    <has_n_lines n="2" />
+                    <has_line line="Annotation&#009;Version" />
+                    <has_line line="CIViC variants&#009;01-Feb-2019" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+This tool can be used to make genome annotation data installed via the
+`data manager for packaged_annotation data
+<https://toolshed.g2.bx.psu.edu/view/wolma/data_manager_packaged_annotation_data>`__
+accessible as a list of datasets collection.
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/retrieve_meta.py	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+
+import yaml
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('galaxy_json')
+    parser.add_argument(
+        '-o', '--ofile',
+        required=True
+    )
+    parser.add_argument(
+        '--format', choices=['yaml', 'tab'], default='yaml'
+    )
+    args = parser.parse_args()
+
+    galaxy_collection_info = json.load(open(args.galaxy_json))
+    annotation_info = next(iter(galaxy_collection_info.values()))['elements']
+    selected_ids = {i['name'] for i in annotation_info}
+    package_meta_file = os.path.join(
+        os.path.dirname(annotation_info[0]['filename']),
+        'meta.yml'
+    )
+    meta = yaml.safe_load(open(package_meta_file))
+    meta['records'] = [
+        rec for rec in meta['records'] if rec['id'] in selected_ids
+    ]
+
+    with open(args.ofile, 'w') as fo:
+        if args.format == 'yaml':
+            yaml.dump(
+                meta, fo, allow_unicode=False, default_flow_style=False
+            )
+        else:
+            print('Annotation\tVersion', file=fo)
+            for record in meta['records']:
+                print(record['name'], record['version'], sep='\t', file=fo)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/anno-version.tsv	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,3 @@
+Annotation	Version
+CancerHotspots	v2
+CIViC variants	01-Feb-2019
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/packaged_annotation_data.loc	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,4 @@
+#<value>		<dbkey>			<data_name>				<data_id>		<data_format>	<package_id>			<package_name>		<path>
+#
+Cancer_variant_data__hg19__1__2021-12-16:hotspots.data.test	hg19	CancerHotspots (v2, hg19; from Cancer variant data/vol:1/build2021-12-16)	hotspots.data.test	bed	Cancer_variant_data__hg19__1__2021-12-16	Cancer variant data (hg19, vol:1/build:2021-12-16)	${__HERE__}/test-annotation-data
+Cancer_variant_data__hg19__1__2021-12-16:civic.variants.test	hg19	CIViC variants (01-Feb-2019, hg19; from Cancer variant data/vol:1/build2021-12-16)	civic.variants.test	bed	Cancer_variant_data__hg19__1__2021-12-16	Cancer variant data (hg19, vol:1/build:2021-12-16)	${__HERE__}/test-annotation-data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-annotation-data/civic.variants.test	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,7 @@
+1	11217229	11217230	https://civic.genome.wustl.edu/links/variants/543	MTOR
+1	11856377	11856378	https://civic.genome.wustl.edu/links/variants/258	MTHFR
+1	26211930	26232957	https://civic.genome.wustl.edu/links/variants/357	STMN1
+1	27022523	27108595	https://civic.genome.wustl.edu/links/variants/2108	ARID1A
+1	36932095	36933444	https://civic.genome.wustl.edu/links/variants/560	CSF3R
+1	40361097	40367685	https://civic.genome.wustl.edu/links/variants/670	MYCL
+1	57159475	57159478	https://civic.genome.wustl.edu/links/variants/385	PRKAA2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-annotation-data/hotspots.data.test	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,12 @@
+1	2488104	2488105	TNFRSF14	0.0100655869328098
+1	2488105	2488106	TNFRSF14	0.0100655869328098
+1	2491288	2491289	TNFRSF14	0.0888304463274096
+1	2491289	2491290	TNFRSF14	0.0888304463274096
+1	8073431	8073432	ERRFI1	0.0212231821625514
+1	8073433	8073434	ERRFI1	0.0212231821625514
+1	8074312	8074313	ERRFI1	0.0452215429932304
+1	9779981	9779982	PIK3CD	0.0177652807514415
+1	9784112	9784113	PIK3CD	0.0656422395683915
+1	9784113	9784114	PIK3CD	0.0656422395683915
+1	9787029	9787030	PIK3CD	7.49932571758753e-06
+1	11169374	11169375	MTOR	2.29333311583453e-13
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-annotation-data/meta.yml	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,20 @@
+build: '2021-12-16'
+id: Cancer_variant_data__hg19__1__2021-12-16
+name: Cancer variant data
+records:
+- checksum: md5:ec8ec9afd4ae4935ac474e150e4e90aa
+  doi: 10.1158/2159-8290.CD-17-0321
+  format: bed
+  id: hotspots.data.test
+  name: CancerHotspots
+  source: https://zenodo.org/api/files/a89ff3af-261e-4c24-a9fb-5050ce8807b2/hotspots.bed
+  version: v2
+- checksum: md5:9e42bb7492be9e0011bf29b7e4f83f41
+  doi: http://dx.doi.org/10.1038/ng.3774
+  format: bed
+  id: civic.variants.test
+  name: CIViC variants
+  source: https://zenodo.org/api/files/a89ff3af-261e-4c24-a9fb-5050ce8807b2/01-Feb-2019-CIVic.bed
+  version: 01-Feb-2019
+refgenome: hg19
+volume: 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/packaged_annotation_data.loc.sample	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,20 @@
+#This file describes genome annotation data packages and their contents
+#available on the server.
+#Such data can consist of any number of individual files in a variety of
+#formats (e.g., bed, vcf, tabular) describing any features with respect to the
+#genome with the associated dbkey.
+#The directory referenced in the <path> column of the table is expected to
+#contain the file listed under <data_id> and a meta.yml file with details about
+#the annotation package volume and all of its contents.
+#This data table has the format (white space characters are TAB characters):
+#
+#<value>														<dbkey>			<data_name>																	<data_id>		<data_format>	<package_id>								<package_name>					<path>
+#
+#So, packaged_annotation_data.loc tables could look like this:
+#
+#dbSNP_hg19__1__1:dbSNP.tidy									hg19			dbSNP tidy (b147.20160601, hg19; from dbSNP/vol:1/build:1)					dbSNP.tidy		vcf_bgzip		dbSNP__hg19__1__1					dbSNP (hg19, vol:1/build:1)							/path/to/packaged_annotation_data/hg19/dbSNP/1/1
+#Cancer_variant_data__1__1:hotspots.data						hg19			CancerHotspots (v2, hg19; from Cancer variant data/vol:1/build:1)				hotspots.data	bed				Cancer_variant_data__hg19__1__1		Cancer variant data (hg19, vol:1/build:1)			/path/to/packaged_annotation_data/hg19/Cancer_variant_data/1/1
+#Cancer_genes_data__1__1:civic.genes							hg19			CIViC genes	(01-Feb-2019, hg19; from Cancer gene data/vol:1/build:1)			civic.genes		tabular			Cancer_gene_data__hg19__1__1		Cancer gene data (hg19, vol:1/build:1)					/path/to/packaged_annotation_data/hg19/Cancer_variant_data/1/1
+#SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1:ARTICv3		NC_045512.2		ARTIC (v3, NC_045512.2; from SARS-CoV-2 amplicon primer sets/vol:1/build:1)	ARTICv3			bed6			SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1	SARS-CoV-2 amplicon primer sets (NC_045512.2, vol:1/build:1)	/path/to/packaged_annotation_data/NC_045512.2/SARS-CoV-2_amplicon_primer_sets/1/1
+#SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1:ARTICv4		NC_045512.2		ARTIC (v4, NC_045512.2; from SARS-CoV-2 amplicon primer sets/vol:1/build:1)	ARTICv4			bed6			SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1	SARS-CoV-2 amplicon primer sets (NC_045512.2, vol:1/build:1)	/path/to/packaged_annotation_data/NC_045512.2/SARS-CoV-2_amplicon_primer_sets/1/1
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Table of installed structured annotation data -->
+    <table name="packaged_annotation_data" comment_char="#">
+        <columns>value, dbkey, data_name, data_id, data_format, package_id, package_name, path</columns>
+        <file path="tool-data/packaged_annotation_data.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Jan 04 18:34:48 2022 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Table of installed structured annotation data -->
+    <table name="packaged_annotation_data" comment_char="#">
+        <columns>value, dbkey, data_name, data_id, data_format, package_id, package_name, path</columns>
+        <file path="${__HERE__}/test-data/packaged_annotation_data.loc" />
+    </table>
+</tables>