Mercurial > repos > iuc > ega_download_client
changeset 1:1162bfd9f782 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit f91c99780efc91e89950ed9494ef9e6f83d198c7"
author | iuc |
---|---|
date | Tue, 12 Apr 2022 11:36:51 +0000 |
parents | caf213d51d6c |
children | e82175804eb1 |
files | pyega3.xml test-data/filelist.tabular test-data/filelist2.tabular test-data/filelist_EGAD00001003338.tabular |
diffstat | 4 files changed, 143 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/pyega3.xml Fri Oct 30 22:18:41 2020 +0000 +++ b/pyega3.xml Tue Apr 12 11:36:51 2022 +0000 @@ -1,9 +1,9 @@ -<tool id="pyega3" name="EGA Download Client" version="@VERSION@+galaxy0" profile="19.09" > +<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy0" profile="21.01" > <macros> - <token name="@VERSION@">3.4.0</token> + <token name="@TOOL_VERSION@">4.0.0</token> </macros> <requirements> - <requirement type="package" version="@VERSION@">pyega3</requirement> + <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #set $username = $__user__.extra_preferences.get('ega_account|username', "") @@ -18,9 +18,18 @@ #elif $action.action_type == "list_dataset_files" pyega3 -cf '$credentials' files '$action.dataset_id' + && + + ## create file header + echo -e 'File ID\tStatus\tBytes\tCheck sum\tFile name' > '$dataset_file_list' && + + ## remove timestamps and convert spaces to tabs + grep EGAF pyega3_output.log | sed -e 's/^\[.*\]\s\+//g' | sed 's/\s\+/\t/g' >> '$dataset_file_list' + #elif $action.action_type == "download_file" - pyega3 -cf '$credentials' + pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials' fetch '$action.file_id' + --max-retries 10 #if $action.range.reference_name --reference-name '$action.range.reference_name' #if $action.range.start @@ -30,7 +39,30 @@ --end $action.range.end #end if #end if - --saveto '$downloaded_file' + && mv ${action.file_id} downloads + && rm -f downloads/*.md5 ## checksum validation already performed by pyEGA, cleanup downloads folder + +#elif $action.action_type == "download_files" + #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines() if x.split('\t')[int(str($action.id_column))-1].startswith('EGAF') ] + mkdir downloads + #for f in $file_ids + && + pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials' + fetch '$f' + --max-retries 10 + #if $action.range.reference_name + --reference-name '$action.range.reference_name' + #if $action.range.start + --start $action.range.start + #end if + #if $action.range.end + --end $action.range.end + #end if + #end if + --output-dir downloads + #end for + && rm -f downloads/**/*.md5 ## checksum validation already performed by pyEGA, clean up downloads folder + #end if ]]></command> <configfiles> @@ -53,6 +85,7 @@ <option value="list_datasets"> List my authorized datasets </option> <option value="list_dataset_files"> List files in a datasets </option> <option value="download_file"> Download a file </option> + <option value="download_files"> Download multiple files (based on a file with IDs) </option> </param> <when value="list_dataset_files"> <param name="dataset_id" type="text" optional="false" label="EGA Dataset Accession ID" help="Identifier starting with 'EGAD'. For example: EGAD00001003338"> @@ -65,40 +98,59 @@ <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGAF[0-9]+</validator> </param> <section name="range" title="Request a specific Genomic range?" expanded="false"> - <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> - <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> - <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> + <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> + <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> + <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> + </section> + </when> + <when value="download_files"> + <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/> + <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" /> + <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false"> + <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> + <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> + <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> </section> </when> </conditional> + <param name="output_log" type="boolean" checked="false" label="Output the log file?"/> </inputs> <outputs> <data name="authorized_datasets" format="txt" from_work_dir="pyega3_output.log" label="${tool.name}: authorized datasets"> <filter> action['action_type'] == 'list_datasets' </filter> </data> - <data name="dataset_file_list" format="txt" from_work_dir="pyega3_output.log" label="${tool.name}: dataset file list"> + <data name="dataset_file_list" format="tabular" label="${tool.name}: ${action.dataset_id} - file list"> <filter> action['action_type'] == 'list_dataset_files' </filter> </data> - <data name="downloaded_file" auto_format="true" label="${tool.name}: ${action.file_id} ${action.range.reference_name} ${action.range.start} ${action.range.end}"> + <data name="downloaded_file" auto_format="true" from_work_dir="downloads/*" label="${tool.name}: ${action.file_id} ${action.range.reference_name} ${action.range.start} ${action.range.end}"> <filter> action['action_type'] == 'download_file' </filter> </data> + <data name="logfile" format="txt" from_work_dir="pyega3_output.log" label="${tool.name}: log"> + <filter> output_log </filter> + </data> + <collection name="downloaded_file_collection" type="list" label="${tool.name} on ${on_string}: Downloaded datasets"> + <filter> action['action_type'] == 'download_files' </filter> + <discover_datasets pattern="__designation_and_ext__" recurse="true" directory="downloads" /> + </collection> </outputs> <tests> <test expect_num_outputs="1"><!-- list datasets with default credentials --> <param name="action_type" value="list_datasets"/> <output name="authorized_datasets" ftype="txt"> <assert_contents> - <has_text text="pyEGA3 - EGA python client version @VERSION@"/> + <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/> <has_text text="EGAD00001003338"/> </assert_contents> </output> </test> - <test expect_num_outputs="1"><!-- list dataset files with default credentials --> + <test expect_num_outputs="2"><!-- list dataset files with default credentials, and request a log output file --> <param name="action_type" value="list_dataset_files"/> <param name="dataset_id" value="EGAD00001003338"/> - <output name="dataset_file_list" ftype="txt"> + <param name="output_log" value="true"/> + <output name="dataset_file_list" file="filelist_EGAD00001003338.tabular"/> + <output name="logfile" ftype="txt"> <assert_contents> - <has_text text="pyEGA3 - EGA python client version @VERSION@"/> + <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/> <has_line_matching expression="^\[.*\]\s+File ID\s+Status\s+Bytes\s+Check sum\s+File name$"/> <has_text text="EGAF00001753734"/> </assert_contents> @@ -109,7 +161,7 @@ <param name="file_id" value="EGAF00001775036"/> <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/> </test> - <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified --> + <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified --> <param name="action_type" value="download_file"/> <param name="file_id" value="EGAF00001753756"/> <param name="reference_name" value="1"/> @@ -117,6 +169,27 @@ <param name="end" value="10000"/> <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/> </test> + <test expect_num_outputs="1"> <!-- download multiple files --> + <param name="action_type" value="download_files"/> + <param name="id_table" value="filelist.tabular"/> + <param name="id_column" value="1"/> + <output_collection name="downloaded_file_collection" type="list" count="2"> + <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" /> + <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" /> + </output_collection> + </test> + <test expect_num_outputs="1"> <!-- download multiple files, in combination with a genomic range --> + <param name="action_type" value="download_files"/> + <param name="id_table" value="filelist2.tabular"/> + <param name="id_column" value="1"/> + <param name="reference_name" value="1"/> + <param name="start" value="100"/> + <param name="end" value="10000"/> + <output_collection name="downloaded_file_collection" count="2"> + <element name="NA19239_genomic_range_1_100_10000" md5="bcdcf18846233cbe5cc8afd95168552c" /> + <element name="NA19240_genomic_range_1_100_10000" md5="e576a38748feec45aa45191f6e902ce2" /> + </output_collection> + </test> </tests> <help><![CDATA[ The pyEGA3 download client is a python-based tool for viewing and downloading files from authorized EGA datasets.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filelist.tabular Tue Apr 12 11:36:51 2022 +0000 @@ -0,0 +1,3 @@ +File ID Status Bytes Check sum File name +EGAF00001775034 1 5991400 b8ae14d5d1f717ab17d45e8fc36946a0 ENCFF000VWO.bam.bai +EGAF00001775036 1 4804928 3b89b96387db5199fef6ba613f70e27c ENCFF284YOU.bam.bai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filelist2.tabular Tue Apr 12 11:36:51 2022 +0000 @@ -0,0 +1,3 @@ +File ID Status Bytes Check sum File name +EGAF00001753756 1 140445765831 2413ce93a4b2b50fa0c2ff5bdf97695f NA19240.bam +EGAF00001753754 1 136016115737 59fbc3828fb878d8e637557ce707d445 NA19239.bam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filelist_EGAD00001003338.tabular Tue Apr 12 11:36:51 2022 +0000 @@ -0,0 +1,49 @@ +File ID Status Bytes Check sum File name +EGAF00005000662 1 25444204 274de4071bca5354ff16a1de0116c455 NA19238.chr22.vcf.gz +EGAF00005000663 1 18596 02fdb6fc68b854f98fef710ff4dee0c1 NA19238.chr22.vcf.gz.tbi +EGAF00005000664 1 26957200 62b16cc9ce6ceb3ef97b98c99aa6fec5 NA19238.chr22.bcf +EGAF00005000665 1 14509 7cf0f467fd44dd783ff05cb4662642b6 NA19238.chr22.bcf.csi +EGAF00005001623 1 214453766 ad7d6e0c05edafd7faed7601f7f3eaba ALL_chr22_20130502_2504Individuals.vcf.gz +EGAF00005001624 1 36094 4202e9a481aa8103b471531a96665047 ALL_chr22_20130502_2504Individuals.vcf.gz.tbi +EGAF00005001625 1 186424665 c65ca1a4abd55351598ccbc65ebfa9a6 ALL_chr22_20130502_2504Individuals.bcf +EGAF00005001626 1 27620 09e3b4724404fc7bb5f9948f80016757 ALL_chr22_20130502_2504Individuals.bcf.csi +EGAF00005007180 1 1837578063 74d3b803823d3f8b73bd592941f23726 HG01775.GRCh38DH.exome.cram +EGAF00005007181 1 2938941932 910141b9f4ccbfbf57813dee1a7a3f1d NA18534.GRCh38DH.exome.cram +EGAF00005007323 1 5719142 388fb466c983d4bec2082941647409f3 ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz +EGAF00005007324 1 8074 fa37e14805cce3221f6f9d3a4cd749a4 ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz.tbi +EGAF00005007325 1 5527171 395c0d3d454d7c7d61c4f771fbab02fc ALL.chrY.phase3_integrated_v2a.20130502.genotypes.bcf +EGAF00005007326 1 6251 ae2d2097a8744877d9d20907200cbdcf ALL.chrY.phase3_integrated_v2a.20130502.genotypes.bcf.csi +EGAF00005007327 1 850737 f3dee64b466efe334b2cac77f5c2f710 HG01775.chrY.vcf.gz +EGAF00005007328 1 4981 d0e71e5dd7f5279e113c4f0dfd37fc23 HG01775.chrY.vcf.gz.tbi +EGAF00005007329 1 876313 aaca702e347ae6caa734d44527a49212 HG01775.chrY.bcf +EGAF00005007330 1 4722 110b493c17210ff3484ed2561a2fe21f HG01775.chrY.bcf.csi +EGAF00005007331 1 137465 fcf1cc38cd404ea1cdba3975d26f4a8b HG01775.GRCh38DH.exome.cram.crai +EGAF00005007332 1 229305 56e8de04466aba23ab5acbaf1c087045 NA18534.GRCh38DH.exome.cram.crai +EGAF00001753734 1 45030910198 040ef7533533a3db67a35b9f454b9269 NA12878.cram +EGAF00001753735 1 1575103 41fd8741e91924eae19c6baa7893eeb8 NA12878.crai +EGAF00001753736 1 38215425935 bbc03793c9534a22f77e751d2723cb10 NA12891.cram +EGAF00001753737 1 1310034 0ab7a2d110740561871ccdca7f15f13b NA12891.crai +EGAF00001753738 1 38370156211 a7503d228d0851b999b826b736b8dd32 NA12892.cram +EGAF00001753739 1 1331384 bb569235226b5b9f0578d34d1b52482e NA12892.crai +EGAF00001753740 1 34823972801 492780f603da2f5f3306c41011e0acd2 NA19238.cram +EGAF00001753741 1 1195785 3b862e018b0b85db7954cbed2e17b6ba NA19238.crai +EGAF00001753742 1 44113571936 d963539652de2ea20005d98e934d59c2 NA19239.cram +EGAF00001753743 1 1514700 be2024ccbf5b3bd9132f6d270a37118c NA19239.crai +EGAF00001753744 1 48309446909 728bea9317cbab1c98429e43e48f9a83 NA19240.cram +EGAF00001753745 1 1622405 18e0e7070b6cf4d042c7f9bee15d56bd NA19240.crai +EGAF00001753746 1 143427187111 11395de33f28ed867170d2dc723cc700 NA12878.bam +EGAF00001753747 1 8949984 a23a84c89d338796f78e68804c8d2c6c NA12878.bam.bai +EGAF00001753748 1 4317237247 71a78dfb5258939abab2257a2abd1126 NA12891.bam +EGAF00001753749 1 9212704 e04dbb7ccbc24ccd853d89b8b066166c NA12891.bai +EGAF00001753750 1 66145394874 201bded705401615fe5e90988d509656 NA12892.bam +EGAF00001753751 1 9204720 c1eadd98469fcd3ced4c51a84b3ce307 NA12892.bai +EGAF00001753752 1 229774247950 0751106bbe1c4c83ec934a5972a4efdf NA19238.bam +EGAF00001753753 1 9379032 028ab5c73fea03c349e0d73943913141 NA19238.bai +EGAF00001753754 1 136016115737 59fbc3828fb878d8e637557ce707d445 NA19239.bam +EGAF00001753755 1 9005792 767fc92be753de8cf570690bd7fbe629 NA19239.bai +EGAF00001753756 1 140445765831 2413ce93a4b2b50fa0c2ff5bdf97695f NA19240.bam +EGAF00001753757 1 9018288 351130149989cca43fe8c7382e9d326a NA19240.bai +EGAF00001770106 1 462139278 ce073afcbc07afa343f2d4e4d07efeda ENCFF000VWO.bam +EGAF00001770107 1 3551031027 dfef3f355230915418a78da460665d56 ENCFF284YOU.bam +EGAF00001775034 1 5991400 b8ae14d5d1f717ab17d45e8fc36946a0 ENCFF000VWO.bam.bai +EGAF00001775036 1 4804928 3b89b96387db5199fef6ba613f70e27c ENCFF284YOU.bam.bai