Mercurial > repos > iuc > ega_download_client
diff pyega3.xml @ 3:8bbe09a52904 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit c472a64dc5e68fb058b71e7404f180847ba9f6d4
author | iuc |
---|---|
date | Thu, 27 Oct 2022 15:12:30 +0000 |
parents | e82175804eb1 |
children | 9564758e8638 |
line wrap: on
line diff
--- a/pyega3.xml Tue Jun 14 17:06:07 2022 +0000 +++ b/pyega3.xml Thu Oct 27 15:12:30 2022 +0000 @@ -1,11 +1,15 @@ -<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy1" profile="21.01" > +<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.01" > <macros> - <token name="@TOOL_VERSION@">4.0.0</token> + <token name="@TOOL_VERSION@">4.0.5</token> + <token name="@VERSION_SUFFIX@">1</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement> </requirements> - <command detect_errors="exit_code"><![CDATA[ + <version_command><![CDATA[ + pyega3 -v |& grep version | cut -d" " -f 10 + ]]></version_command> + <command detect_errors="exit_code"><![CDATA[ #set $username = $__user__.extra_preferences.get('ega_account|username', "") #if $username == "": #set $username = "ega-test-data@ebi.ac.uk (default user)" @@ -43,9 +47,17 @@ && rm -f downloads/*.md5 ## checksum validation already performed by pyEGA, cleanup downloads folder #elif $action.action_type == "download_files" - #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines() if x.split('\t')[int(str($action.id_column))-1].startswith('EGAF') ] + #import re + #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines()] + #set outfiles=[x.split('\t')[int(str($action.file_column))-1].replace('\n', '') for x in open(str($id_table)).readlines()] mkdir downloads - #for f in $file_ids + #for f, o in zip($file_ids, $outfiles) + #if not f.startswith("EGAF") + && >&2 echo "Ignoring \"$f\": no EGA file ID" + #continue + #end if + && + echo 'Downloading $f: $o' && pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials' fetch '$f' @@ -60,6 +72,16 @@ #end if #end if --output-dir downloads + #if re.match(".*vcf(_genomic_range_.*|).gz$", o) + #if $action.range.reference_name or ($action.range.start or $action.range.end) + && mv 'downloads/$f/'$o[:-3]'_genomic_range_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.start" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.end")'.gz' 'downloads/$f/'$o'.vcf_bgzip' + #else + && mv 'downloads/$f/$o' 'downloads/$f/'$o'.vcf_bgzip' + #end if + #end if + #if re.match(".*ped$", o) + && mv 'downloads/$f/$o' 'downloads/$f/'$o'.tabular' + #end if #end for && rm -f downloads/**/*.md5 ## checksum validation already performed by pyEGA, clean up downloads folder @@ -95,7 +117,7 @@ <when value="list_datasets"/> <when value="download_file"> <param name="file_id" type="text" optional="false" label="EGA File Accession Identifier" help="Identifier starting with 'EGAF'. For example: EGAF00001753735"> - <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGAF[0-9]+</validator> + <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGA[DF][0-9]+</validator> </param> <section name="range" title="Request a specific Genomic range?" expanded="false"> <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> @@ -106,6 +128,7 @@ <when value="download_files"> <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/> <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" /> + <param name="file_column" type="data_column" data_ref="id_table" label="Column containing the file names" /> <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false"> <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> @@ -134,7 +157,8 @@ </collection> </outputs> <tests> - <test expect_num_outputs="1"><!-- list datasets with default credentials --> + <!-- list datasets with default credentials --> + <test expect_num_outputs="1"> <param name="action_type" value="list_datasets"/> <output name="authorized_datasets" ftype="txt"> <assert_contents> @@ -143,7 +167,8 @@ </assert_contents> </output> </test> - <test expect_num_outputs="2"><!-- list dataset files with default credentials, and request a log output file --> + <!-- list dataset files with default credentials, and request a log output file --> + <test expect_num_outputs="2"> <param name="action_type" value="list_dataset_files"/> <param name="dataset_id" value="EGAD00001003338"/> <param name="output_log" value="true"/> @@ -156,12 +181,14 @@ </assert_contents> </output> </test> - <test expect_num_outputs="1"> <!-- download a single file --> + <!-- download a single file --> + <test expect_num_outputs="1"> <param name="action_type" value="download_file"/> <param name="file_id" value="EGAF00001775036"/> <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/> </test> - <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified --> + <!-- download a single file, with genomic range specified --> + <test expect_num_outputs="1"> <param name="action_type" value="download_file"/> <param name="file_id" value="EGAF00001753756"/> <param name="reference_name" value="1"/> @@ -169,19 +196,26 @@ <param name="end" value="10000"/> <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/> </test> - <test expect_num_outputs="1"> <!-- download multiple files --> + <!-- download multiple files --> + <test expect_num_outputs="1"> <param name="action_type" value="download_files"/> <param name="id_table" value="filelist.tabular"/> <param name="id_column" value="1"/> + <param name="file_column" value="5"/> <output_collection name="downloaded_file_collection" type="list" count="2"> <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" /> <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" /> </output_collection> + <assert_stderr> + <has_text text="Ignoring "File ID": no EGA file ID"/> + </assert_stderr> </test> - <test expect_num_outputs="1"> <!-- download multiple files, in combination with a genomic range --> + <!-- download multiple files, in combination with a genomic range --> + <test expect_num_outputs="1"> <param name="action_type" value="download_files"/> <param name="id_table" value="filelist2.tabular"/> <param name="id_column" value="1"/> + <param name="file_column" value="5"/> <param name="reference_name" value="1"/> <param name="start" value="0"/> <param name="end" value="10000"/> @@ -189,6 +223,20 @@ <element name="NA19239_genomic_range_1_0_10000" md5="bcdcf18846233cbe5cc8afd95168552c" /> <element name="NA19240_genomic_range_1_0_10000" md5="e576a38748feec45aa45191f6e902ce2" /> </output_collection> + <assert_stderr> + <has_text text="Ignoring "File ID": no EGA file ID"/> + </assert_stderr> + </test> + <!-- download multiple vcf.gz files --> + <test expect_num_outputs="1"> + <param name="action_type" value="download_files"/> + <param name="id_table" value="filelist3.tabular"/> + <param name="id_column" value="1"/> + <param name="file_column" value="5"/> + <output_collection name="downloaded_file_collection" type="list" count="2"> + <element name="HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz" md5="51cfb69bf3b9416ff425381a58c18a2b" /> + <element name="HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz" md5="ebad4425191a89d3e970c02190a87175" /> + </output_collection> </test> </tests> <help><![CDATA[