comparison pyega3.xml @ 3:8bbe09a52904 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit c472a64dc5e68fb058b71e7404f180847ba9f6d4
author iuc
date Thu, 27 Oct 2022 15:12:30 +0000
parents e82175804eb1
children 9564758e8638
comparison
equal deleted inserted replaced
2:e82175804eb1 3:8bbe09a52904
1 <tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy1" profile="21.01" > 1 <tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.01" >
2 <macros> 2 <macros>
3 <token name="@TOOL_VERSION@">4.0.0</token> 3 <token name="@TOOL_VERSION@">4.0.5</token>
4 <token name="@VERSION_SUFFIX@">1</token>
4 </macros> 5 </macros>
5 <requirements> 6 <requirements>
6 <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement> 7 <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement>
7 </requirements> 8 </requirements>
8 <command detect_errors="exit_code"><![CDATA[ 9 <version_command><![CDATA[
10 pyega3 -v |& grep version | cut -d" " -f 10
11 ]]></version_command>
12 <command detect_errors="exit_code"><![CDATA[
9 #set $username = $__user__.extra_preferences.get('ega_account|username', "") 13 #set $username = $__user__.extra_preferences.get('ega_account|username', "")
10 #if $username == "": 14 #if $username == "":
11 #set $username = "ega-test-data@ebi.ac.uk (default user)" 15 #set $username = "ega-test-data@ebi.ac.uk (default user)"
12 #end if 16 #end if
13 echo "Running as user: $username. Set your credentials via: User -> Preferences -> Manage Information" && 17 echo "Running as user: $username. Set your credentials via: User -> Preferences -> Manage Information" &&
41 #end if 45 #end if
42 && mv ${action.file_id} downloads 46 && mv ${action.file_id} downloads
43 && rm -f downloads/*.md5 ## checksum validation already performed by pyEGA, cleanup downloads folder 47 && rm -f downloads/*.md5 ## checksum validation already performed by pyEGA, cleanup downloads folder
44 48
45 #elif $action.action_type == "download_files" 49 #elif $action.action_type == "download_files"
46 #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines() if x.split('\t')[int(str($action.id_column))-1].startswith('EGAF') ] 50 #import re
51 #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines()]
52 #set outfiles=[x.split('\t')[int(str($action.file_column))-1].replace('\n', '') for x in open(str($id_table)).readlines()]
47 mkdir downloads 53 mkdir downloads
48 #for f in $file_ids 54 #for f, o in zip($file_ids, $outfiles)
55 #if not f.startswith("EGAF")
56 && >&2 echo "Ignoring \"$f\": no EGA file ID"
57 #continue
58 #end if
59 &&
60 echo 'Downloading $f: $o'
49 && 61 &&
50 pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials' 62 pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials'
51 fetch '$f' 63 fetch '$f'
52 --max-retries 10 64 --max-retries 10
53 #if $action.range.reference_name 65 #if $action.range.reference_name
58 #if $action.range.end 70 #if $action.range.end
59 --end $action.range.end 71 --end $action.range.end
60 #end if 72 #end if
61 #end if 73 #end if
62 --output-dir downloads 74 --output-dir downloads
75 #if re.match(".*vcf(_genomic_range_.*|).gz$", o)
76 #if $action.range.reference_name or ($action.range.start or $action.range.end)
77 && mv 'downloads/$f/'$o[:-3]'_genomic_range_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.start" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.end")'.gz' 'downloads/$f/'$o'.vcf_bgzip'
78 #else
79 && mv 'downloads/$f/$o' 'downloads/$f/'$o'.vcf_bgzip'
80 #end if
81 #end if
82 #if re.match(".*ped$", o)
83 && mv 'downloads/$f/$o' 'downloads/$f/'$o'.tabular'
84 #end if
63 #end for 85 #end for
64 && rm -f downloads/**/*.md5 ## checksum validation already performed by pyEGA, clean up downloads folder 86 && rm -f downloads/**/*.md5 ## checksum validation already performed by pyEGA, clean up downloads folder
65 87
66 #end if 88 #end if
67 ]]></command> 89 ]]></command>
93 </param> 115 </param>
94 </when> 116 </when>
95 <when value="list_datasets"/> 117 <when value="list_datasets"/>
96 <when value="download_file"> 118 <when value="download_file">
97 <param name="file_id" type="text" optional="false" label="EGA File Accession Identifier" help="Identifier starting with 'EGAF'. For example: EGAF00001753735"> 119 <param name="file_id" type="text" optional="false" label="EGA File Accession Identifier" help="Identifier starting with 'EGAF'. For example: EGAF00001753735">
98 <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGAF[0-9]+</validator> 120 <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGA[DF][0-9]+</validator>
99 </param> 121 </param>
100 <section name="range" title="Request a specific Genomic range?" expanded="false"> 122 <section name="range" title="Request a specific Genomic range?" expanded="false">
101 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> 123 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." />
102 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> 124 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/>
103 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> 125 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/>
104 </section> 126 </section>
105 </when> 127 </when>
106 <when value="download_files"> 128 <when value="download_files">
107 <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/> 129 <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/>
108 <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" /> 130 <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" />
131 <param name="file_column" type="data_column" data_ref="id_table" label="Column containing the file names" />
109 <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false"> 132 <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false">
110 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> 133 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." />
111 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> 134 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/>
112 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> 135 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/>
113 </section> 136 </section>
132 <filter> action['action_type'] == 'download_files' </filter> 155 <filter> action['action_type'] == 'download_files' </filter>
133 <discover_datasets pattern="__designation_and_ext__" recurse="true" directory="downloads" /> 156 <discover_datasets pattern="__designation_and_ext__" recurse="true" directory="downloads" />
134 </collection> 157 </collection>
135 </outputs> 158 </outputs>
136 <tests> 159 <tests>
137 <test expect_num_outputs="1"><!-- list datasets with default credentials --> 160 <!-- list datasets with default credentials -->
161 <test expect_num_outputs="1">
138 <param name="action_type" value="list_datasets"/> 162 <param name="action_type" value="list_datasets"/>
139 <output name="authorized_datasets" ftype="txt"> 163 <output name="authorized_datasets" ftype="txt">
140 <assert_contents> 164 <assert_contents>
141 <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/> 165 <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/>
142 <has_text text="EGAD00001003338"/> 166 <has_text text="EGAD00001003338"/>
143 </assert_contents> 167 </assert_contents>
144 </output> 168 </output>
145 </test> 169 </test>
146 <test expect_num_outputs="2"><!-- list dataset files with default credentials, and request a log output file --> 170 <!-- list dataset files with default credentials, and request a log output file -->
171 <test expect_num_outputs="2">
147 <param name="action_type" value="list_dataset_files"/> 172 <param name="action_type" value="list_dataset_files"/>
148 <param name="dataset_id" value="EGAD00001003338"/> 173 <param name="dataset_id" value="EGAD00001003338"/>
149 <param name="output_log" value="true"/> 174 <param name="output_log" value="true"/>
150 <output name="dataset_file_list" file="filelist_EGAD00001003338.tabular"/> 175 <output name="dataset_file_list" file="filelist_EGAD00001003338.tabular"/>
151 <output name="logfile" ftype="txt"> 176 <output name="logfile" ftype="txt">
154 <has_line_matching expression="^\[.*\]\s+File ID\s+Status\s+Bytes\s+Check sum\s+File name$"/> 179 <has_line_matching expression="^\[.*\]\s+File ID\s+Status\s+Bytes\s+Check sum\s+File name$"/>
155 <has_text text="EGAF00001753734"/> 180 <has_text text="EGAF00001753734"/>
156 </assert_contents> 181 </assert_contents>
157 </output> 182 </output>
158 </test> 183 </test>
159 <test expect_num_outputs="1"> <!-- download a single file --> 184 <!-- download a single file -->
185 <test expect_num_outputs="1">
160 <param name="action_type" value="download_file"/> 186 <param name="action_type" value="download_file"/>
161 <param name="file_id" value="EGAF00001775036"/> 187 <param name="file_id" value="EGAF00001775036"/>
162 <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/> 188 <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/>
163 </test> 189 </test>
164 <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified --> 190 <!-- download a single file, with genomic range specified -->
191 <test expect_num_outputs="1">
165 <param name="action_type" value="download_file"/> 192 <param name="action_type" value="download_file"/>
166 <param name="file_id" value="EGAF00001753756"/> 193 <param name="file_id" value="EGAF00001753756"/>
167 <param name="reference_name" value="1"/> 194 <param name="reference_name" value="1"/>
168 <param name="start" value="0"/> 195 <param name="start" value="0"/>
169 <param name="end" value="10000"/> 196 <param name="end" value="10000"/>
170 <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/> 197 <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/>
171 </test> 198 </test>
172 <test expect_num_outputs="1"> <!-- download multiple files --> 199 <!-- download multiple files -->
200 <test expect_num_outputs="1">
173 <param name="action_type" value="download_files"/> 201 <param name="action_type" value="download_files"/>
174 <param name="id_table" value="filelist.tabular"/> 202 <param name="id_table" value="filelist.tabular"/>
175 <param name="id_column" value="1"/> 203 <param name="id_column" value="1"/>
204 <param name="file_column" value="5"/>
176 <output_collection name="downloaded_file_collection" type="list" count="2"> 205 <output_collection name="downloaded_file_collection" type="list" count="2">
177 <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" /> 206 <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" />
178 <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" /> 207 <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" />
179 </output_collection> 208 </output_collection>
180 </test> 209 <assert_stderr>
181 <test expect_num_outputs="1"> <!-- download multiple files, in combination with a genomic range --> 210 <has_text text="Ignoring &quot;File ID&quot;: no EGA file ID"/>
211 </assert_stderr>
212 </test>
213 <!-- download multiple files, in combination with a genomic range -->
214 <test expect_num_outputs="1">
182 <param name="action_type" value="download_files"/> 215 <param name="action_type" value="download_files"/>
183 <param name="id_table" value="filelist2.tabular"/> 216 <param name="id_table" value="filelist2.tabular"/>
184 <param name="id_column" value="1"/> 217 <param name="id_column" value="1"/>
218 <param name="file_column" value="5"/>
185 <param name="reference_name" value="1"/> 219 <param name="reference_name" value="1"/>
186 <param name="start" value="0"/> 220 <param name="start" value="0"/>
187 <param name="end" value="10000"/> 221 <param name="end" value="10000"/>
188 <output_collection name="downloaded_file_collection" count="2"> 222 <output_collection name="downloaded_file_collection" count="2">
189 <element name="NA19239_genomic_range_1_0_10000" md5="bcdcf18846233cbe5cc8afd95168552c" /> 223 <element name="NA19239_genomic_range_1_0_10000" md5="bcdcf18846233cbe5cc8afd95168552c" />
190 <element name="NA19240_genomic_range_1_0_10000" md5="e576a38748feec45aa45191f6e902ce2" /> 224 <element name="NA19240_genomic_range_1_0_10000" md5="e576a38748feec45aa45191f6e902ce2" />
191 </output_collection> 225 </output_collection>
226 <assert_stderr>
227 <has_text text="Ignoring &quot;File ID&quot;: no EGA file ID"/>
228 </assert_stderr>
229 </test>
230 <!-- download multiple vcf.gz files -->
231 <test expect_num_outputs="1">
232 <param name="action_type" value="download_files"/>
233 <param name="id_table" value="filelist3.tabular"/>
234 <param name="id_column" value="1"/>
235 <param name="file_column" value="5"/>
236 <output_collection name="downloaded_file_collection" type="list" count="2">
237 <element name="HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz" md5="51cfb69bf3b9416ff425381a58c18a2b" />
238 <element name="HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz" md5="ebad4425191a89d3e970c02190a87175" />
239 </output_collection>
192 </test> 240 </test>
193 </tests> 241 </tests>
194 <help><![CDATA[ 242 <help><![CDATA[
195 The pyEGA3 download client is a python-based tool for viewing and downloading files from authorized EGA datasets. 243 The pyEGA3 download client is a python-based tool for viewing and downloading files from authorized EGA datasets.
196 244