Mercurial > repos > iuc > ega_download_client
comparison pyega3.xml @ 3:8bbe09a52904 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit c472a64dc5e68fb058b71e7404f180847ba9f6d4
author | iuc |
---|---|
date | Thu, 27 Oct 2022 15:12:30 +0000 |
parents | e82175804eb1 |
children | 9564758e8638 |
comparison
equal
deleted
inserted
replaced
2:e82175804eb1 | 3:8bbe09a52904 |
---|---|
1 <tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy1" profile="21.01" > | 1 <tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.01" > |
2 <macros> | 2 <macros> |
3 <token name="@TOOL_VERSION@">4.0.0</token> | 3 <token name="@TOOL_VERSION@">4.0.5</token> |
4 <token name="@VERSION_SUFFIX@">1</token> | |
4 </macros> | 5 </macros> |
5 <requirements> | 6 <requirements> |
6 <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement> | 7 <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement> |
7 </requirements> | 8 </requirements> |
8 <command detect_errors="exit_code"><![CDATA[ | 9 <version_command><![CDATA[ |
10 pyega3 -v |& grep version | cut -d" " -f 10 | |
11 ]]></version_command> | |
12 <command detect_errors="exit_code"><![CDATA[ | |
9 #set $username = $__user__.extra_preferences.get('ega_account|username', "") | 13 #set $username = $__user__.extra_preferences.get('ega_account|username', "") |
10 #if $username == "": | 14 #if $username == "": |
11 #set $username = "ega-test-data@ebi.ac.uk (default user)" | 15 #set $username = "ega-test-data@ebi.ac.uk (default user)" |
12 #end if | 16 #end if |
13 echo "Running as user: $username. Set your credentials via: User -> Preferences -> Manage Information" && | 17 echo "Running as user: $username. Set your credentials via: User -> Preferences -> Manage Information" && |
41 #end if | 45 #end if |
42 && mv ${action.file_id} downloads | 46 && mv ${action.file_id} downloads |
43 && rm -f downloads/*.md5 ## checksum validation already performed by pyEGA, cleanup downloads folder | 47 && rm -f downloads/*.md5 ## checksum validation already performed by pyEGA, cleanup downloads folder |
44 | 48 |
45 #elif $action.action_type == "download_files" | 49 #elif $action.action_type == "download_files" |
46 #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines() if x.split('\t')[int(str($action.id_column))-1].startswith('EGAF') ] | 50 #import re |
51 #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines()] | |
52 #set outfiles=[x.split('\t')[int(str($action.file_column))-1].replace('\n', '') for x in open(str($id_table)).readlines()] | |
47 mkdir downloads | 53 mkdir downloads |
48 #for f in $file_ids | 54 #for f, o in zip($file_ids, $outfiles) |
55 #if not f.startswith("EGAF") | |
56 && >&2 echo "Ignoring \"$f\": no EGA file ID" | |
57 #continue | |
58 #end if | |
59 && | |
60 echo 'Downloading $f: $o' | |
49 && | 61 && |
50 pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials' | 62 pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials' |
51 fetch '$f' | 63 fetch '$f' |
52 --max-retries 10 | 64 --max-retries 10 |
53 #if $action.range.reference_name | 65 #if $action.range.reference_name |
58 #if $action.range.end | 70 #if $action.range.end |
59 --end $action.range.end | 71 --end $action.range.end |
60 #end if | 72 #end if |
61 #end if | 73 #end if |
62 --output-dir downloads | 74 --output-dir downloads |
75 #if re.match(".*vcf(_genomic_range_.*|).gz$", o) | |
76 #if $action.range.reference_name or ($action.range.start or $action.range.end) | |
77 && mv 'downloads/$f/'$o[:-3]'_genomic_range_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.start" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.end")'.gz' 'downloads/$f/'$o'.vcf_bgzip' | |
78 #else | |
79 && mv 'downloads/$f/$o' 'downloads/$f/'$o'.vcf_bgzip' | |
80 #end if | |
81 #end if | |
82 #if re.match(".*ped$", o) | |
83 && mv 'downloads/$f/$o' 'downloads/$f/'$o'.tabular' | |
84 #end if | |
63 #end for | 85 #end for |
64 && rm -f downloads/**/*.md5 ## checksum validation already performed by pyEGA, clean up downloads folder | 86 && rm -f downloads/**/*.md5 ## checksum validation already performed by pyEGA, clean up downloads folder |
65 | 87 |
66 #end if | 88 #end if |
67 ]]></command> | 89 ]]></command> |
93 </param> | 115 </param> |
94 </when> | 116 </when> |
95 <when value="list_datasets"/> | 117 <when value="list_datasets"/> |
96 <when value="download_file"> | 118 <when value="download_file"> |
97 <param name="file_id" type="text" optional="false" label="EGA File Accession Identifier" help="Identifier starting with 'EGAF'. For example: EGAF00001753735"> | 119 <param name="file_id" type="text" optional="false" label="EGA File Accession Identifier" help="Identifier starting with 'EGAF'. For example: EGAF00001753735"> |
98 <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGAF[0-9]+</validator> | 120 <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGA[DF][0-9]+</validator> |
99 </param> | 121 </param> |
100 <section name="range" title="Request a specific Genomic range?" expanded="false"> | 122 <section name="range" title="Request a specific Genomic range?" expanded="false"> |
101 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> | 123 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> |
102 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> | 124 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> |
103 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> | 125 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> |
104 </section> | 126 </section> |
105 </when> | 127 </when> |
106 <when value="download_files"> | 128 <when value="download_files"> |
107 <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/> | 129 <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/> |
108 <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" /> | 130 <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" /> |
131 <param name="file_column" type="data_column" data_ref="id_table" label="Column containing the file names" /> | |
109 <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false"> | 132 <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false"> |
110 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> | 133 <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." /> |
111 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> | 134 <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/> |
112 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> | 135 <param argument="--end" type="integer" optional="true" min="0" label="End Position" help="0-based, exclusive. Only used if a reference sequence name was specified"/> |
113 </section> | 136 </section> |
132 <filter> action['action_type'] == 'download_files' </filter> | 155 <filter> action['action_type'] == 'download_files' </filter> |
133 <discover_datasets pattern="__designation_and_ext__" recurse="true" directory="downloads" /> | 156 <discover_datasets pattern="__designation_and_ext__" recurse="true" directory="downloads" /> |
134 </collection> | 157 </collection> |
135 </outputs> | 158 </outputs> |
136 <tests> | 159 <tests> |
137 <test expect_num_outputs="1"><!-- list datasets with default credentials --> | 160 <!-- list datasets with default credentials --> |
161 <test expect_num_outputs="1"> | |
138 <param name="action_type" value="list_datasets"/> | 162 <param name="action_type" value="list_datasets"/> |
139 <output name="authorized_datasets" ftype="txt"> | 163 <output name="authorized_datasets" ftype="txt"> |
140 <assert_contents> | 164 <assert_contents> |
141 <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/> | 165 <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/> |
142 <has_text text="EGAD00001003338"/> | 166 <has_text text="EGAD00001003338"/> |
143 </assert_contents> | 167 </assert_contents> |
144 </output> | 168 </output> |
145 </test> | 169 </test> |
146 <test expect_num_outputs="2"><!-- list dataset files with default credentials, and request a log output file --> | 170 <!-- list dataset files with default credentials, and request a log output file --> |
171 <test expect_num_outputs="2"> | |
147 <param name="action_type" value="list_dataset_files"/> | 172 <param name="action_type" value="list_dataset_files"/> |
148 <param name="dataset_id" value="EGAD00001003338"/> | 173 <param name="dataset_id" value="EGAD00001003338"/> |
149 <param name="output_log" value="true"/> | 174 <param name="output_log" value="true"/> |
150 <output name="dataset_file_list" file="filelist_EGAD00001003338.tabular"/> | 175 <output name="dataset_file_list" file="filelist_EGAD00001003338.tabular"/> |
151 <output name="logfile" ftype="txt"> | 176 <output name="logfile" ftype="txt"> |
154 <has_line_matching expression="^\[.*\]\s+File ID\s+Status\s+Bytes\s+Check sum\s+File name$"/> | 179 <has_line_matching expression="^\[.*\]\s+File ID\s+Status\s+Bytes\s+Check sum\s+File name$"/> |
155 <has_text text="EGAF00001753734"/> | 180 <has_text text="EGAF00001753734"/> |
156 </assert_contents> | 181 </assert_contents> |
157 </output> | 182 </output> |
158 </test> | 183 </test> |
159 <test expect_num_outputs="1"> <!-- download a single file --> | 184 <!-- download a single file --> |
185 <test expect_num_outputs="1"> | |
160 <param name="action_type" value="download_file"/> | 186 <param name="action_type" value="download_file"/> |
161 <param name="file_id" value="EGAF00001775036"/> | 187 <param name="file_id" value="EGAF00001775036"/> |
162 <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/> | 188 <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/> |
163 </test> | 189 </test> |
164 <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified --> | 190 <!-- download a single file, with genomic range specified --> |
191 <test expect_num_outputs="1"> | |
165 <param name="action_type" value="download_file"/> | 192 <param name="action_type" value="download_file"/> |
166 <param name="file_id" value="EGAF00001753756"/> | 193 <param name="file_id" value="EGAF00001753756"/> |
167 <param name="reference_name" value="1"/> | 194 <param name="reference_name" value="1"/> |
168 <param name="start" value="0"/> | 195 <param name="start" value="0"/> |
169 <param name="end" value="10000"/> | 196 <param name="end" value="10000"/> |
170 <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/> | 197 <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/> |
171 </test> | 198 </test> |
172 <test expect_num_outputs="1"> <!-- download multiple files --> | 199 <!-- download multiple files --> |
200 <test expect_num_outputs="1"> | |
173 <param name="action_type" value="download_files"/> | 201 <param name="action_type" value="download_files"/> |
174 <param name="id_table" value="filelist.tabular"/> | 202 <param name="id_table" value="filelist.tabular"/> |
175 <param name="id_column" value="1"/> | 203 <param name="id_column" value="1"/> |
204 <param name="file_column" value="5"/> | |
176 <output_collection name="downloaded_file_collection" type="list" count="2"> | 205 <output_collection name="downloaded_file_collection" type="list" count="2"> |
177 <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" /> | 206 <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" /> |
178 <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" /> | 207 <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" /> |
179 </output_collection> | 208 </output_collection> |
180 </test> | 209 <assert_stderr> |
181 <test expect_num_outputs="1"> <!-- download multiple files, in combination with a genomic range --> | 210 <has_text text="Ignoring "File ID": no EGA file ID"/> |
211 </assert_stderr> | |
212 </test> | |
213 <!-- download multiple files, in combination with a genomic range --> | |
214 <test expect_num_outputs="1"> | |
182 <param name="action_type" value="download_files"/> | 215 <param name="action_type" value="download_files"/> |
183 <param name="id_table" value="filelist2.tabular"/> | 216 <param name="id_table" value="filelist2.tabular"/> |
184 <param name="id_column" value="1"/> | 217 <param name="id_column" value="1"/> |
218 <param name="file_column" value="5"/> | |
185 <param name="reference_name" value="1"/> | 219 <param name="reference_name" value="1"/> |
186 <param name="start" value="0"/> | 220 <param name="start" value="0"/> |
187 <param name="end" value="10000"/> | 221 <param name="end" value="10000"/> |
188 <output_collection name="downloaded_file_collection" count="2"> | 222 <output_collection name="downloaded_file_collection" count="2"> |
189 <element name="NA19239_genomic_range_1_0_10000" md5="bcdcf18846233cbe5cc8afd95168552c" /> | 223 <element name="NA19239_genomic_range_1_0_10000" md5="bcdcf18846233cbe5cc8afd95168552c" /> |
190 <element name="NA19240_genomic_range_1_0_10000" md5="e576a38748feec45aa45191f6e902ce2" /> | 224 <element name="NA19240_genomic_range_1_0_10000" md5="e576a38748feec45aa45191f6e902ce2" /> |
191 </output_collection> | 225 </output_collection> |
226 <assert_stderr> | |
227 <has_text text="Ignoring "File ID": no EGA file ID"/> | |
228 </assert_stderr> | |
229 </test> | |
230 <!-- download multiple vcf.gz files --> | |
231 <test expect_num_outputs="1"> | |
232 <param name="action_type" value="download_files"/> | |
233 <param name="id_table" value="filelist3.tabular"/> | |
234 <param name="id_column" value="1"/> | |
235 <param name="file_column" value="5"/> | |
236 <output_collection name="downloaded_file_collection" type="list" count="2"> | |
237 <element name="HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz" md5="51cfb69bf3b9416ff425381a58c18a2b" /> | |
238 <element name="HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz" md5="ebad4425191a89d3e970c02190a87175" /> | |
239 </output_collection> | |
192 </test> | 240 </test> |
193 </tests> | 241 </tests> |
194 <help><![CDATA[ | 242 <help><![CDATA[ |
195 The pyEGA3 download client is a python-based tool for viewing and downloading files from authorized EGA datasets. | 243 The pyEGA3 download client is a python-based tool for viewing and downloading files from authorized EGA datasets. |
196 | 244 |