Mercurial > repos > prog > mtblsdwnld
comparison mtbls-dwnld @ 0:8dab200e02cb draft
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
author | prog |
---|---|
date | Tue, 07 Jan 2020 09:05:21 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8dab200e02cb |
---|---|
1 #!/bin/bash | |
2 # vi: fdm=marker | |
3 | |
4 # Constants {{{1 | |
5 ################################################################ | |
6 | |
7 PROG_NAME=$(basename $0) | |
8 PROG_PATH=$(dirname $0) | |
9 ISASLICER="$PROG_PATH/isaslicer.py" | |
10 YES=yes | |
11 NO=no | |
12 ASPERA_PUBLIC_TOKEN=Xz68YfDe | |
13 ASCP=ascp | |
14 WGET=wget | |
15 PLATFORM= | |
16 DISTRIBUTION= | |
17 [[ -z $(which uname) ]] || PLATFORM=$(uname) | |
18 [[ $PLATFORM == Linux && -e /proc/version ]] && DISTRIBUTION=$(sed 's/^.*(\([^ ]*\).*$/\1/' /proc/version) | |
19 [[ $DISTRIBUTION == Alpine ]] || WGET_FLAGS="--progress=dot" | |
20 | |
21 # Global variables {{{1 | |
22 ################################################################ | |
23 | |
24 ASPERA= | |
25 COMPRESSED= | |
26 DEBUG=0 | |
27 FACTOR_VALUE= | |
28 HTML= | |
29 METADATA_ONLY= | |
30 OUTPUT= | |
31 PRIVATE= | |
32 QUIET=0 | |
33 TMP_IN_OUTPUT= | |
34 TOKEN= | |
35 | |
36 # Print help {{{1 | |
37 ################################################################ | |
38 | |
39 function print_help { | |
40 echo "Usage: $PROG_NAME [options] study" | |
41 echo | |
42 echo "Retrieves a study from Metabolights database." | |
43 echo "By default it uses the ftp server, but with -a option you can ask for using aspera server (you will need the aspera client ascp to be installed)." | |
44 echo | |
45 echo "Options:" | |
46 echo " -a, --aspera Use aspera server for downloading. You need the ascp client to be installed, it is freely available for linux 64. See http://downloads.asperasoft.com." | |
47 echo " -c, --compressed Output in compressed format. If unset, the output will be a directory." | |
48 echo " -f, --factor-value Filter study on a factor value. Example: \"-f myfactor=myvalue\". Only available with wget downloader." | |
49 echo " -g, --debug Debug mode." | |
50 echo " -h, --help Print this help message." | |
51 echo " -H, --html FILE Write HTML file that list files contained in output directory. Not compatible with compressed output." | |
52 echo " -M, --metadata-only Download only the metadata (ISA-Tab files i_*, m_*, s_* and a_*) files. This option has no effet if aspera is selected (option -a)." | |
53 echo " -o, --output NAME Set the name of the output (both for directory output or compressed file output). By default the name of the output is determined by the download tool." | |
54 echo " -p, --private Indicate the study to download is not public. This is meant for aspera download, since the URL will be different for a public or a private study." | |
55 echo " -q, --quiet Does not print any output. Can be specified twice in order to be real quiet." | |
56 echo " -t, --token TOKEN Set the token or password to use. For aspera public download, if you don't specify a token, the default token '$ASPERA_PUBLIC_TOKEN' will be used." | |
57 echo " -T, --tmp-in-output If an output is specified and it is a directory (-c option must not be set), then use it for writing intermediate files." | |
58 } | |
59 | |
60 # Error {{{1 | |
61 ################################################################ | |
62 | |
63 function error { | |
64 | |
65 local msg=$1 | |
66 | |
67 echo "ERROR: $msg" >&2 | |
68 | |
69 exit 1 | |
70 } | |
71 | |
72 # Debug {{{1 | |
73 ################################################################ | |
74 | |
75 function debug { | |
76 | |
77 local dbgmsg="$1" | |
78 | |
79 [[ $DEBUG -ge 1 ]] && echo "[DEBUG] $dbgmsg" >&2 | |
80 } | |
81 | |
82 | |
83 # Read args {{{1 | |
84 ################################################################ | |
85 | |
86 function read_args { | |
87 | |
88 local args="$*" # save arguments for debugging purpose | |
89 | |
90 # Read options | |
91 while true ; do | |
92 shift_count=1 | |
93 case $1 in | |
94 -a|--aspera) ASPERA=$YES ;; | |
95 -c|--compressed) COMPRESSED=$YES ;; | |
96 -f|--factor-value) FACTOR_VALUE="$2" ; shift_count=2 ;; | |
97 -g|--debug) DEBUG=$((DEBUG + 1)) ;; | |
98 -h|--help) print_help ; exit 0 ;; | |
99 -M|--metadata-only) METADATA_ONLY=$YES ;; | |
100 -H|--html) HTML="$2" ; shift_count=2 ;; | |
101 -o|--output) OUTPUT="$2" ; shift_count=2 ;; | |
102 -p|--private) PRIVATE=$YES ;; | |
103 -q|--quiet) QUIET=$((QUIET + 1)) ;; | |
104 -t|--token) TOKEN="$2" ; shift_count=2 ;; | |
105 -T|--tmp-in-output) TMP_IN_OUTPUT=$YES ;; | |
106 -) error "Illegal option $1." ;; | |
107 --) error "Illegal option $1." ;; | |
108 --*) error "Illegal option $1." ;; | |
109 -?) error "Unknown option $1." ;; | |
110 -[^-]*) split_opt=$(echo $1 | sed 's/^-//' | sed 's/\([a-zA-Z]\)/ -\1/g') ; set -- $1$split_opt "${@:2}" ;; | |
111 *) break | |
112 esac | |
113 shift $shift_count | |
114 done | |
115 shift $((OPTIND - 1)) | |
116 | |
117 # Read remaining arguments | |
118 [ $# -eq 1 ] || error "You must specify one, and only one, study to retrieve." | |
119 STUDY="$1" | |
120 | |
121 # Check token | |
122 if [[ -n $ASPERA && -z $TOKEN ]] ; then | |
123 if [[ -z $PRIVATE ]] ; then | |
124 TOKEN=$ASPERA_PUBLIC_TOKEN | |
125 else | |
126 error "You need to specify a token for retrieving private studies with aspera." | |
127 fi | |
128 fi | |
129 [[ -z $PRIVATE || -n $TOKEN ]] || error "You need to set a token for retrieving private studies." | |
130 | |
131 # Turn off --tmp-to-output if --compressed is set | |
132 [[ $TMP_IN_OUTPUT == $YES && ( $COMPRESSED == $YES || -z $OUTPUT ) ]] && TMP_IN_OUTPUT=$NO | |
133 | |
134 # Debug | |
135 debug "Arguments are : $args" | |
136 debug "Study to retrieve is : $STUDY" | |
137 debug "ASPERA=$ASPERA" | |
138 debug "COMPRESSED=$COMPRESSED" | |
139 debug "DEBUG=$DEBUG" | |
140 debug "FACTOR_VALUE=$FACTOR_VALUE" | |
141 debug "HTML=$HTML" | |
142 debug "METADATA_ONLY=$METADATA_ONLY" | |
143 debug "OUTPUT=$OUTPUT" | |
144 debug "PRIVATE=$PRIVATE" | |
145 debug "QUIET=$QUIET" | |
146 debug "TMP_IN_OUTPUT=$TMP_IN_OUTPUT" | |
147 debug "TOKEN=$TOKEN" | |
148 [[ -n $ASPERA ]] && debug "Aspera will be used." | |
149 [[ -n $TOKEN ]] && debug "Token/Password is \"$TOKEN\"." | |
150 } | |
151 | |
152 # Get download output path {{{1 | |
153 ################################################################ | |
154 | |
155 get_download_output_path() { | |
156 | |
157 local downloader="$1" | |
158 local study_name="$2" | |
159 local output_dir="$3" | |
160 local output_path="$study_name" | |
161 | |
162 [[ -z $output_dir ]] || output_path="$output_dir/$output_path" | |
163 [[ $downloader == $WGET ]] && output_path+=".zip" | |
164 | |
165 echo "$output_path" | |
166 } | |
167 | |
168 # Download with ascp {{{1 | |
169 ################################################################ | |
170 | |
171 download_with_ascp() { | |
172 | |
173 local study_name="$1" | |
174 local metadata_only="$2" | |
175 local output_dir="$3" | |
176 local dwnld_flags= | |
177 local dwnld_link= | |
178 | |
179 # Check ascp | |
180 [ -n "$(which ascp)" ] || error "ascp command not found. Please install Aspera client, version 3.7.4 or greater. See http://downloads.asperasoft.com/en/downloads/62." | |
181 debug "$ASCP: $(which $ASCP)" | |
182 debug "QUIET: $QUIET" | |
183 | |
184 # Silence downloader output | |
185 [[ $QUIET -eq 0 ]] || dwnld_flags=-q | |
186 | |
187 # Set download flags | |
188 [[ -z $dwnld_flags ]] || dwnld_flags+=" " | |
189 dwnld_flags+="--policy=fair -T -l 1g" | |
190 | |
191 dwnld_flags+=" -P33001" | |
192 | |
193 # Set download link | |
194 if [[ -z $PRIVATE ]] ; then | |
195 | |
196 # Make full path for public study | |
197 study_path=$STUDY | |
198 if [[ -z ${study_path##MTBLS*} ]] ; then | |
199 study_path="/studies/public/$study_path" | |
200 fi | |
201 dwnld_link="fasp-ml@fasp.ebi.ac.uk:$study_path" | |
202 | |
203 # Private study | |
204 else | |
205 dwnld_link="mtblight@hx-fasp-1.ebi.ac.uk:$STUDY" | |
206 fi | |
207 | |
208 # Export token | |
209 debug "export ASPERA_SCP_PASS=\"$TOKEN\"" | |
210 export ASPERA_SCP_PASS="$TOKEN" | |
211 | |
212 # Run download command | |
213 if [[ -n $output_dir ]] ; then | |
214 mkdir -p "$output_dir" | |
215 curdir=$(pwd) | |
216 cd "$output_dir" | |
217 fi | |
218 if [[ $metadata_only == $YES ]] ; then | |
219 debug "Download command: $ASCP $dwnld_flags -N '?_*.t*' -E '*.*' -E 'p*' $dwnld_link ." | |
220 $ASCP $dwnld_flags -N '?_*.t*' -E '*.*' -E 'p*' $dwnld_link . | |
221 [[ $? == 0 ]] || error "Downloading of study $STUDY has failed. Error code returned is $?." | |
222 else | |
223 debug "Download command: $ASCP $dwnld_flags $dwnld_link ." | |
224 $ASCP $dwnld_flags $dwnld_link . | |
225 [[ $? == 0 ]] || error "Downloading of study $STUDY has failed. Error code returned is $?." | |
226 fi | |
227 [[ -z $output_dir ]] || cd "$curdir" | |
228 } | |
229 | |
230 # Download with wget {{{1 | |
231 ################################################################ | |
232 | |
233 download_with_wget() { | |
234 | |
235 local study_name="$1" | |
236 local metadata_only="$2" | |
237 local output_dir="$3" | |
238 local dwnld_flags= | |
239 | |
240 # Check wget | |
241 [ -n "$(which wget)" ] || error "wget command not found." | |
242 debug "$WGET: $(which $WGET)" | |
243 | |
244 # Set download link | |
245 file="$study_name" | |
246 [[ $metadata_only == $YES ]] && file=metadata | |
247 dwnld_link="https://www.ebi.ac.uk/metabolights/$study_name/files/$file" | |
248 [[ -n $TOKEN ]] && dwnld_link+="?token=$TOKEN" | |
249 | |
250 # Set download output | |
251 output_path="$study_name.zip" | |
252 if [[ -n $output_dir ]] ; then | |
253 mkdir -p "$output_dir" | |
254 output_path="$output_dir/$output_path" | |
255 fi | |
256 dwnld_flags="-O $output_path" | |
257 | |
258 # Silence downloader output | |
259 wget_log_file=$(mktemp -t $PROG_NAME.XXXXXX) | |
260 [[ $QUIET -eq 0 ]] || dwnld_flags+=" -q -o $wget_log_file" | |
261 | |
262 # Run download command | |
263 debug "Download command: $WGET $WGET_FLAGS $dwnld_flags $dwnld_link" | |
264 $WGET $WGET_FLAGS $dwnld_flags $dwnld_link | |
265 [[ $? == 0 ]] || error "Downloading of study $STUDY has failed. wget log file: $(cat $wget_log_file)" | |
266 rm -f $wget_log_file | |
267 } | |
268 | |
269 # Download {{{1 | |
270 ################################################################ | |
271 | |
272 download() { | |
273 | |
274 local downloader="$1" | |
275 local study_name="$2" | |
276 local metadata_only="$3" | |
277 local output_dir="$4" | |
278 | |
279 if [[ $downloader == $WGET ]] ; then | |
280 download_with_wget "$2" "$3" "$output_dir" | |
281 else | |
282 download_with_ascp "$2" "$3" "$output_dir" | |
283 fi | |
284 } | |
285 | |
286 # Check download {{{1 | |
287 ################################################################ | |
288 | |
289 check_download() { | |
290 | |
291 local download_path="$1" | |
292 | |
293 [[ -d $download_path || -f $download_path ]] || error "The downloading of $tmp_output failed. The output file doesn't exist." | |
294 [[ ! -f $download_path || -s $download_path ]] || error "The downloading of $tmp_output failed. The output file is empty." | |
295 } | |
296 | |
297 # Unzip study {{{1 | |
298 ################################################################ | |
299 | |
300 unzip_study() { | |
301 | |
302 local tmp_output="$1" | |
303 local output="$2" | |
304 local unzip_flags= | |
305 | |
306 # Silence unzip program | |
307 [[ $QUIET -eq 0 ]] || unzip_flags=-qq | |
308 | |
309 debug "Unzipping file \"$tmp_output\"." | |
310 [[ -z $output ]] || debug "Unzipping into \"$output\"." | |
311 | |
312 [[ -n $(which unzip) ]] || error "unzip command not found. Please install zip package." | |
313 | |
314 zip=$tmp_output | |
315 debug "Zipped file is \"$zip\"." | |
316 if [[ -z $output ]] ; then | |
317 output=${zip%.*} | |
318 [[ -d "$output" ]] && rm -r "$output" | |
319 fi | |
320 debug "Output directory will be \"$output\"." | |
321 zip_abs_path=$(realpath "$zip") | |
322 curdir=$(pwd) | |
323 debug "Current directory is \"$curdir\"." | |
324 mkdir -p "$output" | |
325 cd "$output" | |
326 unzip $unzip_flags "$zip_abs_path" >&2 || error "Unable to unzip archive $zip_abs_path." | |
327 cd "$curdir" | |
328 | |
329 echo "$output" | |
330 } | |
331 | |
332 # As zip {{{1 | |
333 ################################################################ | |
334 | |
335 as_zip() { | |
336 | |
337 local path="$1" | |
338 | |
339 if [[ -d $path ]] ; then | |
340 | |
341 zip_file="$path.zip" | |
342 zip "$zip_file" $path/* | |
343 rm -r "$path" | |
344 path="$zip_file" | |
345 fi | |
346 | |
347 echo "$path" | |
348 } | |
349 | |
350 # As folder {{{1 | |
351 ################################################################ | |
352 | |
353 as_folder() { | |
354 | |
355 local path="$1" | |
356 local output="$2" | |
357 | |
358 debug "as_folder($path, $output)" | |
359 if [[ -f $path && ${path##*.} == 'zip' ]] ; then | |
360 folder=$(unzip_study "$path" "$output") | |
361 rm "$path" | |
362 path="$folder" | |
363 elif [[ -d $path && -n $output && $path != $output ]] ; then | |
364 mkdir -p "$output" | |
365 mv "$path"/* "$output"/. | |
366 rm -r "$path" | |
367 path="$output" | |
368 fi | |
369 | |
370 echo "$path" | |
371 } | |
372 | |
373 # Make folder or zip {{{1 | |
374 ################################################################ | |
375 | |
376 make_folder_or_zip() { | |
377 | |
378 local path="$1" | |
379 local compressed="$2" | |
380 local output="$3" | |
381 | |
382 debug "make_folder_or_zip($path, $compressed, $output)" | |
383 [[ -e $path ]] || error "No file or folder at path \"$path\"." | |
384 | |
385 # Compress folder | |
386 if [[ $compressed == $YES ]] ; then | |
387 path=$(as_zip "$path") | |
388 else | |
389 path=$(as_folder "$path" "$output") | |
390 fi | |
391 | |
392 echo "$path" | |
393 } | |
394 | |
395 # Write HTML file {{{1 | |
396 ################################################################ | |
397 | |
398 function write_html_file { | |
399 | |
400 local HTML=$1 | |
401 local tmp_output=$2 | |
402 | |
403 cat >$HTML <<EOF | |
404 <html> | |
405 <header> | |
406 <title>Metabolights study</title> | |
407 </header> | |
408 <body> | |
409 <a href="i_Investigation.txt">Investigation file</a><br/> | |
410 EOF | |
411 | |
412 echo "<br/>" >>$HTML | |
413 echo " Study files:<br/>" >>$HTML | |
414 for f in $tmp_output/s_* ; do | |
415 filename=$(basename "$f") | |
416 echo " <a href=\"$filename\">$filename</a><br/>" >>$HTML | |
417 done | |
418 | |
419 echo "<br/>" >>$HTML | |
420 echo " Assay files:<br/>" >>$HTML | |
421 for f in $tmp_output/a_* ; do | |
422 filename=$(basename "$f") | |
423 echo " <a href=\"$filename\">$filename</a><br/>" >>$HTML | |
424 done | |
425 | |
426 echo "<br/>" >>$HTML | |
427 echo " Data files:<br/>" >>$HTML | |
428 for f in $tmp_output/m_* ; do | |
429 filename=$(basename "$f") | |
430 echo " <a href=\"$filename\">$filename</a><br/>" >>$HTML | |
431 done | |
432 | |
433 cat >>$HTML <<EOF | |
434 </body> | |
435 </html> | |
436 EOF | |
437 } | |
438 | |
439 # Get data files {{{1 | |
440 ################################################################ | |
441 | |
442 get_data_files() { | |
443 | |
444 local json_file="$1" | |
445 | |
446 python3 <<EOF | |
447 # @@@BEGIN_PYTHON@@@ | |
448 import json | |
449 import sys | |
450 with open('$json_file') as f: | |
451 data_list = json.load(f) | |
452 for elem in data_list: | |
453 print("\n".join(elem['data_files'])) | |
454 # @@@END_PYTHON@@@ | |
455 EOF | |
456 } | |
457 | |
458 # MAIN {{{1 | |
459 ################################################################ | |
460 | |
461 read_args "$@" | |
462 | |
463 study_name=$(basename $STUDY) | |
464 downloader=$WGET | |
465 output_dir= | |
466 [[ $ASPERA == $YES ]] && downloader=$ASCP | |
467 [[ $TMP_IN_OUTPUT != $YES ]] || output_dir="$OUTPUT" | |
468 | |
469 # Download only part of the study using factor value | |
470 if [[ -n $FACTOR_VALUE ]] ; then | |
471 | |
472 # Get factor name and value | |
473 factor_name=${FACTOR_VALUE%%=*} | |
474 factor_value=${FACTOR_VALUE#*=} | |
475 | |
476 # Download only metadata | |
477 download "$downloader" "$study_name" "$YES" # XXX Download output must be written into $OUTPUT if TMP_IN_OUTPUT is set | |
478 dwnld_output=$(get_download_output_path "$downloader" "$study_name") | |
479 dwnld_output=$(as_folder "$dwnld_output") | |
480 | |
481 # Get data files to download | |
482 data_files=$(mktemp -t $PROG_NAME.XXXXXX) # XXX must be created into $OUTPUT if TMP_IN_OUTPUT is set | |
483 [[ -x "$ISASLICER" ]] || error "Cannot find or run isaslicer.py script." | |
484 debug "Run ISA slicer: \"$ISASLICER\" 'isa-tab-get-data-list' \"$abs_dwnld_output\" \"$data_files\" --json-query \"{ \\\"$factor_name\\\": \\\"$factor_value\\\" }\"" | |
485 abs_dwnld_output=$(realpath "$dwnld_output") | |
486 "$ISASLICER" --log-level DEBUG 'isa-tab-get-data-list' "$abs_dwnld_output" "$data_files" --json-query "{ \"$factor_name\": \"$factor_value\" }" 2>&1 || error "Call to isaslicer failed." | |
487 | |
488 # Download data files | |
489 wget_log_file=$(mktemp -t $PROG_NAME.XXXXXX) | |
490 get_data_files "$data_files" | sort | uniq | while read file ; do | |
491 if [[ -n $file ]] ; then | |
492 dwnld_flags= | |
493 [[ $QUIET -eq 0 ]] || dwnld_flags+=" -q -o $wget_log_file" | |
494 dwnld_link="https://www.ebi.ac.uk/metabolights/$study_name/files/$file" | |
495 debug "Download command: $WGET $WGET_FLAGS $dwnld_flags $dwnld_link" | |
496 $WGET $WGET_FLAGS $dwnld_flags -O "$dwnld_output/$file" "$dwnld_link" # XXX Download output must be written into $OUTPUT if TMP_IN_OUTPUT is set | |
497 [[ $? == 0 ]] || error "Downlad of study file \"$file\" has failed. wget log file: $(cat $wget_log_file)" | |
498 fi | |
499 done | |
500 rm -f $wget_log_file | |
501 | |
502 # Remove data files list | |
503 rm "$data_files" | |
504 | |
505 # Download whole study | |
506 else | |
507 | |
508 # Download whole study | |
509 download "$downloader" "$study_name" "$METADATA_ONLY" "$output_dir" # XXX Download output must be written into $OUTPUT if TMP_IN_OUTPUT is set | |
510 | |
511 # Get output path | |
512 dwnld_output=$(get_download_output_path "$downloader" "$study_name" "$output_dir") # XXX Correct output path when TMP_IN_OUTPUT is set | |
513 fi | |
514 | |
515 # Check output | |
516 check_download "$dwnld_output" | |
517 | |
518 # Output in right format (zipped or folder) | |
519 dwnld_output=$(make_folder_or_zip "$dwnld_output" "$COMPRESSED" "$output_dir") # XXX zip must be written into $OUTPUT and unzipped into $OUTPUT if TMP_IN_OUTPUT is set | |
520 | |
521 # Output HTML | |
522 [[ -z $HTML || ! -d $dwnld_output ]] || write_html_file "$HTML" "$dwnld_output" | |
523 | |
524 # Rename output | |
525 [[ -z $OUTPUT || $TMP_IN_OUTPUT == $YES ]] || mv "$dwnld_output" "$OUTPUT" |