Mercurial > repos > kellrott > nosql_interface
diff bulk_download.xml @ 3:4d03df88688d draft default tip
Uploaded
author | kellrott |
---|---|
date | Tue, 24 Jul 2012 17:42:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bulk_download.xml Tue Jul 24 17:42:17 2012 -0400 @@ -0,0 +1,59 @@ +<tool id="bulk_download" name="Bulk Download" version="1.0"> + <description>Bulk Downloader</description> + <command interpreter="python">$script_file $output $output.id $__new_file_path__</command> + <inputs> + <param name="urls_txt" type="text" area="True" size="5x35" label="URL Text" optional="True"/> + <param name="urls_file" type="data" label="URL File" optional="True"/> + <param name="decompress" type="boolean" label="Decompress" value="true"/> + + </inputs> + <outputs> + <data name="output"/> + </outputs> + <configfiles> + <configfile name="script_file"><![CDATA[#!/usr/bin/env python +import os +import sys +import urllib +import tempfile + +urls_txt = """${urls_txt}""" +urls_file = """${urls_file}""" +decompress = "${decompress}" + +output = sys.argv[1] +output_id = sys.argv[2] +output_dir = sys.argv[3] + + +if len(urls_file) and urls_file != "None": + handle = open(urls_file) +else: + handle = StringIO(urls_txt) + +#fix for multiple outputs +opath = output +for line in handle: + url = line.rstrip() + base = os.path.basename(url) + h, path = tempfile.mkstemp(dir="./") + os.close(h) + urllib.urlretrieve(url, path) + if decompress == "true": + if base.endswith(".gz"): + import gzip + ohandle = open(opath, "wb") + f = gzip.GzipFile(path, "rb") + for chunk in iter(lambda: f.read(8192), ''): + ohandle.write(chunk) + ohandle.close() + f.close() + + if os.path.exists(path): + os.unlink(path) + else: + shutil.move(path, opath) + +]]></configfile> + </configfiles> +</tool>