view data_manager_gene_annotation/data_manager/data_manager.py @ 10:6c874bd23a6d draft

Uploaded
author scottx611x
date Fri, 17 Jun 2016 16:36:28 -0400
parents 57ce598b7737
children 967e0c774dd8
line wrap: on
line source

import os
import sys
import uuid
import json
import argparse
import datetime
import requests
from requests.exceptions import ContentDecodingError

parser = argparse.ArgumentParser(description='Create data manager json.')
parser.add_argument('--out',
                    dest='output',
                    action='store',
                    help='JSON filename',
                    default="gene_annotation.json"
                    )
parser.add_argument('--name',
                    dest='name',
                    action='store',
                    default=uuid.uuid4(),
                    help='Data table entry unique ID'
                    )
parser.add_argument('--url',
                    dest='url',
                    action='store',
                    help='Download URL'
                    )

args = parser.parse_args()


def url_download(url, name,  workdir):
    # Create path if it doesn't exist
    if not os.path.exists(workdir):
        os.makedirs(workdir)

    response = requests.get(url=url, stream=True)

    # Create path that we will write the file to
    file_path = os.path.join(workdir, 'download_{}.dat'.format(name))

    block_size = 10 * 1024 * 1024  # 10MB chunked download
    with open(file_path, 'w+') as f:
        try:
            for buf in response.iter_content(block_size):
                f.write(buf)
        except (ContentDecodingError, IOError) as e:
            sys.stderr.write("Error occured downloading reference file: %s"
                             % e)
            os.remove(file_path)

    return file_path


def main(args):
    workdir = os.path.join(os.getcwd(), 'gene_annotation')

    # Attempt to download gene annotation file from given url
    gene_annotation_file_path = url_download(args.url, args.name, workdir)

    # Update Data Manager Json and write out
    data_manager_entry = {
        'data_tables': {
            'gene_annotation': {
                'date': str(datetime.datetime.now()),
                'dbkey': str(args.name),
                'name': str(args.name),
                'path': gene_annotation_file_path,
            }
        }
    }

    with open(os.path.join(workdir, args.output), "w+") as f:
        f.write(json.dumps(data_manager_entry))

if __name__ == '__main__':
    main(args)