changeset 0:8dab200e02cb draft

"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
author prog
date Tue, 07 Jan 2020 09:05:21 -0500
parents
children 1fd8547867be
files README.md isaslicer.py mtbls-dwnld mtbls-dwnld.xml test-data/MTBLS2.html
diffstat 5 files changed, 2144 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Tue Jan 07 09:05:21 2020 -0500
@@ -0,0 +1,40 @@
+Metabolights Downloader
+=======================
+
+[![Build Status](https://travis-ci.org/workflow4metabolomics/mtbls-dwnld.svg?branch=master)](https://travis-ci.org/workflow4metabolomics/mtbls-dwnld)
+
+A [Metabolights](http://www.ebi.ac.uk/metabolights/) study downloader for [Galaxy](https://galaxyproject.org/), part of the [Workflow4Metabolomics](http://workflow4metabolomics.org/) project, and developed during the [PhenoMeNal](http://phenomenal-h2020.eu/home/) project.
+
+For more information, see the galaxy tool page, help section, available inside `mtbls-dwnld_config.xml`.
+
+## Requirements
+
+ * Python 3.8
+ * isatools 0.10.3
+ * `unzip` program.
+ * `wget` program.
+ * `ascp` (Aspera secure copy) program, version 3.7.4 or greater, which you can download [here](http://downloads.asperasoft.com/en/downloads/62).
+ 
+## Updates
+
+### 4.1.4
+
+ * Use MTBLS29 instead of MTBLS30, which has been removed from public repository, for testing.
+ * Make planemo tests pass on Travis-CI.
+ * Run several times ascp downloads for tests on Travis-CI, due to connection issues.
+
+### 2.0.4
+
+ * Test presence of downloader program.
+ * Correct running of `wget`.
+
+### 2.0.3
+
+ * Allow to choose the downloading method (`wget` or `aspera`).
+ * Ask to specify is the study to download is public or private.
+ * Allow to download all assays at once and convert them to W4M format.
+ * Allow to download *mzMl* and *mzData* files as collections. 
+
+### 1.3.0
+
+ * Allow to choose the assay to extract and convert to W4M format.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/isaslicer.py	Tue Jan 07 09:05:21 2020 -0500
@@ -0,0 +1,1362 @@
+#!/usr/bin/env python3
+
+import argparse
+import glob
+import json
+import logging
+import os
+import re
+import shutil
+import sys
+import tempfile
+import zipfile
+
+import pandas as pd
+from isatools import isatab
+from isatools.model import OntologyAnnotation
+from isatools.net import mtbls as MTBLS
+
+logger = None
+
+#    isaslicer.py <command> <study_id> [ command-specific options ]
+
+
+def make_parser():
+    parser = argparse.ArgumentParser( description="ISA slicer")
+
+    parser.add_argument('--log-level', choices=[
+        'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'],
+        default='INFO', help="Set the desired logging level")
+
+    subparsers = parser.add_subparsers(
+        title='Actions',
+        dest='command')  # specified subcommand will be available in attribute 'command'
+    subparsers.required = True
+
+    # mtblisa commands
+
+    subparser = subparsers.add_parser(
+        'mtbls-get-study-archive', aliases=['gsa'],
+        help="Get ISA study from MetaboLights as zip archive")
+    subparser.set_defaults(func=get_study_archive_command)
+    subparser.add_argument('study_id')
+    subparser.add_argument(
+        'output', metavar="OUTPUT",
+        help="Name of output archive (extension will be added)")
+    subparser.add_argument('--format', metavar="FMT", choices=[
+        'zip', 'tar', 'gztar', 'bztar', 'xztar'], default='zip',
+        help="Type of archive to create")
+
+    subparser = subparsers.add_parser('mtbls-get-study', aliases=['gs'],
+                                      help="Get ISA study from MetaboLights")
+    subparser.set_defaults(func=get_study_command)
+    subparser.add_argument('study_id')
+    subparser.add_argument('output', metavar="PATH", help="Name of output")
+    subparser.add_argument(
+        '-f', '--isa-format', choices=['isa-tab', 'isa-json'],
+        metavar="FORMAT", default='isa-tab', help="Desired ISA format")
+
+    subparser = subparsers.add_parser(
+        'mtbls-get-factors', aliases=['gf'],
+        help="Get factor names from a study in json format")
+    subparser.set_defaults(func=get_factors_command)
+    subparser.add_argument('study_id')
+    subparser.add_argument(
+        'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output file")
+
+    subparser = subparsers.add_parser(
+        'mtbls-get-factor-values', aliases=['gfv'],
+        help="Get factor values from a study in json format")
+    subparser.set_defaults(func=get_factor_values_command)
+    subparser.add_argument('study_id')
+    subparser.add_argument(
+        'factor', help="The desired factor. Use `get-factors` to get the list "
+                       "of available factors")
+    subparser.add_argument(
+        'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output file")
+
+    subparser = subparsers.add_parser('mtbls-get-data-list', aliases=['gd'],
+                                      help="Get data files list in json format")
+    subparser.set_defaults(func=get_data_files_command)
+    subparser.add_argument('study_id')
+    subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+                           help="Output file")
+    subparser.add_argument(
+        '--json-query',
+        help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
+    subparser.add_argument(
+        '--galaxy_parameters_file',
+        help="Path to JSON file containing input Galaxy JSON")
+
+    subparser = subparsers.add_parser(
+        'mtbls-get-factors-summary', aliases=['gsum'],
+        help="Get the variables summary from a study, in json format")
+    subparser.set_defaults(func=get_summary_command)
+    subparser.add_argument('study_id')
+    subparser.add_argument(
+        'json_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output JSON file")
+    subparser.add_argument(
+        'html_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output HTML file")
+
+    # isaslicer commands on path to unpacked ISA-Tab as input
+
+    subparser = subparsers.add_parser(
+        'isa-tab-get-factors', aliases=['isagf'],
+        help="Get factor names from a study in json format")
+    subparser.set_defaults(func=isatab_get_factor_names_command)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
+    subparser.add_argument(
+        'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output file")
+
+    subparser = subparsers.add_parser(
+        'zip-get-factors', aliases=['zipgf'],
+        help="Get factor names from a study in json format")
+    subparser.set_defaults(func=zip_get_factor_names_command)
+    subparser.add_argument('input_path', type=str,
+                           help="Input ISA-Tab zip path")
+    subparser.add_argument(
+        'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output file")
+
+    subparser = subparsers.add_parser(
+        'isa-tab-get-factor-values', aliases=['isagfv'],
+        help="Get factor values from a study in json format")
+    subparser.set_defaults(func=isatab_get_factor_values_command)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
+    subparser.add_argument(
+        'factor', help="The desired factor. Use `get-factors` to get the list "
+                       "of available factors")
+    subparser.add_argument(
+        'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output file")
+
+    subparser = subparsers.add_parser(
+        'zip-get-factor-values', aliases=['zipgfv'],
+        help="Get factor values from a study in json format")
+    subparser.set_defaults(func=zip_get_factor_values_command)
+    subparser.add_argument('input_path', type=str,
+                           help="Input ISA-Tab zip path")
+    subparser.add_argument(
+        'factor', help="The desired factor. Use `get-factors` to get the list "
+                       "of available factors")
+    subparser.add_argument(
+        'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output file")
+
+    subparser = subparsers.add_parser('isa-tab-get-data-list', aliases=['isagdl'],
+                                      help="Get data files list in json format")
+    subparser.set_defaults(func=isatab_get_data_files_list_command)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
+    subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+                           help="Output file")
+    subparser.add_argument(
+        '--json-query',
+        help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
+    subparser.add_argument(
+        '--galaxy_parameters_file',
+        help="Path to JSON file containing input Galaxy JSON")
+
+    subparser = subparsers.add_parser('zip-get-data-list', aliases=['zipgdl'],
+                                      help="Get data files list in json format")
+    subparser.set_defaults(func=zip_get_data_files_list_command)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path")
+    subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+                           help="Output file")
+    subparser.add_argument(
+        '--json-query',
+        help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
+    subparser.add_argument(
+        '--galaxy_parameters_file',
+        help="Path to JSON file containing input Galaxy JSON")
+
+    subparser = subparsers.add_parser('isa-tab-get-data-collection', aliases=['isagdc'],
+                                      help="Get data files collection")
+    subparser.set_defaults(func=isatab_get_data_files_collection_command)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
+    subparser.add_argument('output_path', type=str, help="Output data files path")
+    subparser.add_argument(
+        '--json-query',
+        help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
+    subparser.add_argument(
+        '--galaxy_parameters_file',
+        help="Path to JSON file containing input Galaxy JSON")
+
+    subparser = subparsers.add_parser('zip-get-data-collection', aliases=['zipgdc'],
+                                      help="Get data files collection")
+    subparser.set_defaults(func=zip_get_data_files_collection_command)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path")
+    subparser.add_argument('output_path', type=str, help="Output data files path")
+    subparser.add_argument(
+        '--json-query',
+        help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
+
+    subparser = subparsers.add_parser(
+        'isa-tab-get-factors-summary', aliases=['isasum'],
+        help="Get the variables summary from a study, in json format")
+    subparser.set_defaults(func=isatab_get_factors_summary_command)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
+    subparser.add_argument(
+        'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
+        help="Output file")
+
+    subparser = subparsers.add_parser(
+        'zip-get-factors-summary', aliases=['zipsum'],
+        help="Get the variables summary from a study, in json format")
+    subparser.set_defaults(func=zip_get_factors_summary_command)
+    subparser.add_argument('input_path', type=str,
+                           help="Input ISA-Tab zip path")
+    subparser.add_argument(
+        'json_output', nargs='?', type=argparse.FileType('w'),
+        default=sys.stdout,
+        help="Output JSON file")
+    subparser.add_argument(
+        'html_output', nargs='?', type=argparse.FileType('w'),
+        default=sys.stdout,
+        help="Output HTML file")
+
+    subparser = subparsers.add_parser(
+        'isaslicer2-slice', aliases=['slice2'],
+        help="Slice ISA-Tabs version 2")
+    subparser.set_defaults(func=query_isatab)
+    subparser.add_argument('--source_dir', type=str,
+                           help="Input ISA-Tab zip path")
+    subparser.add_argument(
+        '--galaxy_parameters_file', type=argparse.FileType(mode='r'),
+        help="Path to JSON file containing input Galaxy JSON")
+    subparser.add_argument('--output', type=argparse.FileType(mode='w'),
+                           help="Input ISA-Tab zip path")
+
+    subparser = subparsers.add_parser(
+        'filter-data', aliases=['filter'],
+        help="Filter out data based on slicer2")
+    subparser.set_defaults(func=filter_data)
+    subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
+    subparser.add_argument('output_path', type=str, help="Output data files path")
+    subparser.add_argument('--slice', type=argparse.FileType(mode='r'),
+                           help="slice")
+    subparser.add_argument('--filename_filter', type=str, help="shell-like wildcard to filter files")
+
+    return parser
+
+
+def filter_data(options):
+    loglines = []
+    source_dir = options.input_path if options.input_path else ""
+    output_path = options.output_path
+    filename_filter = options.filename_filter
+    if source_dir:
+        if not os.path.exists(source_dir):
+            raise IOError('Source path does not exist!')
+    data_files = []
+    slice_json = options.slice
+    for result in json.load(slice_json)['results']:
+        data_files.extend(result.get('data_files', []))
+    reduced_data_files = list(set(data_files))
+    filtered_files = glob.glob(os.path.join(source_dir, filename_filter))
+    to_copy = []
+    for filepath in filtered_files:
+        if os.path.basename(filepath) in reduced_data_files:
+            to_copy.append(filepath)
+    loglines.append("Using slice results from {}\n".format(slice_json.name))
+    for filepath in to_copy:
+        loglines.append("Copying {}\n".format(os.path.basename(filepath)))
+        # try:
+        #     shutil.copyfile(
+        #         filepath, os.path.join(output_path, os.path.basename(filepath)))
+        # except Exception as e:
+        #     print(e)
+        #     exit(1)
+        try:
+            os.symlink(
+                filepath, os.path.join(output_path, os.path.basename(filepath)))
+        except Exception as e:
+            print(e)
+            exit(1)
+    with open('cli.log', 'w') as fp:
+        fp.writelines(loglines)
+
+
+def query_isatab(options):
+    source_dir = options.source_dir if options.source_dir else ""
+    galaxy_parameters_file = options.galaxy_parameters_file
+    output = options.output
+
+    debug = True
+    if galaxy_parameters_file:
+        galaxy_parameters = json.load(galaxy_parameters_file)
+        print('Galaxy parameters:')
+        print(json.dumps(galaxy_parameters, indent=4))
+    else:
+        raise IOError('Could not load Galaxy parameters file!')
+    if source_dir:
+        if not os.path.exists(source_dir):
+            raise IOError('Source path does not exist!')
+    query = galaxy_parameters['query']
+    if debug:
+        print('Query is:')
+        print(json.dumps(query, indent=4))  # for debugging only
+    if source_dir:
+        investigation = isatab.load(source_dir)
+    else:
+        tmp = tempfile.mkdtemp()
+        _ = MTBLS.get(galaxy_parameters['input']['mtbls_id'], tmp)
+        investigation = isatab.load(tmp)
+    # filter assays by mt/tt
+    matching_assays = []
+    mt = query.get('measurement_type').strip()
+    tt = query.get('technology_type').strip()
+    if mt and tt:
+        for study in investigation.studies:
+            matching_assays.extend(
+                [x for x in study.assays if x.measurement_type.term == mt
+                 and x.technology_type.term == tt])
+    elif mt and not tt:
+        for study in investigation.studies:
+            matching_assays.extend(
+                [x for x in study.assays if x.measurement_type.term == mt])
+    elif not mt and tt:
+        for study in investigation.studies:
+            matching_assays.extend(
+                [x for x in study.assays if x.technology_type.term == tt])
+    else:
+        for study in investigation.studies:
+            matching_assays.extend(study.assays)
+    assay_samples = []
+    for assay in matching_assays:
+        assay_samples.extend(assay.samples)
+    if debug:
+        print('Total samples: {}'.format(len(assay_samples)))
+
+    # filter samples by fv
+    factor_selection = {
+        x.get('factor_name').strip(): x.get('factor_value').strip() for x in
+        query.get('factor_selection', [])}
+
+    fv_samples = set()
+    if factor_selection:
+        samples_to_remove = set()
+        for f, v in factor_selection.items():
+            for sample in assay_samples:
+                for fv in [x for x in sample.factor_values if
+                           x.factor_name.name == f]:
+                    if isinstance(fv.value, OntologyAnnotation):
+                        if fv.value.term == v:
+                            fv_samples.add(sample)
+                    elif fv.value == v:
+                        fv_samples.add(sample)
+        for f, v in factor_selection.items():
+            for sample in fv_samples:
+                for fv in [x for x in sample.factor_values if
+                           x.factor_name.name == f]:
+                    if isinstance(fv.value, OntologyAnnotation):
+                        if fv.value.term != v:
+                            samples_to_remove.add(sample)
+                    elif fv.value != v:
+                        samples_to_remove.add(sample)
+        final_fv_samples = fv_samples.difference(samples_to_remove)
+    else:
+        final_fv_samples = assay_samples
+
+    # filter samples by characteristic
+    characteristics_selection = {
+        x.get('characteristic_name').strip():
+            x.get('characteristic_value').strip() for x in
+            query.get('characteristics_selection', [])}
+
+    cv_samples = set()
+    if characteristics_selection:
+        first_pass = True
+        samples_to_remove = set()
+        for c, v in characteristics_selection.items():
+            if first_pass:
+                for sample in final_fv_samples:
+                    for cv in [x for x in sample.characteristics if
+                               x.category.term == c]:
+                        if isinstance(cv.value, OntologyAnnotation):
+                            if cv.value.term == v:
+                                cv_samples.add(sample)
+                        elif cv.value == v:
+                            cv_samples.add(sample)
+                    for source in sample.derives_from:
+                        for cv in [x for x in source.characteristics if
+                                   x.category.term == c]:
+                            if isinstance(cv.value, OntologyAnnotation):
+                                if cv.value.term == v:
+                                    cv_samples.add(sample)
+                            elif cv.value == v:
+                                cv_samples.add(sample)
+                first_pass = False
+            else:
+                for sample in cv_samples:
+                    for cv in [x for x in sample.characteristics if
+                               x.category.term == c]:
+                        if isinstance(cv.value, OntologyAnnotation):
+                            if cv.value.term != v:
+                                samples_to_remove.add(sample)
+                        elif cv.value != v:
+                            samples_to_remove.add(sample)
+                    for source in sample.derives_from:
+                        for cv in [x for x in source.characteristics if
+                                   x.category.term == c]:
+                            if isinstance(cv.value, OntologyAnnotation):
+                                if cv.value.term != v:
+                                    samples_to_remove.add(sample)
+                            elif cv.value != v:
+                                samples_to_remove.add(sample)
+        final_cv_samples = cv_samples.difference(samples_to_remove)
+    else:
+        final_cv_samples = final_fv_samples
+
+    # filter samples by process parameter
+    parameters_selection = {
+        x.get('parameter_name').strip():
+            x.get('parameter_value').strip() for x in
+        query.get('parameter_selection', [])}
+
+    final_samples = final_cv_samples
+
+    if debug:
+        print('Final number of samples: {}'.format(len(final_samples)))
+    results = []
+    for sample in final_samples:
+        results.append({
+            'sample_name': sample.name,
+            'data_files': []
+        })
+    for result in results:
+        sample_name = result['sample_name']
+        if source_dir:
+            table_files = glob.iglob(os.path.join(source_dir, 'a_*'))
+        else:
+            table_files = glob.iglob(os.path.join(tmp, 'a_*'))
+        for table_file in table_files:
+            with open(table_file) as fp:
+                df = isatab.load_table(fp)
+                data_files = []
+                table_headers = list(df.columns.values)
+                sample_rows = df.loc[df['Sample Name'] == sample_name]
+                data_node_labels = [
+                    'Raw Data File', 'Raw Spectral Data File',
+                    'Derived Spectral Data File',
+                    'Derived Array Data File', 'Array Data File',
+                    'Protein Assignment File', 'Peptide Assignment File',
+                    'Post Translational Modification Assignment File',
+                    'Acquisition Parameter Data File',
+                    'Free Induction Decay Data File',
+                    'Derived Array Data Matrix File', 'Image File',
+                    'Derived Data File', 'Metabolite Assignment File']
+                if parameters_selection:
+                    for p, v in parameters_selection.items():
+                        sample_pv_rows = sample_rows.loc[
+                            sample_rows['Parameter Value[{}]'.format(p)] == v]
+                        for node_label in data_node_labels:
+                            if node_label in table_headers:
+                                data_files.extend(
+                                    list(sample_pv_rows[node_label]))
+                    result['data_files'].extend(list(set(
+                        i for i in list(data_files) if
+                        str(i) not in ('nan', ''))))
+                else:
+                    for node_label in data_node_labels:
+                        if node_label in table_headers:
+                            data_files.extend(list(sample_rows[node_label]))
+                    result['data_files'].extend(
+                        list(set(i for i in list(data_files) if
+                                 str(i) not in ('nan', ''))))
+    results_json = {
+        'query': query,
+        'results': results
+    }
+    json.dump(results_json, output, indent=4)
+
+    # if galaxy_parameters['input']['collection_output']:
+    #     logger = logging.getLogger()
+    #     logger.debug("copying data files to %s", os.path.dirname(output))
+    #     for result in results:
+    #         for data_file_name in result['data_files']:
+    #             logging.info("Copying {}".format(data_file_name))
+    #             shutil.copy(os.path.join(source_dir, data_file_name),
+    #                         os.path.dirname(output))
+    #     logger.info(
+    #       "Finished writing data files to {}".format(os.path.dirname(output)))
+
+
+def get_study_archive_command(options):
+    study_id = options.study_id
+
+    logger.info("Downloading study %s into archive at path %s.%s",
+                study_id, options.output, options.format)
+
+    tmpdir = MTBLS.get(study_id)
+    logger.debug("MTBLS.get returned '%s'", tmpdir)
+    if tmpdir is not None:
+        try:
+            shutil.make_archive(
+                options.output, options.format, tmpdir, logger=logger)
+            logger.info("ISA archive written")
+        finally:
+            logger.debug("Trying to clean up tmp dir %s", tmpdir)
+            shutil.rmtree(tmpdir, ignore_errors=True)
+    else:
+        raise RuntimeError("Error downloading ISA study")
+
+# mtblisa commands
+
+
+def get_study_command(options):
+    if os.path.exists(options.output):
+        raise RuntimeError("Selected output path {} already exists!".format(
+            options.output))
+
+    if options.isa_format == "isa-tab":
+        tmp_data = None
+        try:
+            logger.info("Downloading study %s", options.study_id)
+            tmp_data = MTBLS.get(options.study_id)
+            if tmp_data is None:
+                raise RuntimeError("Error downloading ISA study")
+
+            logger.debug(
+                "Finished downloading data. Moving to final location %s",
+                options.output)
+            shutil.move(tmp_data, options.output)
+            logger.info("ISA archive written to %s", options.output)
+        finally:
+            if tmp_data:
+                # try to clean up any temporary files left behind
+                logger.debug("Deleting %s, if there's anything there", tmp_data)
+                shutil.rmtree(tmp_data, ignore_errors=True)
+    elif options.isa_format == "isa-json":
+        isajson = MTBLS.getj(options.study_id)
+        if isajson is None:
+            raise RuntimeError("Error downloading ISA study")
+
+        logger.debug(
+            "Finished downloading data. Dumping json to final location %s",
+            options.output)
+        os.makedirs(options.output)
+        json_file = os.path.join(options.output, "{}.json".format(
+            isajson['identifier']))
+        with open(json_file, 'w') as fd:
+            json.dump(isajson, fd)
+        logger.info("ISA-JSON written to %s", options.output)
+    else:
+        raise ValueError("BUG! Got an invalid isa format '{}'".format(
+            options.isa_format))
+
+
+def get_factors_command(options):
+    logger.info("Getting factors for study %s. Writing to %s.",
+                options.study_id, options.output.name)
+    factor_names = MTBLS.get_factor_names(options.study_id)
+    if factor_names is not None:
+        json.dump(list(factor_names), options.output, indent=4)
+        logger.debug("Factor names written")
+    else:
+        raise RuntimeError("Error downloading factors.")
+
+
+def get_factor_values_command(options):
+    logger.info("Getting values for factor {factor} in study {study_id}. Writing to {output_file}."
+                .format(factor=options.factor, study_id=options.study_id, output_file=options.output.name))
+    fvs = MTBLS.get_factor_values(options.study_id, options.factor)
+    if fvs is not None:
+        json.dump(list(fvs), options.output, indent=4)
+        logger.debug("Factor values written to {}".format(options.output))
+    else:
+        raise RuntimeError("Error getting factor values")
+
+
+def get_data_files_command(options):
+    logger.info("Getting data files for study %s. Writing to %s.",
+                options.study_id, options.output.name)
+    if options.json_query:
+        logger.debug("This is the specified query:\n%s", options.json_query)
+        json_struct = json.loads(options.json_query)
+        data_files = MTBLS.get_data_files(options.study_id, json_struct)
+    elif options.galaxy_parameters_file:
+        logger.debug("Using input Galaxy JSON parameters from:\n%s",
+                     options.galaxy_parameters_file)
+        with open(options.galaxy_parameters_file) as json_fp:
+            galaxy_json = json.load(json_fp)
+            json_struct = {}
+            for fv_item in galaxy_json['factor_value_series']:
+                json_struct[fv_item['factor_name']] = fv_item['factor_value']
+            data_files = MTBLS.get_data_files(options.study_id, json_struct)
+    else:
+        logger.debug("No query was specified")
+        data_files = MTBLS.get_data_files(options.study_id)
+
+    logger.debug("Result data files list: %s", data_files)
+    if data_files is None:
+        raise RuntimeError("Error getting data files with isatools")
+
+    logger.debug("dumping data files to %s", options.output.name)
+    json.dump(list(data_files), options.output, indent=4)
+    logger.info("Finished writing data files to {}".format(options.output))
+
+
+def build_html_data_files_list(data_files_list):
+    data_files_table = '<table>'
+    data_files_table += '<tr><th>Sample Name</th><th>Data File Names</th></tr>'
+    for data_file in data_files_list:
+        sample_name = data_file['sample']
+        data_files = ', '.join(data_file['data_files'])
+        data_files_table += '<tr><td>{sample_name}</td><td>{data_files}</td>' \
+            .format(sample_name=sample_name, data_files=data_files)
+    html_data_files_list = """
+    <html>
+    <head>
+    <title>ISA-Tab Factors Summary</title>
+    </head>
+    <body>
+    {summary_table}
+    </body>
+    </html>
+""".format(summary_table=data_files_table)
+    return html_data_files_list
+
+
+def build_html_summary(summary):
+    study_groups = {}
+    for item in summary:
+        sample_name = item['sample_name']
+        study_factors = []
+        for item in [x for x in item.items() if x[0] != "sample_name"]:
+            study_factors.append(': '.join([item[0], item[1]]))
+        study_group = ', '.join(study_factors)
+        if study_group not in study_groups.keys():
+            study_groups[study_group] = []
+        study_groups[study_group].append(sample_name)
+    summary_table = '<table>'
+    summary_table += '<tr><th>Study group</th><th>Number of samples</th></tr>'
+    for item in study_groups.items():
+        study_group = item[0]
+        num_samples = len(item[1])
+        summary_table += '<tr><td>{study_group}</td><td>{num_samples}</td>' \
+            .format(study_group=study_group, num_samples=num_samples)
+    summary_table += '</table>'
+    html_summary = """
+<html>
+<head>
+<title>ISA-Tab Factors Summary</title>
+</head>
+<body>
+{summary_table}
+</body>
+</html>
+""".format(summary_table=summary_table)
+    return html_summary
+
+
+def get_summary_command(options):
+    logger.info("Getting summary for study %s. Writing to %s.",
+                options.study_id, options.json_output.name)
+
+    summary = MTBLS.get_study_variable_summary(options.study_id)
+    # new_summary = []
+    # for item in summary:
+    #     new_summary.append(
+    #         {k: v for k, v in item.items() if k is not "sample_name"})
+    # summary = new_summary
+    if summary is not None:
+        json.dump(summary, options.json_output, indent=4)
+        logger.debug("Summary dumped to JSON")
+        html_summary = build_html_summary(summary)
+        with options.html_output as html_fp:
+            html_fp.write(html_summary)
+    else:
+        raise RuntimeError("Error getting study summary")
+
+
+# isaslicer commands
+
+def isatab_get_data_files_list_command(options):
+    logger.info("Getting data files for study %s. Writing to %s.",
+                options.input_path, options.output.name)
+    if options.json_query:
+        logger.debug("This is the specified query:\n%s", options.json_query)
+        json_struct = json.loads(options.json_query)
+    elif options.galaxy_parameters_file:
+        logger.debug("Using input Galaxy JSON parameters from:\n%s",
+                     options.galaxy_parameters_file)
+        with open(options.galaxy_parameters_file) as json_fp:
+            galaxy_json = json.load(json_fp)
+            json_struct = {}
+            for fv_item in galaxy_json['factor_value_series']:
+                json_struct[fv_item['factor_name']] = fv_item['factor_value']
+    else:
+        logger.debug("No query was specified")
+        json_struct = None
+    factor_selection = json_struct
+    input_path = options.input_path
+    result = slice_data_files(input_path, factor_selection=factor_selection)
+    data_files = result
+    logger.debug("Result data files list: %s", data_files)
+    if data_files is None:
+        raise RuntimeError("Error getting data files with isatools")
+
+    logger.debug("dumping data files to %s", options.output.name)
+    json.dump(list(data_files), options.output, indent=4)
+    logger.info("Finished writing data files to {}".format(options.output))
+
+
+def zip_get_data_files_list_command(options):
+    logger.info("Getting data files for study %s. Writing to %s.",
+                options.input_path, options.output.name)
+    if options.json_query:
+        logger.debug("This is the specified query:\n%s", options.json_query)
+        json_struct = json.loads(options.json_query)
+    elif options.galaxy_parameters_file:
+        logger.debug("Using input Galaxy JSON parameters from:\n%s",
+                     options.galaxy_parameters_file)
+        with open(options.galaxy_parameters_file) as json_fp:
+            galaxy_json = json.load(json_fp)
+            json_struct = {}
+            for fv_item in galaxy_json['factor_value_series']:
+                json_struct[fv_item['factor_name']] = fv_item['factor_value']
+    else:
+        logger.debug("No query was specified")
+        json_struct = None
+    factor_selection = json_struct
+    input_path = options.input_path
+    with zipfile.ZipFile(input_path) as zfp:
+        tmpdir = tempfile.mkdtemp()
+        zfp.extractall(path=tmpdir)
+        result = slice_data_files(tmpdir, factor_selection=factor_selection)
+        data_files = result
+        logger.debug("Result data files list: %s", data_files)
+    if data_files is None:
+        raise RuntimeError("Error getting data files with isatools")
+    logger.debug("dumping data files to %s", options.output.name)
+    json.dump(list(data_files), options.output, indent=4)
+    logger.info("Finished writing data files to {}".format(options.output))
+    shutil.rmtree(tmpdir)
+
+
+def isatab_get_data_files_collection_command(options):
+    logger.info("Getting data files for study %s. Writing to %s.",
+                options.input_path, options.output_path)
+    if options.json_query:
+        logger.debug("This is the specified query:\n%s", options.json_query)
+    else:
+        logger.debug("No query was specified")
+    input_path = options.input_path
+    if options.json_query is not None:
+        json_struct = json.loads(options.json_query)
+    elif options.galaxy_parameters_file:
+        logger.debug("Using input Galaxy JSON parameters from:\n%s",
+                     options.galaxy_parameters_file)
+        with open(options.galaxy_parameters_file) as json_fp:
+            galaxy_json = json.load(json_fp)
+            json_struct = {}
+            for fv_item in galaxy_json['factor_value_series']:
+                json_struct[fv_item['factor_name']] = fv_item['factor_value']
+    else:
+        logger.debug("No query was specified")
+        json_struct = None
+    factor_selection = json_struct
+    result = slice_data_files(input_path, factor_selection=factor_selection)
+    data_files = result
+    logger.debug("Result data files list: %s", data_files)
+    if data_files is None:
+        raise RuntimeError("Error getting data files with isatools")
+    output_path = options.output_path
+    logger.debug("copying data files to %s", output_path)
+    for result in data_files:
+        for data_file_name in result['data_files']:
+            logging.info("Copying {}".format(data_file_name))
+            shutil.copy(os.path.join(input_path, data_file_name), output_path)
+    logger.info("Finished writing data files to {}".format(output_path))
+
+
+def zip_get_data_files_collection_command(options):
+    logger.info("Getting data files for study %s. Writing to %s.",
+                options.input_path, options.output_path)
+    if options.json_query:
+        logger.debug("This is the specified query:\n%s", options.json_query)
+    else:
+        logger.debug("No query was specified")
+    input_path = options.input_path
+    output_path = options.output_path
+    if options.json_query is not None:
+        json_struct = json.loads(options.json_query)
+        factor_selection = json_struct
+    else:
+        factor_selection = None
+    with zipfile.ZipFile(input_path) as zfp:
+        tmpdir = tempfile.mkdtemp()
+        zfp.extractall(path=tmpdir)
+        result = slice_data_files(tmpdir, factor_selection=factor_selection)
+        data_files = result
+        logger.debug("Result data files list: %s", data_files)
+        if data_files is None:
+            raise RuntimeError("Error getting data files with isatools")
+        logger.debug("copying data files to %s", output_path)
+        for result in data_files:
+            for data_file_name in result['data_files']:
+                logging.info("Copying {}".format(data_file_name))
+                shutil.copy(os.path.join(tmpdir, data_file_name), output_path)
+    logger.info("Finished writing data files to {}".format(output_path))
+    shutil.rmtree(tmpdir)
+
+
+def slice_data_files(dir, factor_selection=None):
+    results = []
+    # first collect matching samples
+    for table_file in glob.iglob(os.path.join(dir, '[a|s]_*')):
+        logger.info('Loading {table_file}'.format(table_file=table_file))
+
+        with open(os.path.join(dir, table_file)) as fp:
+            df = isatab.load_table(fp)
+
+            if factor_selection is None:
+                matches = df['Sample Name'].items()
+
+                for indx, match in matches:
+                    sample_name = match
+                    if len([r for r in results if r['sample'] ==
+                            sample_name]) == 1:
+                        continue
+                    else:
+                        results.append(
+                            {
+                                'sample': sample_name,
+                                'data_files': []
+                            }
+                        )
+
+            else:
+                for factor_name, factor_value in factor_selection.items():
+                    if 'Factor Value[{}]'.format(factor_name) in list(
+                            df.columns.values):
+                        matches = df.loc[df['Factor Value[{factor}]'.format(
+                            factor=factor_name)] == factor_value][
+                            'Sample Name'].items()
+
+                        for indx, match in matches:
+                            sample_name = match
+                            if len([r for r in results if r['sample'] ==
+                                    sample_name]) == 1:
+                                continue
+                            else:
+                                results.append(
+                                    {
+                                        'sample': sample_name,
+                                        'data_files': [],
+                                        'query_used': factor_selection
+                                    }
+                                )
+
+    # now collect the data files relating to the samples
+    for result in results:
+        sample_name = result['sample']
+
+        for table_file in glob.iglob(os.path.join(dir, 'a_*')):
+            with open(table_file) as fp:
+                df = isatab.load_table(fp)
+
+                data_files = []
+
+                table_headers = list(df.columns.values)
+                sample_rows = df.loc[df['Sample Name'] == sample_name]
+
+                data_node_labels = [
+                    'Raw Data File',
+                    'Raw Spectral Data File',
+                    'Derived Spectral Data File',
+                    'Derived Array Data File',
+                    'Array Data File',
+                    'Protein Assignment File',
+                    'Peptide Assignment File',
+                    'Post Translational Modification Assignment File',
+                    'Acquisition Parameter Data File',
+                    'Free Induction Decay Data File',
+                    'Derived Array Data Matrix File',
+                    'Image File',
+                    'Derived Data File',
+                    'Metabolite Assignment File']
+                for node_label in data_node_labels:
+                    if node_label in table_headers:
+                        data_files.extend(list(sample_rows[node_label]))
+
+                result['data_files'] = [i for i in list(data_files) if
+                                        str(i) != 'nan']
+    return results
+
+
+def isatab_get_factor_names_command(options):
+    input_path = options.input_path
+    logger.info("Getting factors for study %s. Writing to %s.",
+                input_path, options.output.name)
+    _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]')
+    factors = set()
+    for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
+        with open(os.path.join(input_path, table_file)) as fp:
+            df = isatab.load_table(fp)
+
+            factors_headers = [header for header in list(df.columns.values)
+                               if _RX_FACTOR_VALUE.match(header)]
+
+            for header in factors_headers:
+                factors.add(header[13:-1])
+    if factors is not None:
+        json.dump(list(factors), options.output, indent=4)
+        logger.debug("Factor names written")
+    else:
+        raise RuntimeError("Error reading factors.")
+
+
+def zip_get_factor_names_command(options):
+    input_path = options.input_path
+    logger.info("Getting factors for study %s. Writing to %s.",
+                input_path, options.output.name)
+    # unpack input_path
+    with zipfile.ZipFile(input_path) as zfp:
+        tmpdir = tempfile.mkdtemp()
+        zfp.extractall(path=tmpdir)
+        _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]')
+        factors = set()
+        for table_file in glob.iglob(os.path.join(tmpdir, '[a|s]_*')):
+            logging.info('Searching {}'.format(table_file))
+            with open(os.path.join(tmpdir, table_file)) as fp:
+                df = isatab.load_table(fp)
+
+                factors_headers = [header for header in list(df.columns.values)
+                                   if _RX_FACTOR_VALUE.match(header)]
+
+                for header in factors_headers:
+                    factors.add(header[13:-1])
+    if factors is not None:
+        json.dump(list(factors), options.output, indent=4)
+        logger.debug("Factor names written")
+    else:
+        raise RuntimeError("Error reading factors.")
+    shutil.rmtree(tmpdir)
+
+
+def isatab_get_factor_values_command(options):
+    logger.info("Getting values for factor {factor} in study {input_path}. Writing to {output_file}."
+                .format(factor=options.factor, input_path=options.input_path, output_file=options.output.name))
+    fvs = set()
+
+    input_path = options.input_path
+    factor_name = options.factor
+
+    for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
+        with open(os.path.join(input_path, table_file)) as fp:
+            df = isatab.load_table(fp)
+
+            if 'Factor Value[{factor}]'.format(factor=factor_name) in \
+                    list(df.columns.values):
+                for _, match in df[
+                    'Factor Value[{factor}]'.format(
+                        factor=factor_name)].iteritems():
+                    try:
+                        match = match.item()
+                    except AttributeError:
+                        pass
+
+                    if isinstance(match, (str, int, float)):
+                        if str(match) != 'nan':
+                            fvs.add(match)
+    if fvs is not None:
+        json.dump(list(fvs), options.output, indent=4)
+        logger.debug("Factor values written to {}".format(options.output))
+    else:
+        raise RuntimeError("Error getting factor values")
+
+
+def zip_get_factor_values_command(options):
+    input_path = options.input_path
+    logger.info("Getting factors for study %s. Writing to %s.",
+                input_path, options.output.name)
+    logger.info("Getting values for factor {factor} in study {input_path}. "
+                "Writing to {output_file}.".format(
+                    factor=options.factor, input_path=options.input_path,
+                    output_file=options.output.name))
+    fvs = set()
+    factor_name = options.factor
+
+    # unpack input_path
+    with zipfile.ZipFile(input_path) as zfp:
+        tmpdir = tempfile.mkdtemp()
+        zfp.extractall(path=tmpdir)
+        for table_file in glob.glob(os.path.join(tmpdir, '[a|s]_*')):
+            logging.info('Searching {}'.format(table_file))
+            with open(os.path.join(input_path, table_file)) as fp:
+                df = isatab.load_table(fp)
+                if 'Factor Value[{factor}]'.format(factor=factor_name) in \
+                        list(df.columns.values):
+                    for _, match in df[
+                        'Factor Value[{factor}]'.format(
+                            factor=factor_name)].iteritems():
+                        try:
+                            match = match.item()
+                        except AttributeError:
+                            pass
+
+                        if isinstance(match, (str, int, float)):
+                            if str(match) != 'nan':
+                                fvs.add(match)
+    if fvs is not None:
+        json.dump(list(fvs), options.output, indent=4)
+        logger.debug("Factor values written to {}".format(options.output))
+    else:
+        raise RuntimeError("Error getting factor values")
+    shutil.rmtree(tmpdir)
+
+
+def isatab_get_factors_summary_command(options):
+    logger.info("Getting summary for study %s. Writing to %s.",
+                options.input_path, options.output.name)
+    input_path = options.input_path
+    ISA = isatab.load(input_path)
+
+    all_samples = []
+    for study in ISA.studies:
+        all_samples.extend(study.samples)
+
+    samples_and_fvs = []
+
+    for sample in all_samples:
+        sample_and_fvs = {
+            'sample_name': sample.name,
+        }
+
+        for fv in sample.factor_values:
+            if isinstance(fv.value, (str, int, float)):
+                fv_value = fv.value
+                sample_and_fvs[fv.factor_name.name] = fv_value
+            elif isinstance(fv.value, OntologyAnnotation):
+                fv_value = fv.value.term
+                sample_and_fvs[fv.factor_name.name] = fv_value
+
+        samples_and_fvs.append(sample_and_fvs)
+
+    df = pd.DataFrame(samples_and_fvs)
+    nunique = df.apply(pd.Series.nunique)
+    cols_to_drop = nunique[nunique == 1].index
+
+    df = df.drop(cols_to_drop, axis=1)
+    summary = df.to_dict(orient='records')
+    if summary is not None:
+        json.dump(summary, options.output, indent=4)
+        logger.debug("Summary dumped to JSON")
+        # html_summary = build_html_summary(summary)
+        # with options.html_output as html_fp:
+        #     html_fp.write(html_summary)
+    else:
+        raise RuntimeError("Error getting study summary")
+
+
+def zip_get_factors_summary_command(options):
+    logger.info("Getting summary for study %s. Writing to %s.",
+                options.input_path, options.json_output.name)
+    input_path = options.input_path
+    with zipfile.ZipFile(input_path) as zfp:
+        tmpdir = tempfile.mkdtemp()
+        zfp.extractall(path=tmpdir)
+        ISA = isatab.load(tmpdir)
+        all_samples = []
+        for study in ISA.studies:
+            all_samples.extend(study.samples)
+        samples_and_fvs = []
+        for sample in all_samples:
+            sample_and_fvs = {
+                'sample_name': sample.name,
+            }
+            for fv in sample.factor_values:
+                if isinstance(fv.value, (str, int, float)):
+                    fv_value = fv.value
+                    sample_and_fvs[fv.factor_name.name] = fv_value
+                elif isinstance(fv.value, OntologyAnnotation):
+                    fv_value = fv.value.term
+                    sample_and_fvs[fv.factor_name.name] = fv_value
+            samples_and_fvs.append(sample_and_fvs)
+        df = pd.DataFrame(samples_and_fvs)
+        nunique = df.apply(pd.Series.nunique)
+        cols_to_drop = nunique[nunique == 1].index
+        df = df.drop(cols_to_drop, axis=1)
+        summary = df.to_dict(orient='records')
+    if summary is not None:
+        json.dump(summary, options.json_output, indent=4)
+        logger.debug("Summary dumped to JSON")
+        print(json.dumps(summary, indent=4))
+        html_summary = build_html_summary(summary)
+        with options.html_output as html_fp:
+            html_fp.write(html_summary)
+    else:
+        raise RuntimeError("Error getting study summary")
+    shutil.rmtree(tmpdir)
+
+
+def get_study_groups(input_path):
+    factors_summary = isatab_get_factors_summary_command(input_path=input_path)
+    study_groups = {}
+
+    for factors_item in factors_summary:
+        fvs = tuple(factors_item[k] for k in factors_item.keys() if k != 'name')
+
+        if fvs in study_groups.keys():
+            study_groups[fvs].append(factors_item['name'])
+        else:
+            study_groups[fvs] = [factors_item['name']]
+    return study_groups
+
+
+def get_study_groups_samples_sizes(input_path):
+    study_groups = get_study_groups(input_path=input_path)
+    return list(map(lambda x: (x[0], len(x[1])), study_groups.items()))
+
+
+def get_sources_for_sample(input_path, sample_name):
+    ISA = isatab.load(input_path)
+    hits = []
+
+    for study in ISA.studies:
+        for sample in study.samples:
+            if sample.name == sample_name:
+                print('found a hit: {sample_name}'.format(
+                    sample_name=sample.name))
+
+                for source in sample.derives_from:
+                    hits.append(source.name)
+    return hits
+
+
+def get_data_for_sample(input_path, sample_name):
+    ISA = isatab.load(input_path)
+    hits = []
+    for study in ISA.studies:
+        for assay in study.assays:
+            for data in assay.data_files:
+                if sample_name in [x.name for x in data.generated_from]:
+                    logger.info('found a hit: {filename}'.format(
+                        filename=data.filename))
+                    hits.append(data)
+    return hits
+
+
+def get_study_groups_data_sizes(input_path):
+    study_groups = get_study_groups(input_path=input_path)
+    return list(map(lambda x: (x[0], len(x[1])), study_groups.items()))
+
+
+def get_characteristics_summary(input_path):
+    """
+        This function generates a characteristics summary for a MetaboLights
+        study
+
+        :param input_path: Input path to ISA-tab
+        :return: A list of dicts summarising the set of characteristic names
+        and values associated with each sample
+
+        Note: it only returns a summary of characteristics with variable values.
+
+        Example usage:
+            characteristics_summary = get_characteristics_summary('/path/to/my/study/')
+            [
+                {
+                    "name": "6089if_9",
+                    "Variant": "Synechocystis sp. PCC 6803.sll0171.ko"
+                },
+                {
+                    "name": "6089if_43",
+                    "Variant": "Synechocystis sp. PCC 6803.WT.none"
+                },
+            ]
+
+
+        """
+    ISA = isatab.load(input_path)
+
+    all_samples = []
+    for study in ISA.studies:
+        all_samples.extend(study.samples)
+
+    samples_and_characs = []
+    for sample in all_samples:
+        sample_and_characs = {
+            'name': sample.name
+        }
+
+        for source in sample.derives_from:
+            for c in source.characteristics:
+                if isinstance(c.value, (str, int, float)):
+                    c_value = c.value
+                    sample_and_characs[c.category.term] = c_value
+                elif isinstance(c.value, OntologyAnnotation):
+                    c_value = c.value.term
+                    sample_and_characs[c.category.term] = c_value
+
+        samples_and_characs.append(sample_and_characs)
+
+    df = pd.DataFrame(samples_and_characs)
+    nunique = df.apply(pd.Series.nunique)
+    cols_to_drop = nunique[nunique == 1].index
+
+    df = df.drop(cols_to_drop, axis=1)
+    return df.to_dict(orient='records')
+
+
+def get_study_variable_summary(input_path):
+    ISA = isatab.load(input_path)
+
+    all_samples = []
+    for study in ISA.studies:
+        all_samples.extend(study.samples)
+
+    samples_and_variables = []
+    for sample in all_samples:
+        sample_and_vars = {
+            'sample_name': sample.name
+        }
+
+        for fv in sample.factor_values:
+            if isinstance(fv.value, (str, int, float)):
+                fv_value = fv.value
+                sample_and_vars[fv.factor_name.name] = fv_value
+            elif isinstance(fv.value, OntologyAnnotation):
+                fv_value = fv.value.term
+                sample_and_vars[fv.factor_name.name] = fv_value
+
+        for source in sample.derives_from:
+            sample_and_vars['source_name'] = source.name
+            for c in source.characteristics:
+                if isinstance(c.value, (str, int, float)):
+                    c_value = c.value
+                    sample_and_vars[c.category.term] = c_value
+                elif isinstance(c.value, OntologyAnnotation):
+                    c_value = c.value.term
+                    sample_and_vars[c.category.term] = c_value
+
+        samples_and_variables.append(sample_and_vars)
+
+    df = pd.DataFrame(samples_and_variables)
+    nunique = df.apply(pd.Series.nunique)
+    cols_to_drop = nunique[nunique == 1].index
+
+    df = df.drop(cols_to_drop, axis=1)
+    return df.to_dict(orient='records')
+
+
+def get_study_group_factors(input_path):
+    factors_list = []
+
+    for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
+        with open(os.path.join(input_path, table_file)) as fp:
+            df = isatab.load_table(fp)
+
+            factor_columns = [x for x in df.columns if x.startswith(
+                'Factor Value')]
+            if len(factor_columns) > 0:
+                factors_list = df[factor_columns].drop_duplicates()\
+                    .to_dict(orient='records')
+    return factors_list
+
+
+def get_filtered_df_on_factors_list(input_path):
+    factors_list = get_study_group_factors(input_path=input_path)
+    queries = []
+
+    for item in factors_list:
+        query_str = []
+
+        for k, v in item.items():
+            k = k.replace(' ', '_').replace('[', '_').replace(']', '_')
+            if isinstance(v, str):
+                v = v.replace(' ', '_').replace('[', '_').replace(']', '_')
+                query_str.append("{k} == '{v}' and ".format(k=k, v=v))
+
+        query_str = ''.join(query_str)[:-4]
+        queries.append(query_str)
+
+    for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
+        with open(os.path.join(input_path, table_file)) as fp:
+            df = isatab.load_table(fp)
+
+            cols = df.columns
+            cols = cols.map(
+                lambda x: x.replace(' ', '_') if isinstance(x, str) else x)
+            df.columns = cols
+
+            cols = df.columns
+            cols = cols.map(
+                lambda x: x.replace('[', '_') if isinstance(x, str) else x)
+            df.columns = cols
+
+            cols = df.columns
+            cols = cols.map(
+                lambda x: x.replace(']', '_') if isinstance(x, str) else x)
+            df.columns = cols
+
+        for query in queries:
+            # query uses pandas.eval, which evaluates queries like pure Python
+            # notation
+            df2 = df.query(query)
+            if 'Sample_Name' in df.columns:
+                print('Group: {query} / Sample_Name: {sample_name}'.format(
+                    query=query, sample_name=list(df2['Sample_Name'])))
+
+            if 'Source_Name' in df.columns:
+                print('Group: {} / Sources_Name: {}'.format(
+                    query, list(df2['Source_Name'])))
+
+            if 'Raw_Spectral_Data_File' in df.columns:
+                print('Group: {query} / Raw_Spectral_Data_File: {filename}'
+                      .format(query=query[13:-2],
+                              filename=list(df2['Raw_Spectral_Data_File'])))
+    return queries
+
+
+def datatype_get_summary_command(options):
+    logger.info("Getting summary for study %s. Writing to %s.",
+                options.study_id, options.output.name)
+
+    summary = get_study_variable_summary(options.study_id)
+    print('summary: ', list(summary))
+    if summary is not None:
+        json.dump(summary, options.output, indent=4)
+        logger.debug("Summary dumped")
+    else:
+        raise RuntimeError("Error getting study summary")
+
+
+# logging and argument parsing
+
+def _configure_logger(options):
+    logging_level = getattr(logging, options.log_level, logging.INFO)
+    logging.basicConfig(level=logging_level)
+
+    global logger
+    logger = logging.getLogger()
+    logger.setLevel(logging_level)  # there's a bug somewhere.  The level set through basicConfig isn't taking effect
+
+
+def _parse_args(args):
+    parser = make_parser()
+    options = parser.parse_args(args)
+    return options
+
+
+def main(args):
+    options = _parse_args(args)
+    _configure_logger(options)
+    # run subcommand
+    options.func(options)
+
+
+if __name__ == '__main__':
+    try:
+        main(sys.argv[1:])
+        sys.exit(0)
+    except Exception as e:
+        logger.exception(e)
+        logger.error(e)
+        sys.exit(e.code if hasattr(e, "code") else 99)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mtbls-dwnld	Tue Jan 07 09:05:21 2020 -0500
@@ -0,0 +1,525 @@
+#!/bin/bash
+# vi: fdm=marker
+
+# Constants {{{1
+################################################################
+
+PROG_NAME=$(basename $0)
+PROG_PATH=$(dirname $0)
+ISASLICER="$PROG_PATH/isaslicer.py"
+YES=yes
+NO=no
+ASPERA_PUBLIC_TOKEN=Xz68YfDe
+ASCP=ascp
+WGET=wget
+PLATFORM=
+DISTRIBUTION=
+[[ -z $(which uname) ]] || PLATFORM=$(uname)
+[[ $PLATFORM == Linux && -e /proc/version ]] && DISTRIBUTION=$(sed 's/^.*(\([^ ]*\).*$/\1/' /proc/version)
+[[ $DISTRIBUTION == Alpine ]] || WGET_FLAGS="--progress=dot"
+
+# Global variables {{{1
+################################################################
+
+ASPERA=
+COMPRESSED=
+DEBUG=0
+FACTOR_VALUE=
+HTML=
+METADATA_ONLY=
+OUTPUT=
+PRIVATE=
+QUIET=0
+TMP_IN_OUTPUT=
+TOKEN=
+
+# Print help {{{1
+################################################################
+
+function print_help {
+	echo "Usage: $PROG_NAME [options] study"
+	echo
+	echo "Retrieves a study from Metabolights database."
+	echo "By default it uses the ftp server, but with -a option you can ask for using aspera server (you will need the aspera client ascp to be installed)."
+	echo
+	echo "Options:"
+	echo "   -a, --aspera                Use aspera server for downloading. You need the ascp client to be installed, it is freely available for linux 64. See http://downloads.asperasoft.com."
+	echo "   -c, --compressed            Output in compressed format. If unset, the output will be a directory."
+	echo "   -f, --factor-value          Filter study on a factor value. Example: \"-f myfactor=myvalue\". Only available with wget downloader."
+	echo "   -g, --debug                 Debug mode."
+	echo "   -h, --help                  Print this help message."
+	echo "   -H, --html           FILE   Write HTML file that list files contained in output directory. Not compatible with compressed output."
+	echo "   -M, --metadata-only         Download only the metadata (ISA-Tab files i_*, m_*, s_* and a_*) files. This option has no effet if aspera is selected (option -a)."
+	echo "   -o, --output         NAME   Set the name of the output (both for directory output or compressed file output). By default the name of the output is determined by the download tool."
+	echo "   -p, --private               Indicate the study to download is not public. This is meant for aspera download, since the URL will be different for a public or a private study."
+	echo "   -q, --quiet                 Does not print any output. Can be specified twice in order to be real quiet."
+	echo "   -t, --token          TOKEN  Set the token or password to use. For aspera public download, if you don't specify a token, the default token '$ASPERA_PUBLIC_TOKEN' will be used."
+	echo "   -T, --tmp-in-output         If an output is specified and it is a directory (-c option must not be set), then use it for writing intermediate files."
+}
+
+# Error {{{1
+################################################################
+
+function error {
+
+	local msg=$1
+
+	echo "ERROR: $msg" >&2
+
+	exit 1
+}
+
+# Debug {{{1
+################################################################
+
+function debug {
+
+	local dbgmsg="$1"
+
+	[[ $DEBUG -ge 1 ]] && echo "[DEBUG] $dbgmsg" >&2
+}
+
+
+# Read args {{{1
+################################################################
+
+function read_args {
+
+	local args="$*" # save arguments for debugging purpose
+	
+	# Read options
+	while true ; do
+		shift_count=1
+		case $1 in
+			-a|--aspera)            ASPERA=$YES ;;
+			-c|--compressed)        COMPRESSED=$YES ;;
+			-f|--factor-value)      FACTOR_VALUE="$2" ; shift_count=2 ;;
+			-g|--debug)             DEBUG=$((DEBUG + 1)) ;;
+			-h|--help)              print_help ; exit 0 ;;
+			-M|--metadata-only)     METADATA_ONLY=$YES ;;
+			-H|--html)              HTML="$2" ; shift_count=2 ;;
+			-o|--output)            OUTPUT="$2" ; shift_count=2 ;;
+			-p|--private)           PRIVATE=$YES ;;
+			-q|--quiet)             QUIET=$((QUIET + 1)) ;;
+			-t|--token)             TOKEN="$2" ; shift_count=2 ;;
+			-T|--tmp-in-output)     TMP_IN_OUTPUT=$YES ;;
+			-) error "Illegal option $1." ;;
+			--) error "Illegal option $1." ;;
+			--*) error "Illegal option $1." ;;
+			-?) error "Unknown option $1." ;;
+			-[^-]*) split_opt=$(echo $1 | sed 's/^-//' | sed 's/\([a-zA-Z]\)/ -\1/g') ; set -- $1$split_opt "${@:2}" ;;
+			*) break
+		esac
+		shift $shift_count
+	done
+	shift $((OPTIND - 1))
+
+	# Read remaining arguments
+	[ $# -eq 1 ] || error "You must specify one, and only one, study to retrieve."
+	STUDY="$1"
+
+	# Check token
+	if [[ -n $ASPERA && -z $TOKEN ]] ; then
+		if [[ -z $PRIVATE ]] ; then
+			TOKEN=$ASPERA_PUBLIC_TOKEN
+		else
+			error "You need to specify a token for retrieving private studies with aspera."
+		fi
+	fi
+	[[ -z $PRIVATE || -n $TOKEN ]] || error "You need to set a token for retrieving private studies."
+
+	# Turn off --tmp-to-output if --compressed is set
+	[[ $TMP_IN_OUTPUT == $YES && ( $COMPRESSED == $YES || -z $OUTPUT ) ]] && TMP_IN_OUTPUT=$NO
+
+	# Debug
+	debug "Arguments are : $args"
+	debug "Study to retrieve is : $STUDY"
+	debug "ASPERA=$ASPERA"
+	debug "COMPRESSED=$COMPRESSED"
+	debug "DEBUG=$DEBUG"
+	debug "FACTOR_VALUE=$FACTOR_VALUE"
+	debug "HTML=$HTML"
+	debug "METADATA_ONLY=$METADATA_ONLY"
+	debug "OUTPUT=$OUTPUT"
+	debug "PRIVATE=$PRIVATE"
+	debug "QUIET=$QUIET"
+	debug "TMP_IN_OUTPUT=$TMP_IN_OUTPUT"
+	debug "TOKEN=$TOKEN"
+	[[ -n $ASPERA ]] && debug "Aspera will be used."
+	[[ -n $TOKEN ]] && debug "Token/Password is \"$TOKEN\"."
+}
+
+# Get download output path {{{1
+################################################################
+
+get_download_output_path() {
+
+	local downloader="$1"
+	local study_name="$2"
+	local output_dir="$3"
+	local output_path="$study_name"
+
+	[[ -z $output_dir ]] || output_path="$output_dir/$output_path"
+	[[ $downloader == $WGET ]] && output_path+=".zip"
+
+	echo "$output_path"
+}
+
+# Download with ascp {{{1
+################################################################
+
+download_with_ascp() {
+
+	local study_name="$1"
+	local metadata_only="$2"
+	local output_dir="$3"
+	local dwnld_flags=
+	local dwnld_link=
+
+	# Check ascp
+	[ -n "$(which ascp)" ] || error "ascp command not found. Please install Aspera client, version 3.7.4 or greater. See http://downloads.asperasoft.com/en/downloads/62."
+	debug "$ASCP: $(which $ASCP)"
+	debug "QUIET: $QUIET"
+
+	# Silence downloader output
+	[[ $QUIET -eq 0 ]] || dwnld_flags=-q
+
+	# Set download flags
+	[[ -z $dwnld_flags ]] || dwnld_flags+=" "
+	dwnld_flags+="--policy=fair -T -l 1g"
+
+	dwnld_flags+=" -P33001"
+
+	# Set download link
+	if [[ -z $PRIVATE ]] ; then
+
+		# Make full path for public study
+		study_path=$STUDY
+		if [[ -z ${study_path##MTBLS*} ]] ; then
+			study_path="/studies/public/$study_path"
+		fi
+		dwnld_link="fasp-ml@fasp.ebi.ac.uk:$study_path"
+
+	# Private study
+	else
+		dwnld_link="mtblight@hx-fasp-1.ebi.ac.uk:$STUDY"
+	fi
+
+	# Export token
+	debug "export ASPERA_SCP_PASS=\"$TOKEN\""
+	export ASPERA_SCP_PASS="$TOKEN"
+
+	# Run download command
+	if [[ -n $output_dir ]] ; then
+		mkdir -p "$output_dir"
+		curdir=$(pwd)
+		cd "$output_dir"
+	fi
+	if [[ $metadata_only == $YES ]] ; then
+		debug "Download command: $ASCP $dwnld_flags -N '?_*.t*' -E '*.*' -E 'p*' $dwnld_link ."
+		$ASCP $dwnld_flags -N '?_*.t*' -E '*.*' -E 'p*' $dwnld_link .
+		[[ $? == 0 ]] || error "Downloading of study $STUDY has failed. Error code returned is $?."
+	else
+		debug "Download command: $ASCP $dwnld_flags $dwnld_link ."
+		$ASCP $dwnld_flags $dwnld_link .
+		[[ $? == 0 ]] || error "Downloading of study $STUDY has failed. Error code returned is $?."
+	fi
+	[[ -z $output_dir ]] || cd "$curdir"
+}
+
+# Download with wget {{{1
+################################################################
+
+download_with_wget() {
+
+	local study_name="$1"
+	local metadata_only="$2"
+	local output_dir="$3"
+	local dwnld_flags=
+
+	# Check wget
+	[ -n "$(which wget)" ] || error "wget command not found."
+	debug "$WGET: $(which $WGET)"
+
+	# Set download link
+	file="$study_name"
+	[[ $metadata_only == $YES ]] && file=metadata
+	dwnld_link="https://www.ebi.ac.uk/metabolights/$study_name/files/$file"
+	[[ -n $TOKEN ]] && dwnld_link+="?token=$TOKEN"
+
+	# Set download output
+	output_path="$study_name.zip"
+	if [[ -n $output_dir ]] ; then
+		mkdir -p "$output_dir"
+		output_path="$output_dir/$output_path"
+	fi
+	dwnld_flags="-O $output_path"
+
+	# Silence downloader output
+	wget_log_file=$(mktemp -t $PROG_NAME.XXXXXX)
+	[[ $QUIET -eq 0 ]] || dwnld_flags+=" -q -o $wget_log_file"
+
+	# Run download command
+	debug "Download command: $WGET $WGET_FLAGS $dwnld_flags $dwnld_link"
+	$WGET $WGET_FLAGS $dwnld_flags $dwnld_link
+	[[ $? == 0 ]] || error "Downloading of study $STUDY has failed. wget log file: $(cat $wget_log_file)"
+	rm -f $wget_log_file
+}
+
+# Download {{{1
+################################################################
+
+download() {
+
+	local downloader="$1"
+	local study_name="$2"
+	local metadata_only="$3"
+	local output_dir="$4"
+
+	if [[ $downloader == $WGET ]] ; then
+		download_with_wget "$2" "$3" "$output_dir"
+	else
+		download_with_ascp "$2" "$3" "$output_dir"
+	fi
+}
+
+# Check download {{{1
+################################################################
+
+check_download() {
+
+	local download_path="$1"
+
+	[[ -d $download_path || -f $download_path ]] || error "The downloading of $tmp_output failed. The output file doesn't exist."
+	[[ ! -f $download_path || -s $download_path ]] || error "The downloading of $tmp_output failed. The output file is empty."
+}
+
+# Unzip study {{{1
+################################################################
+
+unzip_study() {
+
+	local tmp_output="$1"
+	local output="$2"
+	local unzip_flags=
+
+	# Silence unzip program
+	[[ $QUIET -eq 0 ]] || unzip_flags=-qq
+
+	debug "Unzipping file \"$tmp_output\"."
+	[[ -z $output ]] || debug "Unzipping into \"$output\"."
+
+	[[ -n $(which unzip) ]] || error "unzip command not found. Please install zip package."
+
+	zip=$tmp_output
+	debug "Zipped file is \"$zip\"."
+	if [[ -z $output ]] ; then
+		output=${zip%.*}
+		[[ -d "$output" ]] && rm -r "$output"
+	fi
+	debug "Output directory will be \"$output\"."
+	zip_abs_path=$(realpath "$zip")
+	curdir=$(pwd)
+	debug "Current directory is \"$curdir\"."
+	mkdir -p "$output"
+	cd "$output"
+	unzip $unzip_flags "$zip_abs_path" >&2 || error "Unable to unzip archive $zip_abs_path."
+	cd "$curdir"
+
+	echo "$output"
+}
+
+# As zip {{{1
+################################################################
+
+as_zip() {
+
+	local path="$1"
+
+	if [[ -d $path ]] ; then
+
+		zip_file="$path.zip"
+		zip "$zip_file" $path/*
+		rm -r "$path"
+		path="$zip_file"
+	fi
+
+	echo "$path"
+}
+
+# As folder {{{1
+################################################################
+
+as_folder() {
+
+	local path="$1"
+	local output="$2"
+
+	debug "as_folder($path, $output)"
+	if [[ -f $path && ${path##*.} == 'zip' ]] ; then
+		folder=$(unzip_study "$path" "$output")
+		rm "$path"
+		path="$folder"
+	elif [[ -d $path && -n $output && $path != $output ]] ; then
+		mkdir -p "$output"
+		mv "$path"/* "$output"/.
+		rm -r "$path"
+		path="$output"
+	fi
+
+	echo "$path"
+}
+
+# Make folder or zip {{{1
+################################################################
+
+make_folder_or_zip() {
+
+	local path="$1"
+	local compressed="$2"
+	local output="$3"
+
+	debug "make_folder_or_zip($path, $compressed, $output)"
+	[[ -e $path ]] || error "No file or folder at path \"$path\"."
+
+	# Compress folder
+	if [[ $compressed == $YES ]] ; then
+		path=$(as_zip "$path")
+	else
+		path=$(as_folder "$path" "$output")
+	fi
+
+	echo "$path"
+}
+
+# Write HTML file {{{1
+################################################################
+
+function write_html_file {
+
+	local HTML=$1
+	local tmp_output=$2
+
+	cat >$HTML <<EOF
+<html>
+	<header>
+		<title>Metabolights study</title>
+	</header>
+	<body>
+		<a href="i_Investigation.txt">Investigation file</a><br/>
+EOF
+
+	echo "<br/>" >>$HTML
+	echo "          Study files:<br/>" >>$HTML
+	for f in $tmp_output/s_* ; do
+		filename=$(basename "$f")
+		echo "          <a href=\"$filename\">$filename</a><br/>" >>$HTML
+	done
+
+	echo "<br/>" >>$HTML
+	echo "          Assay files:<br/>" >>$HTML
+	for f in $tmp_output/a_* ; do
+		filename=$(basename "$f")
+		echo "          <a href=\"$filename\">$filename</a><br/>" >>$HTML
+	done
+
+	echo "<br/>" >>$HTML
+	echo "          Data files:<br/>" >>$HTML
+	for f in $tmp_output/m_* ; do
+		filename=$(basename "$f")
+		echo "          <a href=\"$filename\">$filename</a><br/>" >>$HTML
+	done
+
+	cat >>$HTML <<EOF
+	</body>
+</html>
+EOF
+}
+
+# Get data files {{{1
+################################################################
+
+get_data_files() {
+
+	local json_file="$1"
+
+	python3 <<EOF
+# @@@BEGIN_PYTHON@@@
+import json
+import sys
+with open('$json_file') as f:
+    data_list = json.load(f)
+    for elem in data_list:
+	    print("\n".join(elem['data_files']))
+# @@@END_PYTHON@@@
+EOF
+}
+
+# MAIN {{{1
+################################################################
+
+read_args "$@"
+
+study_name=$(basename $STUDY)
+downloader=$WGET
+output_dir=
+[[ $ASPERA == $YES ]] && downloader=$ASCP
+[[ $TMP_IN_OUTPUT != $YES ]] || output_dir="$OUTPUT"
+
+# Download only part of the study using factor value
+if [[ -n $FACTOR_VALUE ]] ; then
+
+	# Get factor name and value
+	factor_name=${FACTOR_VALUE%%=*}
+	factor_value=${FACTOR_VALUE#*=}
+
+	# Download only metadata
+	download "$downloader" "$study_name" "$YES" # XXX Download output must be written into $OUTPUT if TMP_IN_OUTPUT is set
+	dwnld_output=$(get_download_output_path "$downloader" "$study_name")
+	dwnld_output=$(as_folder "$dwnld_output")
+
+	# Get data files to download
+	data_files=$(mktemp -t $PROG_NAME.XXXXXX) # XXX must be created into $OUTPUT if TMP_IN_OUTPUT is set
+	[[ -x "$ISASLICER" ]] || error "Cannot find or run isaslicer.py script."
+	debug "Run ISA slicer: \"$ISASLICER\" 'isa-tab-get-data-list' \"$abs_dwnld_output\" \"$data_files\" --json-query \"{ \\\"$factor_name\\\": \\\"$factor_value\\\" }\""
+	abs_dwnld_output=$(realpath "$dwnld_output")
+	"$ISASLICER" --log-level DEBUG 'isa-tab-get-data-list' "$abs_dwnld_output" "$data_files" --json-query "{ \"$factor_name\": \"$factor_value\" }" 2>&1 || error "Call to isaslicer failed."
+
+	# Download data files
+	wget_log_file=$(mktemp -t $PROG_NAME.XXXXXX)
+	get_data_files "$data_files" | sort | uniq | while read file ; do
+		if [[ -n $file ]] ; then
+			dwnld_flags=
+			[[ $QUIET -eq 0 ]] || dwnld_flags+=" -q -o $wget_log_file"
+			dwnld_link="https://www.ebi.ac.uk/metabolights/$study_name/files/$file"
+			debug "Download command: $WGET $WGET_FLAGS $dwnld_flags $dwnld_link"
+			$WGET $WGET_FLAGS $dwnld_flags -O "$dwnld_output/$file" "$dwnld_link" # XXX Download output must be written into $OUTPUT if TMP_IN_OUTPUT is set
+			[[ $? == 0 ]] || error "Downlad of study file \"$file\" has failed. wget log file: $(cat $wget_log_file)"
+		fi
+	done
+	rm -f $wget_log_file
+
+	# Remove data files list
+	rm "$data_files"
+
+# Download whole study
+else
+
+	# Download whole study
+	download "$downloader" "$study_name" "$METADATA_ONLY" "$output_dir" # XXX Download output must be written into $OUTPUT if TMP_IN_OUTPUT is set
+
+	# Get output path
+	dwnld_output=$(get_download_output_path "$downloader" "$study_name" "$output_dir") # XXX Correct output path when TMP_IN_OUTPUT is set
+fi
+
+# Check output
+check_download "$dwnld_output"
+
+# Output in right format (zipped or folder)
+dwnld_output=$(make_folder_or_zip "$dwnld_output" "$COMPRESSED" "$output_dir") # XXX zip must be written into $OUTPUT and unzipped into $OUTPUT if TMP_IN_OUTPUT is set
+
+# Output HTML
+[[ -z $HTML || ! -d $dwnld_output ]] || write_html_file "$HTML" "$dwnld_output"
+
+# Rename output
+[[ -z $OUTPUT || $TMP_IN_OUTPUT == $YES ]] || mv "$dwnld_output" "$OUTPUT"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mtbls-dwnld.xml	Tue Jan 07 09:05:21 2020 -0500
@@ -0,0 +1,200 @@
+<!-- vi: se fdm=marker : -->
+<tool id="mtbls-dwnld" name="Metabolights downloader" version="4.1.4">
+
+	<description>Import public and private MetaboLights studies</description>
+
+	<requirements>
+		<requirement type="package" version="0.10.3">isatools</requirement>
+	</requirements>
+
+	<!-- Command {{{1 -->
+
+	<command><![CDATA[
+		## @@@BEGIN_CHEETAH@@@
+
+		$__tool_directory__/mtbls-dwnld
+
+		-q -T
+		#if $token:
+			-t "$token"
+		#end if
+		#if $type == 'private':
+			-p
+		#end if
+		#if $downloader == 'aspera':
+			-a
+		#end if
+
+		## Download only the metadata
+		#if $only_metadata == 'yes':
+			-M
+		#end if
+
+		## Set output file
+		-H "$isastudy"
+		-o "$isastudy.files_path"
+
+		## Factor slicing
+		#if $factor_slicing.factor_name:
+			-f "$factor_slicing.factor_name=$factor_slicing.factor_value"
+		#end if
+
+		## Study to output
+		"$study"
+
+		## @@@END_CHEETAH@@@
+	]]></command>
+
+	<!-- Inputs {{{1 -->
+
+	<inputs>
+
+		<!-- Study name -->
+		<param name="study" type="text" label="Study name" help="Study name in the form MTBLSXXXX. For downloading a private study with Aspera client, set to the full path of the study. See details below." refresh_on_change="true"/>
+
+		<!-- Public or private -->
+		<param name="type" label="Study type" type="select" help="Indicate here if the study you want to download is public or private.">
+			<option value="public">Public</option>
+			<option value="private">Private</option>
+		</param>
+
+		<!-- Downloader -->
+		<param name="downloader" label="Downloader" type="select" help="If downloading only the metadata prefer wget, otherwise you can choose aspera.">
+			<option value="wget">wget</option>
+			<option value="aspera">Aspera</option>
+		</param>
+
+		<!-- Token -->
+		<param name="token" type="text" optional="true" format="txt" label="Token"  help="A token is required for private studies. A default token is provided for downloading public studies with aspera, but you can set your own if you wish."/>
+
+		<!-- Metadata -->
+		<param name="only_metadata" label="Content to download" type="select" display="radio" help="Specify here what files you want to download from the study.">
+			<option value="yes">ISA-Tab metadata only.</option>
+			<option value="no">Full study including raw data and ISA-Tab metadata.</option>
+		</param>
+
+		<!-- Factor slicing -->
+		<section name="factor_slicing" title="Slicing by factor value" expanded="true" help="With this option, you can restrict the set of files you download from the study.">
+			<param name="factor_name" type="text" size="128" label="Factor title"/>
+			<param name="factor_value" type="text" size="128" label="Factor value"/>
+		</section>
+	</inputs>
+
+	<!-- Outputs {{{1 -->
+
+    <outputs>
+        <data name="isastudy" label="${study}" format="isa-tab"/>
+    </outputs>
+
+	<!-- Tests {{{1 -->
+	<tests>
+		<!-- Simple retrieving with wget -->
+		<test>
+			<param name="study" value="MTBLS2"/>
+			<param name="type" value="public"/>
+			<param name="downloader" value="wget"/>
+			<param name="metadata" value="yes"/>
+			<param name="token" value=""/>
+			<output name="isastudy" file="MTBLS2.html"/>
+		</test>
+
+		<!-- Slicing -->
+		<test>
+			<param name="study" value="MTBLS2"/>
+			<param name="type" value="public"/>
+			<param name="downloader" value="wget"/>
+			<param name="metadata" value="yes"/>
+			<param name="token" value=""/>
+			<param name="factor_name" value="Factor Value[genotype]"/>
+			<param name="factor_value" value="cyp79"/>
+			<output name="isastudy" file="MTBLS2.html"/>
+		</test>
+	</tests>
+
+	<!-- Help {{{1 -->
+    <help>
+<!-- @@@BEGIN_RST@@@ -->
+
+=======================
+Metabolights Downloader
+=======================
+
+A tool to download data from MetaboLights studies. You can download either a private or a public study, using `wget` as well as `aspera` methods.
+The output is an ISA dataset.
+
+-------------
+Input setting
+-------------
+
+Study name
+==========
+
+Fill this field with the name of the study you want to download.
+
+.. class:: warningmark
+
+In case of the download of a private study with *aspera* downloader, you must write the full path of the study, not only the name.
+It is a requirement that you have requested an "FTP folder" for the private study from the metabolights-curation@ebi.ac.uk team.	    
+
+Study type
+==========
+
+You must select here the type of the study, which can be either *public* or *private*. In case of a *private* study, you will have to input a security token.
+
+Downloader
+==========
+
+With this field, you can control the program used for downloading the study. You have the choice between standard *wget* and *aspera*. *wget* will download the data through *http*, while *aspera* will do it through *ftp*. If you are only interested in the metadata, choose *wget* since it is able to download only these data. If you are interested in *mzML* or *mzData* files, choose *aspera* as it is a faster downloader.
+
+Metadata
+========
+
+If you are only interested in metadata, choose *Yes*, and make sure to select *wget* as well in the **Downloader** field.
+If you set this field to *No*, then the tool will try to extract *mzData* and *mzML* files and output them as collections.
+
+Token
+=====
+
+You need to provide a token when downloading with *aspera* (both for private and public studies), or when downloading a private study with *wget*.
+
+------
+Output
+------
+
+The output is an ISA-Tab dataset that can be used in subsequent tools like isa2w4m.
+The ISA-Tab dataset stores all files contain in the downloaded Metabolights study.
+
+---------------------
+Developer information
+---------------------
+
+The Metabolights downloader downloads a compressed archive from Metabolights database and extract it inside Galaxy dataset folder. It creates an HTML file (used as primary file) and extract all files from archive inside the subfolder ``&lt;name_of_the_HTML_file&gt;_files``.
+
+Note that neither the uploader nor the ISA Galaxy data type are involved during this process. This is the reason why the downloader has to create itself an HTML file that will serve as primary file and to extract files inside ``*_files`` subfolder.
+
+A subsequent tool that would use the output of the Metabolights downloader has to declare the input as ISA type::
+
+  &lt;param name="isa" label="ISA" type="data" format="isa"/&gt;
+
+Then to use it inside the command tag, one must use the variable ``$isa.extra_files_path`` to get the path of the folder where all ISA archive files have been extracted::
+
+  &lt;command&gt;&lt;![CDATA[some-program-to-run --the-isa-folder "$isa.extra_files_path" ]]&gt;&lt;/command&gt;
+
+The easiest way to use the ISA archive inside the run program is then to use the Python3 library isatools to load it. See https://github.com/ISA-tools/isa-api for information.
+
+----------------------
+Network considerations
+----------------------
+	    
+The networking of the Galaxy instance needs to allow UDP connections with source port 33001 for the fast Aspera download. This is the case for most commercial providers, including Amazon AWS and Google GCP, but some local installations might have additional firewall rules in place. See also https://test-connect.asperasoft.com/ for more information. If these connections are not allowed, the fallback to the wget download via is still possible.	    
+	    
+<!-- @@@END_RST@@@ -->
+    </help>
+
+	<!-- Citations {{{1 -->
+    <citations>
+        <citation type="doi">10.1002/0471250953.bi1413s53</citation> <!-- Metabolights -->
+        <citation type="doi">10.1007/s11306-015-0879-3</citation> <!-- Metabolights -->
+        <citation type="doi">10.1038/ng.1054</citation> <!-- ISA -->
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/MTBLS2.html	Tue Jan 07 09:05:21 2020 -0500
@@ -0,0 +1,17 @@
+<html>
+	<header>
+		<title>Metabolights study</title>
+	</header>
+	<body>
+		<a href="i_Investigation.txt">Investigation file</a><br/>
+<br/>
+          Study files:<br/>
+          <a href="s_MTBL2.txt">s_MTBL2.txt</a><br/>
+<br/>
+          Assay files:<br/>
+          <a href="a_mtbl2_metabolite profiling_mass spectrometry.txt">a_mtbl2_metabolite profiling_mass spectrometry.txt</a><br/>
+<br/>
+          Data files:<br/>
+          <a href="m_mtbl2_metabolite profiling_mass spectrometry_v2_maf.tsv">m_mtbl2_metabolite profiling_mass spectrometry_v2_maf.tsv</a><br/>
+	</body>
+</html>