Mercurial > repos > prog > mtblsdwnld
annotate isaslicer.py @ 0:8dab200e02cb draft
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
| author | prog | 
|---|---|
| date | Tue, 07 Jan 2020 09:05:21 -0500 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1 #!/usr/bin/env python3 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 2 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 3 import argparse | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 4 import glob | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 5 import json | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 6 import logging | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 7 import os | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 8 import re | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 9 import shutil | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 10 import sys | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 11 import tempfile | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 12 import zipfile | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 13 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 14 import pandas as pd | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 15 from isatools import isatab | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 16 from isatools.model import OntologyAnnotation | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 17 from isatools.net import mtbls as MTBLS | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 18 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 19 logger = None | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 20 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 21 # isaslicer.py <command> <study_id> [ command-specific options ] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 22 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 23 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 24 def make_parser(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 25 parser = argparse.ArgumentParser( description="ISA slicer") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 26 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 27 parser.add_argument('--log-level', choices=[ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 28 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 29 default='INFO', help="Set the desired logging level") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 30 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 31 subparsers = parser.add_subparsers( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 32 title='Actions', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 33 dest='command') # specified subcommand will be available in attribute 'command' | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 34 subparsers.required = True | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 35 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 36 # mtblisa commands | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 37 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 38 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 39 'mtbls-get-study-archive', aliases=['gsa'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 40 help="Get ISA study from MetaboLights as zip archive") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 41 subparser.set_defaults(func=get_study_archive_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 42 subparser.add_argument('study_id') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 43 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 44 'output', metavar="OUTPUT", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 45 help="Name of output archive (extension will be added)") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 46 subparser.add_argument('--format', metavar="FMT", choices=[ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 47 'zip', 'tar', 'gztar', 'bztar', 'xztar'], default='zip', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 48 help="Type of archive to create") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 49 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 50 subparser = subparsers.add_parser('mtbls-get-study', aliases=['gs'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 51 help="Get ISA study from MetaboLights") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 52 subparser.set_defaults(func=get_study_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 53 subparser.add_argument('study_id') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 54 subparser.add_argument('output', metavar="PATH", help="Name of output") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 55 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 56 '-f', '--isa-format', choices=['isa-tab', 'isa-json'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 57 metavar="FORMAT", default='isa-tab', help="Desired ISA format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 58 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 59 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 60 'mtbls-get-factors', aliases=['gf'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 61 help="Get factor names from a study in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 62 subparser.set_defaults(func=get_factors_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 63 subparser.add_argument('study_id') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 64 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 65 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 66 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 67 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 68 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 69 'mtbls-get-factor-values', aliases=['gfv'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 70 help="Get factor values from a study in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 71 subparser.set_defaults(func=get_factor_values_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 72 subparser.add_argument('study_id') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 73 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 74 'factor', help="The desired factor. Use `get-factors` to get the list " | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 75 "of available factors") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 76 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 77 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 78 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 79 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 80 subparser = subparsers.add_parser('mtbls-get-data-list', aliases=['gd'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 81 help="Get data files list in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 82 subparser.set_defaults(func=get_data_files_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 83 subparser.add_argument('study_id') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 84 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 85 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 86 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 87 '--json-query', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 88 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 89 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 90 '--galaxy_parameters_file', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 91 help="Path to JSON file containing input Galaxy JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 92 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 93 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 94 'mtbls-get-factors-summary', aliases=['gsum'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 95 help="Get the variables summary from a study, in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 96 subparser.set_defaults(func=get_summary_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 97 subparser.add_argument('study_id') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 98 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 99 'json_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 100 help="Output JSON file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 101 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 102 'html_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 103 help="Output HTML file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 104 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 105 # isaslicer commands on path to unpacked ISA-Tab as input | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 106 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 107 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 108 'isa-tab-get-factors', aliases=['isagf'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 109 help="Get factor names from a study in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 110 subparser.set_defaults(func=isatab_get_factor_names_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 111 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 112 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 113 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 114 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 115 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 116 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 117 'zip-get-factors', aliases=['zipgf'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 118 help="Get factor names from a study in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 119 subparser.set_defaults(func=zip_get_factor_names_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 120 subparser.add_argument('input_path', type=str, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 121 help="Input ISA-Tab zip path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 122 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 123 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 124 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 125 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 126 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 127 'isa-tab-get-factor-values', aliases=['isagfv'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 128 help="Get factor values from a study in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 129 subparser.set_defaults(func=isatab_get_factor_values_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 130 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 131 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 132 'factor', help="The desired factor. Use `get-factors` to get the list " | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 133 "of available factors") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 134 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 135 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 136 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 137 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 138 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 139 'zip-get-factor-values', aliases=['zipgfv'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 140 help="Get factor values from a study in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 141 subparser.set_defaults(func=zip_get_factor_values_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 142 subparser.add_argument('input_path', type=str, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 143 help="Input ISA-Tab zip path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 144 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 145 'factor', help="The desired factor. Use `get-factors` to get the list " | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 146 "of available factors") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 147 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 148 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 149 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 150 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 151 subparser = subparsers.add_parser('isa-tab-get-data-list', aliases=['isagdl'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 152 help="Get data files list in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 153 subparser.set_defaults(func=isatab_get_data_files_list_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 154 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 155 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 156 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 157 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 158 '--json-query', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 159 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 160 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 161 '--galaxy_parameters_file', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 162 help="Path to JSON file containing input Galaxy JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 163 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 164 subparser = subparsers.add_parser('zip-get-data-list', aliases=['zipgdl'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 165 help="Get data files list in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 166 subparser.set_defaults(func=zip_get_data_files_list_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 167 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 168 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 169 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 170 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 171 '--json-query', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 172 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 173 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 174 '--galaxy_parameters_file', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 175 help="Path to JSON file containing input Galaxy JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 176 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 177 subparser = subparsers.add_parser('isa-tab-get-data-collection', aliases=['isagdc'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 178 help="Get data files collection") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 179 subparser.set_defaults(func=isatab_get_data_files_collection_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 180 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 181 subparser.add_argument('output_path', type=str, help="Output data files path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 182 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 183 '--json-query', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 184 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 185 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 186 '--galaxy_parameters_file', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 187 help="Path to JSON file containing input Galaxy JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 188 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 189 subparser = subparsers.add_parser('zip-get-data-collection', aliases=['zipgdc'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 190 help="Get data files collection") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 191 subparser.set_defaults(func=zip_get_data_files_collection_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 192 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 193 subparser.add_argument('output_path', type=str, help="Output data files path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 194 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 195 '--json-query', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 196 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 197 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 198 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 199 'isa-tab-get-factors-summary', aliases=['isasum'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 200 help="Get the variables summary from a study, in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 201 subparser.set_defaults(func=isatab_get_factors_summary_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 202 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 203 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 204 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 205 help="Output file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 206 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 207 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 208 'zip-get-factors-summary', aliases=['zipsum'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 209 help="Get the variables summary from a study, in json format") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 210 subparser.set_defaults(func=zip_get_factors_summary_command) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 211 subparser.add_argument('input_path', type=str, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 212 help="Input ISA-Tab zip path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 213 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 214 'json_output', nargs='?', type=argparse.FileType('w'), | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 215 default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 216 help="Output JSON file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 217 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 218 'html_output', nargs='?', type=argparse.FileType('w'), | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 219 default=sys.stdout, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 220 help="Output HTML file") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 221 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 222 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 223 'isaslicer2-slice', aliases=['slice2'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 224 help="Slice ISA-Tabs version 2") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 225 subparser.set_defaults(func=query_isatab) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 226 subparser.add_argument('--source_dir', type=str, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 227 help="Input ISA-Tab zip path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 228 subparser.add_argument( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 229 '--galaxy_parameters_file', type=argparse.FileType(mode='r'), | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 230 help="Path to JSON file containing input Galaxy JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 231 subparser.add_argument('--output', type=argparse.FileType(mode='w'), | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 232 help="Input ISA-Tab zip path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 233 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 234 subparser = subparsers.add_parser( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 235 'filter-data', aliases=['filter'], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 236 help="Filter out data based on slicer2") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 237 subparser.set_defaults(func=filter_data) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 238 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 239 subparser.add_argument('output_path', type=str, help="Output data files path") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 240 subparser.add_argument('--slice', type=argparse.FileType(mode='r'), | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 241 help="slice") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 242 subparser.add_argument('--filename_filter', type=str, help="shell-like wildcard to filter files") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 243 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 244 return parser | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 245 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 246 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 247 def filter_data(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 248 loglines = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 249 source_dir = options.input_path if options.input_path else "" | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 250 output_path = options.output_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 251 filename_filter = options.filename_filter | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 252 if source_dir: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 253 if not os.path.exists(source_dir): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 254 raise IOError('Source path does not exist!') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 255 data_files = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 256 slice_json = options.slice | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 257 for result in json.load(slice_json)['results']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 258 data_files.extend(result.get('data_files', [])) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 259 reduced_data_files = list(set(data_files)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 260 filtered_files = glob.glob(os.path.join(source_dir, filename_filter)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 261 to_copy = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 262 for filepath in filtered_files: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 263 if os.path.basename(filepath) in reduced_data_files: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 264 to_copy.append(filepath) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 265 loglines.append("Using slice results from {}\n".format(slice_json.name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 266 for filepath in to_copy: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 267 loglines.append("Copying {}\n".format(os.path.basename(filepath))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 268 # try: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 269 # shutil.copyfile( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 270 # filepath, os.path.join(output_path, os.path.basename(filepath))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 271 # except Exception as e: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 272 # print(e) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 273 # exit(1) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 274 try: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 275 os.symlink( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 276 filepath, os.path.join(output_path, os.path.basename(filepath))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 277 except Exception as e: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 278 print(e) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 279 exit(1) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 280 with open('cli.log', 'w') as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 281 fp.writelines(loglines) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 282 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 283 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 284 def query_isatab(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 285 source_dir = options.source_dir if options.source_dir else "" | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 286 galaxy_parameters_file = options.galaxy_parameters_file | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 287 output = options.output | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 288 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 289 debug = True | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 290 if galaxy_parameters_file: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 291 galaxy_parameters = json.load(galaxy_parameters_file) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 292 print('Galaxy parameters:') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 293 print(json.dumps(galaxy_parameters, indent=4)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 294 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 295 raise IOError('Could not load Galaxy parameters file!') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 296 if source_dir: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 297 if not os.path.exists(source_dir): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 298 raise IOError('Source path does not exist!') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 299 query = galaxy_parameters['query'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 300 if debug: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 301 print('Query is:') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 302 print(json.dumps(query, indent=4)) # for debugging only | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 303 if source_dir: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 304 investigation = isatab.load(source_dir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 305 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 306 tmp = tempfile.mkdtemp() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 307 _ = MTBLS.get(galaxy_parameters['input']['mtbls_id'], tmp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 308 investigation = isatab.load(tmp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 309 # filter assays by mt/tt | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 310 matching_assays = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 311 mt = query.get('measurement_type').strip() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 312 tt = query.get('technology_type').strip() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 313 if mt and tt: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 314 for study in investigation.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 315 matching_assays.extend( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 316 [x for x in study.assays if x.measurement_type.term == mt | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 317 and x.technology_type.term == tt]) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 318 elif mt and not tt: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 319 for study in investigation.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 320 matching_assays.extend( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 321 [x for x in study.assays if x.measurement_type.term == mt]) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 322 elif not mt and tt: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 323 for study in investigation.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 324 matching_assays.extend( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 325 [x for x in study.assays if x.technology_type.term == tt]) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 326 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 327 for study in investigation.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 328 matching_assays.extend(study.assays) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 329 assay_samples = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 330 for assay in matching_assays: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 331 assay_samples.extend(assay.samples) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 332 if debug: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 333 print('Total samples: {}'.format(len(assay_samples))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 334 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 335 # filter samples by fv | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 336 factor_selection = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 337 x.get('factor_name').strip(): x.get('factor_value').strip() for x in | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 338 query.get('factor_selection', [])} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 339 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 340 fv_samples = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 341 if factor_selection: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 342 samples_to_remove = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 343 for f, v in factor_selection.items(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 344 for sample in assay_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 345 for fv in [x for x in sample.factor_values if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 346 x.factor_name.name == f]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 347 if isinstance(fv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 348 if fv.value.term == v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 349 fv_samples.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 350 elif fv.value == v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 351 fv_samples.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 352 for f, v in factor_selection.items(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 353 for sample in fv_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 354 for fv in [x for x in sample.factor_values if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 355 x.factor_name.name == f]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 356 if isinstance(fv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 357 if fv.value.term != v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 358 samples_to_remove.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 359 elif fv.value != v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 360 samples_to_remove.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 361 final_fv_samples = fv_samples.difference(samples_to_remove) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 362 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 363 final_fv_samples = assay_samples | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 364 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 365 # filter samples by characteristic | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 366 characteristics_selection = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 367 x.get('characteristic_name').strip(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 368 x.get('characteristic_value').strip() for x in | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 369 query.get('characteristics_selection', [])} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 370 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 371 cv_samples = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 372 if characteristics_selection: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 373 first_pass = True | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 374 samples_to_remove = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 375 for c, v in characteristics_selection.items(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 376 if first_pass: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 377 for sample in final_fv_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 378 for cv in [x for x in sample.characteristics if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 379 x.category.term == c]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 380 if isinstance(cv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 381 if cv.value.term == v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 382 cv_samples.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 383 elif cv.value == v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 384 cv_samples.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 385 for source in sample.derives_from: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 386 for cv in [x for x in source.characteristics if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 387 x.category.term == c]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 388 if isinstance(cv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 389 if cv.value.term == v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 390 cv_samples.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 391 elif cv.value == v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 392 cv_samples.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 393 first_pass = False | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 394 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 395 for sample in cv_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 396 for cv in [x for x in sample.characteristics if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 397 x.category.term == c]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 398 if isinstance(cv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 399 if cv.value.term != v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 400 samples_to_remove.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 401 elif cv.value != v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 402 samples_to_remove.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 403 for source in sample.derives_from: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 404 for cv in [x for x in source.characteristics if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 405 x.category.term == c]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 406 if isinstance(cv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 407 if cv.value.term != v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 408 samples_to_remove.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 409 elif cv.value != v: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 410 samples_to_remove.add(sample) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 411 final_cv_samples = cv_samples.difference(samples_to_remove) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 412 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 413 final_cv_samples = final_fv_samples | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 414 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 415 # filter samples by process parameter | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 416 parameters_selection = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 417 x.get('parameter_name').strip(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 418 x.get('parameter_value').strip() for x in | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 419 query.get('parameter_selection', [])} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 420 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 421 final_samples = final_cv_samples | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 422 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 423 if debug: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 424 print('Final number of samples: {}'.format(len(final_samples))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 425 results = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 426 for sample in final_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 427 results.append({ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 428 'sample_name': sample.name, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 429 'data_files': [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 430 }) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 431 for result in results: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 432 sample_name = result['sample_name'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 433 if source_dir: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 434 table_files = glob.iglob(os.path.join(source_dir, 'a_*')) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 435 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 436 table_files = glob.iglob(os.path.join(tmp, 'a_*')) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 437 for table_file in table_files: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 438 with open(table_file) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 439 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 440 data_files = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 441 table_headers = list(df.columns.values) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 442 sample_rows = df.loc[df['Sample Name'] == sample_name] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 443 data_node_labels = [ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 444 'Raw Data File', 'Raw Spectral Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 445 'Derived Spectral Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 446 'Derived Array Data File', 'Array Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 447 'Protein Assignment File', 'Peptide Assignment File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 448 'Post Translational Modification Assignment File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 449 'Acquisition Parameter Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 450 'Free Induction Decay Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 451 'Derived Array Data Matrix File', 'Image File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 452 'Derived Data File', 'Metabolite Assignment File'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 453 if parameters_selection: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 454 for p, v in parameters_selection.items(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 455 sample_pv_rows = sample_rows.loc[ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 456 sample_rows['Parameter Value[{}]'.format(p)] == v] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 457 for node_label in data_node_labels: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 458 if node_label in table_headers: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 459 data_files.extend( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 460 list(sample_pv_rows[node_label])) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 461 result['data_files'].extend(list(set( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 462 i for i in list(data_files) if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 463 str(i) not in ('nan', '')))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 464 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 465 for node_label in data_node_labels: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 466 if node_label in table_headers: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 467 data_files.extend(list(sample_rows[node_label])) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 468 result['data_files'].extend( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 469 list(set(i for i in list(data_files) if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 470 str(i) not in ('nan', '')))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 471 results_json = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 472 'query': query, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 473 'results': results | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 474 } | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 475 json.dump(results_json, output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 476 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 477 # if galaxy_parameters['input']['collection_output']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 478 # logger = logging.getLogger() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 479 # logger.debug("copying data files to %s", os.path.dirname(output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 480 # for result in results: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 481 # for data_file_name in result['data_files']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 482 # logging.info("Copying {}".format(data_file_name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 483 # shutil.copy(os.path.join(source_dir, data_file_name), | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 484 # os.path.dirname(output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 485 # logger.info( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 486 # "Finished writing data files to {}".format(os.path.dirname(output))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 487 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 488 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 489 def get_study_archive_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 490 study_id = options.study_id | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 491 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 492 logger.info("Downloading study %s into archive at path %s.%s", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 493 study_id, options.output, options.format) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 494 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 495 tmpdir = MTBLS.get(study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 496 logger.debug("MTBLS.get returned '%s'", tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 497 if tmpdir is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 498 try: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 499 shutil.make_archive( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 500 options.output, options.format, tmpdir, logger=logger) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 501 logger.info("ISA archive written") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 502 finally: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 503 logger.debug("Trying to clean up tmp dir %s", tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 504 shutil.rmtree(tmpdir, ignore_errors=True) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 505 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 506 raise RuntimeError("Error downloading ISA study") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 507 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 508 # mtblisa commands | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 509 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 510 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 511 def get_study_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 512 if os.path.exists(options.output): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 513 raise RuntimeError("Selected output path {} already exists!".format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 514 options.output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 515 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 516 if options.isa_format == "isa-tab": | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 517 tmp_data = None | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 518 try: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 519 logger.info("Downloading study %s", options.study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 520 tmp_data = MTBLS.get(options.study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 521 if tmp_data is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 522 raise RuntimeError("Error downloading ISA study") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 523 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 524 logger.debug( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 525 "Finished downloading data. Moving to final location %s", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 526 options.output) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 527 shutil.move(tmp_data, options.output) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 528 logger.info("ISA archive written to %s", options.output) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 529 finally: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 530 if tmp_data: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 531 # try to clean up any temporary files left behind | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 532 logger.debug("Deleting %s, if there's anything there", tmp_data) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 533 shutil.rmtree(tmp_data, ignore_errors=True) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 534 elif options.isa_format == "isa-json": | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 535 isajson = MTBLS.getj(options.study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 536 if isajson is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 537 raise RuntimeError("Error downloading ISA study") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 538 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 539 logger.debug( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 540 "Finished downloading data. Dumping json to final location %s", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 541 options.output) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 542 os.makedirs(options.output) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 543 json_file = os.path.join(options.output, "{}.json".format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 544 isajson['identifier'])) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 545 with open(json_file, 'w') as fd: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 546 json.dump(isajson, fd) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 547 logger.info("ISA-JSON written to %s", options.output) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 548 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 549 raise ValueError("BUG! Got an invalid isa format '{}'".format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 550 options.isa_format)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 551 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 552 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 553 def get_factors_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 554 logger.info("Getting factors for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 555 options.study_id, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 556 factor_names = MTBLS.get_factor_names(options.study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 557 if factor_names is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 558 json.dump(list(factor_names), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 559 logger.debug("Factor names written") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 560 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 561 raise RuntimeError("Error downloading factors.") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 562 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 563 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 564 def get_factor_values_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 565 logger.info("Getting values for factor {factor} in study {study_id}. Writing to {output_file}." | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 566 .format(factor=options.factor, study_id=options.study_id, output_file=options.output.name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 567 fvs = MTBLS.get_factor_values(options.study_id, options.factor) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 568 if fvs is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 569 json.dump(list(fvs), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 570 logger.debug("Factor values written to {}".format(options.output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 571 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 572 raise RuntimeError("Error getting factor values") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 573 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 574 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 575 def get_data_files_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 576 logger.info("Getting data files for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 577 options.study_id, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 578 if options.json_query: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 579 logger.debug("This is the specified query:\n%s", options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 580 json_struct = json.loads(options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 581 data_files = MTBLS.get_data_files(options.study_id, json_struct) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 582 elif options.galaxy_parameters_file: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 583 logger.debug("Using input Galaxy JSON parameters from:\n%s", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 584 options.galaxy_parameters_file) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 585 with open(options.galaxy_parameters_file) as json_fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 586 galaxy_json = json.load(json_fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 587 json_struct = {} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 588 for fv_item in galaxy_json['factor_value_series']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 589 json_struct[fv_item['factor_name']] = fv_item['factor_value'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 590 data_files = MTBLS.get_data_files(options.study_id, json_struct) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 591 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 592 logger.debug("No query was specified") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 593 data_files = MTBLS.get_data_files(options.study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 594 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 595 logger.debug("Result data files list: %s", data_files) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 596 if data_files is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 597 raise RuntimeError("Error getting data files with isatools") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 598 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 599 logger.debug("dumping data files to %s", options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 600 json.dump(list(data_files), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 601 logger.info("Finished writing data files to {}".format(options.output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 602 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 603 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 604 def build_html_data_files_list(data_files_list): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 605 data_files_table = '<table>' | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 606 data_files_table += '<tr><th>Sample Name</th><th>Data File Names</th></tr>' | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 607 for data_file in data_files_list: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 608 sample_name = data_file['sample'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 609 data_files = ', '.join(data_file['data_files']) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 610 data_files_table += '<tr><td>{sample_name}</td><td>{data_files}</td>' \ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 611 .format(sample_name=sample_name, data_files=data_files) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 612 html_data_files_list = """ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 613 <html> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 614 <head> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 615 <title>ISA-Tab Factors Summary</title> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 616 </head> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 617 <body> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 618 {summary_table} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 619 </body> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 620 </html> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 621 """.format(summary_table=data_files_table) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 622 return html_data_files_list | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 623 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 624 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 625 def build_html_summary(summary): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 626 study_groups = {} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 627 for item in summary: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 628 sample_name = item['sample_name'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 629 study_factors = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 630 for item in [x for x in item.items() if x[0] != "sample_name"]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 631 study_factors.append(': '.join([item[0], item[1]])) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 632 study_group = ', '.join(study_factors) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 633 if study_group not in study_groups.keys(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 634 study_groups[study_group] = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 635 study_groups[study_group].append(sample_name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 636 summary_table = '<table>' | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 637 summary_table += '<tr><th>Study group</th><th>Number of samples</th></tr>' | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 638 for item in study_groups.items(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 639 study_group = item[0] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 640 num_samples = len(item[1]) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 641 summary_table += '<tr><td>{study_group}</td><td>{num_samples}</td>' \ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 642 .format(study_group=study_group, num_samples=num_samples) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 643 summary_table += '</table>' | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 644 html_summary = """ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 645 <html> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 646 <head> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 647 <title>ISA-Tab Factors Summary</title> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 648 </head> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 649 <body> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 650 {summary_table} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 651 </body> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 652 </html> | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 653 """.format(summary_table=summary_table) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 654 return html_summary | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 655 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 656 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 657 def get_summary_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 658 logger.info("Getting summary for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 659 options.study_id, options.json_output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 660 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 661 summary = MTBLS.get_study_variable_summary(options.study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 662 # new_summary = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 663 # for item in summary: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 664 # new_summary.append( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 665 # {k: v for k, v in item.items() if k is not "sample_name"}) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 666 # summary = new_summary | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 667 if summary is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 668 json.dump(summary, options.json_output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 669 logger.debug("Summary dumped to JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 670 html_summary = build_html_summary(summary) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 671 with options.html_output as html_fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 672 html_fp.write(html_summary) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 673 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 674 raise RuntimeError("Error getting study summary") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 675 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 676 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 677 # isaslicer commands | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 678 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 679 def isatab_get_data_files_list_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 680 logger.info("Getting data files for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 681 options.input_path, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 682 if options.json_query: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 683 logger.debug("This is the specified query:\n%s", options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 684 json_struct = json.loads(options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 685 elif options.galaxy_parameters_file: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 686 logger.debug("Using input Galaxy JSON parameters from:\n%s", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 687 options.galaxy_parameters_file) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 688 with open(options.galaxy_parameters_file) as json_fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 689 galaxy_json = json.load(json_fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 690 json_struct = {} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 691 for fv_item in galaxy_json['factor_value_series']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 692 json_struct[fv_item['factor_name']] = fv_item['factor_value'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 693 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 694 logger.debug("No query was specified") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 695 json_struct = None | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 696 factor_selection = json_struct | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 697 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 698 result = slice_data_files(input_path, factor_selection=factor_selection) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 699 data_files = result | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 700 logger.debug("Result data files list: %s", data_files) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 701 if data_files is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 702 raise RuntimeError("Error getting data files with isatools") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 703 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 704 logger.debug("dumping data files to %s", options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 705 json.dump(list(data_files), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 706 logger.info("Finished writing data files to {}".format(options.output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 707 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 708 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 709 def zip_get_data_files_list_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 710 logger.info("Getting data files for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 711 options.input_path, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 712 if options.json_query: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 713 logger.debug("This is the specified query:\n%s", options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 714 json_struct = json.loads(options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 715 elif options.galaxy_parameters_file: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 716 logger.debug("Using input Galaxy JSON parameters from:\n%s", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 717 options.galaxy_parameters_file) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 718 with open(options.galaxy_parameters_file) as json_fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 719 galaxy_json = json.load(json_fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 720 json_struct = {} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 721 for fv_item in galaxy_json['factor_value_series']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 722 json_struct[fv_item['factor_name']] = fv_item['factor_value'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 723 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 724 logger.debug("No query was specified") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 725 json_struct = None | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 726 factor_selection = json_struct | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 727 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 728 with zipfile.ZipFile(input_path) as zfp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 729 tmpdir = tempfile.mkdtemp() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 730 zfp.extractall(path=tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 731 result = slice_data_files(tmpdir, factor_selection=factor_selection) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 732 data_files = result | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 733 logger.debug("Result data files list: %s", data_files) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 734 if data_files is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 735 raise RuntimeError("Error getting data files with isatools") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 736 logger.debug("dumping data files to %s", options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 737 json.dump(list(data_files), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 738 logger.info("Finished writing data files to {}".format(options.output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 739 shutil.rmtree(tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 740 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 741 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 742 def isatab_get_data_files_collection_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 743 logger.info("Getting data files for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 744 options.input_path, options.output_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 745 if options.json_query: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 746 logger.debug("This is the specified query:\n%s", options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 747 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 748 logger.debug("No query was specified") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 749 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 750 if options.json_query is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 751 json_struct = json.loads(options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 752 elif options.galaxy_parameters_file: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 753 logger.debug("Using input Galaxy JSON parameters from:\n%s", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 754 options.galaxy_parameters_file) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 755 with open(options.galaxy_parameters_file) as json_fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 756 galaxy_json = json.load(json_fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 757 json_struct = {} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 758 for fv_item in galaxy_json['factor_value_series']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 759 json_struct[fv_item['factor_name']] = fv_item['factor_value'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 760 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 761 logger.debug("No query was specified") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 762 json_struct = None | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 763 factor_selection = json_struct | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 764 result = slice_data_files(input_path, factor_selection=factor_selection) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 765 data_files = result | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 766 logger.debug("Result data files list: %s", data_files) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 767 if data_files is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 768 raise RuntimeError("Error getting data files with isatools") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 769 output_path = options.output_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 770 logger.debug("copying data files to %s", output_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 771 for result in data_files: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 772 for data_file_name in result['data_files']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 773 logging.info("Copying {}".format(data_file_name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 774 shutil.copy(os.path.join(input_path, data_file_name), output_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 775 logger.info("Finished writing data files to {}".format(output_path)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 776 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 777 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 778 def zip_get_data_files_collection_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 779 logger.info("Getting data files for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 780 options.input_path, options.output_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 781 if options.json_query: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 782 logger.debug("This is the specified query:\n%s", options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 783 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 784 logger.debug("No query was specified") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 785 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 786 output_path = options.output_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 787 if options.json_query is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 788 json_struct = json.loads(options.json_query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 789 factor_selection = json_struct | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 790 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 791 factor_selection = None | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 792 with zipfile.ZipFile(input_path) as zfp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 793 tmpdir = tempfile.mkdtemp() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 794 zfp.extractall(path=tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 795 result = slice_data_files(tmpdir, factor_selection=factor_selection) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 796 data_files = result | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 797 logger.debug("Result data files list: %s", data_files) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 798 if data_files is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 799 raise RuntimeError("Error getting data files with isatools") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 800 logger.debug("copying data files to %s", output_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 801 for result in data_files: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 802 for data_file_name in result['data_files']: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 803 logging.info("Copying {}".format(data_file_name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 804 shutil.copy(os.path.join(tmpdir, data_file_name), output_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 805 logger.info("Finished writing data files to {}".format(output_path)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 806 shutil.rmtree(tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 807 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 808 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 809 def slice_data_files(dir, factor_selection=None): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 810 results = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 811 # first collect matching samples | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 812 for table_file in glob.iglob(os.path.join(dir, '[a|s]_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 813 logger.info('Loading {table_file}'.format(table_file=table_file)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 814 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 815 with open(os.path.join(dir, table_file)) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 816 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 817 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 818 if factor_selection is None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 819 matches = df['Sample Name'].items() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 820 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 821 for indx, match in matches: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 822 sample_name = match | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 823 if len([r for r in results if r['sample'] == | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 824 sample_name]) == 1: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 825 continue | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 826 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 827 results.append( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 828 { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 829 'sample': sample_name, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 830 'data_files': [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 831 } | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 832 ) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 833 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 834 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 835 for factor_name, factor_value in factor_selection.items(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 836 if 'Factor Value[{}]'.format(factor_name) in list( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 837 df.columns.values): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 838 matches = df.loc[df['Factor Value[{factor}]'.format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 839 factor=factor_name)] == factor_value][ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 840 'Sample Name'].items() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 841 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 842 for indx, match in matches: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 843 sample_name = match | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 844 if len([r for r in results if r['sample'] == | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 845 sample_name]) == 1: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 846 continue | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 847 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 848 results.append( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 849 { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 850 'sample': sample_name, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 851 'data_files': [], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 852 'query_used': factor_selection | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 853 } | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 854 ) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 855 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 856 # now collect the data files relating to the samples | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 857 for result in results: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 858 sample_name = result['sample'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 859 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 860 for table_file in glob.iglob(os.path.join(dir, 'a_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 861 with open(table_file) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 862 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 863 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 864 data_files = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 865 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 866 table_headers = list(df.columns.values) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 867 sample_rows = df.loc[df['Sample Name'] == sample_name] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 868 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 869 data_node_labels = [ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 870 'Raw Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 871 'Raw Spectral Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 872 'Derived Spectral Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 873 'Derived Array Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 874 'Array Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 875 'Protein Assignment File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 876 'Peptide Assignment File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 877 'Post Translational Modification Assignment File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 878 'Acquisition Parameter Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 879 'Free Induction Decay Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 880 'Derived Array Data Matrix File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 881 'Image File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 882 'Derived Data File', | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 883 'Metabolite Assignment File'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 884 for node_label in data_node_labels: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 885 if node_label in table_headers: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 886 data_files.extend(list(sample_rows[node_label])) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 887 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 888 result['data_files'] = [i for i in list(data_files) if | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 889 str(i) != 'nan'] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 890 return results | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 891 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 892 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 893 def isatab_get_factor_names_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 894 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 895 logger.info("Getting factors for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 896 input_path, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 897 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 898 factors = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 899 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 900 with open(os.path.join(input_path, table_file)) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 901 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 902 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 903 factors_headers = [header for header in list(df.columns.values) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 904 if _RX_FACTOR_VALUE.match(header)] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 905 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 906 for header in factors_headers: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 907 factors.add(header[13:-1]) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 908 if factors is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 909 json.dump(list(factors), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 910 logger.debug("Factor names written") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 911 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 912 raise RuntimeError("Error reading factors.") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 913 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 914 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 915 def zip_get_factor_names_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 916 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 917 logger.info("Getting factors for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 918 input_path, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 919 # unpack input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 920 with zipfile.ZipFile(input_path) as zfp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 921 tmpdir = tempfile.mkdtemp() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 922 zfp.extractall(path=tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 923 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 924 factors = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 925 for table_file in glob.iglob(os.path.join(tmpdir, '[a|s]_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 926 logging.info('Searching {}'.format(table_file)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 927 with open(os.path.join(tmpdir, table_file)) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 928 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 929 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 930 factors_headers = [header for header in list(df.columns.values) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 931 if _RX_FACTOR_VALUE.match(header)] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 932 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 933 for header in factors_headers: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 934 factors.add(header[13:-1]) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 935 if factors is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 936 json.dump(list(factors), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 937 logger.debug("Factor names written") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 938 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 939 raise RuntimeError("Error reading factors.") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 940 shutil.rmtree(tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 941 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 942 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 943 def isatab_get_factor_values_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 944 logger.info("Getting values for factor {factor} in study {input_path}. Writing to {output_file}." | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 945 .format(factor=options.factor, input_path=options.input_path, output_file=options.output.name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 946 fvs = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 947 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 948 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 949 factor_name = options.factor | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 950 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 951 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 952 with open(os.path.join(input_path, table_file)) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 953 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 954 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 955 if 'Factor Value[{factor}]'.format(factor=factor_name) in \ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 956 list(df.columns.values): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 957 for _, match in df[ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 958 'Factor Value[{factor}]'.format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 959 factor=factor_name)].iteritems(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 960 try: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 961 match = match.item() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 962 except AttributeError: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 963 pass | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 964 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 965 if isinstance(match, (str, int, float)): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 966 if str(match) != 'nan': | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 967 fvs.add(match) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 968 if fvs is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 969 json.dump(list(fvs), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 970 logger.debug("Factor values written to {}".format(options.output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 971 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 972 raise RuntimeError("Error getting factor values") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 973 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 974 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 975 def zip_get_factor_values_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 976 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 977 logger.info("Getting factors for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 978 input_path, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 979 logger.info("Getting values for factor {factor} in study {input_path}. " | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 980 "Writing to {output_file}.".format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 981 factor=options.factor, input_path=options.input_path, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 982 output_file=options.output.name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 983 fvs = set() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 984 factor_name = options.factor | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 985 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 986 # unpack input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 987 with zipfile.ZipFile(input_path) as zfp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 988 tmpdir = tempfile.mkdtemp() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 989 zfp.extractall(path=tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 990 for table_file in glob.glob(os.path.join(tmpdir, '[a|s]_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 991 logging.info('Searching {}'.format(table_file)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 992 with open(os.path.join(input_path, table_file)) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 993 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 994 if 'Factor Value[{factor}]'.format(factor=factor_name) in \ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 995 list(df.columns.values): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 996 for _, match in df[ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 997 'Factor Value[{factor}]'.format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 998 factor=factor_name)].iteritems(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 999 try: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1000 match = match.item() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1001 except AttributeError: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1002 pass | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1003 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1004 if isinstance(match, (str, int, float)): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1005 if str(match) != 'nan': | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1006 fvs.add(match) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1007 if fvs is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1008 json.dump(list(fvs), options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1009 logger.debug("Factor values written to {}".format(options.output)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1010 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1011 raise RuntimeError("Error getting factor values") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1012 shutil.rmtree(tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1013 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1014 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1015 def isatab_get_factors_summary_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1016 logger.info("Getting summary for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1017 options.input_path, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1018 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1019 ISA = isatab.load(input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1020 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1021 all_samples = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1022 for study in ISA.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1023 all_samples.extend(study.samples) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1024 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1025 samples_and_fvs = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1026 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1027 for sample in all_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1028 sample_and_fvs = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1029 'sample_name': sample.name, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1030 } | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1031 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1032 for fv in sample.factor_values: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1033 if isinstance(fv.value, (str, int, float)): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1034 fv_value = fv.value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1035 sample_and_fvs[fv.factor_name.name] = fv_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1036 elif isinstance(fv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1037 fv_value = fv.value.term | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1038 sample_and_fvs[fv.factor_name.name] = fv_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1039 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1040 samples_and_fvs.append(sample_and_fvs) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1041 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1042 df = pd.DataFrame(samples_and_fvs) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1043 nunique = df.apply(pd.Series.nunique) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1044 cols_to_drop = nunique[nunique == 1].index | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1045 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1046 df = df.drop(cols_to_drop, axis=1) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1047 summary = df.to_dict(orient='records') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1048 if summary is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1049 json.dump(summary, options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1050 logger.debug("Summary dumped to JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1051 # html_summary = build_html_summary(summary) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1052 # with options.html_output as html_fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1053 # html_fp.write(html_summary) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1054 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1055 raise RuntimeError("Error getting study summary") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1056 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1057 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1058 def zip_get_factors_summary_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1059 logger.info("Getting summary for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1060 options.input_path, options.json_output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1061 input_path = options.input_path | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1062 with zipfile.ZipFile(input_path) as zfp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1063 tmpdir = tempfile.mkdtemp() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1064 zfp.extractall(path=tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1065 ISA = isatab.load(tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1066 all_samples = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1067 for study in ISA.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1068 all_samples.extend(study.samples) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1069 samples_and_fvs = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1070 for sample in all_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1071 sample_and_fvs = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1072 'sample_name': sample.name, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1073 } | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1074 for fv in sample.factor_values: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1075 if isinstance(fv.value, (str, int, float)): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1076 fv_value = fv.value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1077 sample_and_fvs[fv.factor_name.name] = fv_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1078 elif isinstance(fv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1079 fv_value = fv.value.term | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1080 sample_and_fvs[fv.factor_name.name] = fv_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1081 samples_and_fvs.append(sample_and_fvs) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1082 df = pd.DataFrame(samples_and_fvs) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1083 nunique = df.apply(pd.Series.nunique) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1084 cols_to_drop = nunique[nunique == 1].index | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1085 df = df.drop(cols_to_drop, axis=1) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1086 summary = df.to_dict(orient='records') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1087 if summary is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1088 json.dump(summary, options.json_output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1089 logger.debug("Summary dumped to JSON") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1090 print(json.dumps(summary, indent=4)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1091 html_summary = build_html_summary(summary) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1092 with options.html_output as html_fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1093 html_fp.write(html_summary) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1094 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1095 raise RuntimeError("Error getting study summary") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1096 shutil.rmtree(tmpdir) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1097 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1098 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1099 def get_study_groups(input_path): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1100 factors_summary = isatab_get_factors_summary_command(input_path=input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1101 study_groups = {} | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1102 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1103 for factors_item in factors_summary: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1104 fvs = tuple(factors_item[k] for k in factors_item.keys() if k != 'name') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1105 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1106 if fvs in study_groups.keys(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1107 study_groups[fvs].append(factors_item['name']) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1108 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1109 study_groups[fvs] = [factors_item['name']] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1110 return study_groups | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1111 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1112 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1113 def get_study_groups_samples_sizes(input_path): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1114 study_groups = get_study_groups(input_path=input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1115 return list(map(lambda x: (x[0], len(x[1])), study_groups.items())) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1116 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1117 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1118 def get_sources_for_sample(input_path, sample_name): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1119 ISA = isatab.load(input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1120 hits = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1121 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1122 for study in ISA.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1123 for sample in study.samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1124 if sample.name == sample_name: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1125 print('found a hit: {sample_name}'.format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1126 sample_name=sample.name)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1127 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1128 for source in sample.derives_from: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1129 hits.append(source.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1130 return hits | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1131 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1132 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1133 def get_data_for_sample(input_path, sample_name): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1134 ISA = isatab.load(input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1135 hits = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1136 for study in ISA.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1137 for assay in study.assays: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1138 for data in assay.data_files: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1139 if sample_name in [x.name for x in data.generated_from]: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1140 logger.info('found a hit: {filename}'.format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1141 filename=data.filename)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1142 hits.append(data) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1143 return hits | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1144 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1145 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1146 def get_study_groups_data_sizes(input_path): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1147 study_groups = get_study_groups(input_path=input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1148 return list(map(lambda x: (x[0], len(x[1])), study_groups.items())) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1149 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1150 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1151 def get_characteristics_summary(input_path): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1152 """ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1153 This function generates a characteristics summary for a MetaboLights | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1154 study | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1155 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1156 :param input_path: Input path to ISA-tab | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1157 :return: A list of dicts summarising the set of characteristic names | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1158 and values associated with each sample | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1159 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1160 Note: it only returns a summary of characteristics with variable values. | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1161 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1162 Example usage: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1163 characteristics_summary = get_characteristics_summary('/path/to/my/study/') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1164 [ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1165 { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1166 "name": "6089if_9", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1167 "Variant": "Synechocystis sp. PCC 6803.sll0171.ko" | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1168 }, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1169 { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1170 "name": "6089if_43", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1171 "Variant": "Synechocystis sp. PCC 6803.WT.none" | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1172 }, | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1173 ] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1174 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1175 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1176 """ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1177 ISA = isatab.load(input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1178 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1179 all_samples = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1180 for study in ISA.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1181 all_samples.extend(study.samples) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1182 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1183 samples_and_characs = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1184 for sample in all_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1185 sample_and_characs = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1186 'name': sample.name | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1187 } | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1188 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1189 for source in sample.derives_from: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1190 for c in source.characteristics: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1191 if isinstance(c.value, (str, int, float)): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1192 c_value = c.value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1193 sample_and_characs[c.category.term] = c_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1194 elif isinstance(c.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1195 c_value = c.value.term | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1196 sample_and_characs[c.category.term] = c_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1197 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1198 samples_and_characs.append(sample_and_characs) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1199 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1200 df = pd.DataFrame(samples_and_characs) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1201 nunique = df.apply(pd.Series.nunique) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1202 cols_to_drop = nunique[nunique == 1].index | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1203 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1204 df = df.drop(cols_to_drop, axis=1) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1205 return df.to_dict(orient='records') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1206 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1207 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1208 def get_study_variable_summary(input_path): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1209 ISA = isatab.load(input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1210 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1211 all_samples = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1212 for study in ISA.studies: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1213 all_samples.extend(study.samples) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1214 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1215 samples_and_variables = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1216 for sample in all_samples: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1217 sample_and_vars = { | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1218 'sample_name': sample.name | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1219 } | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1220 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1221 for fv in sample.factor_values: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1222 if isinstance(fv.value, (str, int, float)): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1223 fv_value = fv.value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1224 sample_and_vars[fv.factor_name.name] = fv_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1225 elif isinstance(fv.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1226 fv_value = fv.value.term | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1227 sample_and_vars[fv.factor_name.name] = fv_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1228 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1229 for source in sample.derives_from: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1230 sample_and_vars['source_name'] = source.name | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1231 for c in source.characteristics: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1232 if isinstance(c.value, (str, int, float)): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1233 c_value = c.value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1234 sample_and_vars[c.category.term] = c_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1235 elif isinstance(c.value, OntologyAnnotation): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1236 c_value = c.value.term | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1237 sample_and_vars[c.category.term] = c_value | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1238 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1239 samples_and_variables.append(sample_and_vars) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1240 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1241 df = pd.DataFrame(samples_and_variables) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1242 nunique = df.apply(pd.Series.nunique) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1243 cols_to_drop = nunique[nunique == 1].index | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1244 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1245 df = df.drop(cols_to_drop, axis=1) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1246 return df.to_dict(orient='records') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1247 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1248 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1249 def get_study_group_factors(input_path): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1250 factors_list = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1251 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1252 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1253 with open(os.path.join(input_path, table_file)) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1254 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1255 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1256 factor_columns = [x for x in df.columns if x.startswith( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1257 'Factor Value')] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1258 if len(factor_columns) > 0: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1259 factors_list = df[factor_columns].drop_duplicates()\ | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1260 .to_dict(orient='records') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1261 return factors_list | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1262 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1263 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1264 def get_filtered_df_on_factors_list(input_path): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1265 factors_list = get_study_group_factors(input_path=input_path) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1266 queries = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1267 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1268 for item in factors_list: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1269 query_str = [] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1270 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1271 for k, v in item.items(): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1272 k = k.replace(' ', '_').replace('[', '_').replace(']', '_') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1273 if isinstance(v, str): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1274 v = v.replace(' ', '_').replace('[', '_').replace(']', '_') | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1275 query_str.append("{k} == '{v}' and ".format(k=k, v=v)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1276 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1277 query_str = ''.join(query_str)[:-4] | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1278 queries.append(query_str) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1279 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1280 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1281 with open(os.path.join(input_path, table_file)) as fp: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1282 df = isatab.load_table(fp) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1283 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1284 cols = df.columns | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1285 cols = cols.map( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1286 lambda x: x.replace(' ', '_') if isinstance(x, str) else x) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1287 df.columns = cols | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1288 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1289 cols = df.columns | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1290 cols = cols.map( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1291 lambda x: x.replace('[', '_') if isinstance(x, str) else x) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1292 df.columns = cols | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1293 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1294 cols = df.columns | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1295 cols = cols.map( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1296 lambda x: x.replace(']', '_') if isinstance(x, str) else x) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1297 df.columns = cols | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1298 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1299 for query in queries: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1300 # query uses pandas.eval, which evaluates queries like pure Python | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1301 # notation | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1302 df2 = df.query(query) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1303 if 'Sample_Name' in df.columns: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1304 print('Group: {query} / Sample_Name: {sample_name}'.format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1305 query=query, sample_name=list(df2['Sample_Name']))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1306 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1307 if 'Source_Name' in df.columns: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1308 print('Group: {} / Sources_Name: {}'.format( | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1309 query, list(df2['Source_Name']))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1310 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1311 if 'Raw_Spectral_Data_File' in df.columns: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1312 print('Group: {query} / Raw_Spectral_Data_File: {filename}' | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1313 .format(query=query[13:-2], | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1314 filename=list(df2['Raw_Spectral_Data_File']))) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1315 return queries | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1316 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1317 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1318 def datatype_get_summary_command(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1319 logger.info("Getting summary for study %s. Writing to %s.", | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1320 options.study_id, options.output.name) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1321 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1322 summary = get_study_variable_summary(options.study_id) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1323 print('summary: ', list(summary)) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1324 if summary is not None: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1325 json.dump(summary, options.output, indent=4) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1326 logger.debug("Summary dumped") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1327 else: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1328 raise RuntimeError("Error getting study summary") | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1329 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1330 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1331 # logging and argument parsing | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1332 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1333 def _configure_logger(options): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1334 logging_level = getattr(logging, options.log_level, logging.INFO) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1335 logging.basicConfig(level=logging_level) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1336 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1337 global logger | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1338 logger = logging.getLogger() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1339 logger.setLevel(logging_level) # there's a bug somewhere. The level set through basicConfig isn't taking effect | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1340 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1341 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1342 def _parse_args(args): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1343 parser = make_parser() | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1344 options = parser.parse_args(args) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1345 return options | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1346 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1347 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1348 def main(args): | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1349 options = _parse_args(args) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1350 _configure_logger(options) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1351 # run subcommand | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1352 options.func(options) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1353 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1354 | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1355 if __name__ == '__main__': | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1356 try: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1357 main(sys.argv[1:]) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1358 sys.exit(0) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1359 except Exception as e: | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1360 logger.exception(e) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1361 logger.error(e) | 
| 
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
 prog parents: diff
changeset | 1362 sys.exit(e.code if hasattr(e, "code") else 99) | 
