comparison isaslicer.py @ 0:8dab200e02cb draft

"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
author prog
date Tue, 07 Jan 2020 09:05:21 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8dab200e02cb
1 #!/usr/bin/env python3
2
3 import argparse
4 import glob
5 import json
6 import logging
7 import os
8 import re
9 import shutil
10 import sys
11 import tempfile
12 import zipfile
13
14 import pandas as pd
15 from isatools import isatab
16 from isatools.model import OntologyAnnotation
17 from isatools.net import mtbls as MTBLS
18
19 logger = None
20
21 # isaslicer.py <command> <study_id> [ command-specific options ]
22
23
24 def make_parser():
25 parser = argparse.ArgumentParser( description="ISA slicer")
26
27 parser.add_argument('--log-level', choices=[
28 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'],
29 default='INFO', help="Set the desired logging level")
30
31 subparsers = parser.add_subparsers(
32 title='Actions',
33 dest='command') # specified subcommand will be available in attribute 'command'
34 subparsers.required = True
35
36 # mtblisa commands
37
38 subparser = subparsers.add_parser(
39 'mtbls-get-study-archive', aliases=['gsa'],
40 help="Get ISA study from MetaboLights as zip archive")
41 subparser.set_defaults(func=get_study_archive_command)
42 subparser.add_argument('study_id')
43 subparser.add_argument(
44 'output', metavar="OUTPUT",
45 help="Name of output archive (extension will be added)")
46 subparser.add_argument('--format', metavar="FMT", choices=[
47 'zip', 'tar', 'gztar', 'bztar', 'xztar'], default='zip',
48 help="Type of archive to create")
49
50 subparser = subparsers.add_parser('mtbls-get-study', aliases=['gs'],
51 help="Get ISA study from MetaboLights")
52 subparser.set_defaults(func=get_study_command)
53 subparser.add_argument('study_id')
54 subparser.add_argument('output', metavar="PATH", help="Name of output")
55 subparser.add_argument(
56 '-f', '--isa-format', choices=['isa-tab', 'isa-json'],
57 metavar="FORMAT", default='isa-tab', help="Desired ISA format")
58
59 subparser = subparsers.add_parser(
60 'mtbls-get-factors', aliases=['gf'],
61 help="Get factor names from a study in json format")
62 subparser.set_defaults(func=get_factors_command)
63 subparser.add_argument('study_id')
64 subparser.add_argument(
65 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
66 help="Output file")
67
68 subparser = subparsers.add_parser(
69 'mtbls-get-factor-values', aliases=['gfv'],
70 help="Get factor values from a study in json format")
71 subparser.set_defaults(func=get_factor_values_command)
72 subparser.add_argument('study_id')
73 subparser.add_argument(
74 'factor', help="The desired factor. Use `get-factors` to get the list "
75 "of available factors")
76 subparser.add_argument(
77 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
78 help="Output file")
79
80 subparser = subparsers.add_parser('mtbls-get-data-list', aliases=['gd'],
81 help="Get data files list in json format")
82 subparser.set_defaults(func=get_data_files_command)
83 subparser.add_argument('study_id')
84 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
85 help="Output file")
86 subparser.add_argument(
87 '--json-query',
88 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
89 subparser.add_argument(
90 '--galaxy_parameters_file',
91 help="Path to JSON file containing input Galaxy JSON")
92
93 subparser = subparsers.add_parser(
94 'mtbls-get-factors-summary', aliases=['gsum'],
95 help="Get the variables summary from a study, in json format")
96 subparser.set_defaults(func=get_summary_command)
97 subparser.add_argument('study_id')
98 subparser.add_argument(
99 'json_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
100 help="Output JSON file")
101 subparser.add_argument(
102 'html_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
103 help="Output HTML file")
104
105 # isaslicer commands on path to unpacked ISA-Tab as input
106
107 subparser = subparsers.add_parser(
108 'isa-tab-get-factors', aliases=['isagf'],
109 help="Get factor names from a study in json format")
110 subparser.set_defaults(func=isatab_get_factor_names_command)
111 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
112 subparser.add_argument(
113 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
114 help="Output file")
115
116 subparser = subparsers.add_parser(
117 'zip-get-factors', aliases=['zipgf'],
118 help="Get factor names from a study in json format")
119 subparser.set_defaults(func=zip_get_factor_names_command)
120 subparser.add_argument('input_path', type=str,
121 help="Input ISA-Tab zip path")
122 subparser.add_argument(
123 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
124 help="Output file")
125
126 subparser = subparsers.add_parser(
127 'isa-tab-get-factor-values', aliases=['isagfv'],
128 help="Get factor values from a study in json format")
129 subparser.set_defaults(func=isatab_get_factor_values_command)
130 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
131 subparser.add_argument(
132 'factor', help="The desired factor. Use `get-factors` to get the list "
133 "of available factors")
134 subparser.add_argument(
135 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
136 help="Output file")
137
138 subparser = subparsers.add_parser(
139 'zip-get-factor-values', aliases=['zipgfv'],
140 help="Get factor values from a study in json format")
141 subparser.set_defaults(func=zip_get_factor_values_command)
142 subparser.add_argument('input_path', type=str,
143 help="Input ISA-Tab zip path")
144 subparser.add_argument(
145 'factor', help="The desired factor. Use `get-factors` to get the list "
146 "of available factors")
147 subparser.add_argument(
148 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
149 help="Output file")
150
151 subparser = subparsers.add_parser('isa-tab-get-data-list', aliases=['isagdl'],
152 help="Get data files list in json format")
153 subparser.set_defaults(func=isatab_get_data_files_list_command)
154 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
155 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
156 help="Output file")
157 subparser.add_argument(
158 '--json-query',
159 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
160 subparser.add_argument(
161 '--galaxy_parameters_file',
162 help="Path to JSON file containing input Galaxy JSON")
163
164 subparser = subparsers.add_parser('zip-get-data-list', aliases=['zipgdl'],
165 help="Get data files list in json format")
166 subparser.set_defaults(func=zip_get_data_files_list_command)
167 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path")
168 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
169 help="Output file")
170 subparser.add_argument(
171 '--json-query',
172 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
173 subparser.add_argument(
174 '--galaxy_parameters_file',
175 help="Path to JSON file containing input Galaxy JSON")
176
177 subparser = subparsers.add_parser('isa-tab-get-data-collection', aliases=['isagdc'],
178 help="Get data files collection")
179 subparser.set_defaults(func=isatab_get_data_files_collection_command)
180 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
181 subparser.add_argument('output_path', type=str, help="Output data files path")
182 subparser.add_argument(
183 '--json-query',
184 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
185 subparser.add_argument(
186 '--galaxy_parameters_file',
187 help="Path to JSON file containing input Galaxy JSON")
188
189 subparser = subparsers.add_parser('zip-get-data-collection', aliases=['zipgdc'],
190 help="Get data files collection")
191 subparser.set_defaults(func=zip_get_data_files_collection_command)
192 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path")
193 subparser.add_argument('output_path', type=str, help="Output data files path")
194 subparser.add_argument(
195 '--json-query',
196 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
197
198 subparser = subparsers.add_parser(
199 'isa-tab-get-factors-summary', aliases=['isasum'],
200 help="Get the variables summary from a study, in json format")
201 subparser.set_defaults(func=isatab_get_factors_summary_command)
202 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
203 subparser.add_argument(
204 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
205 help="Output file")
206
207 subparser = subparsers.add_parser(
208 'zip-get-factors-summary', aliases=['zipsum'],
209 help="Get the variables summary from a study, in json format")
210 subparser.set_defaults(func=zip_get_factors_summary_command)
211 subparser.add_argument('input_path', type=str,
212 help="Input ISA-Tab zip path")
213 subparser.add_argument(
214 'json_output', nargs='?', type=argparse.FileType('w'),
215 default=sys.stdout,
216 help="Output JSON file")
217 subparser.add_argument(
218 'html_output', nargs='?', type=argparse.FileType('w'),
219 default=sys.stdout,
220 help="Output HTML file")
221
222 subparser = subparsers.add_parser(
223 'isaslicer2-slice', aliases=['slice2'],
224 help="Slice ISA-Tabs version 2")
225 subparser.set_defaults(func=query_isatab)
226 subparser.add_argument('--source_dir', type=str,
227 help="Input ISA-Tab zip path")
228 subparser.add_argument(
229 '--galaxy_parameters_file', type=argparse.FileType(mode='r'),
230 help="Path to JSON file containing input Galaxy JSON")
231 subparser.add_argument('--output', type=argparse.FileType(mode='w'),
232 help="Input ISA-Tab zip path")
233
234 subparser = subparsers.add_parser(
235 'filter-data', aliases=['filter'],
236 help="Filter out data based on slicer2")
237 subparser.set_defaults(func=filter_data)
238 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
239 subparser.add_argument('output_path', type=str, help="Output data files path")
240 subparser.add_argument('--slice', type=argparse.FileType(mode='r'),
241 help="slice")
242 subparser.add_argument('--filename_filter', type=str, help="shell-like wildcard to filter files")
243
244 return parser
245
246
247 def filter_data(options):
248 loglines = []
249 source_dir = options.input_path if options.input_path else ""
250 output_path = options.output_path
251 filename_filter = options.filename_filter
252 if source_dir:
253 if not os.path.exists(source_dir):
254 raise IOError('Source path does not exist!')
255 data_files = []
256 slice_json = options.slice
257 for result in json.load(slice_json)['results']:
258 data_files.extend(result.get('data_files', []))
259 reduced_data_files = list(set(data_files))
260 filtered_files = glob.glob(os.path.join(source_dir, filename_filter))
261 to_copy = []
262 for filepath in filtered_files:
263 if os.path.basename(filepath) in reduced_data_files:
264 to_copy.append(filepath)
265 loglines.append("Using slice results from {}\n".format(slice_json.name))
266 for filepath in to_copy:
267 loglines.append("Copying {}\n".format(os.path.basename(filepath)))
268 # try:
269 # shutil.copyfile(
270 # filepath, os.path.join(output_path, os.path.basename(filepath)))
271 # except Exception as e:
272 # print(e)
273 # exit(1)
274 try:
275 os.symlink(
276 filepath, os.path.join(output_path, os.path.basename(filepath)))
277 except Exception as e:
278 print(e)
279 exit(1)
280 with open('cli.log', 'w') as fp:
281 fp.writelines(loglines)
282
283
284 def query_isatab(options):
285 source_dir = options.source_dir if options.source_dir else ""
286 galaxy_parameters_file = options.galaxy_parameters_file
287 output = options.output
288
289 debug = True
290 if galaxy_parameters_file:
291 galaxy_parameters = json.load(galaxy_parameters_file)
292 print('Galaxy parameters:')
293 print(json.dumps(galaxy_parameters, indent=4))
294 else:
295 raise IOError('Could not load Galaxy parameters file!')
296 if source_dir:
297 if not os.path.exists(source_dir):
298 raise IOError('Source path does not exist!')
299 query = galaxy_parameters['query']
300 if debug:
301 print('Query is:')
302 print(json.dumps(query, indent=4)) # for debugging only
303 if source_dir:
304 investigation = isatab.load(source_dir)
305 else:
306 tmp = tempfile.mkdtemp()
307 _ = MTBLS.get(galaxy_parameters['input']['mtbls_id'], tmp)
308 investigation = isatab.load(tmp)
309 # filter assays by mt/tt
310 matching_assays = []
311 mt = query.get('measurement_type').strip()
312 tt = query.get('technology_type').strip()
313 if mt and tt:
314 for study in investigation.studies:
315 matching_assays.extend(
316 [x for x in study.assays if x.measurement_type.term == mt
317 and x.technology_type.term == tt])
318 elif mt and not tt:
319 for study in investigation.studies:
320 matching_assays.extend(
321 [x for x in study.assays if x.measurement_type.term == mt])
322 elif not mt and tt:
323 for study in investigation.studies:
324 matching_assays.extend(
325 [x for x in study.assays if x.technology_type.term == tt])
326 else:
327 for study in investigation.studies:
328 matching_assays.extend(study.assays)
329 assay_samples = []
330 for assay in matching_assays:
331 assay_samples.extend(assay.samples)
332 if debug:
333 print('Total samples: {}'.format(len(assay_samples)))
334
335 # filter samples by fv
336 factor_selection = {
337 x.get('factor_name').strip(): x.get('factor_value').strip() for x in
338 query.get('factor_selection', [])}
339
340 fv_samples = set()
341 if factor_selection:
342 samples_to_remove = set()
343 for f, v in factor_selection.items():
344 for sample in assay_samples:
345 for fv in [x for x in sample.factor_values if
346 x.factor_name.name == f]:
347 if isinstance(fv.value, OntologyAnnotation):
348 if fv.value.term == v:
349 fv_samples.add(sample)
350 elif fv.value == v:
351 fv_samples.add(sample)
352 for f, v in factor_selection.items():
353 for sample in fv_samples:
354 for fv in [x for x in sample.factor_values if
355 x.factor_name.name == f]:
356 if isinstance(fv.value, OntologyAnnotation):
357 if fv.value.term != v:
358 samples_to_remove.add(sample)
359 elif fv.value != v:
360 samples_to_remove.add(sample)
361 final_fv_samples = fv_samples.difference(samples_to_remove)
362 else:
363 final_fv_samples = assay_samples
364
365 # filter samples by characteristic
366 characteristics_selection = {
367 x.get('characteristic_name').strip():
368 x.get('characteristic_value').strip() for x in
369 query.get('characteristics_selection', [])}
370
371 cv_samples = set()
372 if characteristics_selection:
373 first_pass = True
374 samples_to_remove = set()
375 for c, v in characteristics_selection.items():
376 if first_pass:
377 for sample in final_fv_samples:
378 for cv in [x for x in sample.characteristics if
379 x.category.term == c]:
380 if isinstance(cv.value, OntologyAnnotation):
381 if cv.value.term == v:
382 cv_samples.add(sample)
383 elif cv.value == v:
384 cv_samples.add(sample)
385 for source in sample.derives_from:
386 for cv in [x for x in source.characteristics if
387 x.category.term == c]:
388 if isinstance(cv.value, OntologyAnnotation):
389 if cv.value.term == v:
390 cv_samples.add(sample)
391 elif cv.value == v:
392 cv_samples.add(sample)
393 first_pass = False
394 else:
395 for sample in cv_samples:
396 for cv in [x for x in sample.characteristics if
397 x.category.term == c]:
398 if isinstance(cv.value, OntologyAnnotation):
399 if cv.value.term != v:
400 samples_to_remove.add(sample)
401 elif cv.value != v:
402 samples_to_remove.add(sample)
403 for source in sample.derives_from:
404 for cv in [x for x in source.characteristics if
405 x.category.term == c]:
406 if isinstance(cv.value, OntologyAnnotation):
407 if cv.value.term != v:
408 samples_to_remove.add(sample)
409 elif cv.value != v:
410 samples_to_remove.add(sample)
411 final_cv_samples = cv_samples.difference(samples_to_remove)
412 else:
413 final_cv_samples = final_fv_samples
414
415 # filter samples by process parameter
416 parameters_selection = {
417 x.get('parameter_name').strip():
418 x.get('parameter_value').strip() for x in
419 query.get('parameter_selection', [])}
420
421 final_samples = final_cv_samples
422
423 if debug:
424 print('Final number of samples: {}'.format(len(final_samples)))
425 results = []
426 for sample in final_samples:
427 results.append({
428 'sample_name': sample.name,
429 'data_files': []
430 })
431 for result in results:
432 sample_name = result['sample_name']
433 if source_dir:
434 table_files = glob.iglob(os.path.join(source_dir, 'a_*'))
435 else:
436 table_files = glob.iglob(os.path.join(tmp, 'a_*'))
437 for table_file in table_files:
438 with open(table_file) as fp:
439 df = isatab.load_table(fp)
440 data_files = []
441 table_headers = list(df.columns.values)
442 sample_rows = df.loc[df['Sample Name'] == sample_name]
443 data_node_labels = [
444 'Raw Data File', 'Raw Spectral Data File',
445 'Derived Spectral Data File',
446 'Derived Array Data File', 'Array Data File',
447 'Protein Assignment File', 'Peptide Assignment File',
448 'Post Translational Modification Assignment File',
449 'Acquisition Parameter Data File',
450 'Free Induction Decay Data File',
451 'Derived Array Data Matrix File', 'Image File',
452 'Derived Data File', 'Metabolite Assignment File']
453 if parameters_selection:
454 for p, v in parameters_selection.items():
455 sample_pv_rows = sample_rows.loc[
456 sample_rows['Parameter Value[{}]'.format(p)] == v]
457 for node_label in data_node_labels:
458 if node_label in table_headers:
459 data_files.extend(
460 list(sample_pv_rows[node_label]))
461 result['data_files'].extend(list(set(
462 i for i in list(data_files) if
463 str(i) not in ('nan', ''))))
464 else:
465 for node_label in data_node_labels:
466 if node_label in table_headers:
467 data_files.extend(list(sample_rows[node_label]))
468 result['data_files'].extend(
469 list(set(i for i in list(data_files) if
470 str(i) not in ('nan', ''))))
471 results_json = {
472 'query': query,
473 'results': results
474 }
475 json.dump(results_json, output, indent=4)
476
477 # if galaxy_parameters['input']['collection_output']:
478 # logger = logging.getLogger()
479 # logger.debug("copying data files to %s", os.path.dirname(output))
480 # for result in results:
481 # for data_file_name in result['data_files']:
482 # logging.info("Copying {}".format(data_file_name))
483 # shutil.copy(os.path.join(source_dir, data_file_name),
484 # os.path.dirname(output))
485 # logger.info(
486 # "Finished writing data files to {}".format(os.path.dirname(output)))
487
488
489 def get_study_archive_command(options):
490 study_id = options.study_id
491
492 logger.info("Downloading study %s into archive at path %s.%s",
493 study_id, options.output, options.format)
494
495 tmpdir = MTBLS.get(study_id)
496 logger.debug("MTBLS.get returned '%s'", tmpdir)
497 if tmpdir is not None:
498 try:
499 shutil.make_archive(
500 options.output, options.format, tmpdir, logger=logger)
501 logger.info("ISA archive written")
502 finally:
503 logger.debug("Trying to clean up tmp dir %s", tmpdir)
504 shutil.rmtree(tmpdir, ignore_errors=True)
505 else:
506 raise RuntimeError("Error downloading ISA study")
507
508 # mtblisa commands
509
510
511 def get_study_command(options):
512 if os.path.exists(options.output):
513 raise RuntimeError("Selected output path {} already exists!".format(
514 options.output))
515
516 if options.isa_format == "isa-tab":
517 tmp_data = None
518 try:
519 logger.info("Downloading study %s", options.study_id)
520 tmp_data = MTBLS.get(options.study_id)
521 if tmp_data is None:
522 raise RuntimeError("Error downloading ISA study")
523
524 logger.debug(
525 "Finished downloading data. Moving to final location %s",
526 options.output)
527 shutil.move(tmp_data, options.output)
528 logger.info("ISA archive written to %s", options.output)
529 finally:
530 if tmp_data:
531 # try to clean up any temporary files left behind
532 logger.debug("Deleting %s, if there's anything there", tmp_data)
533 shutil.rmtree(tmp_data, ignore_errors=True)
534 elif options.isa_format == "isa-json":
535 isajson = MTBLS.getj(options.study_id)
536 if isajson is None:
537 raise RuntimeError("Error downloading ISA study")
538
539 logger.debug(
540 "Finished downloading data. Dumping json to final location %s",
541 options.output)
542 os.makedirs(options.output)
543 json_file = os.path.join(options.output, "{}.json".format(
544 isajson['identifier']))
545 with open(json_file, 'w') as fd:
546 json.dump(isajson, fd)
547 logger.info("ISA-JSON written to %s", options.output)
548 else:
549 raise ValueError("BUG! Got an invalid isa format '{}'".format(
550 options.isa_format))
551
552
553 def get_factors_command(options):
554 logger.info("Getting factors for study %s. Writing to %s.",
555 options.study_id, options.output.name)
556 factor_names = MTBLS.get_factor_names(options.study_id)
557 if factor_names is not None:
558 json.dump(list(factor_names), options.output, indent=4)
559 logger.debug("Factor names written")
560 else:
561 raise RuntimeError("Error downloading factors.")
562
563
564 def get_factor_values_command(options):
565 logger.info("Getting values for factor {factor} in study {study_id}. Writing to {output_file}."
566 .format(factor=options.factor, study_id=options.study_id, output_file=options.output.name))
567 fvs = MTBLS.get_factor_values(options.study_id, options.factor)
568 if fvs is not None:
569 json.dump(list(fvs), options.output, indent=4)
570 logger.debug("Factor values written to {}".format(options.output))
571 else:
572 raise RuntimeError("Error getting factor values")
573
574
575 def get_data_files_command(options):
576 logger.info("Getting data files for study %s. Writing to %s.",
577 options.study_id, options.output.name)
578 if options.json_query:
579 logger.debug("This is the specified query:\n%s", options.json_query)
580 json_struct = json.loads(options.json_query)
581 data_files = MTBLS.get_data_files(options.study_id, json_struct)
582 elif options.galaxy_parameters_file:
583 logger.debug("Using input Galaxy JSON parameters from:\n%s",
584 options.galaxy_parameters_file)
585 with open(options.galaxy_parameters_file) as json_fp:
586 galaxy_json = json.load(json_fp)
587 json_struct = {}
588 for fv_item in galaxy_json['factor_value_series']:
589 json_struct[fv_item['factor_name']] = fv_item['factor_value']
590 data_files = MTBLS.get_data_files(options.study_id, json_struct)
591 else:
592 logger.debug("No query was specified")
593 data_files = MTBLS.get_data_files(options.study_id)
594
595 logger.debug("Result data files list: %s", data_files)
596 if data_files is None:
597 raise RuntimeError("Error getting data files with isatools")
598
599 logger.debug("dumping data files to %s", options.output.name)
600 json.dump(list(data_files), options.output, indent=4)
601 logger.info("Finished writing data files to {}".format(options.output))
602
603
604 def build_html_data_files_list(data_files_list):
605 data_files_table = '<table>'
606 data_files_table += '<tr><th>Sample Name</th><th>Data File Names</th></tr>'
607 for data_file in data_files_list:
608 sample_name = data_file['sample']
609 data_files = ', '.join(data_file['data_files'])
610 data_files_table += '<tr><td>{sample_name}</td><td>{data_files}</td>' \
611 .format(sample_name=sample_name, data_files=data_files)
612 html_data_files_list = """
613 <html>
614 <head>
615 <title>ISA-Tab Factors Summary</title>
616 </head>
617 <body>
618 {summary_table}
619 </body>
620 </html>
621 """.format(summary_table=data_files_table)
622 return html_data_files_list
623
624
625 def build_html_summary(summary):
626 study_groups = {}
627 for item in summary:
628 sample_name = item['sample_name']
629 study_factors = []
630 for item in [x for x in item.items() if x[0] != "sample_name"]:
631 study_factors.append(': '.join([item[0], item[1]]))
632 study_group = ', '.join(study_factors)
633 if study_group not in study_groups.keys():
634 study_groups[study_group] = []
635 study_groups[study_group].append(sample_name)
636 summary_table = '<table>'
637 summary_table += '<tr><th>Study group</th><th>Number of samples</th></tr>'
638 for item in study_groups.items():
639 study_group = item[0]
640 num_samples = len(item[1])
641 summary_table += '<tr><td>{study_group}</td><td>{num_samples}</td>' \
642 .format(study_group=study_group, num_samples=num_samples)
643 summary_table += '</table>'
644 html_summary = """
645 <html>
646 <head>
647 <title>ISA-Tab Factors Summary</title>
648 </head>
649 <body>
650 {summary_table}
651 </body>
652 </html>
653 """.format(summary_table=summary_table)
654 return html_summary
655
656
657 def get_summary_command(options):
658 logger.info("Getting summary for study %s. Writing to %s.",
659 options.study_id, options.json_output.name)
660
661 summary = MTBLS.get_study_variable_summary(options.study_id)
662 # new_summary = []
663 # for item in summary:
664 # new_summary.append(
665 # {k: v for k, v in item.items() if k is not "sample_name"})
666 # summary = new_summary
667 if summary is not None:
668 json.dump(summary, options.json_output, indent=4)
669 logger.debug("Summary dumped to JSON")
670 html_summary = build_html_summary(summary)
671 with options.html_output as html_fp:
672 html_fp.write(html_summary)
673 else:
674 raise RuntimeError("Error getting study summary")
675
676
677 # isaslicer commands
678
679 def isatab_get_data_files_list_command(options):
680 logger.info("Getting data files for study %s. Writing to %s.",
681 options.input_path, options.output.name)
682 if options.json_query:
683 logger.debug("This is the specified query:\n%s", options.json_query)
684 json_struct = json.loads(options.json_query)
685 elif options.galaxy_parameters_file:
686 logger.debug("Using input Galaxy JSON parameters from:\n%s",
687 options.galaxy_parameters_file)
688 with open(options.galaxy_parameters_file) as json_fp:
689 galaxy_json = json.load(json_fp)
690 json_struct = {}
691 for fv_item in galaxy_json['factor_value_series']:
692 json_struct[fv_item['factor_name']] = fv_item['factor_value']
693 else:
694 logger.debug("No query was specified")
695 json_struct = None
696 factor_selection = json_struct
697 input_path = options.input_path
698 result = slice_data_files(input_path, factor_selection=factor_selection)
699 data_files = result
700 logger.debug("Result data files list: %s", data_files)
701 if data_files is None:
702 raise RuntimeError("Error getting data files with isatools")
703
704 logger.debug("dumping data files to %s", options.output.name)
705 json.dump(list(data_files), options.output, indent=4)
706 logger.info("Finished writing data files to {}".format(options.output))
707
708
709 def zip_get_data_files_list_command(options):
710 logger.info("Getting data files for study %s. Writing to %s.",
711 options.input_path, options.output.name)
712 if options.json_query:
713 logger.debug("This is the specified query:\n%s", options.json_query)
714 json_struct = json.loads(options.json_query)
715 elif options.galaxy_parameters_file:
716 logger.debug("Using input Galaxy JSON parameters from:\n%s",
717 options.galaxy_parameters_file)
718 with open(options.galaxy_parameters_file) as json_fp:
719 galaxy_json = json.load(json_fp)
720 json_struct = {}
721 for fv_item in galaxy_json['factor_value_series']:
722 json_struct[fv_item['factor_name']] = fv_item['factor_value']
723 else:
724 logger.debug("No query was specified")
725 json_struct = None
726 factor_selection = json_struct
727 input_path = options.input_path
728 with zipfile.ZipFile(input_path) as zfp:
729 tmpdir = tempfile.mkdtemp()
730 zfp.extractall(path=tmpdir)
731 result = slice_data_files(tmpdir, factor_selection=factor_selection)
732 data_files = result
733 logger.debug("Result data files list: %s", data_files)
734 if data_files is None:
735 raise RuntimeError("Error getting data files with isatools")
736 logger.debug("dumping data files to %s", options.output.name)
737 json.dump(list(data_files), options.output, indent=4)
738 logger.info("Finished writing data files to {}".format(options.output))
739 shutil.rmtree(tmpdir)
740
741
742 def isatab_get_data_files_collection_command(options):
743 logger.info("Getting data files for study %s. Writing to %s.",
744 options.input_path, options.output_path)
745 if options.json_query:
746 logger.debug("This is the specified query:\n%s", options.json_query)
747 else:
748 logger.debug("No query was specified")
749 input_path = options.input_path
750 if options.json_query is not None:
751 json_struct = json.loads(options.json_query)
752 elif options.galaxy_parameters_file:
753 logger.debug("Using input Galaxy JSON parameters from:\n%s",
754 options.galaxy_parameters_file)
755 with open(options.galaxy_parameters_file) as json_fp:
756 galaxy_json = json.load(json_fp)
757 json_struct = {}
758 for fv_item in galaxy_json['factor_value_series']:
759 json_struct[fv_item['factor_name']] = fv_item['factor_value']
760 else:
761 logger.debug("No query was specified")
762 json_struct = None
763 factor_selection = json_struct
764 result = slice_data_files(input_path, factor_selection=factor_selection)
765 data_files = result
766 logger.debug("Result data files list: %s", data_files)
767 if data_files is None:
768 raise RuntimeError("Error getting data files with isatools")
769 output_path = options.output_path
770 logger.debug("copying data files to %s", output_path)
771 for result in data_files:
772 for data_file_name in result['data_files']:
773 logging.info("Copying {}".format(data_file_name))
774 shutil.copy(os.path.join(input_path, data_file_name), output_path)
775 logger.info("Finished writing data files to {}".format(output_path))
776
777
778 def zip_get_data_files_collection_command(options):
779 logger.info("Getting data files for study %s. Writing to %s.",
780 options.input_path, options.output_path)
781 if options.json_query:
782 logger.debug("This is the specified query:\n%s", options.json_query)
783 else:
784 logger.debug("No query was specified")
785 input_path = options.input_path
786 output_path = options.output_path
787 if options.json_query is not None:
788 json_struct = json.loads(options.json_query)
789 factor_selection = json_struct
790 else:
791 factor_selection = None
792 with zipfile.ZipFile(input_path) as zfp:
793 tmpdir = tempfile.mkdtemp()
794 zfp.extractall(path=tmpdir)
795 result = slice_data_files(tmpdir, factor_selection=factor_selection)
796 data_files = result
797 logger.debug("Result data files list: %s", data_files)
798 if data_files is None:
799 raise RuntimeError("Error getting data files with isatools")
800 logger.debug("copying data files to %s", output_path)
801 for result in data_files:
802 for data_file_name in result['data_files']:
803 logging.info("Copying {}".format(data_file_name))
804 shutil.copy(os.path.join(tmpdir, data_file_name), output_path)
805 logger.info("Finished writing data files to {}".format(output_path))
806 shutil.rmtree(tmpdir)
807
808
809 def slice_data_files(dir, factor_selection=None):
810 results = []
811 # first collect matching samples
812 for table_file in glob.iglob(os.path.join(dir, '[a|s]_*')):
813 logger.info('Loading {table_file}'.format(table_file=table_file))
814
815 with open(os.path.join(dir, table_file)) as fp:
816 df = isatab.load_table(fp)
817
818 if factor_selection is None:
819 matches = df['Sample Name'].items()
820
821 for indx, match in matches:
822 sample_name = match
823 if len([r for r in results if r['sample'] ==
824 sample_name]) == 1:
825 continue
826 else:
827 results.append(
828 {
829 'sample': sample_name,
830 'data_files': []
831 }
832 )
833
834 else:
835 for factor_name, factor_value in factor_selection.items():
836 if 'Factor Value[{}]'.format(factor_name) in list(
837 df.columns.values):
838 matches = df.loc[df['Factor Value[{factor}]'.format(
839 factor=factor_name)] == factor_value][
840 'Sample Name'].items()
841
842 for indx, match in matches:
843 sample_name = match
844 if len([r for r in results if r['sample'] ==
845 sample_name]) == 1:
846 continue
847 else:
848 results.append(
849 {
850 'sample': sample_name,
851 'data_files': [],
852 'query_used': factor_selection
853 }
854 )
855
856 # now collect the data files relating to the samples
857 for result in results:
858 sample_name = result['sample']
859
860 for table_file in glob.iglob(os.path.join(dir, 'a_*')):
861 with open(table_file) as fp:
862 df = isatab.load_table(fp)
863
864 data_files = []
865
866 table_headers = list(df.columns.values)
867 sample_rows = df.loc[df['Sample Name'] == sample_name]
868
869 data_node_labels = [
870 'Raw Data File',
871 'Raw Spectral Data File',
872 'Derived Spectral Data File',
873 'Derived Array Data File',
874 'Array Data File',
875 'Protein Assignment File',
876 'Peptide Assignment File',
877 'Post Translational Modification Assignment File',
878 'Acquisition Parameter Data File',
879 'Free Induction Decay Data File',
880 'Derived Array Data Matrix File',
881 'Image File',
882 'Derived Data File',
883 'Metabolite Assignment File']
884 for node_label in data_node_labels:
885 if node_label in table_headers:
886 data_files.extend(list(sample_rows[node_label]))
887
888 result['data_files'] = [i for i in list(data_files) if
889 str(i) != 'nan']
890 return results
891
892
893 def isatab_get_factor_names_command(options):
894 input_path = options.input_path
895 logger.info("Getting factors for study %s. Writing to %s.",
896 input_path, options.output.name)
897 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]')
898 factors = set()
899 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
900 with open(os.path.join(input_path, table_file)) as fp:
901 df = isatab.load_table(fp)
902
903 factors_headers = [header for header in list(df.columns.values)
904 if _RX_FACTOR_VALUE.match(header)]
905
906 for header in factors_headers:
907 factors.add(header[13:-1])
908 if factors is not None:
909 json.dump(list(factors), options.output, indent=4)
910 logger.debug("Factor names written")
911 else:
912 raise RuntimeError("Error reading factors.")
913
914
915 def zip_get_factor_names_command(options):
916 input_path = options.input_path
917 logger.info("Getting factors for study %s. Writing to %s.",
918 input_path, options.output.name)
919 # unpack input_path
920 with zipfile.ZipFile(input_path) as zfp:
921 tmpdir = tempfile.mkdtemp()
922 zfp.extractall(path=tmpdir)
923 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]')
924 factors = set()
925 for table_file in glob.iglob(os.path.join(tmpdir, '[a|s]_*')):
926 logging.info('Searching {}'.format(table_file))
927 with open(os.path.join(tmpdir, table_file)) as fp:
928 df = isatab.load_table(fp)
929
930 factors_headers = [header for header in list(df.columns.values)
931 if _RX_FACTOR_VALUE.match(header)]
932
933 for header in factors_headers:
934 factors.add(header[13:-1])
935 if factors is not None:
936 json.dump(list(factors), options.output, indent=4)
937 logger.debug("Factor names written")
938 else:
939 raise RuntimeError("Error reading factors.")
940 shutil.rmtree(tmpdir)
941
942
943 def isatab_get_factor_values_command(options):
944 logger.info("Getting values for factor {factor} in study {input_path}. Writing to {output_file}."
945 .format(factor=options.factor, input_path=options.input_path, output_file=options.output.name))
946 fvs = set()
947
948 input_path = options.input_path
949 factor_name = options.factor
950
951 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
952 with open(os.path.join(input_path, table_file)) as fp:
953 df = isatab.load_table(fp)
954
955 if 'Factor Value[{factor}]'.format(factor=factor_name) in \
956 list(df.columns.values):
957 for _, match in df[
958 'Factor Value[{factor}]'.format(
959 factor=factor_name)].iteritems():
960 try:
961 match = match.item()
962 except AttributeError:
963 pass
964
965 if isinstance(match, (str, int, float)):
966 if str(match) != 'nan':
967 fvs.add(match)
968 if fvs is not None:
969 json.dump(list(fvs), options.output, indent=4)
970 logger.debug("Factor values written to {}".format(options.output))
971 else:
972 raise RuntimeError("Error getting factor values")
973
974
975 def zip_get_factor_values_command(options):
976 input_path = options.input_path
977 logger.info("Getting factors for study %s. Writing to %s.",
978 input_path, options.output.name)
979 logger.info("Getting values for factor {factor} in study {input_path}. "
980 "Writing to {output_file}.".format(
981 factor=options.factor, input_path=options.input_path,
982 output_file=options.output.name))
983 fvs = set()
984 factor_name = options.factor
985
986 # unpack input_path
987 with zipfile.ZipFile(input_path) as zfp:
988 tmpdir = tempfile.mkdtemp()
989 zfp.extractall(path=tmpdir)
990 for table_file in glob.glob(os.path.join(tmpdir, '[a|s]_*')):
991 logging.info('Searching {}'.format(table_file))
992 with open(os.path.join(input_path, table_file)) as fp:
993 df = isatab.load_table(fp)
994 if 'Factor Value[{factor}]'.format(factor=factor_name) in \
995 list(df.columns.values):
996 for _, match in df[
997 'Factor Value[{factor}]'.format(
998 factor=factor_name)].iteritems():
999 try:
1000 match = match.item()
1001 except AttributeError:
1002 pass
1003
1004 if isinstance(match, (str, int, float)):
1005 if str(match) != 'nan':
1006 fvs.add(match)
1007 if fvs is not None:
1008 json.dump(list(fvs), options.output, indent=4)
1009 logger.debug("Factor values written to {}".format(options.output))
1010 else:
1011 raise RuntimeError("Error getting factor values")
1012 shutil.rmtree(tmpdir)
1013
1014
1015 def isatab_get_factors_summary_command(options):
1016 logger.info("Getting summary for study %s. Writing to %s.",
1017 options.input_path, options.output.name)
1018 input_path = options.input_path
1019 ISA = isatab.load(input_path)
1020
1021 all_samples = []
1022 for study in ISA.studies:
1023 all_samples.extend(study.samples)
1024
1025 samples_and_fvs = []
1026
1027 for sample in all_samples:
1028 sample_and_fvs = {
1029 'sample_name': sample.name,
1030 }
1031
1032 for fv in sample.factor_values:
1033 if isinstance(fv.value, (str, int, float)):
1034 fv_value = fv.value
1035 sample_and_fvs[fv.factor_name.name] = fv_value
1036 elif isinstance(fv.value, OntologyAnnotation):
1037 fv_value = fv.value.term
1038 sample_and_fvs[fv.factor_name.name] = fv_value
1039
1040 samples_and_fvs.append(sample_and_fvs)
1041
1042 df = pd.DataFrame(samples_and_fvs)
1043 nunique = df.apply(pd.Series.nunique)
1044 cols_to_drop = nunique[nunique == 1].index
1045
1046 df = df.drop(cols_to_drop, axis=1)
1047 summary = df.to_dict(orient='records')
1048 if summary is not None:
1049 json.dump(summary, options.output, indent=4)
1050 logger.debug("Summary dumped to JSON")
1051 # html_summary = build_html_summary(summary)
1052 # with options.html_output as html_fp:
1053 # html_fp.write(html_summary)
1054 else:
1055 raise RuntimeError("Error getting study summary")
1056
1057
1058 def zip_get_factors_summary_command(options):
1059 logger.info("Getting summary for study %s. Writing to %s.",
1060 options.input_path, options.json_output.name)
1061 input_path = options.input_path
1062 with zipfile.ZipFile(input_path) as zfp:
1063 tmpdir = tempfile.mkdtemp()
1064 zfp.extractall(path=tmpdir)
1065 ISA = isatab.load(tmpdir)
1066 all_samples = []
1067 for study in ISA.studies:
1068 all_samples.extend(study.samples)
1069 samples_and_fvs = []
1070 for sample in all_samples:
1071 sample_and_fvs = {
1072 'sample_name': sample.name,
1073 }
1074 for fv in sample.factor_values:
1075 if isinstance(fv.value, (str, int, float)):
1076 fv_value = fv.value
1077 sample_and_fvs[fv.factor_name.name] = fv_value
1078 elif isinstance(fv.value, OntologyAnnotation):
1079 fv_value = fv.value.term
1080 sample_and_fvs[fv.factor_name.name] = fv_value
1081 samples_and_fvs.append(sample_and_fvs)
1082 df = pd.DataFrame(samples_and_fvs)
1083 nunique = df.apply(pd.Series.nunique)
1084 cols_to_drop = nunique[nunique == 1].index
1085 df = df.drop(cols_to_drop, axis=1)
1086 summary = df.to_dict(orient='records')
1087 if summary is not None:
1088 json.dump(summary, options.json_output, indent=4)
1089 logger.debug("Summary dumped to JSON")
1090 print(json.dumps(summary, indent=4))
1091 html_summary = build_html_summary(summary)
1092 with options.html_output as html_fp:
1093 html_fp.write(html_summary)
1094 else:
1095 raise RuntimeError("Error getting study summary")
1096 shutil.rmtree(tmpdir)
1097
1098
1099 def get_study_groups(input_path):
1100 factors_summary = isatab_get_factors_summary_command(input_path=input_path)
1101 study_groups = {}
1102
1103 for factors_item in factors_summary:
1104 fvs = tuple(factors_item[k] for k in factors_item.keys() if k != 'name')
1105
1106 if fvs in study_groups.keys():
1107 study_groups[fvs].append(factors_item['name'])
1108 else:
1109 study_groups[fvs] = [factors_item['name']]
1110 return study_groups
1111
1112
1113 def get_study_groups_samples_sizes(input_path):
1114 study_groups = get_study_groups(input_path=input_path)
1115 return list(map(lambda x: (x[0], len(x[1])), study_groups.items()))
1116
1117
1118 def get_sources_for_sample(input_path, sample_name):
1119 ISA = isatab.load(input_path)
1120 hits = []
1121
1122 for study in ISA.studies:
1123 for sample in study.samples:
1124 if sample.name == sample_name:
1125 print('found a hit: {sample_name}'.format(
1126 sample_name=sample.name))
1127
1128 for source in sample.derives_from:
1129 hits.append(source.name)
1130 return hits
1131
1132
1133 def get_data_for_sample(input_path, sample_name):
1134 ISA = isatab.load(input_path)
1135 hits = []
1136 for study in ISA.studies:
1137 for assay in study.assays:
1138 for data in assay.data_files:
1139 if sample_name in [x.name for x in data.generated_from]:
1140 logger.info('found a hit: {filename}'.format(
1141 filename=data.filename))
1142 hits.append(data)
1143 return hits
1144
1145
1146 def get_study_groups_data_sizes(input_path):
1147 study_groups = get_study_groups(input_path=input_path)
1148 return list(map(lambda x: (x[0], len(x[1])), study_groups.items()))
1149
1150
1151 def get_characteristics_summary(input_path):
1152 """
1153 This function generates a characteristics summary for a MetaboLights
1154 study
1155
1156 :param input_path: Input path to ISA-tab
1157 :return: A list of dicts summarising the set of characteristic names
1158 and values associated with each sample
1159
1160 Note: it only returns a summary of characteristics with variable values.
1161
1162 Example usage:
1163 characteristics_summary = get_characteristics_summary('/path/to/my/study/')
1164 [
1165 {
1166 "name": "6089if_9",
1167 "Variant": "Synechocystis sp. PCC 6803.sll0171.ko"
1168 },
1169 {
1170 "name": "6089if_43",
1171 "Variant": "Synechocystis sp. PCC 6803.WT.none"
1172 },
1173 ]
1174
1175
1176 """
1177 ISA = isatab.load(input_path)
1178
1179 all_samples = []
1180 for study in ISA.studies:
1181 all_samples.extend(study.samples)
1182
1183 samples_and_characs = []
1184 for sample in all_samples:
1185 sample_and_characs = {
1186 'name': sample.name
1187 }
1188
1189 for source in sample.derives_from:
1190 for c in source.characteristics:
1191 if isinstance(c.value, (str, int, float)):
1192 c_value = c.value
1193 sample_and_characs[c.category.term] = c_value
1194 elif isinstance(c.value, OntologyAnnotation):
1195 c_value = c.value.term
1196 sample_and_characs[c.category.term] = c_value
1197
1198 samples_and_characs.append(sample_and_characs)
1199
1200 df = pd.DataFrame(samples_and_characs)
1201 nunique = df.apply(pd.Series.nunique)
1202 cols_to_drop = nunique[nunique == 1].index
1203
1204 df = df.drop(cols_to_drop, axis=1)
1205 return df.to_dict(orient='records')
1206
1207
1208 def get_study_variable_summary(input_path):
1209 ISA = isatab.load(input_path)
1210
1211 all_samples = []
1212 for study in ISA.studies:
1213 all_samples.extend(study.samples)
1214
1215 samples_and_variables = []
1216 for sample in all_samples:
1217 sample_and_vars = {
1218 'sample_name': sample.name
1219 }
1220
1221 for fv in sample.factor_values:
1222 if isinstance(fv.value, (str, int, float)):
1223 fv_value = fv.value
1224 sample_and_vars[fv.factor_name.name] = fv_value
1225 elif isinstance(fv.value, OntologyAnnotation):
1226 fv_value = fv.value.term
1227 sample_and_vars[fv.factor_name.name] = fv_value
1228
1229 for source in sample.derives_from:
1230 sample_and_vars['source_name'] = source.name
1231 for c in source.characteristics:
1232 if isinstance(c.value, (str, int, float)):
1233 c_value = c.value
1234 sample_and_vars[c.category.term] = c_value
1235 elif isinstance(c.value, OntologyAnnotation):
1236 c_value = c.value.term
1237 sample_and_vars[c.category.term] = c_value
1238
1239 samples_and_variables.append(sample_and_vars)
1240
1241 df = pd.DataFrame(samples_and_variables)
1242 nunique = df.apply(pd.Series.nunique)
1243 cols_to_drop = nunique[nunique == 1].index
1244
1245 df = df.drop(cols_to_drop, axis=1)
1246 return df.to_dict(orient='records')
1247
1248
1249 def get_study_group_factors(input_path):
1250 factors_list = []
1251
1252 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
1253 with open(os.path.join(input_path, table_file)) as fp:
1254 df = isatab.load_table(fp)
1255
1256 factor_columns = [x for x in df.columns if x.startswith(
1257 'Factor Value')]
1258 if len(factor_columns) > 0:
1259 factors_list = df[factor_columns].drop_duplicates()\
1260 .to_dict(orient='records')
1261 return factors_list
1262
1263
1264 def get_filtered_df_on_factors_list(input_path):
1265 factors_list = get_study_group_factors(input_path=input_path)
1266 queries = []
1267
1268 for item in factors_list:
1269 query_str = []
1270
1271 for k, v in item.items():
1272 k = k.replace(' ', '_').replace('[', '_').replace(']', '_')
1273 if isinstance(v, str):
1274 v = v.replace(' ', '_').replace('[', '_').replace(']', '_')
1275 query_str.append("{k} == '{v}' and ".format(k=k, v=v))
1276
1277 query_str = ''.join(query_str)[:-4]
1278 queries.append(query_str)
1279
1280 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
1281 with open(os.path.join(input_path, table_file)) as fp:
1282 df = isatab.load_table(fp)
1283
1284 cols = df.columns
1285 cols = cols.map(
1286 lambda x: x.replace(' ', '_') if isinstance(x, str) else x)
1287 df.columns = cols
1288
1289 cols = df.columns
1290 cols = cols.map(
1291 lambda x: x.replace('[', '_') if isinstance(x, str) else x)
1292 df.columns = cols
1293
1294 cols = df.columns
1295 cols = cols.map(
1296 lambda x: x.replace(']', '_') if isinstance(x, str) else x)
1297 df.columns = cols
1298
1299 for query in queries:
1300 # query uses pandas.eval, which evaluates queries like pure Python
1301 # notation
1302 df2 = df.query(query)
1303 if 'Sample_Name' in df.columns:
1304 print('Group: {query} / Sample_Name: {sample_name}'.format(
1305 query=query, sample_name=list(df2['Sample_Name'])))
1306
1307 if 'Source_Name' in df.columns:
1308 print('Group: {} / Sources_Name: {}'.format(
1309 query, list(df2['Source_Name'])))
1310
1311 if 'Raw_Spectral_Data_File' in df.columns:
1312 print('Group: {query} / Raw_Spectral_Data_File: {filename}'
1313 .format(query=query[13:-2],
1314 filename=list(df2['Raw_Spectral_Data_File'])))
1315 return queries
1316
1317
1318 def datatype_get_summary_command(options):
1319 logger.info("Getting summary for study %s. Writing to %s.",
1320 options.study_id, options.output.name)
1321
1322 summary = get_study_variable_summary(options.study_id)
1323 print('summary: ', list(summary))
1324 if summary is not None:
1325 json.dump(summary, options.output, indent=4)
1326 logger.debug("Summary dumped")
1327 else:
1328 raise RuntimeError("Error getting study summary")
1329
1330
1331 # logging and argument parsing
1332
1333 def _configure_logger(options):
1334 logging_level = getattr(logging, options.log_level, logging.INFO)
1335 logging.basicConfig(level=logging_level)
1336
1337 global logger
1338 logger = logging.getLogger()
1339 logger.setLevel(logging_level) # there's a bug somewhere. The level set through basicConfig isn't taking effect
1340
1341
1342 def _parse_args(args):
1343 parser = make_parser()
1344 options = parser.parse_args(args)
1345 return options
1346
1347
1348 def main(args):
1349 options = _parse_args(args)
1350 _configure_logger(options)
1351 # run subcommand
1352 options.func(options)
1353
1354
1355 if __name__ == '__main__':
1356 try:
1357 main(sys.argv[1:])
1358 sys.exit(0)
1359 except Exception as e:
1360 logger.exception(e)
1361 logger.error(e)
1362 sys.exit(e.code if hasattr(e, "code") else 99)