Mercurial > repos > prog > mtblsdwnld
annotate isaslicer.py @ 1:1fd8547867be draft default tip
"planemo upload commit 76293bd47447c171c939b4f3c194fd0cfbd7f69c-dirty"
author | prog |
---|---|
date | Thu, 04 Mar 2021 11:21:03 +0000 |
parents | 8dab200e02cb |
children |
rev | line source |
---|---|
0
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
2 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
3 import argparse |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
4 import glob |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
5 import json |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
6 import logging |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
7 import os |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
8 import re |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
9 import shutil |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
10 import sys |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
11 import tempfile |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
12 import zipfile |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
13 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
14 import pandas as pd |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
15 from isatools import isatab |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
16 from isatools.model import OntologyAnnotation |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
17 from isatools.net import mtbls as MTBLS |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
18 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
19 logger = None |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
20 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
21 # isaslicer.py <command> <study_id> [ command-specific options ] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
22 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
23 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
24 def make_parser(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
25 parser = argparse.ArgumentParser( description="ISA slicer") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
26 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
27 parser.add_argument('--log-level', choices=[ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
28 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
29 default='INFO', help="Set the desired logging level") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
30 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
31 subparsers = parser.add_subparsers( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
32 title='Actions', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
33 dest='command') # specified subcommand will be available in attribute 'command' |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
34 subparsers.required = True |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
35 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
36 # mtblisa commands |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
37 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
38 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
39 'mtbls-get-study-archive', aliases=['gsa'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
40 help="Get ISA study from MetaboLights as zip archive") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
41 subparser.set_defaults(func=get_study_archive_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
42 subparser.add_argument('study_id') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
43 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
44 'output', metavar="OUTPUT", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
45 help="Name of output archive (extension will be added)") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
46 subparser.add_argument('--format', metavar="FMT", choices=[ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
47 'zip', 'tar', 'gztar', 'bztar', 'xztar'], default='zip', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
48 help="Type of archive to create") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
49 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
50 subparser = subparsers.add_parser('mtbls-get-study', aliases=['gs'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
51 help="Get ISA study from MetaboLights") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
52 subparser.set_defaults(func=get_study_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
53 subparser.add_argument('study_id') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
54 subparser.add_argument('output', metavar="PATH", help="Name of output") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
55 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
56 '-f', '--isa-format', choices=['isa-tab', 'isa-json'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
57 metavar="FORMAT", default='isa-tab', help="Desired ISA format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
58 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
59 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
60 'mtbls-get-factors', aliases=['gf'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
61 help="Get factor names from a study in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
62 subparser.set_defaults(func=get_factors_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
63 subparser.add_argument('study_id') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
64 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
65 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
66 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
67 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
68 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
69 'mtbls-get-factor-values', aliases=['gfv'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
70 help="Get factor values from a study in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
71 subparser.set_defaults(func=get_factor_values_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
72 subparser.add_argument('study_id') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
73 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
74 'factor', help="The desired factor. Use `get-factors` to get the list " |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
75 "of available factors") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
76 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
77 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
78 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
79 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
80 subparser = subparsers.add_parser('mtbls-get-data-list', aliases=['gd'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
81 help="Get data files list in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
82 subparser.set_defaults(func=get_data_files_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
83 subparser.add_argument('study_id') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
84 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
85 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
86 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
87 '--json-query', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
88 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
89 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
90 '--galaxy_parameters_file', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
91 help="Path to JSON file containing input Galaxy JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
92 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
93 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
94 'mtbls-get-factors-summary', aliases=['gsum'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
95 help="Get the variables summary from a study, in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
96 subparser.set_defaults(func=get_summary_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
97 subparser.add_argument('study_id') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
98 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
99 'json_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
100 help="Output JSON file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
101 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
102 'html_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
103 help="Output HTML file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
104 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
105 # isaslicer commands on path to unpacked ISA-Tab as input |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
106 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
107 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
108 'isa-tab-get-factors', aliases=['isagf'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
109 help="Get factor names from a study in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
110 subparser.set_defaults(func=isatab_get_factor_names_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
111 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
112 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
113 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
114 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
115 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
116 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
117 'zip-get-factors', aliases=['zipgf'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
118 help="Get factor names from a study in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
119 subparser.set_defaults(func=zip_get_factor_names_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
120 subparser.add_argument('input_path', type=str, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
121 help="Input ISA-Tab zip path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
122 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
123 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
124 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
125 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
126 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
127 'isa-tab-get-factor-values', aliases=['isagfv'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
128 help="Get factor values from a study in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
129 subparser.set_defaults(func=isatab_get_factor_values_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
130 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
131 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
132 'factor', help="The desired factor. Use `get-factors` to get the list " |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
133 "of available factors") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
134 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
135 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
136 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
137 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
138 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
139 'zip-get-factor-values', aliases=['zipgfv'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
140 help="Get factor values from a study in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
141 subparser.set_defaults(func=zip_get_factor_values_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
142 subparser.add_argument('input_path', type=str, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
143 help="Input ISA-Tab zip path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
144 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
145 'factor', help="The desired factor. Use `get-factors` to get the list " |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
146 "of available factors") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
147 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
148 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
149 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
150 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
151 subparser = subparsers.add_parser('isa-tab-get-data-list', aliases=['isagdl'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
152 help="Get data files list in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
153 subparser.set_defaults(func=isatab_get_data_files_list_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
154 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
155 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
156 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
157 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
158 '--json-query', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
159 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
160 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
161 '--galaxy_parameters_file', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
162 help="Path to JSON file containing input Galaxy JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
163 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
164 subparser = subparsers.add_parser('zip-get-data-list', aliases=['zipgdl'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
165 help="Get data files list in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
166 subparser.set_defaults(func=zip_get_data_files_list_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
167 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
168 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
169 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
170 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
171 '--json-query', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
172 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
173 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
174 '--galaxy_parameters_file', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
175 help="Path to JSON file containing input Galaxy JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
176 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
177 subparser = subparsers.add_parser('isa-tab-get-data-collection', aliases=['isagdc'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
178 help="Get data files collection") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
179 subparser.set_defaults(func=isatab_get_data_files_collection_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
180 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
181 subparser.add_argument('output_path', type=str, help="Output data files path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
182 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
183 '--json-query', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
184 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
185 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
186 '--galaxy_parameters_file', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
187 help="Path to JSON file containing input Galaxy JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
188 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
189 subparser = subparsers.add_parser('zip-get-data-collection', aliases=['zipgdc'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
190 help="Get data files collection") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
191 subparser.set_defaults(func=zip_get_data_files_collection_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
192 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
193 subparser.add_argument('output_path', type=str, help="Output data files path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
194 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
195 '--json-query', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
196 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
197 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
198 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
199 'isa-tab-get-factors-summary', aliases=['isasum'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
200 help="Get the variables summary from a study, in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
201 subparser.set_defaults(func=isatab_get_factors_summary_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
202 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
203 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
204 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
205 help="Output file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
206 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
207 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
208 'zip-get-factors-summary', aliases=['zipsum'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
209 help="Get the variables summary from a study, in json format") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
210 subparser.set_defaults(func=zip_get_factors_summary_command) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
211 subparser.add_argument('input_path', type=str, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
212 help="Input ISA-Tab zip path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
213 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
214 'json_output', nargs='?', type=argparse.FileType('w'), |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
215 default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
216 help="Output JSON file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
217 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
218 'html_output', nargs='?', type=argparse.FileType('w'), |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
219 default=sys.stdout, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
220 help="Output HTML file") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
221 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
222 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
223 'isaslicer2-slice', aliases=['slice2'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
224 help="Slice ISA-Tabs version 2") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
225 subparser.set_defaults(func=query_isatab) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
226 subparser.add_argument('--source_dir', type=str, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
227 help="Input ISA-Tab zip path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
228 subparser.add_argument( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
229 '--galaxy_parameters_file', type=argparse.FileType(mode='r'), |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
230 help="Path to JSON file containing input Galaxy JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
231 subparser.add_argument('--output', type=argparse.FileType(mode='w'), |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
232 help="Input ISA-Tab zip path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
233 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
234 subparser = subparsers.add_parser( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
235 'filter-data', aliases=['filter'], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
236 help="Filter out data based on slicer2") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
237 subparser.set_defaults(func=filter_data) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
238 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
239 subparser.add_argument('output_path', type=str, help="Output data files path") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
240 subparser.add_argument('--slice', type=argparse.FileType(mode='r'), |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
241 help="slice") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
242 subparser.add_argument('--filename_filter', type=str, help="shell-like wildcard to filter files") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
243 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
244 return parser |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
245 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
246 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
247 def filter_data(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
248 loglines = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
249 source_dir = options.input_path if options.input_path else "" |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
250 output_path = options.output_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
251 filename_filter = options.filename_filter |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
252 if source_dir: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
253 if not os.path.exists(source_dir): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
254 raise IOError('Source path does not exist!') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
255 data_files = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
256 slice_json = options.slice |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
257 for result in json.load(slice_json)['results']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
258 data_files.extend(result.get('data_files', [])) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
259 reduced_data_files = list(set(data_files)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
260 filtered_files = glob.glob(os.path.join(source_dir, filename_filter)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
261 to_copy = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
262 for filepath in filtered_files: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
263 if os.path.basename(filepath) in reduced_data_files: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
264 to_copy.append(filepath) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
265 loglines.append("Using slice results from {}\n".format(slice_json.name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
266 for filepath in to_copy: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
267 loglines.append("Copying {}\n".format(os.path.basename(filepath))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
268 # try: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
269 # shutil.copyfile( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
270 # filepath, os.path.join(output_path, os.path.basename(filepath))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
271 # except Exception as e: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
272 # print(e) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
273 # exit(1) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
274 try: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
275 os.symlink( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
276 filepath, os.path.join(output_path, os.path.basename(filepath))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
277 except Exception as e: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
278 print(e) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
279 exit(1) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
280 with open('cli.log', 'w') as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
281 fp.writelines(loglines) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
282 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
283 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
284 def query_isatab(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
285 source_dir = options.source_dir if options.source_dir else "" |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
286 galaxy_parameters_file = options.galaxy_parameters_file |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
287 output = options.output |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
288 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
289 debug = True |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
290 if galaxy_parameters_file: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
291 galaxy_parameters = json.load(galaxy_parameters_file) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
292 print('Galaxy parameters:') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
293 print(json.dumps(galaxy_parameters, indent=4)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
294 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
295 raise IOError('Could not load Galaxy parameters file!') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
296 if source_dir: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
297 if not os.path.exists(source_dir): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
298 raise IOError('Source path does not exist!') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
299 query = galaxy_parameters['query'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
300 if debug: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
301 print('Query is:') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
302 print(json.dumps(query, indent=4)) # for debugging only |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
303 if source_dir: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
304 investigation = isatab.load(source_dir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
305 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
306 tmp = tempfile.mkdtemp() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
307 _ = MTBLS.get(galaxy_parameters['input']['mtbls_id'], tmp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
308 investigation = isatab.load(tmp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
309 # filter assays by mt/tt |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
310 matching_assays = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
311 mt = query.get('measurement_type').strip() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
312 tt = query.get('technology_type').strip() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
313 if mt and tt: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
314 for study in investigation.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
315 matching_assays.extend( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
316 [x for x in study.assays if x.measurement_type.term == mt |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
317 and x.technology_type.term == tt]) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
318 elif mt and not tt: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
319 for study in investigation.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
320 matching_assays.extend( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
321 [x for x in study.assays if x.measurement_type.term == mt]) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
322 elif not mt and tt: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
323 for study in investigation.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
324 matching_assays.extend( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
325 [x for x in study.assays if x.technology_type.term == tt]) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
326 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
327 for study in investigation.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
328 matching_assays.extend(study.assays) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
329 assay_samples = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
330 for assay in matching_assays: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
331 assay_samples.extend(assay.samples) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
332 if debug: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
333 print('Total samples: {}'.format(len(assay_samples))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
334 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
335 # filter samples by fv |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
336 factor_selection = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
337 x.get('factor_name').strip(): x.get('factor_value').strip() for x in |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
338 query.get('factor_selection', [])} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
339 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
340 fv_samples = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
341 if factor_selection: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
342 samples_to_remove = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
343 for f, v in factor_selection.items(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
344 for sample in assay_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
345 for fv in [x for x in sample.factor_values if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
346 x.factor_name.name == f]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
347 if isinstance(fv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
348 if fv.value.term == v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
349 fv_samples.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
350 elif fv.value == v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
351 fv_samples.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
352 for f, v in factor_selection.items(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
353 for sample in fv_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
354 for fv in [x for x in sample.factor_values if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
355 x.factor_name.name == f]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
356 if isinstance(fv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
357 if fv.value.term != v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
358 samples_to_remove.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
359 elif fv.value != v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
360 samples_to_remove.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
361 final_fv_samples = fv_samples.difference(samples_to_remove) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
362 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
363 final_fv_samples = assay_samples |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
364 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
365 # filter samples by characteristic |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
366 characteristics_selection = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
367 x.get('characteristic_name').strip(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
368 x.get('characteristic_value').strip() for x in |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
369 query.get('characteristics_selection', [])} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
370 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
371 cv_samples = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
372 if characteristics_selection: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
373 first_pass = True |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
374 samples_to_remove = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
375 for c, v in characteristics_selection.items(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
376 if first_pass: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
377 for sample in final_fv_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
378 for cv in [x for x in sample.characteristics if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
379 x.category.term == c]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
380 if isinstance(cv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
381 if cv.value.term == v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
382 cv_samples.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
383 elif cv.value == v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
384 cv_samples.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
385 for source in sample.derives_from: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
386 for cv in [x for x in source.characteristics if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
387 x.category.term == c]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
388 if isinstance(cv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
389 if cv.value.term == v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
390 cv_samples.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
391 elif cv.value == v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
392 cv_samples.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
393 first_pass = False |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
394 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
395 for sample in cv_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
396 for cv in [x for x in sample.characteristics if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
397 x.category.term == c]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
398 if isinstance(cv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
399 if cv.value.term != v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
400 samples_to_remove.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
401 elif cv.value != v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
402 samples_to_remove.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
403 for source in sample.derives_from: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
404 for cv in [x for x in source.characteristics if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
405 x.category.term == c]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
406 if isinstance(cv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
407 if cv.value.term != v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
408 samples_to_remove.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
409 elif cv.value != v: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
410 samples_to_remove.add(sample) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
411 final_cv_samples = cv_samples.difference(samples_to_remove) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
412 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
413 final_cv_samples = final_fv_samples |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
414 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
415 # filter samples by process parameter |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
416 parameters_selection = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
417 x.get('parameter_name').strip(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
418 x.get('parameter_value').strip() for x in |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
419 query.get('parameter_selection', [])} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
420 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
421 final_samples = final_cv_samples |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
422 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
423 if debug: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
424 print('Final number of samples: {}'.format(len(final_samples))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
425 results = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
426 for sample in final_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
427 results.append({ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
428 'sample_name': sample.name, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
429 'data_files': [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
430 }) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
431 for result in results: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
432 sample_name = result['sample_name'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
433 if source_dir: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
434 table_files = glob.iglob(os.path.join(source_dir, 'a_*')) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
435 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
436 table_files = glob.iglob(os.path.join(tmp, 'a_*')) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
437 for table_file in table_files: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
438 with open(table_file) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
439 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
440 data_files = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
441 table_headers = list(df.columns.values) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
442 sample_rows = df.loc[df['Sample Name'] == sample_name] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
443 data_node_labels = [ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
444 'Raw Data File', 'Raw Spectral Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
445 'Derived Spectral Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
446 'Derived Array Data File', 'Array Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
447 'Protein Assignment File', 'Peptide Assignment File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
448 'Post Translational Modification Assignment File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
449 'Acquisition Parameter Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
450 'Free Induction Decay Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
451 'Derived Array Data Matrix File', 'Image File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
452 'Derived Data File', 'Metabolite Assignment File'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
453 if parameters_selection: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
454 for p, v in parameters_selection.items(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
455 sample_pv_rows = sample_rows.loc[ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
456 sample_rows['Parameter Value[{}]'.format(p)] == v] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
457 for node_label in data_node_labels: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
458 if node_label in table_headers: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
459 data_files.extend( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
460 list(sample_pv_rows[node_label])) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
461 result['data_files'].extend(list(set( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
462 i for i in list(data_files) if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
463 str(i) not in ('nan', '')))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
464 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
465 for node_label in data_node_labels: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
466 if node_label in table_headers: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
467 data_files.extend(list(sample_rows[node_label])) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
468 result['data_files'].extend( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
469 list(set(i for i in list(data_files) if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
470 str(i) not in ('nan', '')))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
471 results_json = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
472 'query': query, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
473 'results': results |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
474 } |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
475 json.dump(results_json, output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
476 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
477 # if galaxy_parameters['input']['collection_output']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
478 # logger = logging.getLogger() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
479 # logger.debug("copying data files to %s", os.path.dirname(output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
480 # for result in results: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
481 # for data_file_name in result['data_files']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
482 # logging.info("Copying {}".format(data_file_name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
483 # shutil.copy(os.path.join(source_dir, data_file_name), |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
484 # os.path.dirname(output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
485 # logger.info( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
486 # "Finished writing data files to {}".format(os.path.dirname(output))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
487 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
488 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
489 def get_study_archive_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
490 study_id = options.study_id |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
491 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
492 logger.info("Downloading study %s into archive at path %s.%s", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
493 study_id, options.output, options.format) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
494 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
495 tmpdir = MTBLS.get(study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
496 logger.debug("MTBLS.get returned '%s'", tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
497 if tmpdir is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
498 try: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
499 shutil.make_archive( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
500 options.output, options.format, tmpdir, logger=logger) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
501 logger.info("ISA archive written") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
502 finally: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
503 logger.debug("Trying to clean up tmp dir %s", tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
504 shutil.rmtree(tmpdir, ignore_errors=True) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
505 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
506 raise RuntimeError("Error downloading ISA study") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
507 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
508 # mtblisa commands |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
509 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
510 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
511 def get_study_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
512 if os.path.exists(options.output): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
513 raise RuntimeError("Selected output path {} already exists!".format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
514 options.output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
515 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
516 if options.isa_format == "isa-tab": |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
517 tmp_data = None |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
518 try: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
519 logger.info("Downloading study %s", options.study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
520 tmp_data = MTBLS.get(options.study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
521 if tmp_data is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
522 raise RuntimeError("Error downloading ISA study") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
523 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
524 logger.debug( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
525 "Finished downloading data. Moving to final location %s", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
526 options.output) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
527 shutil.move(tmp_data, options.output) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
528 logger.info("ISA archive written to %s", options.output) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
529 finally: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
530 if tmp_data: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
531 # try to clean up any temporary files left behind |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
532 logger.debug("Deleting %s, if there's anything there", tmp_data) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
533 shutil.rmtree(tmp_data, ignore_errors=True) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
534 elif options.isa_format == "isa-json": |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
535 isajson = MTBLS.getj(options.study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
536 if isajson is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
537 raise RuntimeError("Error downloading ISA study") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
538 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
539 logger.debug( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
540 "Finished downloading data. Dumping json to final location %s", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
541 options.output) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
542 os.makedirs(options.output) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
543 json_file = os.path.join(options.output, "{}.json".format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
544 isajson['identifier'])) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
545 with open(json_file, 'w') as fd: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
546 json.dump(isajson, fd) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
547 logger.info("ISA-JSON written to %s", options.output) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
548 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
549 raise ValueError("BUG! Got an invalid isa format '{}'".format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
550 options.isa_format)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
551 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
552 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
553 def get_factors_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
554 logger.info("Getting factors for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
555 options.study_id, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
556 factor_names = MTBLS.get_factor_names(options.study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
557 if factor_names is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
558 json.dump(list(factor_names), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
559 logger.debug("Factor names written") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
560 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
561 raise RuntimeError("Error downloading factors.") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
562 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
563 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
564 def get_factor_values_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
565 logger.info("Getting values for factor {factor} in study {study_id}. Writing to {output_file}." |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
566 .format(factor=options.factor, study_id=options.study_id, output_file=options.output.name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
567 fvs = MTBLS.get_factor_values(options.study_id, options.factor) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
568 if fvs is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
569 json.dump(list(fvs), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
570 logger.debug("Factor values written to {}".format(options.output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
571 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
572 raise RuntimeError("Error getting factor values") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
573 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
574 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
575 def get_data_files_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
576 logger.info("Getting data files for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
577 options.study_id, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
578 if options.json_query: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
579 logger.debug("This is the specified query:\n%s", options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
580 json_struct = json.loads(options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
581 data_files = MTBLS.get_data_files(options.study_id, json_struct) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
582 elif options.galaxy_parameters_file: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
583 logger.debug("Using input Galaxy JSON parameters from:\n%s", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
584 options.galaxy_parameters_file) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
585 with open(options.galaxy_parameters_file) as json_fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
586 galaxy_json = json.load(json_fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
587 json_struct = {} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
588 for fv_item in galaxy_json['factor_value_series']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
589 json_struct[fv_item['factor_name']] = fv_item['factor_value'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
590 data_files = MTBLS.get_data_files(options.study_id, json_struct) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
591 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
592 logger.debug("No query was specified") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
593 data_files = MTBLS.get_data_files(options.study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
594 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
595 logger.debug("Result data files list: %s", data_files) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
596 if data_files is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
597 raise RuntimeError("Error getting data files with isatools") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
598 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
599 logger.debug("dumping data files to %s", options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
600 json.dump(list(data_files), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
601 logger.info("Finished writing data files to {}".format(options.output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
602 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
603 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
604 def build_html_data_files_list(data_files_list): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
605 data_files_table = '<table>' |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
606 data_files_table += '<tr><th>Sample Name</th><th>Data File Names</th></tr>' |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
607 for data_file in data_files_list: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
608 sample_name = data_file['sample'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
609 data_files = ', '.join(data_file['data_files']) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
610 data_files_table += '<tr><td>{sample_name}</td><td>{data_files}</td>' \ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
611 .format(sample_name=sample_name, data_files=data_files) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
612 html_data_files_list = """ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
613 <html> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
614 <head> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
615 <title>ISA-Tab Factors Summary</title> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
616 </head> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
617 <body> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
618 {summary_table} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
619 </body> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
620 </html> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
621 """.format(summary_table=data_files_table) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
622 return html_data_files_list |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
623 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
624 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
625 def build_html_summary(summary): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
626 study_groups = {} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
627 for item in summary: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
628 sample_name = item['sample_name'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
629 study_factors = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
630 for item in [x for x in item.items() if x[0] != "sample_name"]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
631 study_factors.append(': '.join([item[0], item[1]])) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
632 study_group = ', '.join(study_factors) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
633 if study_group not in study_groups.keys(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
634 study_groups[study_group] = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
635 study_groups[study_group].append(sample_name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
636 summary_table = '<table>' |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
637 summary_table += '<tr><th>Study group</th><th>Number of samples</th></tr>' |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
638 for item in study_groups.items(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
639 study_group = item[0] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
640 num_samples = len(item[1]) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
641 summary_table += '<tr><td>{study_group}</td><td>{num_samples}</td>' \ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
642 .format(study_group=study_group, num_samples=num_samples) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
643 summary_table += '</table>' |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
644 html_summary = """ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
645 <html> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
646 <head> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
647 <title>ISA-Tab Factors Summary</title> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
648 </head> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
649 <body> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
650 {summary_table} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
651 </body> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
652 </html> |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
653 """.format(summary_table=summary_table) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
654 return html_summary |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
655 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
656 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
657 def get_summary_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
658 logger.info("Getting summary for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
659 options.study_id, options.json_output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
660 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
661 summary = MTBLS.get_study_variable_summary(options.study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
662 # new_summary = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
663 # for item in summary: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
664 # new_summary.append( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
665 # {k: v for k, v in item.items() if k is not "sample_name"}) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
666 # summary = new_summary |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
667 if summary is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
668 json.dump(summary, options.json_output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
669 logger.debug("Summary dumped to JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
670 html_summary = build_html_summary(summary) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
671 with options.html_output as html_fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
672 html_fp.write(html_summary) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
673 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
674 raise RuntimeError("Error getting study summary") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
675 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
676 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
677 # isaslicer commands |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
678 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
679 def isatab_get_data_files_list_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
680 logger.info("Getting data files for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
681 options.input_path, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
682 if options.json_query: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
683 logger.debug("This is the specified query:\n%s", options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
684 json_struct = json.loads(options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
685 elif options.galaxy_parameters_file: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
686 logger.debug("Using input Galaxy JSON parameters from:\n%s", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
687 options.galaxy_parameters_file) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
688 with open(options.galaxy_parameters_file) as json_fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
689 galaxy_json = json.load(json_fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
690 json_struct = {} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
691 for fv_item in galaxy_json['factor_value_series']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
692 json_struct[fv_item['factor_name']] = fv_item['factor_value'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
693 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
694 logger.debug("No query was specified") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
695 json_struct = None |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
696 factor_selection = json_struct |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
697 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
698 result = slice_data_files(input_path, factor_selection=factor_selection) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
699 data_files = result |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
700 logger.debug("Result data files list: %s", data_files) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
701 if data_files is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
702 raise RuntimeError("Error getting data files with isatools") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
703 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
704 logger.debug("dumping data files to %s", options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
705 json.dump(list(data_files), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
706 logger.info("Finished writing data files to {}".format(options.output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
707 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
708 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
709 def zip_get_data_files_list_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
710 logger.info("Getting data files for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
711 options.input_path, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
712 if options.json_query: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
713 logger.debug("This is the specified query:\n%s", options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
714 json_struct = json.loads(options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
715 elif options.galaxy_parameters_file: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
716 logger.debug("Using input Galaxy JSON parameters from:\n%s", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
717 options.galaxy_parameters_file) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
718 with open(options.galaxy_parameters_file) as json_fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
719 galaxy_json = json.load(json_fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
720 json_struct = {} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
721 for fv_item in galaxy_json['factor_value_series']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
722 json_struct[fv_item['factor_name']] = fv_item['factor_value'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
723 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
724 logger.debug("No query was specified") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
725 json_struct = None |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
726 factor_selection = json_struct |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
727 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
728 with zipfile.ZipFile(input_path) as zfp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
729 tmpdir = tempfile.mkdtemp() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
730 zfp.extractall(path=tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
731 result = slice_data_files(tmpdir, factor_selection=factor_selection) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
732 data_files = result |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
733 logger.debug("Result data files list: %s", data_files) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
734 if data_files is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
735 raise RuntimeError("Error getting data files with isatools") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
736 logger.debug("dumping data files to %s", options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
737 json.dump(list(data_files), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
738 logger.info("Finished writing data files to {}".format(options.output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
739 shutil.rmtree(tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
740 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
741 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
742 def isatab_get_data_files_collection_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
743 logger.info("Getting data files for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
744 options.input_path, options.output_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
745 if options.json_query: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
746 logger.debug("This is the specified query:\n%s", options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
747 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
748 logger.debug("No query was specified") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
749 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
750 if options.json_query is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
751 json_struct = json.loads(options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
752 elif options.galaxy_parameters_file: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
753 logger.debug("Using input Galaxy JSON parameters from:\n%s", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
754 options.galaxy_parameters_file) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
755 with open(options.galaxy_parameters_file) as json_fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
756 galaxy_json = json.load(json_fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
757 json_struct = {} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
758 for fv_item in galaxy_json['factor_value_series']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
759 json_struct[fv_item['factor_name']] = fv_item['factor_value'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
760 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
761 logger.debug("No query was specified") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
762 json_struct = None |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
763 factor_selection = json_struct |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
764 result = slice_data_files(input_path, factor_selection=factor_selection) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
765 data_files = result |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
766 logger.debug("Result data files list: %s", data_files) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
767 if data_files is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
768 raise RuntimeError("Error getting data files with isatools") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
769 output_path = options.output_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
770 logger.debug("copying data files to %s", output_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
771 for result in data_files: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
772 for data_file_name in result['data_files']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
773 logging.info("Copying {}".format(data_file_name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
774 shutil.copy(os.path.join(input_path, data_file_name), output_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
775 logger.info("Finished writing data files to {}".format(output_path)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
776 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
777 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
778 def zip_get_data_files_collection_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
779 logger.info("Getting data files for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
780 options.input_path, options.output_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
781 if options.json_query: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
782 logger.debug("This is the specified query:\n%s", options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
783 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
784 logger.debug("No query was specified") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
785 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
786 output_path = options.output_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
787 if options.json_query is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
788 json_struct = json.loads(options.json_query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
789 factor_selection = json_struct |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
790 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
791 factor_selection = None |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
792 with zipfile.ZipFile(input_path) as zfp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
793 tmpdir = tempfile.mkdtemp() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
794 zfp.extractall(path=tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
795 result = slice_data_files(tmpdir, factor_selection=factor_selection) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
796 data_files = result |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
797 logger.debug("Result data files list: %s", data_files) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
798 if data_files is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
799 raise RuntimeError("Error getting data files with isatools") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
800 logger.debug("copying data files to %s", output_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
801 for result in data_files: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
802 for data_file_name in result['data_files']: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
803 logging.info("Copying {}".format(data_file_name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
804 shutil.copy(os.path.join(tmpdir, data_file_name), output_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
805 logger.info("Finished writing data files to {}".format(output_path)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
806 shutil.rmtree(tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
807 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
808 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
809 def slice_data_files(dir, factor_selection=None): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
810 results = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
811 # first collect matching samples |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
812 for table_file in glob.iglob(os.path.join(dir, '[a|s]_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
813 logger.info('Loading {table_file}'.format(table_file=table_file)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
814 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
815 with open(os.path.join(dir, table_file)) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
816 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
817 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
818 if factor_selection is None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
819 matches = df['Sample Name'].items() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
820 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
821 for indx, match in matches: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
822 sample_name = match |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
823 if len([r for r in results if r['sample'] == |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
824 sample_name]) == 1: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
825 continue |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
826 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
827 results.append( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
828 { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
829 'sample': sample_name, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
830 'data_files': [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
831 } |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
832 ) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
833 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
834 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
835 for factor_name, factor_value in factor_selection.items(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
836 if 'Factor Value[{}]'.format(factor_name) in list( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
837 df.columns.values): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
838 matches = df.loc[df['Factor Value[{factor}]'.format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
839 factor=factor_name)] == factor_value][ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
840 'Sample Name'].items() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
841 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
842 for indx, match in matches: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
843 sample_name = match |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
844 if len([r for r in results if r['sample'] == |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
845 sample_name]) == 1: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
846 continue |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
847 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
848 results.append( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
849 { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
850 'sample': sample_name, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
851 'data_files': [], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
852 'query_used': factor_selection |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
853 } |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
854 ) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
855 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
856 # now collect the data files relating to the samples |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
857 for result in results: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
858 sample_name = result['sample'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
859 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
860 for table_file in glob.iglob(os.path.join(dir, 'a_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
861 with open(table_file) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
862 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
863 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
864 data_files = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
865 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
866 table_headers = list(df.columns.values) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
867 sample_rows = df.loc[df['Sample Name'] == sample_name] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
868 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
869 data_node_labels = [ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
870 'Raw Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
871 'Raw Spectral Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
872 'Derived Spectral Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
873 'Derived Array Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
874 'Array Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
875 'Protein Assignment File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
876 'Peptide Assignment File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
877 'Post Translational Modification Assignment File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
878 'Acquisition Parameter Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
879 'Free Induction Decay Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
880 'Derived Array Data Matrix File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
881 'Image File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
882 'Derived Data File', |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
883 'Metabolite Assignment File'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
884 for node_label in data_node_labels: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
885 if node_label in table_headers: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
886 data_files.extend(list(sample_rows[node_label])) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
887 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
888 result['data_files'] = [i for i in list(data_files) if |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
889 str(i) != 'nan'] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
890 return results |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
891 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
892 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
893 def isatab_get_factor_names_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
894 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
895 logger.info("Getting factors for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
896 input_path, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
897 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
898 factors = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
899 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
900 with open(os.path.join(input_path, table_file)) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
901 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
902 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
903 factors_headers = [header for header in list(df.columns.values) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
904 if _RX_FACTOR_VALUE.match(header)] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
905 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
906 for header in factors_headers: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
907 factors.add(header[13:-1]) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
908 if factors is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
909 json.dump(list(factors), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
910 logger.debug("Factor names written") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
911 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
912 raise RuntimeError("Error reading factors.") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
913 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
914 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
915 def zip_get_factor_names_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
916 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
917 logger.info("Getting factors for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
918 input_path, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
919 # unpack input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
920 with zipfile.ZipFile(input_path) as zfp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
921 tmpdir = tempfile.mkdtemp() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
922 zfp.extractall(path=tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
923 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
924 factors = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
925 for table_file in glob.iglob(os.path.join(tmpdir, '[a|s]_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
926 logging.info('Searching {}'.format(table_file)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
927 with open(os.path.join(tmpdir, table_file)) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
928 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
929 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
930 factors_headers = [header for header in list(df.columns.values) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
931 if _RX_FACTOR_VALUE.match(header)] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
932 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
933 for header in factors_headers: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
934 factors.add(header[13:-1]) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
935 if factors is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
936 json.dump(list(factors), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
937 logger.debug("Factor names written") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
938 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
939 raise RuntimeError("Error reading factors.") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
940 shutil.rmtree(tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
941 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
942 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
943 def isatab_get_factor_values_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
944 logger.info("Getting values for factor {factor} in study {input_path}. Writing to {output_file}." |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
945 .format(factor=options.factor, input_path=options.input_path, output_file=options.output.name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
946 fvs = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
947 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
948 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
949 factor_name = options.factor |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
950 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
951 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
952 with open(os.path.join(input_path, table_file)) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
953 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
954 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
955 if 'Factor Value[{factor}]'.format(factor=factor_name) in \ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
956 list(df.columns.values): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
957 for _, match in df[ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
958 'Factor Value[{factor}]'.format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
959 factor=factor_name)].iteritems(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
960 try: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
961 match = match.item() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
962 except AttributeError: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
963 pass |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
964 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
965 if isinstance(match, (str, int, float)): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
966 if str(match) != 'nan': |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
967 fvs.add(match) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
968 if fvs is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
969 json.dump(list(fvs), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
970 logger.debug("Factor values written to {}".format(options.output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
971 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
972 raise RuntimeError("Error getting factor values") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
973 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
974 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
975 def zip_get_factor_values_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
976 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
977 logger.info("Getting factors for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
978 input_path, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
979 logger.info("Getting values for factor {factor} in study {input_path}. " |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
980 "Writing to {output_file}.".format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
981 factor=options.factor, input_path=options.input_path, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
982 output_file=options.output.name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
983 fvs = set() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
984 factor_name = options.factor |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
985 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
986 # unpack input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
987 with zipfile.ZipFile(input_path) as zfp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
988 tmpdir = tempfile.mkdtemp() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
989 zfp.extractall(path=tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
990 for table_file in glob.glob(os.path.join(tmpdir, '[a|s]_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
991 logging.info('Searching {}'.format(table_file)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
992 with open(os.path.join(input_path, table_file)) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
993 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
994 if 'Factor Value[{factor}]'.format(factor=factor_name) in \ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
995 list(df.columns.values): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
996 for _, match in df[ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
997 'Factor Value[{factor}]'.format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
998 factor=factor_name)].iteritems(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
999 try: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1000 match = match.item() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1001 except AttributeError: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1002 pass |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1003 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1004 if isinstance(match, (str, int, float)): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1005 if str(match) != 'nan': |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1006 fvs.add(match) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1007 if fvs is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1008 json.dump(list(fvs), options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1009 logger.debug("Factor values written to {}".format(options.output)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1010 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1011 raise RuntimeError("Error getting factor values") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1012 shutil.rmtree(tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1013 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1014 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1015 def isatab_get_factors_summary_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1016 logger.info("Getting summary for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1017 options.input_path, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1018 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1019 ISA = isatab.load(input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1020 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1021 all_samples = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1022 for study in ISA.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1023 all_samples.extend(study.samples) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1024 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1025 samples_and_fvs = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1026 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1027 for sample in all_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1028 sample_and_fvs = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1029 'sample_name': sample.name, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1030 } |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1031 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1032 for fv in sample.factor_values: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1033 if isinstance(fv.value, (str, int, float)): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1034 fv_value = fv.value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1035 sample_and_fvs[fv.factor_name.name] = fv_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1036 elif isinstance(fv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1037 fv_value = fv.value.term |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1038 sample_and_fvs[fv.factor_name.name] = fv_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1039 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1040 samples_and_fvs.append(sample_and_fvs) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1041 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1042 df = pd.DataFrame(samples_and_fvs) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1043 nunique = df.apply(pd.Series.nunique) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1044 cols_to_drop = nunique[nunique == 1].index |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1045 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1046 df = df.drop(cols_to_drop, axis=1) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1047 summary = df.to_dict(orient='records') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1048 if summary is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1049 json.dump(summary, options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1050 logger.debug("Summary dumped to JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1051 # html_summary = build_html_summary(summary) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1052 # with options.html_output as html_fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1053 # html_fp.write(html_summary) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1054 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1055 raise RuntimeError("Error getting study summary") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1056 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1057 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1058 def zip_get_factors_summary_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1059 logger.info("Getting summary for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1060 options.input_path, options.json_output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1061 input_path = options.input_path |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1062 with zipfile.ZipFile(input_path) as zfp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1063 tmpdir = tempfile.mkdtemp() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1064 zfp.extractall(path=tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1065 ISA = isatab.load(tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1066 all_samples = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1067 for study in ISA.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1068 all_samples.extend(study.samples) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1069 samples_and_fvs = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1070 for sample in all_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1071 sample_and_fvs = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1072 'sample_name': sample.name, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1073 } |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1074 for fv in sample.factor_values: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1075 if isinstance(fv.value, (str, int, float)): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1076 fv_value = fv.value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1077 sample_and_fvs[fv.factor_name.name] = fv_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1078 elif isinstance(fv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1079 fv_value = fv.value.term |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1080 sample_and_fvs[fv.factor_name.name] = fv_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1081 samples_and_fvs.append(sample_and_fvs) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1082 df = pd.DataFrame(samples_and_fvs) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1083 nunique = df.apply(pd.Series.nunique) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1084 cols_to_drop = nunique[nunique == 1].index |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1085 df = df.drop(cols_to_drop, axis=1) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1086 summary = df.to_dict(orient='records') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1087 if summary is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1088 json.dump(summary, options.json_output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1089 logger.debug("Summary dumped to JSON") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1090 print(json.dumps(summary, indent=4)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1091 html_summary = build_html_summary(summary) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1092 with options.html_output as html_fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1093 html_fp.write(html_summary) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1094 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1095 raise RuntimeError("Error getting study summary") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1096 shutil.rmtree(tmpdir) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1097 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1098 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1099 def get_study_groups(input_path): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1100 factors_summary = isatab_get_factors_summary_command(input_path=input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1101 study_groups = {} |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1102 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1103 for factors_item in factors_summary: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1104 fvs = tuple(factors_item[k] for k in factors_item.keys() if k != 'name') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1105 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1106 if fvs in study_groups.keys(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1107 study_groups[fvs].append(factors_item['name']) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1108 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1109 study_groups[fvs] = [factors_item['name']] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1110 return study_groups |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1111 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1112 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1113 def get_study_groups_samples_sizes(input_path): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1114 study_groups = get_study_groups(input_path=input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1115 return list(map(lambda x: (x[0], len(x[1])), study_groups.items())) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1116 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1117 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1118 def get_sources_for_sample(input_path, sample_name): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1119 ISA = isatab.load(input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1120 hits = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1121 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1122 for study in ISA.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1123 for sample in study.samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1124 if sample.name == sample_name: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1125 print('found a hit: {sample_name}'.format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1126 sample_name=sample.name)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1127 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1128 for source in sample.derives_from: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1129 hits.append(source.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1130 return hits |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1131 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1132 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1133 def get_data_for_sample(input_path, sample_name): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1134 ISA = isatab.load(input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1135 hits = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1136 for study in ISA.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1137 for assay in study.assays: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1138 for data in assay.data_files: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1139 if sample_name in [x.name for x in data.generated_from]: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1140 logger.info('found a hit: {filename}'.format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1141 filename=data.filename)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1142 hits.append(data) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1143 return hits |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1144 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1145 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1146 def get_study_groups_data_sizes(input_path): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1147 study_groups = get_study_groups(input_path=input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1148 return list(map(lambda x: (x[0], len(x[1])), study_groups.items())) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1149 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1150 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1151 def get_characteristics_summary(input_path): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1152 """ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1153 This function generates a characteristics summary for a MetaboLights |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1154 study |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1155 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1156 :param input_path: Input path to ISA-tab |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1157 :return: A list of dicts summarising the set of characteristic names |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1158 and values associated with each sample |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1159 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1160 Note: it only returns a summary of characteristics with variable values. |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1161 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1162 Example usage: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1163 characteristics_summary = get_characteristics_summary('/path/to/my/study/') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1164 [ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1165 { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1166 "name": "6089if_9", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1167 "Variant": "Synechocystis sp. PCC 6803.sll0171.ko" |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1168 }, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1169 { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1170 "name": "6089if_43", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1171 "Variant": "Synechocystis sp. PCC 6803.WT.none" |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1172 }, |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1173 ] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1174 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1175 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1176 """ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1177 ISA = isatab.load(input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1178 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1179 all_samples = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1180 for study in ISA.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1181 all_samples.extend(study.samples) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1182 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1183 samples_and_characs = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1184 for sample in all_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1185 sample_and_characs = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1186 'name': sample.name |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1187 } |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1188 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1189 for source in sample.derives_from: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1190 for c in source.characteristics: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1191 if isinstance(c.value, (str, int, float)): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1192 c_value = c.value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1193 sample_and_characs[c.category.term] = c_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1194 elif isinstance(c.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1195 c_value = c.value.term |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1196 sample_and_characs[c.category.term] = c_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1197 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1198 samples_and_characs.append(sample_and_characs) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1199 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1200 df = pd.DataFrame(samples_and_characs) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1201 nunique = df.apply(pd.Series.nunique) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1202 cols_to_drop = nunique[nunique == 1].index |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1203 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1204 df = df.drop(cols_to_drop, axis=1) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1205 return df.to_dict(orient='records') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1206 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1207 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1208 def get_study_variable_summary(input_path): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1209 ISA = isatab.load(input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1210 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1211 all_samples = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1212 for study in ISA.studies: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1213 all_samples.extend(study.samples) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1214 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1215 samples_and_variables = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1216 for sample in all_samples: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1217 sample_and_vars = { |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1218 'sample_name': sample.name |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1219 } |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1220 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1221 for fv in sample.factor_values: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1222 if isinstance(fv.value, (str, int, float)): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1223 fv_value = fv.value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1224 sample_and_vars[fv.factor_name.name] = fv_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1225 elif isinstance(fv.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1226 fv_value = fv.value.term |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1227 sample_and_vars[fv.factor_name.name] = fv_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1228 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1229 for source in sample.derives_from: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1230 sample_and_vars['source_name'] = source.name |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1231 for c in source.characteristics: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1232 if isinstance(c.value, (str, int, float)): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1233 c_value = c.value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1234 sample_and_vars[c.category.term] = c_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1235 elif isinstance(c.value, OntologyAnnotation): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1236 c_value = c.value.term |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1237 sample_and_vars[c.category.term] = c_value |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1238 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1239 samples_and_variables.append(sample_and_vars) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1240 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1241 df = pd.DataFrame(samples_and_variables) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1242 nunique = df.apply(pd.Series.nunique) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1243 cols_to_drop = nunique[nunique == 1].index |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1244 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1245 df = df.drop(cols_to_drop, axis=1) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1246 return df.to_dict(orient='records') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1247 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1248 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1249 def get_study_group_factors(input_path): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1250 factors_list = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1251 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1252 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1253 with open(os.path.join(input_path, table_file)) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1254 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1255 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1256 factor_columns = [x for x in df.columns if x.startswith( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1257 'Factor Value')] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1258 if len(factor_columns) > 0: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1259 factors_list = df[factor_columns].drop_duplicates()\ |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1260 .to_dict(orient='records') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1261 return factors_list |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1262 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1263 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1264 def get_filtered_df_on_factors_list(input_path): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1265 factors_list = get_study_group_factors(input_path=input_path) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1266 queries = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1267 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1268 for item in factors_list: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1269 query_str = [] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1270 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1271 for k, v in item.items(): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1272 k = k.replace(' ', '_').replace('[', '_').replace(']', '_') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1273 if isinstance(v, str): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1274 v = v.replace(' ', '_').replace('[', '_').replace(']', '_') |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1275 query_str.append("{k} == '{v}' and ".format(k=k, v=v)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1276 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1277 query_str = ''.join(query_str)[:-4] |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1278 queries.append(query_str) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1279 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1280 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1281 with open(os.path.join(input_path, table_file)) as fp: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1282 df = isatab.load_table(fp) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1283 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1284 cols = df.columns |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1285 cols = cols.map( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1286 lambda x: x.replace(' ', '_') if isinstance(x, str) else x) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1287 df.columns = cols |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1288 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1289 cols = df.columns |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1290 cols = cols.map( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1291 lambda x: x.replace('[', '_') if isinstance(x, str) else x) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1292 df.columns = cols |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1293 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1294 cols = df.columns |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1295 cols = cols.map( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1296 lambda x: x.replace(']', '_') if isinstance(x, str) else x) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1297 df.columns = cols |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1298 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1299 for query in queries: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1300 # query uses pandas.eval, which evaluates queries like pure Python |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1301 # notation |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1302 df2 = df.query(query) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1303 if 'Sample_Name' in df.columns: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1304 print('Group: {query} / Sample_Name: {sample_name}'.format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1305 query=query, sample_name=list(df2['Sample_Name']))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1306 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1307 if 'Source_Name' in df.columns: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1308 print('Group: {} / Sources_Name: {}'.format( |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1309 query, list(df2['Source_Name']))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1310 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1311 if 'Raw_Spectral_Data_File' in df.columns: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1312 print('Group: {query} / Raw_Spectral_Data_File: {filename}' |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1313 .format(query=query[13:-2], |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1314 filename=list(df2['Raw_Spectral_Data_File']))) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1315 return queries |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1316 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1317 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1318 def datatype_get_summary_command(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1319 logger.info("Getting summary for study %s. Writing to %s.", |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1320 options.study_id, options.output.name) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1321 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1322 summary = get_study_variable_summary(options.study_id) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1323 print('summary: ', list(summary)) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1324 if summary is not None: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1325 json.dump(summary, options.output, indent=4) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1326 logger.debug("Summary dumped") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1327 else: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1328 raise RuntimeError("Error getting study summary") |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1329 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1330 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1331 # logging and argument parsing |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1332 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1333 def _configure_logger(options): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1334 logging_level = getattr(logging, options.log_level, logging.INFO) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1335 logging.basicConfig(level=logging_level) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1336 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1337 global logger |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1338 logger = logging.getLogger() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1339 logger.setLevel(logging_level) # there's a bug somewhere. The level set through basicConfig isn't taking effect |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1340 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1341 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1342 def _parse_args(args): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1343 parser = make_parser() |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1344 options = parser.parse_args(args) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1345 return options |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1346 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1347 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1348 def main(args): |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1349 options = _parse_args(args) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1350 _configure_logger(options) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1351 # run subcommand |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1352 options.func(options) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1353 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1354 |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1355 if __name__ == '__main__': |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1356 try: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1357 main(sys.argv[1:]) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1358 sys.exit(0) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1359 except Exception as e: |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1360 logger.exception(e) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1361 logger.error(e) |
8dab200e02cb
"planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff
changeset
|
1362 sys.exit(e.code if hasattr(e, "code") else 99) |