annotate isaslicer.py @ 1:1fd8547867be draft default tip

"planemo upload commit 76293bd47447c171c939b4f3c194fd0cfbd7f69c-dirty"
author prog
date Thu, 04 Mar 2021 11:21:03 +0000
parents 8dab200e02cb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1 #!/usr/bin/env python3
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
2
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
3 import argparse
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
4 import glob
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
5 import json
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
6 import logging
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
7 import os
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
8 import re
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
9 import shutil
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
10 import sys
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
11 import tempfile
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
12 import zipfile
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
13
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
14 import pandas as pd
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
15 from isatools import isatab
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
16 from isatools.model import OntologyAnnotation
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
17 from isatools.net import mtbls as MTBLS
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
18
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
19 logger = None
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
20
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
21 # isaslicer.py <command> <study_id> [ command-specific options ]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
22
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
23
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
24 def make_parser():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
25 parser = argparse.ArgumentParser( description="ISA slicer")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
26
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
27 parser.add_argument('--log-level', choices=[
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
28 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
29 default='INFO', help="Set the desired logging level")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
30
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
31 subparsers = parser.add_subparsers(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
32 title='Actions',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
33 dest='command') # specified subcommand will be available in attribute 'command'
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
34 subparsers.required = True
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
35
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
36 # mtblisa commands
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
37
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
38 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
39 'mtbls-get-study-archive', aliases=['gsa'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
40 help="Get ISA study from MetaboLights as zip archive")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
41 subparser.set_defaults(func=get_study_archive_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
42 subparser.add_argument('study_id')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
43 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
44 'output', metavar="OUTPUT",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
45 help="Name of output archive (extension will be added)")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
46 subparser.add_argument('--format', metavar="FMT", choices=[
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
47 'zip', 'tar', 'gztar', 'bztar', 'xztar'], default='zip',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
48 help="Type of archive to create")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
49
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
50 subparser = subparsers.add_parser('mtbls-get-study', aliases=['gs'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
51 help="Get ISA study from MetaboLights")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
52 subparser.set_defaults(func=get_study_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
53 subparser.add_argument('study_id')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
54 subparser.add_argument('output', metavar="PATH", help="Name of output")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
55 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
56 '-f', '--isa-format', choices=['isa-tab', 'isa-json'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
57 metavar="FORMAT", default='isa-tab', help="Desired ISA format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
58
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
59 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
60 'mtbls-get-factors', aliases=['gf'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
61 help="Get factor names from a study in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
62 subparser.set_defaults(func=get_factors_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
63 subparser.add_argument('study_id')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
64 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
65 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
66 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
67
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
68 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
69 'mtbls-get-factor-values', aliases=['gfv'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
70 help="Get factor values from a study in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
71 subparser.set_defaults(func=get_factor_values_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
72 subparser.add_argument('study_id')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
73 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
74 'factor', help="The desired factor. Use `get-factors` to get the list "
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
75 "of available factors")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
76 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
77 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
78 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
79
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
80 subparser = subparsers.add_parser('mtbls-get-data-list', aliases=['gd'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
81 help="Get data files list in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
82 subparser.set_defaults(func=get_data_files_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
83 subparser.add_argument('study_id')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
84 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
85 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
86 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
87 '--json-query',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
88 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
89 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
90 '--galaxy_parameters_file',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
91 help="Path to JSON file containing input Galaxy JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
92
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
93 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
94 'mtbls-get-factors-summary', aliases=['gsum'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
95 help="Get the variables summary from a study, in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
96 subparser.set_defaults(func=get_summary_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
97 subparser.add_argument('study_id')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
98 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
99 'json_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
100 help="Output JSON file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
101 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
102 'html_output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
103 help="Output HTML file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
104
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
105 # isaslicer commands on path to unpacked ISA-Tab as input
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
106
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
107 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
108 'isa-tab-get-factors', aliases=['isagf'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
109 help="Get factor names from a study in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
110 subparser.set_defaults(func=isatab_get_factor_names_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
111 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
112 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
113 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
114 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
115
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
116 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
117 'zip-get-factors', aliases=['zipgf'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
118 help="Get factor names from a study in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
119 subparser.set_defaults(func=zip_get_factor_names_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
120 subparser.add_argument('input_path', type=str,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
121 help="Input ISA-Tab zip path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
122 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
123 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
124 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
125
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
126 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
127 'isa-tab-get-factor-values', aliases=['isagfv'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
128 help="Get factor values from a study in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
129 subparser.set_defaults(func=isatab_get_factor_values_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
130 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
131 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
132 'factor', help="The desired factor. Use `get-factors` to get the list "
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
133 "of available factors")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
134 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
135 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
136 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
137
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
138 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
139 'zip-get-factor-values', aliases=['zipgfv'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
140 help="Get factor values from a study in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
141 subparser.set_defaults(func=zip_get_factor_values_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
142 subparser.add_argument('input_path', type=str,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
143 help="Input ISA-Tab zip path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
144 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
145 'factor', help="The desired factor. Use `get-factors` to get the list "
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
146 "of available factors")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
147 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
148 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
149 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
150
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
151 subparser = subparsers.add_parser('isa-tab-get-data-list', aliases=['isagdl'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
152 help="Get data files list in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
153 subparser.set_defaults(func=isatab_get_data_files_list_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
154 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
155 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
156 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
157 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
158 '--json-query',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
159 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
160 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
161 '--galaxy_parameters_file',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
162 help="Path to JSON file containing input Galaxy JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
163
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
164 subparser = subparsers.add_parser('zip-get-data-list', aliases=['zipgdl'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
165 help="Get data files list in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
166 subparser.set_defaults(func=zip_get_data_files_list_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
167 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
168 subparser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
169 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
170 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
171 '--json-query',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
172 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
173 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
174 '--galaxy_parameters_file',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
175 help="Path to JSON file containing input Galaxy JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
176
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
177 subparser = subparsers.add_parser('isa-tab-get-data-collection', aliases=['isagdc'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
178 help="Get data files collection")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
179 subparser.set_defaults(func=isatab_get_data_files_collection_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
180 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
181 subparser.add_argument('output_path', type=str, help="Output data files path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
182 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
183 '--json-query',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
184 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
185 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
186 '--galaxy_parameters_file',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
187 help="Path to JSON file containing input Galaxy JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
188
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
189 subparser = subparsers.add_parser('zip-get-data-collection', aliases=['zipgdc'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
190 help="Get data files collection")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
191 subparser.set_defaults(func=zip_get_data_files_collection_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
192 subparser.add_argument('input_path', type=str, help="Input ISA-Tab zip path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
193 subparser.add_argument('output_path', type=str, help="Output data files path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
194 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
195 '--json-query',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
196 help="Factor query in JSON (e.g., '{\"Gender\":\"Male\"}'")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
197
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
198 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
199 'isa-tab-get-factors-summary', aliases=['isasum'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
200 help="Get the variables summary from a study, in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
201 subparser.set_defaults(func=isatab_get_factors_summary_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
202 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
203 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
204 'output', nargs='?', type=argparse.FileType('w'), default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
205 help="Output file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
206
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
207 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
208 'zip-get-factors-summary', aliases=['zipsum'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
209 help="Get the variables summary from a study, in json format")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
210 subparser.set_defaults(func=zip_get_factors_summary_command)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
211 subparser.add_argument('input_path', type=str,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
212 help="Input ISA-Tab zip path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
213 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
214 'json_output', nargs='?', type=argparse.FileType('w'),
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
215 default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
216 help="Output JSON file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
217 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
218 'html_output', nargs='?', type=argparse.FileType('w'),
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
219 default=sys.stdout,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
220 help="Output HTML file")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
221
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
222 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
223 'isaslicer2-slice', aliases=['slice2'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
224 help="Slice ISA-Tabs version 2")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
225 subparser.set_defaults(func=query_isatab)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
226 subparser.add_argument('--source_dir', type=str,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
227 help="Input ISA-Tab zip path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
228 subparser.add_argument(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
229 '--galaxy_parameters_file', type=argparse.FileType(mode='r'),
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
230 help="Path to JSON file containing input Galaxy JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
231 subparser.add_argument('--output', type=argparse.FileType(mode='w'),
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
232 help="Input ISA-Tab zip path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
233
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
234 subparser = subparsers.add_parser(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
235 'filter-data', aliases=['filter'],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
236 help="Filter out data based on slicer2")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
237 subparser.set_defaults(func=filter_data)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
238 subparser.add_argument('input_path', type=str, help="Input ISA-Tab path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
239 subparser.add_argument('output_path', type=str, help="Output data files path")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
240 subparser.add_argument('--slice', type=argparse.FileType(mode='r'),
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
241 help="slice")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
242 subparser.add_argument('--filename_filter', type=str, help="shell-like wildcard to filter files")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
243
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
244 return parser
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
245
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
246
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
247 def filter_data(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
248 loglines = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
249 source_dir = options.input_path if options.input_path else ""
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
250 output_path = options.output_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
251 filename_filter = options.filename_filter
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
252 if source_dir:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
253 if not os.path.exists(source_dir):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
254 raise IOError('Source path does not exist!')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
255 data_files = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
256 slice_json = options.slice
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
257 for result in json.load(slice_json)['results']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
258 data_files.extend(result.get('data_files', []))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
259 reduced_data_files = list(set(data_files))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
260 filtered_files = glob.glob(os.path.join(source_dir, filename_filter))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
261 to_copy = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
262 for filepath in filtered_files:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
263 if os.path.basename(filepath) in reduced_data_files:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
264 to_copy.append(filepath)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
265 loglines.append("Using slice results from {}\n".format(slice_json.name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
266 for filepath in to_copy:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
267 loglines.append("Copying {}\n".format(os.path.basename(filepath)))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
268 # try:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
269 # shutil.copyfile(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
270 # filepath, os.path.join(output_path, os.path.basename(filepath)))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
271 # except Exception as e:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
272 # print(e)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
273 # exit(1)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
274 try:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
275 os.symlink(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
276 filepath, os.path.join(output_path, os.path.basename(filepath)))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
277 except Exception as e:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
278 print(e)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
279 exit(1)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
280 with open('cli.log', 'w') as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
281 fp.writelines(loglines)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
282
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
283
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
284 def query_isatab(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
285 source_dir = options.source_dir if options.source_dir else ""
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
286 galaxy_parameters_file = options.galaxy_parameters_file
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
287 output = options.output
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
288
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
289 debug = True
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
290 if galaxy_parameters_file:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
291 galaxy_parameters = json.load(galaxy_parameters_file)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
292 print('Galaxy parameters:')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
293 print(json.dumps(galaxy_parameters, indent=4))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
294 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
295 raise IOError('Could not load Galaxy parameters file!')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
296 if source_dir:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
297 if not os.path.exists(source_dir):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
298 raise IOError('Source path does not exist!')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
299 query = galaxy_parameters['query']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
300 if debug:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
301 print('Query is:')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
302 print(json.dumps(query, indent=4)) # for debugging only
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
303 if source_dir:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
304 investigation = isatab.load(source_dir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
305 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
306 tmp = tempfile.mkdtemp()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
307 _ = MTBLS.get(galaxy_parameters['input']['mtbls_id'], tmp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
308 investigation = isatab.load(tmp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
309 # filter assays by mt/tt
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
310 matching_assays = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
311 mt = query.get('measurement_type').strip()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
312 tt = query.get('technology_type').strip()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
313 if mt and tt:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
314 for study in investigation.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
315 matching_assays.extend(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
316 [x for x in study.assays if x.measurement_type.term == mt
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
317 and x.technology_type.term == tt])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
318 elif mt and not tt:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
319 for study in investigation.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
320 matching_assays.extend(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
321 [x for x in study.assays if x.measurement_type.term == mt])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
322 elif not mt and tt:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
323 for study in investigation.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
324 matching_assays.extend(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
325 [x for x in study.assays if x.technology_type.term == tt])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
326 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
327 for study in investigation.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
328 matching_assays.extend(study.assays)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
329 assay_samples = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
330 for assay in matching_assays:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
331 assay_samples.extend(assay.samples)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
332 if debug:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
333 print('Total samples: {}'.format(len(assay_samples)))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
334
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
335 # filter samples by fv
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
336 factor_selection = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
337 x.get('factor_name').strip(): x.get('factor_value').strip() for x in
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
338 query.get('factor_selection', [])}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
339
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
340 fv_samples = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
341 if factor_selection:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
342 samples_to_remove = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
343 for f, v in factor_selection.items():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
344 for sample in assay_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
345 for fv in [x for x in sample.factor_values if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
346 x.factor_name.name == f]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
347 if isinstance(fv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
348 if fv.value.term == v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
349 fv_samples.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
350 elif fv.value == v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
351 fv_samples.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
352 for f, v in factor_selection.items():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
353 for sample in fv_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
354 for fv in [x for x in sample.factor_values if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
355 x.factor_name.name == f]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
356 if isinstance(fv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
357 if fv.value.term != v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
358 samples_to_remove.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
359 elif fv.value != v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
360 samples_to_remove.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
361 final_fv_samples = fv_samples.difference(samples_to_remove)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
362 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
363 final_fv_samples = assay_samples
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
364
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
365 # filter samples by characteristic
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
366 characteristics_selection = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
367 x.get('characteristic_name').strip():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
368 x.get('characteristic_value').strip() for x in
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
369 query.get('characteristics_selection', [])}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
370
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
371 cv_samples = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
372 if characteristics_selection:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
373 first_pass = True
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
374 samples_to_remove = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
375 for c, v in characteristics_selection.items():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
376 if first_pass:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
377 for sample in final_fv_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
378 for cv in [x for x in sample.characteristics if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
379 x.category.term == c]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
380 if isinstance(cv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
381 if cv.value.term == v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
382 cv_samples.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
383 elif cv.value == v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
384 cv_samples.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
385 for source in sample.derives_from:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
386 for cv in [x for x in source.characteristics if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
387 x.category.term == c]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
388 if isinstance(cv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
389 if cv.value.term == v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
390 cv_samples.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
391 elif cv.value == v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
392 cv_samples.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
393 first_pass = False
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
394 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
395 for sample in cv_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
396 for cv in [x for x in sample.characteristics if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
397 x.category.term == c]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
398 if isinstance(cv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
399 if cv.value.term != v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
400 samples_to_remove.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
401 elif cv.value != v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
402 samples_to_remove.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
403 for source in sample.derives_from:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
404 for cv in [x for x in source.characteristics if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
405 x.category.term == c]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
406 if isinstance(cv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
407 if cv.value.term != v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
408 samples_to_remove.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
409 elif cv.value != v:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
410 samples_to_remove.add(sample)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
411 final_cv_samples = cv_samples.difference(samples_to_remove)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
412 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
413 final_cv_samples = final_fv_samples
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
414
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
415 # filter samples by process parameter
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
416 parameters_selection = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
417 x.get('parameter_name').strip():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
418 x.get('parameter_value').strip() for x in
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
419 query.get('parameter_selection', [])}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
420
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
421 final_samples = final_cv_samples
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
422
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
423 if debug:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
424 print('Final number of samples: {}'.format(len(final_samples)))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
425 results = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
426 for sample in final_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
427 results.append({
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
428 'sample_name': sample.name,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
429 'data_files': []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
430 })
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
431 for result in results:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
432 sample_name = result['sample_name']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
433 if source_dir:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
434 table_files = glob.iglob(os.path.join(source_dir, 'a_*'))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
435 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
436 table_files = glob.iglob(os.path.join(tmp, 'a_*'))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
437 for table_file in table_files:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
438 with open(table_file) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
439 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
440 data_files = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
441 table_headers = list(df.columns.values)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
442 sample_rows = df.loc[df['Sample Name'] == sample_name]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
443 data_node_labels = [
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
444 'Raw Data File', 'Raw Spectral Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
445 'Derived Spectral Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
446 'Derived Array Data File', 'Array Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
447 'Protein Assignment File', 'Peptide Assignment File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
448 'Post Translational Modification Assignment File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
449 'Acquisition Parameter Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
450 'Free Induction Decay Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
451 'Derived Array Data Matrix File', 'Image File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
452 'Derived Data File', 'Metabolite Assignment File']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
453 if parameters_selection:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
454 for p, v in parameters_selection.items():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
455 sample_pv_rows = sample_rows.loc[
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
456 sample_rows['Parameter Value[{}]'.format(p)] == v]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
457 for node_label in data_node_labels:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
458 if node_label in table_headers:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
459 data_files.extend(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
460 list(sample_pv_rows[node_label]))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
461 result['data_files'].extend(list(set(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
462 i for i in list(data_files) if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
463 str(i) not in ('nan', ''))))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
464 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
465 for node_label in data_node_labels:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
466 if node_label in table_headers:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
467 data_files.extend(list(sample_rows[node_label]))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
468 result['data_files'].extend(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
469 list(set(i for i in list(data_files) if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
470 str(i) not in ('nan', ''))))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
471 results_json = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
472 'query': query,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
473 'results': results
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
474 }
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
475 json.dump(results_json, output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
476
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
477 # if galaxy_parameters['input']['collection_output']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
478 # logger = logging.getLogger()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
479 # logger.debug("copying data files to %s", os.path.dirname(output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
480 # for result in results:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
481 # for data_file_name in result['data_files']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
482 # logging.info("Copying {}".format(data_file_name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
483 # shutil.copy(os.path.join(source_dir, data_file_name),
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
484 # os.path.dirname(output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
485 # logger.info(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
486 # "Finished writing data files to {}".format(os.path.dirname(output)))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
487
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
488
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
489 def get_study_archive_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
490 study_id = options.study_id
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
491
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
492 logger.info("Downloading study %s into archive at path %s.%s",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
493 study_id, options.output, options.format)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
494
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
495 tmpdir = MTBLS.get(study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
496 logger.debug("MTBLS.get returned '%s'", tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
497 if tmpdir is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
498 try:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
499 shutil.make_archive(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
500 options.output, options.format, tmpdir, logger=logger)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
501 logger.info("ISA archive written")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
502 finally:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
503 logger.debug("Trying to clean up tmp dir %s", tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
504 shutil.rmtree(tmpdir, ignore_errors=True)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
505 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
506 raise RuntimeError("Error downloading ISA study")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
507
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
508 # mtblisa commands
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
509
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
510
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
511 def get_study_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
512 if os.path.exists(options.output):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
513 raise RuntimeError("Selected output path {} already exists!".format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
514 options.output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
515
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
516 if options.isa_format == "isa-tab":
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
517 tmp_data = None
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
518 try:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
519 logger.info("Downloading study %s", options.study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
520 tmp_data = MTBLS.get(options.study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
521 if tmp_data is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
522 raise RuntimeError("Error downloading ISA study")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
523
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
524 logger.debug(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
525 "Finished downloading data. Moving to final location %s",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
526 options.output)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
527 shutil.move(tmp_data, options.output)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
528 logger.info("ISA archive written to %s", options.output)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
529 finally:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
530 if tmp_data:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
531 # try to clean up any temporary files left behind
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
532 logger.debug("Deleting %s, if there's anything there", tmp_data)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
533 shutil.rmtree(tmp_data, ignore_errors=True)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
534 elif options.isa_format == "isa-json":
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
535 isajson = MTBLS.getj(options.study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
536 if isajson is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
537 raise RuntimeError("Error downloading ISA study")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
538
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
539 logger.debug(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
540 "Finished downloading data. Dumping json to final location %s",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
541 options.output)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
542 os.makedirs(options.output)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
543 json_file = os.path.join(options.output, "{}.json".format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
544 isajson['identifier']))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
545 with open(json_file, 'w') as fd:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
546 json.dump(isajson, fd)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
547 logger.info("ISA-JSON written to %s", options.output)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
548 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
549 raise ValueError("BUG! Got an invalid isa format '{}'".format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
550 options.isa_format))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
551
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
552
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
553 def get_factors_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
554 logger.info("Getting factors for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
555 options.study_id, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
556 factor_names = MTBLS.get_factor_names(options.study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
557 if factor_names is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
558 json.dump(list(factor_names), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
559 logger.debug("Factor names written")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
560 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
561 raise RuntimeError("Error downloading factors.")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
562
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
563
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
564 def get_factor_values_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
565 logger.info("Getting values for factor {factor} in study {study_id}. Writing to {output_file}."
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
566 .format(factor=options.factor, study_id=options.study_id, output_file=options.output.name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
567 fvs = MTBLS.get_factor_values(options.study_id, options.factor)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
568 if fvs is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
569 json.dump(list(fvs), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
570 logger.debug("Factor values written to {}".format(options.output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
571 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
572 raise RuntimeError("Error getting factor values")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
573
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
574
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
575 def get_data_files_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
576 logger.info("Getting data files for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
577 options.study_id, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
578 if options.json_query:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
579 logger.debug("This is the specified query:\n%s", options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
580 json_struct = json.loads(options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
581 data_files = MTBLS.get_data_files(options.study_id, json_struct)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
582 elif options.galaxy_parameters_file:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
583 logger.debug("Using input Galaxy JSON parameters from:\n%s",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
584 options.galaxy_parameters_file)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
585 with open(options.galaxy_parameters_file) as json_fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
586 galaxy_json = json.load(json_fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
587 json_struct = {}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
588 for fv_item in galaxy_json['factor_value_series']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
589 json_struct[fv_item['factor_name']] = fv_item['factor_value']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
590 data_files = MTBLS.get_data_files(options.study_id, json_struct)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
591 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
592 logger.debug("No query was specified")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
593 data_files = MTBLS.get_data_files(options.study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
594
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
595 logger.debug("Result data files list: %s", data_files)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
596 if data_files is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
597 raise RuntimeError("Error getting data files with isatools")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
598
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
599 logger.debug("dumping data files to %s", options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
600 json.dump(list(data_files), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
601 logger.info("Finished writing data files to {}".format(options.output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
602
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
603
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
604 def build_html_data_files_list(data_files_list):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
605 data_files_table = '<table>'
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
606 data_files_table += '<tr><th>Sample Name</th><th>Data File Names</th></tr>'
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
607 for data_file in data_files_list:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
608 sample_name = data_file['sample']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
609 data_files = ', '.join(data_file['data_files'])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
610 data_files_table += '<tr><td>{sample_name}</td><td>{data_files}</td>' \
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
611 .format(sample_name=sample_name, data_files=data_files)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
612 html_data_files_list = """
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
613 <html>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
614 <head>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
615 <title>ISA-Tab Factors Summary</title>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
616 </head>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
617 <body>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
618 {summary_table}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
619 </body>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
620 </html>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
621 """.format(summary_table=data_files_table)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
622 return html_data_files_list
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
623
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
624
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
625 def build_html_summary(summary):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
626 study_groups = {}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
627 for item in summary:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
628 sample_name = item['sample_name']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
629 study_factors = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
630 for item in [x for x in item.items() if x[0] != "sample_name"]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
631 study_factors.append(': '.join([item[0], item[1]]))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
632 study_group = ', '.join(study_factors)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
633 if study_group not in study_groups.keys():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
634 study_groups[study_group] = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
635 study_groups[study_group].append(sample_name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
636 summary_table = '<table>'
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
637 summary_table += '<tr><th>Study group</th><th>Number of samples</th></tr>'
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
638 for item in study_groups.items():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
639 study_group = item[0]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
640 num_samples = len(item[1])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
641 summary_table += '<tr><td>{study_group}</td><td>{num_samples}</td>' \
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
642 .format(study_group=study_group, num_samples=num_samples)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
643 summary_table += '</table>'
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
644 html_summary = """
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
645 <html>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
646 <head>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
647 <title>ISA-Tab Factors Summary</title>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
648 </head>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
649 <body>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
650 {summary_table}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
651 </body>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
652 </html>
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
653 """.format(summary_table=summary_table)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
654 return html_summary
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
655
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
656
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
657 def get_summary_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
658 logger.info("Getting summary for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
659 options.study_id, options.json_output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
660
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
661 summary = MTBLS.get_study_variable_summary(options.study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
662 # new_summary = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
663 # for item in summary:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
664 # new_summary.append(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
665 # {k: v for k, v in item.items() if k is not "sample_name"})
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
666 # summary = new_summary
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
667 if summary is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
668 json.dump(summary, options.json_output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
669 logger.debug("Summary dumped to JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
670 html_summary = build_html_summary(summary)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
671 with options.html_output as html_fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
672 html_fp.write(html_summary)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
673 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
674 raise RuntimeError("Error getting study summary")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
675
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
676
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
677 # isaslicer commands
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
678
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
679 def isatab_get_data_files_list_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
680 logger.info("Getting data files for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
681 options.input_path, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
682 if options.json_query:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
683 logger.debug("This is the specified query:\n%s", options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
684 json_struct = json.loads(options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
685 elif options.galaxy_parameters_file:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
686 logger.debug("Using input Galaxy JSON parameters from:\n%s",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
687 options.galaxy_parameters_file)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
688 with open(options.galaxy_parameters_file) as json_fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
689 galaxy_json = json.load(json_fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
690 json_struct = {}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
691 for fv_item in galaxy_json['factor_value_series']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
692 json_struct[fv_item['factor_name']] = fv_item['factor_value']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
693 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
694 logger.debug("No query was specified")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
695 json_struct = None
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
696 factor_selection = json_struct
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
697 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
698 result = slice_data_files(input_path, factor_selection=factor_selection)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
699 data_files = result
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
700 logger.debug("Result data files list: %s", data_files)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
701 if data_files is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
702 raise RuntimeError("Error getting data files with isatools")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
703
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
704 logger.debug("dumping data files to %s", options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
705 json.dump(list(data_files), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
706 logger.info("Finished writing data files to {}".format(options.output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
707
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
708
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
709 def zip_get_data_files_list_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
710 logger.info("Getting data files for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
711 options.input_path, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
712 if options.json_query:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
713 logger.debug("This is the specified query:\n%s", options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
714 json_struct = json.loads(options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
715 elif options.galaxy_parameters_file:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
716 logger.debug("Using input Galaxy JSON parameters from:\n%s",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
717 options.galaxy_parameters_file)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
718 with open(options.galaxy_parameters_file) as json_fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
719 galaxy_json = json.load(json_fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
720 json_struct = {}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
721 for fv_item in galaxy_json['factor_value_series']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
722 json_struct[fv_item['factor_name']] = fv_item['factor_value']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
723 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
724 logger.debug("No query was specified")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
725 json_struct = None
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
726 factor_selection = json_struct
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
727 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
728 with zipfile.ZipFile(input_path) as zfp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
729 tmpdir = tempfile.mkdtemp()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
730 zfp.extractall(path=tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
731 result = slice_data_files(tmpdir, factor_selection=factor_selection)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
732 data_files = result
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
733 logger.debug("Result data files list: %s", data_files)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
734 if data_files is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
735 raise RuntimeError("Error getting data files with isatools")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
736 logger.debug("dumping data files to %s", options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
737 json.dump(list(data_files), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
738 logger.info("Finished writing data files to {}".format(options.output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
739 shutil.rmtree(tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
740
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
741
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
742 def isatab_get_data_files_collection_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
743 logger.info("Getting data files for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
744 options.input_path, options.output_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
745 if options.json_query:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
746 logger.debug("This is the specified query:\n%s", options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
747 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
748 logger.debug("No query was specified")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
749 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
750 if options.json_query is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
751 json_struct = json.loads(options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
752 elif options.galaxy_parameters_file:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
753 logger.debug("Using input Galaxy JSON parameters from:\n%s",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
754 options.galaxy_parameters_file)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
755 with open(options.galaxy_parameters_file) as json_fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
756 galaxy_json = json.load(json_fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
757 json_struct = {}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
758 for fv_item in galaxy_json['factor_value_series']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
759 json_struct[fv_item['factor_name']] = fv_item['factor_value']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
760 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
761 logger.debug("No query was specified")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
762 json_struct = None
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
763 factor_selection = json_struct
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
764 result = slice_data_files(input_path, factor_selection=factor_selection)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
765 data_files = result
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
766 logger.debug("Result data files list: %s", data_files)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
767 if data_files is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
768 raise RuntimeError("Error getting data files with isatools")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
769 output_path = options.output_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
770 logger.debug("copying data files to %s", output_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
771 for result in data_files:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
772 for data_file_name in result['data_files']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
773 logging.info("Copying {}".format(data_file_name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
774 shutil.copy(os.path.join(input_path, data_file_name), output_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
775 logger.info("Finished writing data files to {}".format(output_path))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
776
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
777
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
778 def zip_get_data_files_collection_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
779 logger.info("Getting data files for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
780 options.input_path, options.output_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
781 if options.json_query:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
782 logger.debug("This is the specified query:\n%s", options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
783 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
784 logger.debug("No query was specified")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
785 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
786 output_path = options.output_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
787 if options.json_query is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
788 json_struct = json.loads(options.json_query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
789 factor_selection = json_struct
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
790 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
791 factor_selection = None
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
792 with zipfile.ZipFile(input_path) as zfp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
793 tmpdir = tempfile.mkdtemp()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
794 zfp.extractall(path=tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
795 result = slice_data_files(tmpdir, factor_selection=factor_selection)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
796 data_files = result
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
797 logger.debug("Result data files list: %s", data_files)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
798 if data_files is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
799 raise RuntimeError("Error getting data files with isatools")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
800 logger.debug("copying data files to %s", output_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
801 for result in data_files:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
802 for data_file_name in result['data_files']:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
803 logging.info("Copying {}".format(data_file_name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
804 shutil.copy(os.path.join(tmpdir, data_file_name), output_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
805 logger.info("Finished writing data files to {}".format(output_path))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
806 shutil.rmtree(tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
807
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
808
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
809 def slice_data_files(dir, factor_selection=None):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
810 results = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
811 # first collect matching samples
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
812 for table_file in glob.iglob(os.path.join(dir, '[a|s]_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
813 logger.info('Loading {table_file}'.format(table_file=table_file))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
814
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
815 with open(os.path.join(dir, table_file)) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
816 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
817
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
818 if factor_selection is None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
819 matches = df['Sample Name'].items()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
820
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
821 for indx, match in matches:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
822 sample_name = match
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
823 if len([r for r in results if r['sample'] ==
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
824 sample_name]) == 1:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
825 continue
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
826 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
827 results.append(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
828 {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
829 'sample': sample_name,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
830 'data_files': []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
831 }
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
832 )
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
833
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
834 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
835 for factor_name, factor_value in factor_selection.items():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
836 if 'Factor Value[{}]'.format(factor_name) in list(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
837 df.columns.values):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
838 matches = df.loc[df['Factor Value[{factor}]'.format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
839 factor=factor_name)] == factor_value][
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
840 'Sample Name'].items()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
841
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
842 for indx, match in matches:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
843 sample_name = match
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
844 if len([r for r in results if r['sample'] ==
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
845 sample_name]) == 1:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
846 continue
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
847 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
848 results.append(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
849 {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
850 'sample': sample_name,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
851 'data_files': [],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
852 'query_used': factor_selection
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
853 }
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
854 )
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
855
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
856 # now collect the data files relating to the samples
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
857 for result in results:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
858 sample_name = result['sample']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
859
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
860 for table_file in glob.iglob(os.path.join(dir, 'a_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
861 with open(table_file) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
862 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
863
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
864 data_files = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
865
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
866 table_headers = list(df.columns.values)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
867 sample_rows = df.loc[df['Sample Name'] == sample_name]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
868
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
869 data_node_labels = [
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
870 'Raw Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
871 'Raw Spectral Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
872 'Derived Spectral Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
873 'Derived Array Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
874 'Array Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
875 'Protein Assignment File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
876 'Peptide Assignment File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
877 'Post Translational Modification Assignment File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
878 'Acquisition Parameter Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
879 'Free Induction Decay Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
880 'Derived Array Data Matrix File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
881 'Image File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
882 'Derived Data File',
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
883 'Metabolite Assignment File']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
884 for node_label in data_node_labels:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
885 if node_label in table_headers:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
886 data_files.extend(list(sample_rows[node_label]))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
887
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
888 result['data_files'] = [i for i in list(data_files) if
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
889 str(i) != 'nan']
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
890 return results
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
891
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
892
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
893 def isatab_get_factor_names_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
894 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
895 logger.info("Getting factors for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
896 input_path, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
897 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
898 factors = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
899 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
900 with open(os.path.join(input_path, table_file)) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
901 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
902
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
903 factors_headers = [header for header in list(df.columns.values)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
904 if _RX_FACTOR_VALUE.match(header)]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
905
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
906 for header in factors_headers:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
907 factors.add(header[13:-1])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
908 if factors is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
909 json.dump(list(factors), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
910 logger.debug("Factor names written")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
911 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
912 raise RuntimeError("Error reading factors.")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
913
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
914
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
915 def zip_get_factor_names_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
916 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
917 logger.info("Getting factors for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
918 input_path, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
919 # unpack input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
920 with zipfile.ZipFile(input_path) as zfp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
921 tmpdir = tempfile.mkdtemp()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
922 zfp.extractall(path=tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
923 _RX_FACTOR_VALUE = re.compile(r'Factor Value\[(.*?)\]')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
924 factors = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
925 for table_file in glob.iglob(os.path.join(tmpdir, '[a|s]_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
926 logging.info('Searching {}'.format(table_file))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
927 with open(os.path.join(tmpdir, table_file)) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
928 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
929
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
930 factors_headers = [header for header in list(df.columns.values)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
931 if _RX_FACTOR_VALUE.match(header)]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
932
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
933 for header in factors_headers:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
934 factors.add(header[13:-1])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
935 if factors is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
936 json.dump(list(factors), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
937 logger.debug("Factor names written")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
938 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
939 raise RuntimeError("Error reading factors.")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
940 shutil.rmtree(tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
941
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
942
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
943 def isatab_get_factor_values_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
944 logger.info("Getting values for factor {factor} in study {input_path}. Writing to {output_file}."
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
945 .format(factor=options.factor, input_path=options.input_path, output_file=options.output.name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
946 fvs = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
947
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
948 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
949 factor_name = options.factor
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
950
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
951 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
952 with open(os.path.join(input_path, table_file)) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
953 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
954
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
955 if 'Factor Value[{factor}]'.format(factor=factor_name) in \
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
956 list(df.columns.values):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
957 for _, match in df[
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
958 'Factor Value[{factor}]'.format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
959 factor=factor_name)].iteritems():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
960 try:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
961 match = match.item()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
962 except AttributeError:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
963 pass
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
964
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
965 if isinstance(match, (str, int, float)):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
966 if str(match) != 'nan':
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
967 fvs.add(match)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
968 if fvs is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
969 json.dump(list(fvs), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
970 logger.debug("Factor values written to {}".format(options.output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
971 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
972 raise RuntimeError("Error getting factor values")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
973
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
974
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
975 def zip_get_factor_values_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
976 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
977 logger.info("Getting factors for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
978 input_path, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
979 logger.info("Getting values for factor {factor} in study {input_path}. "
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
980 "Writing to {output_file}.".format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
981 factor=options.factor, input_path=options.input_path,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
982 output_file=options.output.name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
983 fvs = set()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
984 factor_name = options.factor
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
985
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
986 # unpack input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
987 with zipfile.ZipFile(input_path) as zfp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
988 tmpdir = tempfile.mkdtemp()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
989 zfp.extractall(path=tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
990 for table_file in glob.glob(os.path.join(tmpdir, '[a|s]_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
991 logging.info('Searching {}'.format(table_file))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
992 with open(os.path.join(input_path, table_file)) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
993 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
994 if 'Factor Value[{factor}]'.format(factor=factor_name) in \
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
995 list(df.columns.values):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
996 for _, match in df[
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
997 'Factor Value[{factor}]'.format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
998 factor=factor_name)].iteritems():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
999 try:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1000 match = match.item()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1001 except AttributeError:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1002 pass
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1003
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1004 if isinstance(match, (str, int, float)):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1005 if str(match) != 'nan':
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1006 fvs.add(match)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1007 if fvs is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1008 json.dump(list(fvs), options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1009 logger.debug("Factor values written to {}".format(options.output))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1010 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1011 raise RuntimeError("Error getting factor values")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1012 shutil.rmtree(tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1013
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1014
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1015 def isatab_get_factors_summary_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1016 logger.info("Getting summary for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1017 options.input_path, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1018 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1019 ISA = isatab.load(input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1020
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1021 all_samples = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1022 for study in ISA.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1023 all_samples.extend(study.samples)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1024
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1025 samples_and_fvs = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1026
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1027 for sample in all_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1028 sample_and_fvs = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1029 'sample_name': sample.name,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1030 }
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1031
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1032 for fv in sample.factor_values:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1033 if isinstance(fv.value, (str, int, float)):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1034 fv_value = fv.value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1035 sample_and_fvs[fv.factor_name.name] = fv_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1036 elif isinstance(fv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1037 fv_value = fv.value.term
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1038 sample_and_fvs[fv.factor_name.name] = fv_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1039
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1040 samples_and_fvs.append(sample_and_fvs)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1041
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1042 df = pd.DataFrame(samples_and_fvs)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1043 nunique = df.apply(pd.Series.nunique)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1044 cols_to_drop = nunique[nunique == 1].index
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1045
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1046 df = df.drop(cols_to_drop, axis=1)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1047 summary = df.to_dict(orient='records')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1048 if summary is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1049 json.dump(summary, options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1050 logger.debug("Summary dumped to JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1051 # html_summary = build_html_summary(summary)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1052 # with options.html_output as html_fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1053 # html_fp.write(html_summary)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1054 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1055 raise RuntimeError("Error getting study summary")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1056
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1057
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1058 def zip_get_factors_summary_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1059 logger.info("Getting summary for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1060 options.input_path, options.json_output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1061 input_path = options.input_path
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1062 with zipfile.ZipFile(input_path) as zfp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1063 tmpdir = tempfile.mkdtemp()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1064 zfp.extractall(path=tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1065 ISA = isatab.load(tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1066 all_samples = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1067 for study in ISA.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1068 all_samples.extend(study.samples)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1069 samples_and_fvs = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1070 for sample in all_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1071 sample_and_fvs = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1072 'sample_name': sample.name,
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1073 }
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1074 for fv in sample.factor_values:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1075 if isinstance(fv.value, (str, int, float)):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1076 fv_value = fv.value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1077 sample_and_fvs[fv.factor_name.name] = fv_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1078 elif isinstance(fv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1079 fv_value = fv.value.term
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1080 sample_and_fvs[fv.factor_name.name] = fv_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1081 samples_and_fvs.append(sample_and_fvs)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1082 df = pd.DataFrame(samples_and_fvs)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1083 nunique = df.apply(pd.Series.nunique)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1084 cols_to_drop = nunique[nunique == 1].index
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1085 df = df.drop(cols_to_drop, axis=1)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1086 summary = df.to_dict(orient='records')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1087 if summary is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1088 json.dump(summary, options.json_output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1089 logger.debug("Summary dumped to JSON")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1090 print(json.dumps(summary, indent=4))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1091 html_summary = build_html_summary(summary)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1092 with options.html_output as html_fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1093 html_fp.write(html_summary)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1094 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1095 raise RuntimeError("Error getting study summary")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1096 shutil.rmtree(tmpdir)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1097
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1098
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1099 def get_study_groups(input_path):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1100 factors_summary = isatab_get_factors_summary_command(input_path=input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1101 study_groups = {}
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1102
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1103 for factors_item in factors_summary:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1104 fvs = tuple(factors_item[k] for k in factors_item.keys() if k != 'name')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1105
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1106 if fvs in study_groups.keys():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1107 study_groups[fvs].append(factors_item['name'])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1108 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1109 study_groups[fvs] = [factors_item['name']]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1110 return study_groups
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1111
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1112
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1113 def get_study_groups_samples_sizes(input_path):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1114 study_groups = get_study_groups(input_path=input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1115 return list(map(lambda x: (x[0], len(x[1])), study_groups.items()))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1116
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1117
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1118 def get_sources_for_sample(input_path, sample_name):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1119 ISA = isatab.load(input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1120 hits = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1121
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1122 for study in ISA.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1123 for sample in study.samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1124 if sample.name == sample_name:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1125 print('found a hit: {sample_name}'.format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1126 sample_name=sample.name))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1127
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1128 for source in sample.derives_from:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1129 hits.append(source.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1130 return hits
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1131
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1132
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1133 def get_data_for_sample(input_path, sample_name):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1134 ISA = isatab.load(input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1135 hits = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1136 for study in ISA.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1137 for assay in study.assays:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1138 for data in assay.data_files:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1139 if sample_name in [x.name for x in data.generated_from]:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1140 logger.info('found a hit: {filename}'.format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1141 filename=data.filename))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1142 hits.append(data)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1143 return hits
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1144
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1145
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1146 def get_study_groups_data_sizes(input_path):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1147 study_groups = get_study_groups(input_path=input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1148 return list(map(lambda x: (x[0], len(x[1])), study_groups.items()))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1149
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1150
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1151 def get_characteristics_summary(input_path):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1152 """
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1153 This function generates a characteristics summary for a MetaboLights
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1154 study
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1155
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1156 :param input_path: Input path to ISA-tab
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1157 :return: A list of dicts summarising the set of characteristic names
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1158 and values associated with each sample
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1159
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1160 Note: it only returns a summary of characteristics with variable values.
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1161
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1162 Example usage:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1163 characteristics_summary = get_characteristics_summary('/path/to/my/study/')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1164 [
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1165 {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1166 "name": "6089if_9",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1167 "Variant": "Synechocystis sp. PCC 6803.sll0171.ko"
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1168 },
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1169 {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1170 "name": "6089if_43",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1171 "Variant": "Synechocystis sp. PCC 6803.WT.none"
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1172 },
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1173 ]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1174
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1175
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1176 """
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1177 ISA = isatab.load(input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1178
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1179 all_samples = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1180 for study in ISA.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1181 all_samples.extend(study.samples)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1182
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1183 samples_and_characs = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1184 for sample in all_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1185 sample_and_characs = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1186 'name': sample.name
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1187 }
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1188
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1189 for source in sample.derives_from:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1190 for c in source.characteristics:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1191 if isinstance(c.value, (str, int, float)):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1192 c_value = c.value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1193 sample_and_characs[c.category.term] = c_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1194 elif isinstance(c.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1195 c_value = c.value.term
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1196 sample_and_characs[c.category.term] = c_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1197
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1198 samples_and_characs.append(sample_and_characs)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1199
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1200 df = pd.DataFrame(samples_and_characs)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1201 nunique = df.apply(pd.Series.nunique)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1202 cols_to_drop = nunique[nunique == 1].index
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1203
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1204 df = df.drop(cols_to_drop, axis=1)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1205 return df.to_dict(orient='records')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1206
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1207
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1208 def get_study_variable_summary(input_path):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1209 ISA = isatab.load(input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1210
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1211 all_samples = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1212 for study in ISA.studies:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1213 all_samples.extend(study.samples)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1214
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1215 samples_and_variables = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1216 for sample in all_samples:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1217 sample_and_vars = {
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1218 'sample_name': sample.name
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1219 }
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1220
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1221 for fv in sample.factor_values:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1222 if isinstance(fv.value, (str, int, float)):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1223 fv_value = fv.value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1224 sample_and_vars[fv.factor_name.name] = fv_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1225 elif isinstance(fv.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1226 fv_value = fv.value.term
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1227 sample_and_vars[fv.factor_name.name] = fv_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1228
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1229 for source in sample.derives_from:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1230 sample_and_vars['source_name'] = source.name
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1231 for c in source.characteristics:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1232 if isinstance(c.value, (str, int, float)):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1233 c_value = c.value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1234 sample_and_vars[c.category.term] = c_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1235 elif isinstance(c.value, OntologyAnnotation):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1236 c_value = c.value.term
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1237 sample_and_vars[c.category.term] = c_value
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1238
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1239 samples_and_variables.append(sample_and_vars)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1240
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1241 df = pd.DataFrame(samples_and_variables)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1242 nunique = df.apply(pd.Series.nunique)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1243 cols_to_drop = nunique[nunique == 1].index
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1244
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1245 df = df.drop(cols_to_drop, axis=1)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1246 return df.to_dict(orient='records')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1247
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1248
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1249 def get_study_group_factors(input_path):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1250 factors_list = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1251
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1252 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1253 with open(os.path.join(input_path, table_file)) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1254 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1255
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1256 factor_columns = [x for x in df.columns if x.startswith(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1257 'Factor Value')]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1258 if len(factor_columns) > 0:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1259 factors_list = df[factor_columns].drop_duplicates()\
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1260 .to_dict(orient='records')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1261 return factors_list
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1262
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1263
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1264 def get_filtered_df_on_factors_list(input_path):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1265 factors_list = get_study_group_factors(input_path=input_path)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1266 queries = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1267
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1268 for item in factors_list:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1269 query_str = []
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1270
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1271 for k, v in item.items():
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1272 k = k.replace(' ', '_').replace('[', '_').replace(']', '_')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1273 if isinstance(v, str):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1274 v = v.replace(' ', '_').replace('[', '_').replace(']', '_')
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1275 query_str.append("{k} == '{v}' and ".format(k=k, v=v))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1276
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1277 query_str = ''.join(query_str)[:-4]
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1278 queries.append(query_str)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1279
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1280 for table_file in glob.iglob(os.path.join(input_path, '[a|s]_*')):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1281 with open(os.path.join(input_path, table_file)) as fp:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1282 df = isatab.load_table(fp)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1283
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1284 cols = df.columns
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1285 cols = cols.map(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1286 lambda x: x.replace(' ', '_') if isinstance(x, str) else x)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1287 df.columns = cols
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1288
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1289 cols = df.columns
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1290 cols = cols.map(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1291 lambda x: x.replace('[', '_') if isinstance(x, str) else x)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1292 df.columns = cols
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1293
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1294 cols = df.columns
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1295 cols = cols.map(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1296 lambda x: x.replace(']', '_') if isinstance(x, str) else x)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1297 df.columns = cols
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1298
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1299 for query in queries:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1300 # query uses pandas.eval, which evaluates queries like pure Python
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1301 # notation
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1302 df2 = df.query(query)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1303 if 'Sample_Name' in df.columns:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1304 print('Group: {query} / Sample_Name: {sample_name}'.format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1305 query=query, sample_name=list(df2['Sample_Name'])))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1306
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1307 if 'Source_Name' in df.columns:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1308 print('Group: {} / Sources_Name: {}'.format(
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1309 query, list(df2['Source_Name'])))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1310
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1311 if 'Raw_Spectral_Data_File' in df.columns:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1312 print('Group: {query} / Raw_Spectral_Data_File: {filename}'
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1313 .format(query=query[13:-2],
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1314 filename=list(df2['Raw_Spectral_Data_File'])))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1315 return queries
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1316
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1317
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1318 def datatype_get_summary_command(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1319 logger.info("Getting summary for study %s. Writing to %s.",
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1320 options.study_id, options.output.name)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1321
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1322 summary = get_study_variable_summary(options.study_id)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1323 print('summary: ', list(summary))
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1324 if summary is not None:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1325 json.dump(summary, options.output, indent=4)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1326 logger.debug("Summary dumped")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1327 else:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1328 raise RuntimeError("Error getting study summary")
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1329
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1330
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1331 # logging and argument parsing
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1332
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1333 def _configure_logger(options):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1334 logging_level = getattr(logging, options.log_level, logging.INFO)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1335 logging.basicConfig(level=logging_level)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1336
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1337 global logger
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1338 logger = logging.getLogger()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1339 logger.setLevel(logging_level) # there's a bug somewhere. The level set through basicConfig isn't taking effect
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1340
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1341
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1342 def _parse_args(args):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1343 parser = make_parser()
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1344 options = parser.parse_args(args)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1345 return options
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1346
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1347
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1348 def main(args):
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1349 options = _parse_args(args)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1350 _configure_logger(options)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1351 # run subcommand
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1352 options.func(options)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1353
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1354
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1355 if __name__ == '__main__':
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1356 try:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1357 main(sys.argv[1:])
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1358 sys.exit(0)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1359 except Exception as e:
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1360 logger.exception(e)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1361 logger.error(e)
8dab200e02cb "planemo upload commit 239561a6401593c5f87df40ac971a9aa393c4663-dirty"
prog
parents:
diff changeset
1362 sys.exit(e.code if hasattr(e, "code") else 99)