Mercurial > repos > rhpvorderman > data_manager_select_index_by_path
comparison data_manager/path_name_value_key_manager.py @ 1:8495c49cd056 draft default tip
planemo upload for repository https://github.com/LUMC/lumc-galaxy-tools/tree/master/data_manager_select_index_by_path commit 9061997af3bc94f49653ffd42f10b973578e371d
author | rhpvorderman |
---|---|
date | Mon, 16 Jul 2018 10:58:36 -0400 |
parents | 5f8d9309058b |
children |
comparison
equal
deleted
inserted
replaced
0:5f8d9309058b | 1:8495c49cd056 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python3 |
2 | 2 """Script to create data manager jsons""" |
3 | |
4 import argparse | |
3 import json | 5 import json |
4 import argparse | 6 from pathlib import Path |
5 import os | 7 |
6 import yaml | 8 import yaml |
7 | 9 from schema import Schema, Optional |
8 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): | 10 |
9 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 11 |
10 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) | 12 def indexes_schema(): |
11 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) | 13 return Schema( |
12 return data_manager_dict | 14 {'name': str, |
13 | 15 Optional('prefix'): bool, |
14 | 16 Optional('extensions'): [str], |
15 def check_param(name, value, default=None, check_tab=True): | 17 Optional('prefix_strip_extension'): bool, |
16 if value in [ None, '', '?' ]: | 18 Optional('extra_columns'): [str], |
17 if default: | 19 Optional('folder'): [str]}) |
18 print "Using {0} for {1} as no value provided".format( default, name ) | 20 |
19 value = default | 21 |
20 else: | 22 def argument_parser(): |
21 raise Exception( '{0} is not a valid {1}. You must specify a valid {1}.'.format( value, name ) ) | 23 parser = argparse.ArgumentParser() |
22 if check_tab and "\t" in value: | 24 parser.add_argument('--value', type=str, help='value') |
23 raise Exception( '{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .'.format( value, name ) ) | 25 parser.add_argument('--dbkey', type=str, help='dbkey') |
24 return value | 26 parser.add_argument('--name', type=str, help='name') |
25 | 27 parser.add_argument('--path', type=Path, help='path', |
26 def prefix_exists(directory, prefix): | 28 required=True) |
27 '''checks if files exist with prefix in a directory. Returns Boolean''' | 29 parser.add_argument('--data_table_name', action='store', type=str, |
28 matched_files = [] | 30 help='Name of the data table', |
29 directory_files = os.listdir(directory) | 31 required=True) |
30 for directory_file in directory_files: | 32 parser.add_argument('--json_output_file', action='store', type=Path, |
31 if directory_file.startswith(prefix): | 33 help='Json output file', |
32 matched_files.append(directory_file) | 34 required=True) |
35 parser.add_argument("--extra-columns", type=str, | |
36 help='Yaml formatted string with extra columns ' | |
37 'and their values. For example ' | |
38 '\'{"with-gtf":"0"}\' for STAR indexes') | |
39 return parser | |
40 | |
41 | |
42 def check_tab(name: str, value: str): | |
43 if '\t' in value: | |
44 raise ValueError( | |
45 "'{0}' is not a valid '{1}'. It may not contain a tab because " | |
46 "these are used as seperators by galaxy .".format( | |
47 value, name)) | |
48 | |
49 | |
50 def prefix_plus_extension_exists(directory: Path, prefix: str, extension: str): | |
51 """checks if files exist with prefix in a directory. Returns Boolean""" | |
52 matched_files = [directory_file for directory_file in directory.iterdir() | |
53 if | |
54 directory_file.name.startswith( | |
55 prefix) and directory_file.suffix == extension] | |
33 # Empty list should return False | 56 # Empty list should return False |
34 return bool(matched_files) | 57 return bool(matched_files) |
35 | 58 |
36 def prefix_plus_extension_exists(directory, prefix, extension): | 59 |
37 '''checks if files exist with prefix in a directory. Returns Boolean''' | 60 class DataTable(object): |
38 matched_files = [] | 61 |
39 directory_files = os.listdir(directory) | 62 def __init__(self, |
40 for directory_file in directory_files: | 63 index_path: Path, |
41 if directory_file.startswith(prefix) and directory_file.endswith(extension): | 64 data_table_name: str, |
42 matched_files.append(directory_file) | 65 indexes_properties_file: Path, |
43 # Empty list should return False | 66 name: str = None, |
44 return bool(matched_files) | 67 dbkey: str = None, |
68 value: str = None, | |
69 extra_columns: dict = None | |
70 ): | |
71 self.index_path = index_path | |
72 self.data_table_name = data_table_name | |
73 self.name = name if name else str(self.index_path.with_suffix( | |
74 '').name) | |
75 self.value = value if value else self.name | |
76 self.dbkey = dbkey if dbkey else self.value | |
77 self.extra_columns = extra_columns if extra_columns is not None else {} | |
78 self.indexes_properties_file = indexes_properties_file | |
79 | |
80 self.check_params() | |
81 | |
82 self.index_properties = self.get_index_properties() | |
83 | |
84 self.check_index_file_presence() | |
85 | |
86 def check_params(self): | |
87 | |
88 check_tab('name', self.name) | |
89 check_tab('index_path', str(self.index_path.absolute().name)) | |
90 check_tab('value', self.value) | |
91 check_tab('dbkey', self.dbkey) | |
92 self.check_extra_columns() | |
93 | |
94 def check_extra_columns(self): | |
95 index_properties = self.get_index_properties() | |
96 index_extra_columns = set(index_properties.get("extra_columns", [])) | |
97 given_extra_columns = self.extra_columns.keys() | |
98 if index_extra_columns != given_extra_columns: | |
99 if len(index_extra_columns) > 0: | |
100 raise ValueError( | |
101 "Values for the following columns should be " | |
102 "supplied: {0}.".format( | |
103 str(index_extra_columns).strip("{}"))) | |
104 if len(index_extra_columns) == 0: | |
105 raise ValueError( | |
106 "The table '{0}' does not have extra columns".format( | |
107 self.data_table_name)) | |
108 for key, value in self.extra_columns.items(): | |
109 check_tab(key, value) | |
110 | |
111 def get_index_properties(self) -> dict: | |
112 with self.indexes_properties_file.open('r') as properties_file: | |
113 indexes = yaml.safe_load(properties_file) | |
114 index_properties = indexes.get(self.data_table_name) | |
115 if index_properties is None: | |
116 raise ValueError( | |
117 "'{0}' not a supported table name".format( | |
118 self.data_table_name)) | |
119 return indexes_schema().validate(index_properties) | |
120 | |
121 def check_index_file_presence(self): | |
122 index_name = self.index_properties.get('name') | |
123 if index_name is None: | |
124 raise NotImplementedError( | |
125 "Property 'name' not defined for '{0}'," | |
126 " please contact the developers to correct the mistake.") | |
127 index_extensions = self.index_properties.get('extensions', ['']) | |
128 | |
129 # Sometimes an index path is a prefix. | |
130 # For example, with BWA. 'reference.fa' is the index. | |
131 # But the actual index files are | |
132 # 'reference.fa.amb', 'reference.fa.ann' etc. | |
133 | |
134 # If the index is not a prefix, | |
135 # the index file is taken to be the path itself. | |
136 index_is_a_prefix = self.index_properties.get('prefix', True) | |
137 prefix_strip_extension = self.index_properties.get( | |
138 'prefix_strip_extension', False) | |
139 if index_is_a_prefix: | |
140 if prefix_strip_extension: | |
141 prefix = str(self.index_path.with_suffix("").name) | |
142 else: | |
143 prefix = str(self.index_path.name) | |
144 for extension in index_extensions: | |
145 if not prefix_plus_extension_exists(self.index_path.parent, | |
146 prefix, extension): | |
147 raise FileNotFoundError( | |
148 "Unable to find files with prefix '{0}' " | |
149 "and extension '{1}' in {2}. Is this a valid {3}?" | |
150 .format( | |
151 prefix, | |
152 extension, | |
153 str(self.index_path.parent), | |
154 index_name)) | |
155 elif self.index_properties.get('folder') is not None: | |
156 for file in self.index_properties.get('folder'): | |
157 if not (self.index_path / Path(file)).exists(): | |
158 raise FileNotFoundError( | |
159 "A file named '{0}' was not found in '{1}'".format( | |
160 file, str(self.index_path))) | |
161 elif not self.index_path.exists() and not self.index_path.is_dir(): | |
162 raise FileNotFoundError( | |
163 'Unable to find path {0}.'.format(self.index_path)) | |
164 elif self.index_path.is_dir() and self.index_properties.get( | |
165 'folder') is None: | |
166 raise IsADirectoryError( | |
167 '{0} is a directory not a file'.format(self.index_path)) | |
168 elif self.index_path.exists(): | |
169 pass | |
170 else: | |
171 raise NotImplementedError("This condition was not expected " | |
172 "and should not be reached. Please " | |
173 "contact the developers.") | |
174 | |
175 @property | |
176 def data_manager_dict(self) -> dict: | |
177 data_table_entry = dict(value=self.value, dbkey=self.dbkey, | |
178 name=self.name, | |
179 path=str(self.index_path), | |
180 **self.extra_columns) | |
181 data_manager_dict = dict(data_tables=dict()) | |
182 data_manager_dict["data_tables"][ | |
183 self.data_table_name] = [data_table_entry] | |
184 return data_manager_dict | |
185 | |
186 @property | |
187 def data_manager_json(self) -> str: | |
188 return json.dumps(self.data_manager_dict) | |
189 | |
45 | 190 |
46 def main(): | 191 def main(): |
47 | 192 options = argument_parser().parse_args() |
48 #value = "test_value" | 193 |
49 #name = "test_name" | 194 if options.json_output_file.exists(): |
50 #print '{0} other {1} more{0}'.format(value, name ) | 195 pass # Do not raise error. |
51 #print '{0} is not a valid {1}. It may not contain a tab.'.format( value, name ) | 196 |
52 | 197 if options.extra_columns is None: |
53 #Parse Command Line | 198 extra_columns = dict() |
54 parser = argparse.ArgumentParser() | |
55 parser.add_argument( '--value', action='store', type=str, default=None, help='value' ) | |
56 parser.add_argument( '--dbkey', action='store', type=str, default=None, help='dbkey' ) | |
57 parser.add_argument( '--name', action='store', type=str, default=None, help='name' ) | |
58 parser.add_argument( '--path', action='store', type=str, default=None, help='path' ) | |
59 parser.add_argument( '--data_table_name', action='store', type=str, default=None, help='path' ) | |
60 parser.add_argument( '--json_output_file', action='store', type=str, default=None, help='path' ) | |
61 options = parser.parse_args() | |
62 | |
63 path = check_param("path", options.path) | |
64 basename = os.path.basename(path) | |
65 filename = os.path.splitext(basename)[0] | |
66 name = check_param("name", options.name, default=filename) | |
67 value = check_param("value", options.value, default=name) | |
68 dbkey = check_param("dbkey", options.dbkey, default=value) | |
69 data_table_name = check_param("data_table_name", options.data_table_name) | |
70 json_output_file = check_param("json_output_file", options.json_output_file, check_tab=False) | |
71 | |
72 # Check if file or prefix exists | |
73 indexes = yaml.load(file(os.path.join(os.path.dirname(__file__), 'indexes.yml'))) | |
74 index_dict = indexes.get(data_table_name,{}) | |
75 index_name = index_dict.get('name','index') | |
76 index_extensions = index_dict.get('extensions', ['']) | |
77 no_prefix = index_dict.get('no_prefix', False) | |
78 if not no_prefix: | |
79 dirname = os.path.dirname(path) | |
80 prefix = basename | |
81 for extension in index_extensions: | |
82 if not prefix_plus_extension_exists(dirname,prefix,extension): | |
83 raise Exception( 'Unable to find files with prefix "{0}" and extension "{1}" in {2}. Is this a valid {3}?'.format( prefix, extension, dirname, index_name ) ) | |
84 else: | 199 else: |
85 if not os.path.exists(path): | 200 try: |
86 raise Exception( 'Unable to find path {0}.'.format( path ) ) | 201 extra_columns = yaml.safe_load(options.extra_columns) |
87 | 202 except yaml.parser.ParserError as e: |
88 if os.path.exists(json_output_file): | 203 raise yaml.parser.ParserError( |
89 params = json.loads( open( json_output_file ).read() ) | 204 "Invalid yaml string for --extra_indexes. \nError {0}".format( |
90 print "params", params | 205 e)) |
91 else: | 206 |
92 params = {} | 207 index_properties_file = Path(__file__).parent / Path("indexes.yml") |
93 | 208 data_table = DataTable(index_path=options.path, |
94 data_manager_dict = {} | 209 data_table_name=options.data_table_name, |
95 data_table_entry = dict( value=value, dbkey=dbkey, name=name, path=path ) | 210 name=options.name, |
96 _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) | 211 value=options.value, |
97 | 212 dbkey=options.dbkey, |
98 #save info to json file | 213 indexes_properties_file=index_properties_file, |
99 with open( json_output_file, 'wb' ) as output_file: | 214 extra_columns=extra_columns) |
100 output_file.write( json.dumps( data_manager_dict ) ) | 215 |
101 output_file.write( "\n" ) | 216 # save info to json file |
217 with options.json_output_file.open('w') as output_file: | |
218 output_file.write(data_table.data_manager_json) | |
219 | |
102 | 220 |
103 if __name__ == "__main__": | 221 if __name__ == "__main__": |
104 main() | 222 main() |