0
|
1 import os
|
|
2 import sys
|
|
3 import json
|
|
4 import subprocess
|
|
5 import argparse
|
|
6 import logging
|
|
7 import itertools
|
2
|
8 import copy
|
0
|
9
|
2
|
10 import numpy as np
|
|
11
|
|
12 from collections import defaultdict, OrderedDict
|
0
|
13
|
|
14 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.
|
|
15 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
|
|
16
|
|
17 from logging_module import initLogger
|
|
18
|
|
19 class ModelFile(dict):
|
|
20 def __init__(self, *arg, **kw):
|
|
21 super(ModelFile, self).__init__(*arg, **kw)
|
|
22 self.inds = []
|
|
23 self.ind_file = ''
|
|
24 self.exclude_file = ''
|
|
25
|
2
|
26 if arg and self.confirm_model_instance(arg[1]):
|
|
27 self.update_inds(arg[1])
|
|
28
|
|
29 def __setitem__(self, *arg, **kw):
|
|
30 super(ModelFile, self).__setitem__(*arg, **kw)
|
|
31
|
|
32 if arg and self.confirm_model_instance(arg[1]):
|
|
33 self.update_inds(model = arg[1])
|
|
34
|
|
35 def __delitem__(self, key):
|
|
36 super(ModelFile, self).__delitem__(key)
|
|
37 self.update_inds()
|
|
38
|
|
39 def confirm_model_instance (self, unknown):
|
|
40
|
|
41 if isinstance(unknown, Model):
|
|
42
|
|
43 return True
|
|
44
|
|
45 else:
|
|
46
|
|
47 return False
|
|
48
|
|
49 def copy_model (self, src_model_name, new_model_name):
|
|
50
|
|
51 src_model = super(ModelFile, self).__getitem__(src_model_name)
|
|
52
|
|
53 src_model_copy = copy.deepcopy(src_model)
|
|
54
|
|
55 src_model_copy.name = new_model_name
|
|
56
|
|
57 super(ModelFile, self).__setitem__(new_model_name, src_model_copy)
|
|
58
|
|
59 def rename_model (self, src_model_name, new_model_name):
|
|
60
|
|
61 src_model = super(ModelFile, self).pop(src_model_name)
|
|
62
|
|
63 src_model.name = new_model_name
|
|
64
|
|
65 super(ModelFile, self).__setitem__(new_model_name, src_model)
|
|
66
|
|
67 def update_inds (self, model = None):
|
|
68
|
|
69 if self.confirm_model_instance(model):
|
|
70
|
|
71 # Return error if inds is empty
|
|
72 if not model.inds:
|
|
73 raise IOError('No individuals found in %s.' % model.name)
|
|
74
|
|
75 # Create a list of the unique individuals
|
|
76 unique_inds = list(set(self.inds + model.inds))
|
|
77
|
|
78 else:
|
|
79
|
|
80 # Create an empty list for the unique individuals
|
|
81 unique_inds = []
|
|
82
|
|
83 # Loop the models in the file
|
|
84 for model_in_file in super(ModelFile, self).values():
|
|
85
|
|
86 # Create a list of the unique individuals
|
|
87 unique_inds = list(set(unique_inds + model_in_file.inds))
|
|
88
|
|
89
|
0
|
90 # Store the individuals
|
2
|
91 self.inds = unique_inds
|
0
|
92
|
|
93 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
|
|
94 # Assign the filename for the population file
|
|
95 ind_filename = 'unique_individuals' + file_ext
|
|
96
|
|
97 # If a path is assigned, create the file at the specified location
|
|
98 if file_path:
|
|
99 ind_filename = os.path.join(file_path, ind_filename)
|
|
100
|
|
101 # Check if previous files should be overwriten
|
|
102 if not overwrite:
|
|
103 # Check if the file already exists
|
|
104 if os.path.isfile(ind_filename):
|
|
105 raise IOError('Individuals file exists.')
|
|
106
|
|
107 # Create the population file
|
|
108 ind_file = open(ind_filename, 'w')
|
|
109 ind_file.write('%s\n' %'\n'.join(self.inds))
|
|
110 ind_file.close()
|
|
111
|
|
112 # Save the individuals filename
|
|
113 self.ind_file = ind_filename
|
|
114
|
|
115 def delete_ind_file (self):
|
|
116 # Check if an individuals file was created
|
|
117 if self.ind_file:
|
|
118
|
|
119 # Delete the individuals file
|
|
120 os.remove(self.ind_file)
|
|
121
|
|
122 # Remove the filename
|
|
123 self.ind_file = ''
|
|
124
|
|
125 def create_exclude_ind_file (self, inds_to_include = [], file_ext = '', file_path = '', overwrite = False):
|
|
126 # Assign the filename for the population file
|
|
127 ind_filename = 'exclude_individuals' + file_ext
|
|
128
|
|
129 # If a path is assigned, create the file at the specified location
|
|
130 if file_path:
|
|
131 ind_filename = os.path.join(file_path, ind_filename)
|
|
132
|
|
133 # Check if previous files should be overwriten
|
|
134 if not overwrite:
|
|
135 # Check if the file already exists
|
|
136 if os.path.isfile(ind_filename):
|
|
137 raise IOError('Individuals file exists.')
|
|
138
|
|
139 # Create exclude list by removing included individuals
|
|
140 exclude_inds = list(set(self.inds) - set(inds_to_include))
|
|
141
|
|
142 # Create the population file
|
|
143 ind_file = open(ind_filename, 'w')
|
|
144 ind_file.write('%s\n' %'\n'.join(exclude_inds))
|
|
145 ind_file.close()
|
|
146
|
|
147 # Save the individuals filename
|
|
148 self.exclude_file = ind_filename
|
|
149
|
2
|
150 def delete_exclude_ind_file (self):
|
0
|
151 # Check if an individuals file was created
|
|
152 if self.exclude_file:
|
|
153
|
|
154 # Delete the individuals file
|
|
155 os.remove(self.exclude_file)
|
|
156
|
|
157 # Remove the filename
|
|
158 self.exclude_file = ''
|
|
159
|
2
|
160 def to_json (self):
|
|
161
|
|
162 model_file_json = []
|
|
163
|
|
164 for model_name, model_data in super(ModelFile, self).items():
|
|
165 model_file_json.append(model_data.to_json())
|
|
166
|
|
167 return model_file_json
|
|
168
|
|
169
|
0
|
170 class Model:
|
|
171 def __init__ (self, name):
|
|
172 self.name = name
|
|
173 self.tree = ''
|
|
174 self.pop_list = []
|
2
|
175 self.ind_dict = defaultdict(list)
|
0
|
176 self.nind = defaultdict(int)
|
|
177 self.pop_files = []
|
|
178 self.ind_file = ''
|
|
179
|
|
180 @property
|
2
|
181 def npop (self):
|
|
182 return len(self.pop_list)
|
|
183
|
|
184 @property
|
|
185 def inds (self):
|
0
|
186 return list(itertools.chain.from_iterable(self.ind_dict.values()))
|
|
187
|
|
188 def assign_tree (self, tree):
|
|
189 self.tree = str(tree)
|
|
190
|
|
191 def assign_pop (self, pop, inds = []):
|
|
192 self.pop_list.append(str(pop))
|
|
193 if inds:
|
|
194 self.ind_dict[pop] = [str(ind) for ind in inds]
|
2
|
195 self.nind[pop] = len(self.ind_dict[pop])
|
|
196
|
|
197 def sample_pop (self, pop, sample_size, with_replacements = False):
|
|
198
|
|
199 # Confirm the pop is in the model
|
|
200 if str(pop) not in self.pop_list:
|
|
201
|
|
202 # Raise error if pop not found
|
|
203 raise Exception('%s not found' % pop)
|
|
204
|
|
205 # Confirm the sample size is an int
|
|
206 try:
|
|
207
|
|
208 sample_size = int(sample_size)
|
|
209
|
|
210 except:
|
|
211
|
|
212 # Raise error if sample_size not an int
|
|
213 raise Exception('%s not int' % sample_size)
|
|
214
|
|
215 # Check if the sample size is larger than the pop
|
|
216 if int(sample_size) > self.nind[pop]:
|
|
217
|
|
218 # Raise error if sample_size is larger
|
|
219 raise Exception('%s is larger than %s' % (sample_size, pop))
|
|
220
|
|
221 # Use numpy choice to randomly sample the pop
|
|
222 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements)
|
|
223
|
|
224 # Save the sampled inds as a list
|
|
225 self.ind_dict[pop] = list(sampled_inds)
|
|
226
|
|
227 def sample_pops (self, sample_size, with_replacements = False):
|
|
228
|
|
229 # Confirm the sample size is an int
|
|
230 try:
|
|
231
|
|
232 sample_size = int(sample_size)
|
|
233
|
|
234 except:
|
|
235
|
|
236 # Raise error if sample_size not an int
|
|
237 raise Exception('%s not int' % sample_size)
|
|
238
|
|
239 # Loop each pop in the pop list
|
|
240 for pop in self.pop_list:
|
|
241
|
|
242 # Check if the sample size is larger than the pop
|
|
243 if int(sample_size) > self.nind[pop]:
|
|
244
|
|
245 # Raise error if sample_size is larger
|
|
246 raise Exception('%s is larger than %s' % (sample_size, pop))
|
|
247
|
|
248 # Loop each pop in the pop list, if no error raised
|
|
249 for pop in self.pop_list:
|
|
250
|
|
251 # Use numpy choice to randomly sample the pop
|
|
252 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements)
|
|
253
|
|
254 # Save the sampled inds as a list
|
|
255 self.ind_dict[pop] = list(sampled_inds)
|
0
|
256
|
|
257 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False):
|
|
258 for pop in self.pop_list:
|
|
259 # Assign the filename for the population file
|
|
260 pop_filename = pop + file_ext
|
|
261
|
|
262 # If a path is assigned, create the file at the specified location
|
|
263 if file_path:
|
|
264 pop_filename = os.path.join(file_path, pop_filename)
|
|
265
|
|
266 # Check if previous files should be overwriten
|
|
267 if not overwrite:
|
|
268 # Check if the file already exists
|
|
269 if os.path.isfile(pop_filename):
|
|
270 raise IOError('Population file exists.')
|
|
271
|
|
272 # Create the population file
|
|
273 pop_file = open(pop_filename, 'w')
|
|
274 pop_file.write('%s\n' %'\n'.join(self.ind_dict[pop]))
|
|
275 pop_file.close()
|
|
276
|
|
277 # Save the population filename
|
|
278 self.pop_files.append(pop_filename)
|
|
279
|
|
280 def delete_pop_files (self):
|
|
281 # Check if pop files were created
|
|
282 if len(self.pop_files) != 0:
|
|
283
|
|
284 # Loop the created pop files
|
|
285 for pop_file in self.pop_files:
|
|
286 # Delete the pop file
|
|
287 os.remove(pop_file)
|
|
288
|
|
289 # Remove the filenames
|
|
290 self.pop_files = []
|
|
291
|
|
292 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
|
|
293 # Assign the filename for the population file
|
|
294 ind_filename = 'individual.keep' + file_ext
|
|
295
|
|
296 # If a path is assigned, create the file at the specified location
|
|
297 if file_path:
|
|
298 ind_filename = os.path.join(file_path, ind_filename)
|
|
299
|
|
300 # Check if previous files should be overwriten
|
|
301 if not overwrite:
|
|
302 # Check if the file already exists
|
|
303 if os.path.isfile(ind_filename):
|
|
304 raise IOError('Individuals file exists.')
|
|
305
|
|
306 # Create the population file
|
|
307 ind_file = open(ind_filename, 'w')
|
|
308 ind_file.write('%s\n' %'\n'.join(self.inds))
|
|
309 ind_file.close()
|
|
310
|
|
311 # Save the individuals filename
|
|
312 self.ind_file = ind_filename
|
|
313
|
|
314 def delete_ind_file (self):
|
|
315 # Check if an individuals file was created
|
|
316 if self.ind_file:
|
|
317
|
|
318 # Delete the individuals file
|
|
319 os.remove(self.ind_file)
|
|
320
|
|
321 # Remove the filename
|
|
322 self.ind_file = ''
|
|
323
|
2
|
324 def to_json (self):
|
|
325
|
|
326 model_json = OrderedDict()
|
|
327
|
|
328 model_json['name'] = self.name
|
|
329
|
|
330 pop_json = OrderedDict()
|
|
331
|
|
332 for pop in self.pop_list:
|
|
333
|
|
334 pop_json[pop] = OrderedDict()
|
|
335
|
|
336 pop_json[pop]['indv'] = self.ind_dict[pop]
|
|
337
|
|
338 model_json['pops'] = pop_json
|
|
339
|
|
340 return model_json
|
|
341
|
|
342 def read_model_file (filename):
|
0
|
343
|
|
344 # Check that the file exists
|
2
|
345 if not os.path.isfile(filename):
|
0
|
346 raise IOError
|
|
347
|
|
348 # Create ModelFile object
|
|
349 models_to_return = ModelFile()
|
|
350
|
|
351 # Check if using python 2 or 3
|
|
352 if sys.version_info[0] == 2:
|
|
353 # Open the model file in python 2
|
2
|
354 model_file = open(filename, 'rU')
|
0
|
355 else:
|
|
356 # Open the model file in python 3
|
2
|
357 model_file = open(filename, 'r', newline=None)
|
0
|
358
|
|
359 # Parse the model file using the json reader
|
|
360 models_dict = json.load(model_file)
|
|
361
|
|
362 # List to store all unique individuals (i.e. individuals in all models)
|
|
363 individual_list = []
|
|
364
|
|
365 # Loop the parsed models
|
|
366 for model_dict in models_dict:
|
|
367
|
|
368 # Create the model
|
2
|
369 model = Model(str(model_dict['name']))
|
0
|
370
|
|
371 # Loop the populations in the model
|
|
372 for pop, pop_dict in model_dict['pops'].items():
|
|
373
|
2
|
374 # Convert all individuals names to str
|
|
375 ind_list = [str(pop_ind) for pop_ind in pop_dict['inds']]
|
|
376
|
0
|
377 # Assign the population ans it's individuals to the model
|
2
|
378 model.assign_pop(str(pop), ind_list)
|
|
379
|
0
|
380 # Assign the individuals to the unique individual list
|
2
|
381 individual_list.extend(ind_list)
|
0
|
382
|
|
383 # Remove duplicates from the unique individual list
|
|
384 individual_list = list(set(individual_list))
|
|
385
|
|
386 # Save the model
|
|
387 models_to_return[str(model.name)] = model
|
|
388
|
2
|
389 logging.info('Finished reading model file (%s)' % filename)
|
0
|
390
|
|
391 # Return the models
|
|
392 return models_to_return
|
2
|
393
|
|
394 def write_model_file (model_file, filename, overwrite = False):
|
|
395
|
|
396 # Check if the file is to be overwritten
|
|
397 if not overwrite:
|
|
398
|
|
399 # Check if the file exists
|
|
400 if os.path.exists(filename):
|
|
401 raise Exception('%s already exists' % filename)
|
|
402
|
|
403 # Open the output file
|
|
404 output_file = open(filename, 'w')
|
|
405
|
|
406 # Write the json-formmated data to the output file
|
|
407 output_file.write(json.dumps(model_file.to_json(), indent = 4))
|
|
408
|
|
409 # Close the output file
|
|
410 output_file.close()
|
|
411
|
|
412 logging.info('Finished writing model file (%s)' % filename)
|