comparison model.py @ 2:54c84f7dcb2c draft

Uploaded
author jaredgk
date Wed, 17 Oct 2018 17:20:47 -0400
parents 3830d29fca6a
children
comparison
equal deleted inserted replaced
1:15245deda141 2:54c84f7dcb2c
3 import json 3 import json
4 import subprocess 4 import subprocess
5 import argparse 5 import argparse
6 import logging 6 import logging
7 import itertools 7 import itertools
8 8 import copy
9 from collections import defaultdict 9
10 import numpy as np
11
12 from collections import defaultdict, OrderedDict
10 13
11 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. 14 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.
12 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) 15 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
13 16
14 from logging_module import initLogger 17 from logging_module import initLogger
18 super(ModelFile, self).__init__(*arg, **kw) 21 super(ModelFile, self).__init__(*arg, **kw)
19 self.inds = [] 22 self.inds = []
20 self.ind_file = '' 23 self.ind_file = ''
21 self.exclude_file = '' 24 self.exclude_file = ''
22 25
23 def assign_inds (self, inds = []): 26 if arg and self.confirm_model_instance(arg[1]):
24 # Return error if inds is empty 27 self.update_inds(arg[1])
25 if not inds: 28
26 raise IOError('No individuals found in the model file.') 29 def __setitem__(self, *arg, **kw):
30 super(ModelFile, self).__setitem__(*arg, **kw)
31
32 if arg and self.confirm_model_instance(arg[1]):
33 self.update_inds(model = arg[1])
34
35 def __delitem__(self, key):
36 super(ModelFile, self).__delitem__(key)
37 self.update_inds()
38
39 def confirm_model_instance (self, unknown):
40
41 if isinstance(unknown, Model):
42
43 return True
44
45 else:
46
47 return False
48
49 def copy_model (self, src_model_name, new_model_name):
50
51 src_model = super(ModelFile, self).__getitem__(src_model_name)
52
53 src_model_copy = copy.deepcopy(src_model)
54
55 src_model_copy.name = new_model_name
56
57 super(ModelFile, self).__setitem__(new_model_name, src_model_copy)
58
59 def rename_model (self, src_model_name, new_model_name):
60
61 src_model = super(ModelFile, self).pop(src_model_name)
62
63 src_model.name = new_model_name
64
65 super(ModelFile, self).__setitem__(new_model_name, src_model)
66
67 def update_inds (self, model = None):
68
69 if self.confirm_model_instance(model):
70
71 # Return error if inds is empty
72 if not model.inds:
73 raise IOError('No individuals found in %s.' % model.name)
74
75 # Create a list of the unique individuals
76 unique_inds = list(set(self.inds + model.inds))
77
78 else:
79
80 # Create an empty list for the unique individuals
81 unique_inds = []
82
83 # Loop the models in the file
84 for model_in_file in super(ModelFile, self).values():
85
86 # Create a list of the unique individuals
87 unique_inds = list(set(unique_inds + model_in_file.inds))
88
89
27 # Store the individuals 90 # Store the individuals
28 self.inds = [str(ind) for ind in inds] 91 self.inds = unique_inds
29 92
30 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): 93 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
31 # Assign the filename for the population file 94 # Assign the filename for the population file
32 ind_filename = 'unique_individuals' + file_ext 95 ind_filename = 'unique_individuals' + file_ext
33 96
82 ind_file.close() 145 ind_file.close()
83 146
84 # Save the individuals filename 147 # Save the individuals filename
85 self.exclude_file = ind_filename 148 self.exclude_file = ind_filename
86 149
87 def delete_ind_file (self): 150 def delete_exclude_ind_file (self):
88 # Check if an individuals file was created 151 # Check if an individuals file was created
89 if self.exclude_file: 152 if self.exclude_file:
90 153
91 # Delete the individuals file 154 # Delete the individuals file
92 os.remove(self.exclude_file) 155 os.remove(self.exclude_file)
93 156
94 # Remove the filename 157 # Remove the filename
95 self.exclude_file = '' 158 self.exclude_file = ''
159
160 def to_json (self):
161
162 model_file_json = []
163
164 for model_name, model_data in super(ModelFile, self).items():
165 model_file_json.append(model_data.to_json())
166
167 return model_file_json
168
96 169
97 class Model: 170 class Model:
98 def __init__ (self, name): 171 def __init__ (self, name):
99 self.name = name 172 self.name = name
100 self.tree = '' 173 self.tree = ''
101 self.npop = 0
102 self.pop_list = [] 174 self.pop_list = []
175 self.ind_dict = defaultdict(list)
103 self.nind = defaultdict(int) 176 self.nind = defaultdict(int)
104 self.ind_dict = defaultdict(list)
105 self.pop_files = [] 177 self.pop_files = []
106 self.ind_file = '' 178 self.ind_file = ''
107 179
108 @property 180 @property
109 def inds(self): 181 def npop (self):
182 return len(self.pop_list)
183
184 @property
185 def inds (self):
110 return list(itertools.chain.from_iterable(self.ind_dict.values())) 186 return list(itertools.chain.from_iterable(self.ind_dict.values()))
111 187
112 def assign_tree (self, tree): 188 def assign_tree (self, tree):
113 self.tree = str(tree) 189 self.tree = str(tree)
114 190
115 def assign_pop (self, pop, inds = []): 191 def assign_pop (self, pop, inds = []):
116 self.npop += 1
117 self.pop_list.append(str(pop)) 192 self.pop_list.append(str(pop))
118 if inds: 193 if inds:
119 self.nind[pop] = len(inds)
120 self.ind_dict[pop] = [str(ind) for ind in inds] 194 self.ind_dict[pop] = [str(ind) for ind in inds]
195 self.nind[pop] = len(self.ind_dict[pop])
196
197 def sample_pop (self, pop, sample_size, with_replacements = False):
198
199 # Confirm the pop is in the model
200 if str(pop) not in self.pop_list:
201
202 # Raise error if pop not found
203 raise Exception('%s not found' % pop)
204
205 # Confirm the sample size is an int
206 try:
207
208 sample_size = int(sample_size)
209
210 except:
211
212 # Raise error if sample_size not an int
213 raise Exception('%s not int' % sample_size)
214
215 # Check if the sample size is larger than the pop
216 if int(sample_size) > self.nind[pop]:
217
218 # Raise error if sample_size is larger
219 raise Exception('%s is larger than %s' % (sample_size, pop))
220
221 # Use numpy choice to randomly sample the pop
222 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements)
223
224 # Save the sampled inds as a list
225 self.ind_dict[pop] = list(sampled_inds)
226
227 def sample_pops (self, sample_size, with_replacements = False):
228
229 # Confirm the sample size is an int
230 try:
231
232 sample_size = int(sample_size)
233
234 except:
235
236 # Raise error if sample_size not an int
237 raise Exception('%s not int' % sample_size)
238
239 # Loop each pop in the pop list
240 for pop in self.pop_list:
241
242 # Check if the sample size is larger than the pop
243 if int(sample_size) > self.nind[pop]:
244
245 # Raise error if sample_size is larger
246 raise Exception('%s is larger than %s' % (sample_size, pop))
247
248 # Loop each pop in the pop list, if no error raised
249 for pop in self.pop_list:
250
251 # Use numpy choice to randomly sample the pop
252 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements)
253
254 # Save the sampled inds as a list
255 self.ind_dict[pop] = list(sampled_inds)
121 256
122 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): 257 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False):
123 for pop in self.pop_list: 258 for pop in self.pop_list:
124 # Assign the filename for the population file 259 # Assign the filename for the population file
125 pop_filename = pop + file_ext 260 pop_filename = pop + file_ext
184 os.remove(self.ind_file) 319 os.remove(self.ind_file)
185 320
186 # Remove the filename 321 # Remove the filename
187 self.ind_file = '' 322 self.ind_file = ''
188 323
189 def read_model_file (model_filename): 324 def to_json (self):
325
326 model_json = OrderedDict()
327
328 model_json['name'] = self.name
329
330 pop_json = OrderedDict()
331
332 for pop in self.pop_list:
333
334 pop_json[pop] = OrderedDict()
335
336 pop_json[pop]['indv'] = self.ind_dict[pop]
337
338 model_json['pops'] = pop_json
339
340 return model_json
341
342 def read_model_file (filename):
190 343
191 # Check that the file exists 344 # Check that the file exists
192 if not os.path.isfile(model_filename): 345 if not os.path.isfile(filename):
193 raise IOError 346 raise IOError
194 347
195 # Create ModelFile object 348 # Create ModelFile object
196 models_to_return = ModelFile() 349 models_to_return = ModelFile()
197 350
198 # Check if using python 2 or 3 351 # Check if using python 2 or 3
199 if sys.version_info[0] == 2: 352 if sys.version_info[0] == 2:
200 # Open the model file in python 2 353 # Open the model file in python 2
201 model_file = open(model_filename, 'rU') 354 model_file = open(filename, 'rU')
202 else: 355 else:
203 # Open the model file in python 3 356 # Open the model file in python 3
204 model_file = open(model_filename, 'r', newline=None) 357 model_file = open(filename, 'r', newline=None)
205 358
206 # Parse the model file using the json reader 359 # Parse the model file using the json reader
207 models_dict = json.load(model_file) 360 models_dict = json.load(model_file)
208 361
209 # List to store all unique individuals (i.e. individuals in all models) 362 # List to store all unique individuals (i.e. individuals in all models)
211 364
212 # Loop the parsed models 365 # Loop the parsed models
213 for model_dict in models_dict: 366 for model_dict in models_dict:
214 367
215 # Create the model 368 # Create the model
216 model = Model(model_dict['name']) 369 model = Model(str(model_dict['name']))
217 370
218 # Loop the populations in the model 371 # Loop the populations in the model
219 for pop, pop_dict in model_dict['pops'].items(): 372 for pop, pop_dict in model_dict['pops'].items():
220 373
374 # Convert all individuals names to str
375 ind_list = [str(pop_ind) for pop_ind in pop_dict['inds']]
376
221 # Assign the population ans it's individuals to the model 377 # Assign the population ans it's individuals to the model
222 model.assign_pop(pop, pop_dict['inds']) 378 model.assign_pop(str(pop), ind_list)
379
223 # Assign the individuals to the unique individual list 380 # Assign the individuals to the unique individual list
224 individual_list.extend(pop_dict['inds']) 381 individual_list.extend(ind_list)
225 382
226 # Remove duplicates from the unique individual list 383 # Remove duplicates from the unique individual list
227 individual_list = list(set(individual_list)) 384 individual_list = list(set(individual_list))
228 385
229 # Save the model 386 # Save the model
230 models_to_return[str(model.name)] = model 387 models_to_return[str(model.name)] = model
231 388
232 # Store the unique individuals within the ModelFile object 389 logging.info('Finished reading model file (%s)' % filename)
233 models_to_return.assign_inds(individual_list)
234 390
235 # Return the models 391 # Return the models
236 return models_to_return 392 return models_to_return
393
394 def write_model_file (model_file, filename, overwrite = False):
395
396 # Check if the file is to be overwritten
397 if not overwrite:
398
399 # Check if the file exists
400 if os.path.exists(filename):
401 raise Exception('%s already exists' % filename)
402
403 # Open the output file
404 output_file = open(filename, 'w')
405
406 # Write the json-formmated data to the output file
407 output_file.write(json.dumps(model_file.to_json(), indent = 4))
408
409 # Close the output file
410 output_file.close()
411
412 logging.info('Finished writing model file (%s)' % filename)