Mercurial > repos > jaredgk > ppp_vcfphase
comparison model.py @ 2:54c84f7dcb2c draft
Uploaded
author | jaredgk |
---|---|
date | Wed, 17 Oct 2018 17:20:47 -0400 |
parents | 3830d29fca6a |
children |
comparison
equal
deleted
inserted
replaced
1:15245deda141 | 2:54c84f7dcb2c |
---|---|
3 import json | 3 import json |
4 import subprocess | 4 import subprocess |
5 import argparse | 5 import argparse |
6 import logging | 6 import logging |
7 import itertools | 7 import itertools |
8 | 8 import copy |
9 from collections import defaultdict | 9 |
10 import numpy as np | |
11 | |
12 from collections import defaultdict, OrderedDict | |
10 | 13 |
11 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. | 14 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. |
12 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) | 15 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) |
13 | 16 |
14 from logging_module import initLogger | 17 from logging_module import initLogger |
18 super(ModelFile, self).__init__(*arg, **kw) | 21 super(ModelFile, self).__init__(*arg, **kw) |
19 self.inds = [] | 22 self.inds = [] |
20 self.ind_file = '' | 23 self.ind_file = '' |
21 self.exclude_file = '' | 24 self.exclude_file = '' |
22 | 25 |
23 def assign_inds (self, inds = []): | 26 if arg and self.confirm_model_instance(arg[1]): |
24 # Return error if inds is empty | 27 self.update_inds(arg[1]) |
25 if not inds: | 28 |
26 raise IOError('No individuals found in the model file.') | 29 def __setitem__(self, *arg, **kw): |
30 super(ModelFile, self).__setitem__(*arg, **kw) | |
31 | |
32 if arg and self.confirm_model_instance(arg[1]): | |
33 self.update_inds(model = arg[1]) | |
34 | |
35 def __delitem__(self, key): | |
36 super(ModelFile, self).__delitem__(key) | |
37 self.update_inds() | |
38 | |
39 def confirm_model_instance (self, unknown): | |
40 | |
41 if isinstance(unknown, Model): | |
42 | |
43 return True | |
44 | |
45 else: | |
46 | |
47 return False | |
48 | |
49 def copy_model (self, src_model_name, new_model_name): | |
50 | |
51 src_model = super(ModelFile, self).__getitem__(src_model_name) | |
52 | |
53 src_model_copy = copy.deepcopy(src_model) | |
54 | |
55 src_model_copy.name = new_model_name | |
56 | |
57 super(ModelFile, self).__setitem__(new_model_name, src_model_copy) | |
58 | |
59 def rename_model (self, src_model_name, new_model_name): | |
60 | |
61 src_model = super(ModelFile, self).pop(src_model_name) | |
62 | |
63 src_model.name = new_model_name | |
64 | |
65 super(ModelFile, self).__setitem__(new_model_name, src_model) | |
66 | |
67 def update_inds (self, model = None): | |
68 | |
69 if self.confirm_model_instance(model): | |
70 | |
71 # Return error if inds is empty | |
72 if not model.inds: | |
73 raise IOError('No individuals found in %s.' % model.name) | |
74 | |
75 # Create a list of the unique individuals | |
76 unique_inds = list(set(self.inds + model.inds)) | |
77 | |
78 else: | |
79 | |
80 # Create an empty list for the unique individuals | |
81 unique_inds = [] | |
82 | |
83 # Loop the models in the file | |
84 for model_in_file in super(ModelFile, self).values(): | |
85 | |
86 # Create a list of the unique individuals | |
87 unique_inds = list(set(unique_inds + model_in_file.inds)) | |
88 | |
89 | |
27 # Store the individuals | 90 # Store the individuals |
28 self.inds = [str(ind) for ind in inds] | 91 self.inds = unique_inds |
29 | 92 |
30 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): | 93 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): |
31 # Assign the filename for the population file | 94 # Assign the filename for the population file |
32 ind_filename = 'unique_individuals' + file_ext | 95 ind_filename = 'unique_individuals' + file_ext |
33 | 96 |
82 ind_file.close() | 145 ind_file.close() |
83 | 146 |
84 # Save the individuals filename | 147 # Save the individuals filename |
85 self.exclude_file = ind_filename | 148 self.exclude_file = ind_filename |
86 | 149 |
87 def delete_ind_file (self): | 150 def delete_exclude_ind_file (self): |
88 # Check if an individuals file was created | 151 # Check if an individuals file was created |
89 if self.exclude_file: | 152 if self.exclude_file: |
90 | 153 |
91 # Delete the individuals file | 154 # Delete the individuals file |
92 os.remove(self.exclude_file) | 155 os.remove(self.exclude_file) |
93 | 156 |
94 # Remove the filename | 157 # Remove the filename |
95 self.exclude_file = '' | 158 self.exclude_file = '' |
159 | |
160 def to_json (self): | |
161 | |
162 model_file_json = [] | |
163 | |
164 for model_name, model_data in super(ModelFile, self).items(): | |
165 model_file_json.append(model_data.to_json()) | |
166 | |
167 return model_file_json | |
168 | |
96 | 169 |
97 class Model: | 170 class Model: |
98 def __init__ (self, name): | 171 def __init__ (self, name): |
99 self.name = name | 172 self.name = name |
100 self.tree = '' | 173 self.tree = '' |
101 self.npop = 0 | |
102 self.pop_list = [] | 174 self.pop_list = [] |
175 self.ind_dict = defaultdict(list) | |
103 self.nind = defaultdict(int) | 176 self.nind = defaultdict(int) |
104 self.ind_dict = defaultdict(list) | |
105 self.pop_files = [] | 177 self.pop_files = [] |
106 self.ind_file = '' | 178 self.ind_file = '' |
107 | 179 |
108 @property | 180 @property |
109 def inds(self): | 181 def npop (self): |
182 return len(self.pop_list) | |
183 | |
184 @property | |
185 def inds (self): | |
110 return list(itertools.chain.from_iterable(self.ind_dict.values())) | 186 return list(itertools.chain.from_iterable(self.ind_dict.values())) |
111 | 187 |
112 def assign_tree (self, tree): | 188 def assign_tree (self, tree): |
113 self.tree = str(tree) | 189 self.tree = str(tree) |
114 | 190 |
115 def assign_pop (self, pop, inds = []): | 191 def assign_pop (self, pop, inds = []): |
116 self.npop += 1 | |
117 self.pop_list.append(str(pop)) | 192 self.pop_list.append(str(pop)) |
118 if inds: | 193 if inds: |
119 self.nind[pop] = len(inds) | |
120 self.ind_dict[pop] = [str(ind) for ind in inds] | 194 self.ind_dict[pop] = [str(ind) for ind in inds] |
195 self.nind[pop] = len(self.ind_dict[pop]) | |
196 | |
197 def sample_pop (self, pop, sample_size, with_replacements = False): | |
198 | |
199 # Confirm the pop is in the model | |
200 if str(pop) not in self.pop_list: | |
201 | |
202 # Raise error if pop not found | |
203 raise Exception('%s not found' % pop) | |
204 | |
205 # Confirm the sample size is an int | |
206 try: | |
207 | |
208 sample_size = int(sample_size) | |
209 | |
210 except: | |
211 | |
212 # Raise error if sample_size not an int | |
213 raise Exception('%s not int' % sample_size) | |
214 | |
215 # Check if the sample size is larger than the pop | |
216 if int(sample_size) > self.nind[pop]: | |
217 | |
218 # Raise error if sample_size is larger | |
219 raise Exception('%s is larger than %s' % (sample_size, pop)) | |
220 | |
221 # Use numpy choice to randomly sample the pop | |
222 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements) | |
223 | |
224 # Save the sampled inds as a list | |
225 self.ind_dict[pop] = list(sampled_inds) | |
226 | |
227 def sample_pops (self, sample_size, with_replacements = False): | |
228 | |
229 # Confirm the sample size is an int | |
230 try: | |
231 | |
232 sample_size = int(sample_size) | |
233 | |
234 except: | |
235 | |
236 # Raise error if sample_size not an int | |
237 raise Exception('%s not int' % sample_size) | |
238 | |
239 # Loop each pop in the pop list | |
240 for pop in self.pop_list: | |
241 | |
242 # Check if the sample size is larger than the pop | |
243 if int(sample_size) > self.nind[pop]: | |
244 | |
245 # Raise error if sample_size is larger | |
246 raise Exception('%s is larger than %s' % (sample_size, pop)) | |
247 | |
248 # Loop each pop in the pop list, if no error raised | |
249 for pop in self.pop_list: | |
250 | |
251 # Use numpy choice to randomly sample the pop | |
252 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements) | |
253 | |
254 # Save the sampled inds as a list | |
255 self.ind_dict[pop] = list(sampled_inds) | |
121 | 256 |
122 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): | 257 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): |
123 for pop in self.pop_list: | 258 for pop in self.pop_list: |
124 # Assign the filename for the population file | 259 # Assign the filename for the population file |
125 pop_filename = pop + file_ext | 260 pop_filename = pop + file_ext |
184 os.remove(self.ind_file) | 319 os.remove(self.ind_file) |
185 | 320 |
186 # Remove the filename | 321 # Remove the filename |
187 self.ind_file = '' | 322 self.ind_file = '' |
188 | 323 |
189 def read_model_file (model_filename): | 324 def to_json (self): |
325 | |
326 model_json = OrderedDict() | |
327 | |
328 model_json['name'] = self.name | |
329 | |
330 pop_json = OrderedDict() | |
331 | |
332 for pop in self.pop_list: | |
333 | |
334 pop_json[pop] = OrderedDict() | |
335 | |
336 pop_json[pop]['indv'] = self.ind_dict[pop] | |
337 | |
338 model_json['pops'] = pop_json | |
339 | |
340 return model_json | |
341 | |
342 def read_model_file (filename): | |
190 | 343 |
191 # Check that the file exists | 344 # Check that the file exists |
192 if not os.path.isfile(model_filename): | 345 if not os.path.isfile(filename): |
193 raise IOError | 346 raise IOError |
194 | 347 |
195 # Create ModelFile object | 348 # Create ModelFile object |
196 models_to_return = ModelFile() | 349 models_to_return = ModelFile() |
197 | 350 |
198 # Check if using python 2 or 3 | 351 # Check if using python 2 or 3 |
199 if sys.version_info[0] == 2: | 352 if sys.version_info[0] == 2: |
200 # Open the model file in python 2 | 353 # Open the model file in python 2 |
201 model_file = open(model_filename, 'rU') | 354 model_file = open(filename, 'rU') |
202 else: | 355 else: |
203 # Open the model file in python 3 | 356 # Open the model file in python 3 |
204 model_file = open(model_filename, 'r', newline=None) | 357 model_file = open(filename, 'r', newline=None) |
205 | 358 |
206 # Parse the model file using the json reader | 359 # Parse the model file using the json reader |
207 models_dict = json.load(model_file) | 360 models_dict = json.load(model_file) |
208 | 361 |
209 # List to store all unique individuals (i.e. individuals in all models) | 362 # List to store all unique individuals (i.e. individuals in all models) |
211 | 364 |
212 # Loop the parsed models | 365 # Loop the parsed models |
213 for model_dict in models_dict: | 366 for model_dict in models_dict: |
214 | 367 |
215 # Create the model | 368 # Create the model |
216 model = Model(model_dict['name']) | 369 model = Model(str(model_dict['name'])) |
217 | 370 |
218 # Loop the populations in the model | 371 # Loop the populations in the model |
219 for pop, pop_dict in model_dict['pops'].items(): | 372 for pop, pop_dict in model_dict['pops'].items(): |
220 | 373 |
374 # Convert all individuals names to str | |
375 ind_list = [str(pop_ind) for pop_ind in pop_dict['inds']] | |
376 | |
221 # Assign the population ans it's individuals to the model | 377 # Assign the population ans it's individuals to the model |
222 model.assign_pop(pop, pop_dict['inds']) | 378 model.assign_pop(str(pop), ind_list) |
379 | |
223 # Assign the individuals to the unique individual list | 380 # Assign the individuals to the unique individual list |
224 individual_list.extend(pop_dict['inds']) | 381 individual_list.extend(ind_list) |
225 | 382 |
226 # Remove duplicates from the unique individual list | 383 # Remove duplicates from the unique individual list |
227 individual_list = list(set(individual_list)) | 384 individual_list = list(set(individual_list)) |
228 | 385 |
229 # Save the model | 386 # Save the model |
230 models_to_return[str(model.name)] = model | 387 models_to_return[str(model.name)] = model |
231 | 388 |
232 # Store the unique individuals within the ModelFile object | 389 logging.info('Finished reading model file (%s)' % filename) |
233 models_to_return.assign_inds(individual_list) | |
234 | 390 |
235 # Return the models | 391 # Return the models |
236 return models_to_return | 392 return models_to_return |
393 | |
394 def write_model_file (model_file, filename, overwrite = False): | |
395 | |
396 # Check if the file is to be overwritten | |
397 if not overwrite: | |
398 | |
399 # Check if the file exists | |
400 if os.path.exists(filename): | |
401 raise Exception('%s already exists' % filename) | |
402 | |
403 # Open the output file | |
404 output_file = open(filename, 'w') | |
405 | |
406 # Write the json-formmated data to the output file | |
407 output_file.write(json.dumps(model_file.to_json(), indent = 4)) | |
408 | |
409 # Close the output file | |
410 output_file.close() | |
411 | |
412 logging.info('Finished writing model file (%s)' % filename) |