annotate model.py @ 4:901857c9b24f draft

Uploaded
author jaredgk
date Wed, 17 Oct 2018 17:30:37 -0400
parents 54c84f7dcb2c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
1 import os
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
2 import sys
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
3 import json
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
4 import subprocess
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
5 import argparse
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
6 import logging
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
7 import itertools
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
8 import copy
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
9
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
10 import numpy as np
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
11
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
12 from collections import defaultdict, OrderedDict
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
13
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
14 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
15 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
16
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
17 from logging_module import initLogger
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
18
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
19 class ModelFile(dict):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
20 def __init__(self, *arg, **kw):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
21 super(ModelFile, self).__init__(*arg, **kw)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
22 self.inds = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
23 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
24 self.exclude_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
25
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
26 if arg and self.confirm_model_instance(arg[1]):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
27 self.update_inds(arg[1])
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
28
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
29 def __setitem__(self, *arg, **kw):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
30 super(ModelFile, self).__setitem__(*arg, **kw)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
31
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
32 if arg and self.confirm_model_instance(arg[1]):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
33 self.update_inds(model = arg[1])
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
34
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
35 def __delitem__(self, key):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
36 super(ModelFile, self).__delitem__(key)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
37 self.update_inds()
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
38
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
39 def confirm_model_instance (self, unknown):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
40
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
41 if isinstance(unknown, Model):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
42
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
43 return True
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
44
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
45 else:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
46
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
47 return False
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
48
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
49 def copy_model (self, src_model_name, new_model_name):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
50
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
51 src_model = super(ModelFile, self).__getitem__(src_model_name)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
52
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
53 src_model_copy = copy.deepcopy(src_model)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
54
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
55 src_model_copy.name = new_model_name
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
56
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
57 super(ModelFile, self).__setitem__(new_model_name, src_model_copy)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
58
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
59 def rename_model (self, src_model_name, new_model_name):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
60
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
61 src_model = super(ModelFile, self).pop(src_model_name)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
62
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
63 src_model.name = new_model_name
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
64
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
65 super(ModelFile, self).__setitem__(new_model_name, src_model)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
66
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
67 def update_inds (self, model = None):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
68
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
69 if self.confirm_model_instance(model):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
70
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
71 # Return error if inds is empty
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
72 if not model.inds:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
73 raise IOError('No individuals found in %s.' % model.name)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
74
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
75 # Create a list of the unique individuals
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
76 unique_inds = list(set(self.inds + model.inds))
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
77
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
78 else:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
79
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
80 # Create an empty list for the unique individuals
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
81 unique_inds = []
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
82
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
83 # Loop the models in the file
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
84 for model_in_file in super(ModelFile, self).values():
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
85
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
86 # Create a list of the unique individuals
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
87 unique_inds = list(set(unique_inds + model_in_file.inds))
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
88
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
89
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
90 # Store the individuals
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
91 self.inds = unique_inds
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
92
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
93 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
94 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
95 ind_filename = 'unique_individuals' + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
96
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
97 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
98 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
99 ind_filename = os.path.join(file_path, ind_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
100
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
101 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
102 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
103 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
104 if os.path.isfile(ind_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
105 raise IOError('Individuals file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
106
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
107 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
108 ind_file = open(ind_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
109 ind_file.write('%s\n' %'\n'.join(self.inds))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
110 ind_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
111
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
112 # Save the individuals filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
113 self.ind_file = ind_filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
114
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
115 def delete_ind_file (self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
116 # Check if an individuals file was created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
117 if self.ind_file:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
118
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
119 # Delete the individuals file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
120 os.remove(self.ind_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
121
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
122 # Remove the filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
123 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
124
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
125 def create_exclude_ind_file (self, inds_to_include = [], file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
126 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
127 ind_filename = 'exclude_individuals' + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
128
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
129 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
130 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
131 ind_filename = os.path.join(file_path, ind_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
132
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
133 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
134 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
135 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
136 if os.path.isfile(ind_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
137 raise IOError('Individuals file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
138
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
139 # Create exclude list by removing included individuals
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
140 exclude_inds = list(set(self.inds) - set(inds_to_include))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
141
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
142 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
143 ind_file = open(ind_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
144 ind_file.write('%s\n' %'\n'.join(exclude_inds))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
145 ind_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
146
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
147 # Save the individuals filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
148 self.exclude_file = ind_filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
149
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
150 def delete_exclude_ind_file (self):
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
151 # Check if an individuals file was created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
152 if self.exclude_file:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
153
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
154 # Delete the individuals file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
155 os.remove(self.exclude_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
156
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
157 # Remove the filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
158 self.exclude_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
159
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
160 def to_json (self):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
161
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
162 model_file_json = []
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
163
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
164 for model_name, model_data in super(ModelFile, self).items():
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
165 model_file_json.append(model_data.to_json())
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
166
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
167 return model_file_json
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
168
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
169
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
170 class Model:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
171 def __init__ (self, name):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
172 self.name = name
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
173 self.tree = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
174 self.pop_list = []
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
175 self.ind_dict = defaultdict(list)
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
176 self.nind = defaultdict(int)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
177 self.pop_files = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
178 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
179
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
180 @property
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
181 def npop (self):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
182 return len(self.pop_list)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
183
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
184 @property
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
185 def inds (self):
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
186 return list(itertools.chain.from_iterable(self.ind_dict.values()))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
187
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
188 def assign_tree (self, tree):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
189 self.tree = str(tree)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
190
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
191 def assign_pop (self, pop, inds = []):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
192 self.pop_list.append(str(pop))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
193 if inds:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
194 self.ind_dict[pop] = [str(ind) for ind in inds]
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
195 self.nind[pop] = len(self.ind_dict[pop])
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
196
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
197 def sample_pop (self, pop, sample_size, with_replacements = False):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
198
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
199 # Confirm the pop is in the model
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
200 if str(pop) not in self.pop_list:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
201
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
202 # Raise error if pop not found
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
203 raise Exception('%s not found' % pop)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
204
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
205 # Confirm the sample size is an int
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
206 try:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
207
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
208 sample_size = int(sample_size)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
209
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
210 except:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
211
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
212 # Raise error if sample_size not an int
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
213 raise Exception('%s not int' % sample_size)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
214
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
215 # Check if the sample size is larger than the pop
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
216 if int(sample_size) > self.nind[pop]:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
217
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
218 # Raise error if sample_size is larger
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
219 raise Exception('%s is larger than %s' % (sample_size, pop))
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
220
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
221 # Use numpy choice to randomly sample the pop
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
222 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
223
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
224 # Save the sampled inds as a list
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
225 self.ind_dict[pop] = list(sampled_inds)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
226
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
227 def sample_pops (self, sample_size, with_replacements = False):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
228
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
229 # Confirm the sample size is an int
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
230 try:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
231
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
232 sample_size = int(sample_size)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
233
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
234 except:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
235
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
236 # Raise error if sample_size not an int
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
237 raise Exception('%s not int' % sample_size)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
238
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
239 # Loop each pop in the pop list
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
240 for pop in self.pop_list:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
241
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
242 # Check if the sample size is larger than the pop
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
243 if int(sample_size) > self.nind[pop]:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
244
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
245 # Raise error if sample_size is larger
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
246 raise Exception('%s is larger than %s' % (sample_size, pop))
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
247
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
248 # Loop each pop in the pop list, if no error raised
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
249 for pop in self.pop_list:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
250
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
251 # Use numpy choice to randomly sample the pop
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
252 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
253
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
254 # Save the sampled inds as a list
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
255 self.ind_dict[pop] = list(sampled_inds)
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
256
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
257 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
258 for pop in self.pop_list:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
259 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
260 pop_filename = pop + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
261
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
262 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
263 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
264 pop_filename = os.path.join(file_path, pop_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
265
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
266 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
267 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
268 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
269 if os.path.isfile(pop_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
270 raise IOError('Population file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
271
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
272 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
273 pop_file = open(pop_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
274 pop_file.write('%s\n' %'\n'.join(self.ind_dict[pop]))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
275 pop_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
276
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
277 # Save the population filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
278 self.pop_files.append(pop_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
279
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
280 def delete_pop_files (self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
281 # Check if pop files were created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
282 if len(self.pop_files) != 0:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
283
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
284 # Loop the created pop files
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
285 for pop_file in self.pop_files:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
286 # Delete the pop file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
287 os.remove(pop_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
288
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
289 # Remove the filenames
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
290 self.pop_files = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
291
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
292 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
293 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
294 ind_filename = 'individual.keep' + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
295
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
296 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
297 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
298 ind_filename = os.path.join(file_path, ind_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
299
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
300 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
301 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
302 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
303 if os.path.isfile(ind_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
304 raise IOError('Individuals file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
305
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
306 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
307 ind_file = open(ind_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
308 ind_file.write('%s\n' %'\n'.join(self.inds))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
309 ind_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
310
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
311 # Save the individuals filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
312 self.ind_file = ind_filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
313
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
314 def delete_ind_file (self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
315 # Check if an individuals file was created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
316 if self.ind_file:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
317
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
318 # Delete the individuals file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
319 os.remove(self.ind_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
320
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
321 # Remove the filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
322 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
323
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
324 def to_json (self):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
325
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
326 model_json = OrderedDict()
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
327
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
328 model_json['name'] = self.name
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
329
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
330 pop_json = OrderedDict()
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
331
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
332 for pop in self.pop_list:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
333
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
334 pop_json[pop] = OrderedDict()
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
335
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
336 pop_json[pop]['indv'] = self.ind_dict[pop]
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
337
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
338 model_json['pops'] = pop_json
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
339
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
340 return model_json
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
341
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
342 def read_model_file (filename):
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
343
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
344 # Check that the file exists
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
345 if not os.path.isfile(filename):
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
346 raise IOError
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
347
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
348 # Create ModelFile object
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
349 models_to_return = ModelFile()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
350
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
351 # Check if using python 2 or 3
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
352 if sys.version_info[0] == 2:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
353 # Open the model file in python 2
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
354 model_file = open(filename, 'rU')
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
355 else:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
356 # Open the model file in python 3
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
357 model_file = open(filename, 'r', newline=None)
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
358
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
359 # Parse the model file using the json reader
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
360 models_dict = json.load(model_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
361
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
362 # List to store all unique individuals (i.e. individuals in all models)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
363 individual_list = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
364
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
365 # Loop the parsed models
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
366 for model_dict in models_dict:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
367
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
368 # Create the model
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
369 model = Model(str(model_dict['name']))
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
370
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
371 # Loop the populations in the model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
372 for pop, pop_dict in model_dict['pops'].items():
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
373
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
374 # Convert all individuals names to str
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
375 ind_list = [str(pop_ind) for pop_ind in pop_dict['inds']]
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
376
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
377 # Assign the population ans it's individuals to the model
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
378 model.assign_pop(str(pop), ind_list)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
379
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
380 # Assign the individuals to the unique individual list
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
381 individual_list.extend(ind_list)
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
382
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
383 # Remove duplicates from the unique individual list
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
384 individual_list = list(set(individual_list))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
385
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
386 # Save the model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
387 models_to_return[str(model.name)] = model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
388
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
389 logging.info('Finished reading model file (%s)' % filename)
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
390
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
391 # Return the models
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
392 return models_to_return
2
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
393
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
394 def write_model_file (model_file, filename, overwrite = False):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
395
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
396 # Check if the file is to be overwritten
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
397 if not overwrite:
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
398
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
399 # Check if the file exists
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
400 if os.path.exists(filename):
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
401 raise Exception('%s already exists' % filename)
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
402
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
403 # Open the output file
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
404 output_file = open(filename, 'w')
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
405
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
406 # Write the json-formmated data to the output file
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
407 output_file.write(json.dumps(model_file.to_json(), indent = 4))
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
408
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
409 # Close the output file
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
410 output_file.close()
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
411
54c84f7dcb2c Uploaded
jaredgk
parents: 0
diff changeset
412 logging.info('Finished writing model file (%s)' % filename)