annotate model.py @ 0:3830d29fca6a draft

Uploaded
author jaredgk
date Mon, 15 Oct 2018 18:15:47 -0400
parents
children 54c84f7dcb2c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
1 import os
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
2 import sys
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
3 import json
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
4 import subprocess
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
5 import argparse
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
6 import logging
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
7 import itertools
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
8
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
9 from collections import defaultdict
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
10
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
11 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
12 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
13
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
14 from logging_module import initLogger
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
15
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
16 class ModelFile(dict):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
17 def __init__(self, *arg, **kw):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
18 super(ModelFile, self).__init__(*arg, **kw)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
19 self.inds = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
20 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
21 self.exclude_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
22
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
23 def assign_inds (self, inds = []):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
24 # Return error if inds is empty
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
25 if not inds:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
26 raise IOError('No individuals found in the model file.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
27 # Store the individuals
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
28 self.inds = [str(ind) for ind in inds]
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
29
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
30 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
31 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
32 ind_filename = 'unique_individuals' + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
33
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
34 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
35 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
36 ind_filename = os.path.join(file_path, ind_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
37
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
38 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
39 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
40 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
41 if os.path.isfile(ind_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
42 raise IOError('Individuals file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
43
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
44 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
45 ind_file = open(ind_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
46 ind_file.write('%s\n' %'\n'.join(self.inds))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
47 ind_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
48
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
49 # Save the individuals filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
50 self.ind_file = ind_filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
51
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
52 def delete_ind_file (self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
53 # Check if an individuals file was created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
54 if self.ind_file:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
55
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
56 # Delete the individuals file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
57 os.remove(self.ind_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
58
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
59 # Remove the filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
60 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
61
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
62 def create_exclude_ind_file (self, inds_to_include = [], file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
63 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
64 ind_filename = 'exclude_individuals' + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
65
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
66 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
67 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
68 ind_filename = os.path.join(file_path, ind_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
69
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
70 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
71 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
72 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
73 if os.path.isfile(ind_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
74 raise IOError('Individuals file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
75
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
76 # Create exclude list by removing included individuals
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
77 exclude_inds = list(set(self.inds) - set(inds_to_include))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
78
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
79 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
80 ind_file = open(ind_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
81 ind_file.write('%s\n' %'\n'.join(exclude_inds))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
82 ind_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
83
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
84 # Save the individuals filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
85 self.exclude_file = ind_filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
86
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
87 def delete_ind_file (self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
88 # Check if an individuals file was created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
89 if self.exclude_file:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
90
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
91 # Delete the individuals file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
92 os.remove(self.exclude_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
93
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
94 # Remove the filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
95 self.exclude_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
96
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
97 class Model:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
98 def __init__ (self, name):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
99 self.name = name
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
100 self.tree = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
101 self.npop = 0
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
102 self.pop_list = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
103 self.nind = defaultdict(int)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
104 self.ind_dict = defaultdict(list)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
105 self.pop_files = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
106 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
107
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
108 @property
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
109 def inds(self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
110 return list(itertools.chain.from_iterable(self.ind_dict.values()))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
111
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
112 def assign_tree (self, tree):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
113 self.tree = str(tree)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
114
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
115 def assign_pop (self, pop, inds = []):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
116 self.npop += 1
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
117 self.pop_list.append(str(pop))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
118 if inds:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
119 self.nind[pop] = len(inds)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
120 self.ind_dict[pop] = [str(ind) for ind in inds]
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
121
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
122 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
123 for pop in self.pop_list:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
124 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
125 pop_filename = pop + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
126
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
127 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
128 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
129 pop_filename = os.path.join(file_path, pop_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
130
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
131 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
132 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
133 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
134 if os.path.isfile(pop_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
135 raise IOError('Population file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
136
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
137 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
138 pop_file = open(pop_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
139 pop_file.write('%s\n' %'\n'.join(self.ind_dict[pop]))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
140 pop_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
141
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
142 # Save the population filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
143 self.pop_files.append(pop_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
144
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
145 def delete_pop_files (self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
146 # Check if pop files were created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
147 if len(self.pop_files) != 0:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
148
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
149 # Loop the created pop files
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
150 for pop_file in self.pop_files:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
151 # Delete the pop file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
152 os.remove(pop_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
153
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
154 # Remove the filenames
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
155 self.pop_files = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
156
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
157 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
158 # Assign the filename for the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
159 ind_filename = 'individual.keep' + file_ext
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
160
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
161 # If a path is assigned, create the file at the specified location
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
162 if file_path:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
163 ind_filename = os.path.join(file_path, ind_filename)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
164
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
165 # Check if previous files should be overwriten
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
166 if not overwrite:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
167 # Check if the file already exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
168 if os.path.isfile(ind_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
169 raise IOError('Individuals file exists.')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
170
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
171 # Create the population file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
172 ind_file = open(ind_filename, 'w')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
173 ind_file.write('%s\n' %'\n'.join(self.inds))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
174 ind_file.close()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
175
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
176 # Save the individuals filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
177 self.ind_file = ind_filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
178
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
179 def delete_ind_file (self):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
180 # Check if an individuals file was created
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
181 if self.ind_file:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
182
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
183 # Delete the individuals file
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
184 os.remove(self.ind_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
185
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
186 # Remove the filename
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
187 self.ind_file = ''
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
188
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
189 def read_model_file (model_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
190
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
191 # Check that the file exists
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
192 if not os.path.isfile(model_filename):
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
193 raise IOError
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
194
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
195 # Create ModelFile object
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
196 models_to_return = ModelFile()
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
197
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
198 # Check if using python 2 or 3
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
199 if sys.version_info[0] == 2:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
200 # Open the model file in python 2
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
201 model_file = open(model_filename, 'rU')
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
202 else:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
203 # Open the model file in python 3
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
204 model_file = open(model_filename, 'r', newline=None)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
205
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
206 # Parse the model file using the json reader
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
207 models_dict = json.load(model_file)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
208
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
209 # List to store all unique individuals (i.e. individuals in all models)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
210 individual_list = []
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
211
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
212 # Loop the parsed models
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
213 for model_dict in models_dict:
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
214
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
215 # Create the model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
216 model = Model(model_dict['name'])
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
217
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
218 # Loop the populations in the model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
219 for pop, pop_dict in model_dict['pops'].items():
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
220
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
221 # Assign the population ans it's individuals to the model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
222 model.assign_pop(pop, pop_dict['inds'])
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
223 # Assign the individuals to the unique individual list
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
224 individual_list.extend(pop_dict['inds'])
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
225
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
226 # Remove duplicates from the unique individual list
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
227 individual_list = list(set(individual_list))
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
228
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
229 # Save the model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
230 models_to_return[str(model.name)] = model
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
231
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
232 # Store the unique individuals within the ModelFile object
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
233 models_to_return.assign_inds(individual_list)
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
234
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
235 # Return the models
3830d29fca6a Uploaded
jaredgk
parents:
diff changeset
236 return models_to_return