Mercurial > repos > jaredgk > ppp_vcfphase
comparison model.py @ 0:3830d29fca6a draft
Uploaded
author | jaredgk |
---|---|
date | Mon, 15 Oct 2018 18:15:47 -0400 |
parents | |
children | 54c84f7dcb2c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3830d29fca6a |
---|---|
1 import os | |
2 import sys | |
3 import json | |
4 import subprocess | |
5 import argparse | |
6 import logging | |
7 import itertools | |
8 | |
9 from collections import defaultdict | |
10 | |
11 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. | |
12 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) | |
13 | |
14 from logging_module import initLogger | |
15 | |
16 class ModelFile(dict): | |
17 def __init__(self, *arg, **kw): | |
18 super(ModelFile, self).__init__(*arg, **kw) | |
19 self.inds = [] | |
20 self.ind_file = '' | |
21 self.exclude_file = '' | |
22 | |
23 def assign_inds (self, inds = []): | |
24 # Return error if inds is empty | |
25 if not inds: | |
26 raise IOError('No individuals found in the model file.') | |
27 # Store the individuals | |
28 self.inds = [str(ind) for ind in inds] | |
29 | |
30 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): | |
31 # Assign the filename for the population file | |
32 ind_filename = 'unique_individuals' + file_ext | |
33 | |
34 # If a path is assigned, create the file at the specified location | |
35 if file_path: | |
36 ind_filename = os.path.join(file_path, ind_filename) | |
37 | |
38 # Check if previous files should be overwriten | |
39 if not overwrite: | |
40 # Check if the file already exists | |
41 if os.path.isfile(ind_filename): | |
42 raise IOError('Individuals file exists.') | |
43 | |
44 # Create the population file | |
45 ind_file = open(ind_filename, 'w') | |
46 ind_file.write('%s\n' %'\n'.join(self.inds)) | |
47 ind_file.close() | |
48 | |
49 # Save the individuals filename | |
50 self.ind_file = ind_filename | |
51 | |
52 def delete_ind_file (self): | |
53 # Check if an individuals file was created | |
54 if self.ind_file: | |
55 | |
56 # Delete the individuals file | |
57 os.remove(self.ind_file) | |
58 | |
59 # Remove the filename | |
60 self.ind_file = '' | |
61 | |
62 def create_exclude_ind_file (self, inds_to_include = [], file_ext = '', file_path = '', overwrite = False): | |
63 # Assign the filename for the population file | |
64 ind_filename = 'exclude_individuals' + file_ext | |
65 | |
66 # If a path is assigned, create the file at the specified location | |
67 if file_path: | |
68 ind_filename = os.path.join(file_path, ind_filename) | |
69 | |
70 # Check if previous files should be overwriten | |
71 if not overwrite: | |
72 # Check if the file already exists | |
73 if os.path.isfile(ind_filename): | |
74 raise IOError('Individuals file exists.') | |
75 | |
76 # Create exclude list by removing included individuals | |
77 exclude_inds = list(set(self.inds) - set(inds_to_include)) | |
78 | |
79 # Create the population file | |
80 ind_file = open(ind_filename, 'w') | |
81 ind_file.write('%s\n' %'\n'.join(exclude_inds)) | |
82 ind_file.close() | |
83 | |
84 # Save the individuals filename | |
85 self.exclude_file = ind_filename | |
86 | |
87 def delete_ind_file (self): | |
88 # Check if an individuals file was created | |
89 if self.exclude_file: | |
90 | |
91 # Delete the individuals file | |
92 os.remove(self.exclude_file) | |
93 | |
94 # Remove the filename | |
95 self.exclude_file = '' | |
96 | |
97 class Model: | |
98 def __init__ (self, name): | |
99 self.name = name | |
100 self.tree = '' | |
101 self.npop = 0 | |
102 self.pop_list = [] | |
103 self.nind = defaultdict(int) | |
104 self.ind_dict = defaultdict(list) | |
105 self.pop_files = [] | |
106 self.ind_file = '' | |
107 | |
108 @property | |
109 def inds(self): | |
110 return list(itertools.chain.from_iterable(self.ind_dict.values())) | |
111 | |
112 def assign_tree (self, tree): | |
113 self.tree = str(tree) | |
114 | |
115 def assign_pop (self, pop, inds = []): | |
116 self.npop += 1 | |
117 self.pop_list.append(str(pop)) | |
118 if inds: | |
119 self.nind[pop] = len(inds) | |
120 self.ind_dict[pop] = [str(ind) for ind in inds] | |
121 | |
122 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): | |
123 for pop in self.pop_list: | |
124 # Assign the filename for the population file | |
125 pop_filename = pop + file_ext | |
126 | |
127 # If a path is assigned, create the file at the specified location | |
128 if file_path: | |
129 pop_filename = os.path.join(file_path, pop_filename) | |
130 | |
131 # Check if previous files should be overwriten | |
132 if not overwrite: | |
133 # Check if the file already exists | |
134 if os.path.isfile(pop_filename): | |
135 raise IOError('Population file exists.') | |
136 | |
137 # Create the population file | |
138 pop_file = open(pop_filename, 'w') | |
139 pop_file.write('%s\n' %'\n'.join(self.ind_dict[pop])) | |
140 pop_file.close() | |
141 | |
142 # Save the population filename | |
143 self.pop_files.append(pop_filename) | |
144 | |
145 def delete_pop_files (self): | |
146 # Check if pop files were created | |
147 if len(self.pop_files) != 0: | |
148 | |
149 # Loop the created pop files | |
150 for pop_file in self.pop_files: | |
151 # Delete the pop file | |
152 os.remove(pop_file) | |
153 | |
154 # Remove the filenames | |
155 self.pop_files = [] | |
156 | |
157 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): | |
158 # Assign the filename for the population file | |
159 ind_filename = 'individual.keep' + file_ext | |
160 | |
161 # If a path is assigned, create the file at the specified location | |
162 if file_path: | |
163 ind_filename = os.path.join(file_path, ind_filename) | |
164 | |
165 # Check if previous files should be overwriten | |
166 if not overwrite: | |
167 # Check if the file already exists | |
168 if os.path.isfile(ind_filename): | |
169 raise IOError('Individuals file exists.') | |
170 | |
171 # Create the population file | |
172 ind_file = open(ind_filename, 'w') | |
173 ind_file.write('%s\n' %'\n'.join(self.inds)) | |
174 ind_file.close() | |
175 | |
176 # Save the individuals filename | |
177 self.ind_file = ind_filename | |
178 | |
179 def delete_ind_file (self): | |
180 # Check if an individuals file was created | |
181 if self.ind_file: | |
182 | |
183 # Delete the individuals file | |
184 os.remove(self.ind_file) | |
185 | |
186 # Remove the filename | |
187 self.ind_file = '' | |
188 | |
189 def read_model_file (model_filename): | |
190 | |
191 # Check that the file exists | |
192 if not os.path.isfile(model_filename): | |
193 raise IOError | |
194 | |
195 # Create ModelFile object | |
196 models_to_return = ModelFile() | |
197 | |
198 # Check if using python 2 or 3 | |
199 if sys.version_info[0] == 2: | |
200 # Open the model file in python 2 | |
201 model_file = open(model_filename, 'rU') | |
202 else: | |
203 # Open the model file in python 3 | |
204 model_file = open(model_filename, 'r', newline=None) | |
205 | |
206 # Parse the model file using the json reader | |
207 models_dict = json.load(model_file) | |
208 | |
209 # List to store all unique individuals (i.e. individuals in all models) | |
210 individual_list = [] | |
211 | |
212 # Loop the parsed models | |
213 for model_dict in models_dict: | |
214 | |
215 # Create the model | |
216 model = Model(model_dict['name']) | |
217 | |
218 # Loop the populations in the model | |
219 for pop, pop_dict in model_dict['pops'].items(): | |
220 | |
221 # Assign the population ans it's individuals to the model | |
222 model.assign_pop(pop, pop_dict['inds']) | |
223 # Assign the individuals to the unique individual list | |
224 individual_list.extend(pop_dict['inds']) | |
225 | |
226 # Remove duplicates from the unique individual list | |
227 individual_list = list(set(individual_list)) | |
228 | |
229 # Save the model | |
230 models_to_return[str(model.name)] = model | |
231 | |
232 # Store the unique individuals within the ModelFile object | |
233 models_to_return.assign_inds(individual_list) | |
234 | |
235 # Return the models | |
236 return models_to_return |