Mercurial > repos > jaredgk > ppp_vcfphase
changeset 2:54c84f7dcb2c draft
Uploaded
author | jaredgk |
---|---|
date | Wed, 17 Oct 2018 17:20:47 -0400 |
parents | 15245deda141 |
children | d1e3db7f6521 |
files | beagle.py model.py |
diffstat | 2 files changed, 198 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/beagle.py Tue Oct 16 18:03:51 2018 -0400 +++ b/beagle.py Wed Oct 17 17:20:47 2018 -0400 @@ -144,7 +144,7 @@ output_format : str Output file format ''' - print beagle_call_args + print (beagle_call_args) # Standard call to beagle standard_beagle_call(beagle_path, beagle_call_args, output_prefix)
--- a/model.py Tue Oct 16 18:03:51 2018 -0400 +++ b/model.py Wed Oct 17 17:20:47 2018 -0400 @@ -5,8 +5,11 @@ import argparse import logging import itertools +import copy -from collections import defaultdict +import numpy as np + +from collections import defaultdict, OrderedDict # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) @@ -20,12 +23,72 @@ self.ind_file = '' self.exclude_file = '' - def assign_inds (self, inds = []): - # Return error if inds is empty - if not inds: - raise IOError('No individuals found in the model file.') + if arg and self.confirm_model_instance(arg[1]): + self.update_inds(arg[1]) + + def __setitem__(self, *arg, **kw): + super(ModelFile, self).__setitem__(*arg, **kw) + + if arg and self.confirm_model_instance(arg[1]): + self.update_inds(model = arg[1]) + + def __delitem__(self, key): + super(ModelFile, self).__delitem__(key) + self.update_inds() + + def confirm_model_instance (self, unknown): + + if isinstance(unknown, Model): + + return True + + else: + + return False + + def copy_model (self, src_model_name, new_model_name): + + src_model = super(ModelFile, self).__getitem__(src_model_name) + + src_model_copy = copy.deepcopy(src_model) + + src_model_copy.name = new_model_name + + super(ModelFile, self).__setitem__(new_model_name, src_model_copy) + + def rename_model (self, src_model_name, new_model_name): + + src_model = super(ModelFile, self).pop(src_model_name) + + src_model.name = new_model_name + + super(ModelFile, self).__setitem__(new_model_name, src_model) + + def update_inds (self, model = None): + + if self.confirm_model_instance(model): + + # Return error if inds is empty + if not model.inds: + raise IOError('No individuals found in %s.' % model.name) + + # Create a list of the unique individuals + unique_inds = list(set(self.inds + model.inds)) + + else: + + # Create an empty list for the unique individuals + unique_inds = [] + + # Loop the models in the file + for model_in_file in super(ModelFile, self).values(): + + # Create a list of the unique individuals + unique_inds = list(set(unique_inds + model_in_file.inds)) + + # Store the individuals - self.inds = [str(ind) for ind in inds] + self.inds = unique_inds def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): # Assign the filename for the population file @@ -84,7 +147,7 @@ # Save the individuals filename self.exclude_file = ind_filename - def delete_ind_file (self): + def delete_exclude_ind_file (self): # Check if an individuals file was created if self.exclude_file: @@ -94,30 +157,102 @@ # Remove the filename self.exclude_file = '' + def to_json (self): + + model_file_json = [] + + for model_name, model_data in super(ModelFile, self).items(): + model_file_json.append(model_data.to_json()) + + return model_file_json + + class Model: def __init__ (self, name): self.name = name self.tree = '' - self.npop = 0 self.pop_list = [] + self.ind_dict = defaultdict(list) self.nind = defaultdict(int) - self.ind_dict = defaultdict(list) self.pop_files = [] self.ind_file = '' @property - def inds(self): + def npop (self): + return len(self.pop_list) + + @property + def inds (self): return list(itertools.chain.from_iterable(self.ind_dict.values())) def assign_tree (self, tree): self.tree = str(tree) def assign_pop (self, pop, inds = []): - self.npop += 1 self.pop_list.append(str(pop)) if inds: - self.nind[pop] = len(inds) self.ind_dict[pop] = [str(ind) for ind in inds] + self.nind[pop] = len(self.ind_dict[pop]) + + def sample_pop (self, pop, sample_size, with_replacements = False): + + # Confirm the pop is in the model + if str(pop) not in self.pop_list: + + # Raise error if pop not found + raise Exception('%s not found' % pop) + + # Confirm the sample size is an int + try: + + sample_size = int(sample_size) + + except: + + # Raise error if sample_size not an int + raise Exception('%s not int' % sample_size) + + # Check if the sample size is larger than the pop + if int(sample_size) > self.nind[pop]: + + # Raise error if sample_size is larger + raise Exception('%s is larger than %s' % (sample_size, pop)) + + # Use numpy choice to randomly sample the pop + sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements) + + # Save the sampled inds as a list + self.ind_dict[pop] = list(sampled_inds) + + def sample_pops (self, sample_size, with_replacements = False): + + # Confirm the sample size is an int + try: + + sample_size = int(sample_size) + + except: + + # Raise error if sample_size not an int + raise Exception('%s not int' % sample_size) + + # Loop each pop in the pop list + for pop in self.pop_list: + + # Check if the sample size is larger than the pop + if int(sample_size) > self.nind[pop]: + + # Raise error if sample_size is larger + raise Exception('%s is larger than %s' % (sample_size, pop)) + + # Loop each pop in the pop list, if no error raised + for pop in self.pop_list: + + # Use numpy choice to randomly sample the pop + sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements) + + # Save the sampled inds as a list + self.ind_dict[pop] = list(sampled_inds) def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): for pop in self.pop_list: @@ -186,10 +321,28 @@ # Remove the filename self.ind_file = '' -def read_model_file (model_filename): + def to_json (self): + + model_json = OrderedDict() + + model_json['name'] = self.name + + pop_json = OrderedDict() + + for pop in self.pop_list: + + pop_json[pop] = OrderedDict() + + pop_json[pop]['indv'] = self.ind_dict[pop] + + model_json['pops'] = pop_json + + return model_json + +def read_model_file (filename): # Check that the file exists - if not os.path.isfile(model_filename): + if not os.path.isfile(filename): raise IOError # Create ModelFile object @@ -198,10 +351,10 @@ # Check if using python 2 or 3 if sys.version_info[0] == 2: # Open the model file in python 2 - model_file = open(model_filename, 'rU') + model_file = open(filename, 'rU') else: # Open the model file in python 3 - model_file = open(model_filename, 'r', newline=None) + model_file = open(filename, 'r', newline=None) # Parse the model file using the json reader models_dict = json.load(model_file) @@ -213,15 +366,19 @@ for model_dict in models_dict: # Create the model - model = Model(model_dict['name']) + model = Model(str(model_dict['name'])) # Loop the populations in the model for pop, pop_dict in model_dict['pops'].items(): + # Convert all individuals names to str + ind_list = [str(pop_ind) for pop_ind in pop_dict['inds']] + # Assign the population ans it's individuals to the model - model.assign_pop(pop, pop_dict['inds']) + model.assign_pop(str(pop), ind_list) + # Assign the individuals to the unique individual list - individual_list.extend(pop_dict['inds']) + individual_list.extend(ind_list) # Remove duplicates from the unique individual list individual_list = list(set(individual_list)) @@ -229,8 +386,27 @@ # Save the model models_to_return[str(model.name)] = model - # Store the unique individuals within the ModelFile object - models_to_return.assign_inds(individual_list) + logging.info('Finished reading model file (%s)' % filename) # Return the models return models_to_return + +def write_model_file (model_file, filename, overwrite = False): + + # Check if the file is to be overwritten + if not overwrite: + + # Check if the file exists + if os.path.exists(filename): + raise Exception('%s already exists' % filename) + + # Open the output file + output_file = open(filename, 'w') + + # Write the json-formmated data to the output file + output_file.write(json.dumps(model_file.to_json(), indent = 4)) + + # Close the output file + output_file.close() + + logging.info('Finished writing model file (%s)' % filename)