Previous changeset 1:15245deda141 (2018-10-16) Next changeset 3:d1e3db7f6521 (2018-10-17) |
Commit message:
Uploaded |
modified:
beagle.py model.py |
b |
diff -r 15245deda141 -r 54c84f7dcb2c beagle.py --- a/beagle.py Tue Oct 16 18:03:51 2018 -0400 +++ b/beagle.py Wed Oct 17 17:20:47 2018 -0400 |
b |
@@ -144,7 +144,7 @@ output_format : str Output file format ''' - print beagle_call_args + print (beagle_call_args) # Standard call to beagle standard_beagle_call(beagle_path, beagle_call_args, output_prefix) |
b |
diff -r 15245deda141 -r 54c84f7dcb2c model.py --- a/model.py Tue Oct 16 18:03:51 2018 -0400 +++ b/model.py Wed Oct 17 17:20:47 2018 -0400 |
[ |
b"@@ -5,8 +5,11 @@\n import argparse\n import logging\n import itertools\n+import copy\n \n-from collections import defaultdict\n+import numpy as np\n+\n+from collections import defaultdict, OrderedDict\n \n # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes.\n sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))\n@@ -20,12 +23,72 @@\n self.ind_file = ''\n self.exclude_file = ''\n \n- def assign_inds (self, inds = []):\n- # Return error if inds is empty\n- if not inds:\n- raise IOError('No individuals found in the model file.')\n+ if arg and self.confirm_model_instance(arg[1]):\n+ self.update_inds(arg[1])\n+\n+ def __setitem__(self, *arg, **kw):\n+ super(ModelFile, self).__setitem__(*arg, **kw)\n+\n+ if arg and self.confirm_model_instance(arg[1]):\n+ self.update_inds(model = arg[1])\n+\n+ def __delitem__(self, key):\n+ super(ModelFile, self).__delitem__(key)\n+ self.update_inds()\n+\n+ def confirm_model_instance (self, unknown):\n+\n+ if isinstance(unknown, Model):\n+\n+ return True\n+\n+ else:\n+\n+ return False\n+\n+ def copy_model (self, src_model_name, new_model_name):\n+\n+ src_model = super(ModelFile, self).__getitem__(src_model_name)\n+\n+ src_model_copy = copy.deepcopy(src_model)\n+\n+ src_model_copy.name = new_model_name\n+\n+ super(ModelFile, self).__setitem__(new_model_name, src_model_copy)\n+\n+ def rename_model (self, src_model_name, new_model_name):\n+\n+ src_model = super(ModelFile, self).pop(src_model_name)\n+\n+ src_model.name = new_model_name\n+\n+ super(ModelFile, self).__setitem__(new_model_name, src_model)\n+\n+ def update_inds (self, model = None):\n+\n+ if self.confirm_model_instance(model):\n+\n+ # Return error if inds is empty\n+ if not model.inds:\n+ raise IOError('No individuals found in %s.' % model.name)\n+\n+ # Create a list of the unique individuals\n+ unique_inds = list(set(self.inds + model.inds))\n+\n+ else:\n+\n+ # Create an empty list for the unique individuals\n+ unique_inds = []\n+\n+ # Loop the models in the file\n+ for model_in_file in super(ModelFile, self).values():\n+\n+ # Create a list of the unique individuals\n+ unique_inds = list(set(unique_inds + model_in_file.inds))\n+\n+\n # Store the individuals\n- self.inds = [str(ind) for ind in inds]\n+ self.inds = unique_inds\n \n def create_ind_file (self, file_ext = '', file_path = '', overwrite = False):\n # Assign the filename for the population file\n@@ -84,7 +147,7 @@\n # Save the individuals filename\n self.exclude_file = ind_filename\n \n- def delete_ind_file (self):\n+ def delete_exclude_ind_file (self):\n # Check if an individuals file was created\n if self.exclude_file:\n \n@@ -94,30 +157,102 @@\n # Remove the filename\n self.exclude_file = ''\n \n+ def to_json (self):\n+\n+ model_file_json = []\n+\n+ for model_name, model_data in super(ModelFile, self).items():\n+ model_file_json.append(model_data.to_json())\n+\n+ return model_file_json\n+\n+\n class Model:\n def __init__ (self, name):\n self.name = name\n self.tree = ''\n- self.npop = 0\n self.pop_list = []\n+ self.ind_dict = defaultdict(list)\n self.nind = defaultdict(int)\n- self.ind_dict = defaultdict(list)\n self.pop_files = []\n self.ind_file = ''\n \n @property\n- def inds(self):\n+ def npop (self):\n+ return len(self.pop_list)\n+\n+ @property\n+ def inds (self):\n return list(itertools.chain.from_iterable(self.ind_dict.values()))\n \n def assign_tree (self, tree):\n self.tree = str(tree)\n \n def assign_pop (self, pop, inds = []):\n- "..b"int\n+ try:\n+\n+ sample_size = int(sample_size)\n+\n+ except:\n+\n+ # Raise error if sample_size not an int\n+ raise Exception('%s not int' % sample_size)\n+\n+ # Loop each pop in the pop list\n+ for pop in self.pop_list:\n+\n+ # Check if the sample size is larger than the pop\n+ if int(sample_size) > self.nind[pop]:\n+\n+ # Raise error if sample_size is larger\n+ raise Exception('%s is larger than %s' % (sample_size, pop))\n+\n+ # Loop each pop in the pop list, if no error raised\n+ for pop in self.pop_list:\n+\n+ # Use numpy choice to randomly sample the pop\n+ sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements)\n+\n+ # Save the sampled inds as a list\n+ self.ind_dict[pop] = list(sampled_inds)\n \n def create_pop_files (self, file_ext = '', file_path = '', overwrite = False):\n for pop in self.pop_list:\n@@ -186,10 +321,28 @@\n # Remove the filename\n self.ind_file = ''\n \n-def read_model_file (model_filename):\n+ def to_json (self):\n+\n+ model_json = OrderedDict()\n+\n+ model_json['name'] = self.name\n+\n+ pop_json = OrderedDict()\n+\n+ for pop in self.pop_list:\n+\n+ pop_json[pop] = OrderedDict()\n+\n+ pop_json[pop]['indv'] = self.ind_dict[pop]\n+\n+ model_json['pops'] = pop_json\n+\n+ return model_json\n+\n+def read_model_file (filename):\n \n # Check that the file exists\n- if not os.path.isfile(model_filename):\n+ if not os.path.isfile(filename):\n raise IOError\n \n # Create ModelFile object\n@@ -198,10 +351,10 @@\n # Check if using python 2 or 3\n if sys.version_info[0] == 2:\n # Open the model file in python 2\n- model_file = open(model_filename, 'rU')\n+ model_file = open(filename, 'rU')\n else:\n # Open the model file in python 3\n- model_file = open(model_filename, 'r', newline=None)\n+ model_file = open(filename, 'r', newline=None)\n \n # Parse the model file using the json reader\n models_dict = json.load(model_file)\n@@ -213,15 +366,19 @@\n for model_dict in models_dict:\n \n # Create the model\n- model = Model(model_dict['name'])\n+ model = Model(str(model_dict['name']))\n \n # Loop the populations in the model\n for pop, pop_dict in model_dict['pops'].items():\n \n+ # Convert all individuals names to str\n+ ind_list = [str(pop_ind) for pop_ind in pop_dict['inds']]\n+\n # Assign the population ans it's individuals to the model\n- model.assign_pop(pop, pop_dict['inds'])\n+ model.assign_pop(str(pop), ind_list)\n+\n # Assign the individuals to the unique individual list\n- individual_list.extend(pop_dict['inds'])\n+ individual_list.extend(ind_list)\n \n # Remove duplicates from the unique individual list\n individual_list = list(set(individual_list))\n@@ -229,8 +386,27 @@\n # Save the model\n models_to_return[str(model.name)] = model\n \n- # Store the unique individuals within the ModelFile object\n- models_to_return.assign_inds(individual_list)\n+ logging.info('Finished reading model file (%s)' % filename)\n \n # Return the models\n return models_to_return\n+\n+def write_model_file (model_file, filename, overwrite = False):\n+\n+ # Check if the file is to be overwritten\n+ if not overwrite:\n+\n+ # Check if the file exists\n+ if os.path.exists(filename):\n+ raise Exception('%s already exists' % filename)\n+\n+ # Open the output file\n+ output_file = open(filename, 'w')\n+\n+ # Write the json-formmated data to the output file\n+ output_file.write(json.dumps(model_file.to_json(), indent = 4))\n+\n+ # Close the output file\n+ output_file.close()\n+\n+ logging.info('Finished writing model file (%s)' % filename)\n" |