annotate Population.py @ 21:d6b961721037

Miller Lab Devshed version 4c04e35b18f6
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 05 Nov 2012 12:44:17 -0500
parents 2c498d40ecde
children 8997f2ca8c7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 from OrderedDict import OrderedDict
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 class Individual(object):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 __slots__ = ['_column', '_name', '_alias']
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 def __init__(self, column, name, alias=None):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 self._column = column
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10 self._name = name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 self._alias = alias
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 @property
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 def column(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 return self._column
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 @property
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 def name(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 return self._name if self._alias is None else self._alias
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 @property
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 def alias(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 return self._alias
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 @alias.setter
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 def alias(self, alias):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 self._alias = alias
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29 @property
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 def real_name(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 return self._name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 def __eq__(self, other):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 return self._column == other._column and self._name == other._name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 def __ne__(self, other):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 return not self.__eq__(other)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 def __repr__(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 class Population(object):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 def __init__(self, name=None):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 self._columns = OrderedDict()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 self._name = name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 @property
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 def name(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 return self._name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 @name.setter
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 def name(self, name):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 self._name = name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 def add_individual(self, individual, alias=None):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 if individual.column not in self._columns:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58 self._columns[individual.column] = individual
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 elif self._columns[individual.column] == individual:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 # should should this be an error?
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61 # should we replace the alias using this entry?
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62 pass
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64 raise 'Duplicate column: {0}'.format(individual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 def is_superset(self, other):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 for column, other_individual in other._columns.items():
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 our_individual = self._columns.get(column)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 if our_individual is None or our_individual != other_individual:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 return False
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 return True
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 def is_disjoint(self, other):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74 for column, our_individual in self._columns.items():
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 other_individual = other._columns.get(column)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76 if other_individual is not None and other_individual == our_individual:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77 return False
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 return True
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80 def column_list(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81 return self._columns.keys()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83 def individual_with_column(self, column):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
84 if column in self._columns:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
85 return self._columns[column]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
86 return None
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
87
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
88 def tag_list(self, delimiter=':'):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89 entries = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
90 for column, individual in self._columns.items():
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
91 entry = '{0}{1}{2}'.format(column, delimiter, individual.name)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
92 entries.append(entry)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
93 return entries
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95 def to_string(self, delimiter=':', separator=' ', replace_names_with=None):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96 entries = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
97 for column, individual in self._columns.items():
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98 value = individual.name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
99 if replace_names_with is not None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100 value = replace_names_with
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101 entry = '{0}{1}{2}'.format(column, delimiter, value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102 entries.append(entry)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103 return separator.join(entries)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
104
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105 def __str__(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 return self.to_string()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
107
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108 def from_population_file(self, filename):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109 with open(filename) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110 for line in fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111 line = line.rstrip('\r\n')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112 column, name, alias = line.split('\t')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113 alias = alias.strip()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114 individual = Individual(column, name)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115 if alias:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116 individual.alias = alias
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117 self.add_individual(individual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
118
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
119 def from_tag_list(self, tag_list):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120 for tag in tag_list:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121 column, name = tag.split(':')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
122 individual = Individual(column, name)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
123 self.add_individual(individual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
124
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
125 def individual_names(self):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
126 for column, individual in self._columns.items():
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
127 yield individual.name
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
128