Mercurial > repos > davidvanzessen > shm_csr
comparison change_o/DefineClones.py @ 52:22dddabe3637 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 23 May 2017 08:32:58 -0400 |
parents | c33d93683a09 |
children |
comparison
equal
deleted
inserted
replaced
51:8fa8836bd605 | 52:22dddabe3637 |
---|---|
8 | 8 |
9 # Imports | 9 # Imports |
10 import os | 10 import os |
11 import re | 11 import re |
12 import sys | 12 import sys |
13 import csv | |
13 import numpy as np | 14 import numpy as np |
14 from argparse import ArgumentParser | 15 from argparse import ArgumentParser |
15 from collections import OrderedDict | 16 from collections import OrderedDict |
16 from itertools import chain | 17 from itertools import chain |
17 from textwrap import dedent | 18 from textwrap import dedent |
23 from presto.Defaults import default_out_args | 24 from presto.Defaults import default_out_args |
24 from presto.IO import getFileType, getOutputHandle, printLog, printProgress | 25 from presto.IO import getFileType, getOutputHandle, printLog, printProgress |
25 from presto.Multiprocessing import manageProcesses | 26 from presto.Multiprocessing import manageProcesses |
26 from presto.Sequence import getDNAScoreDict | 27 from presto.Sequence import getDNAScoreDict |
27 from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs | 28 from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs |
28 from changeo.Distance import getDNADistMatrix, getAADistMatrix, \ | 29 from changeo.Distance import distance_models, calcDistances, formClusters |
29 hs1f_model, m1n_model, hs5f_model, \ | |
30 calcDistances, formClusters | |
31 from changeo.IO import getDbWriter, readDbFile, countDbFile | 30 from changeo.IO import getDbWriter, readDbFile, countDbFile |
32 from changeo.Multiprocessing import DbData, DbResult | 31 from changeo.Multiprocessing import DbData, DbResult |
32 | |
33 ## Set maximum field size for csv.reader | |
34 csv.field_size_limit(sys.maxsize) | |
33 | 35 |
34 # Defaults | 36 # Defaults |
35 default_translate = False | 37 default_translate = False |
36 default_distance = 0.0 | 38 default_distance = 0.0 |
37 default_bygroup_model = 'hs1f' | 39 default_index_mode = 'gene' |
40 default_index_action = 'set' | |
41 default_bygroup_model = 'ham' | |
38 default_hclust_model = 'chen2010' | 42 default_hclust_model = 'chen2010' |
39 default_seq_field = 'JUNCTION' | 43 default_seq_field = 'JUNCTION' |
40 default_norm = 'len' | 44 default_norm = 'len' |
41 default_sym = 'avg' | 45 default_sym = 'avg' |
42 default_linkage = 'single' | 46 default_linkage = 'single' |
43 | 47 choices_bygroup_model = ('ham', 'aa', 'hh_s1f', 'hh_s5f', 'mk_rs1nf', 'mk_rs5nf', 'hs1f_compat', 'm1n_compat') |
44 # TODO: should be in Distance, but need to be after function definitions | 48 |
45 # Amino acid Hamming distance | 49 |
46 aa_model = getAADistMatrix(mask_dist=1, gap_dist=0) | 50 def indexByIdentity(index, key, rec, fields=None): |
47 | 51 """ |
48 # DNA Hamming distance | 52 Updates a preclone index with a simple key |
49 ham_model = getDNADistMatrix(mask_dist=0, gap_dist=0) | |
50 | |
51 | |
52 # TODO: this function is an abstraction to facilitate later cleanup | |
53 def getModelMatrix(model): | |
54 """ | |
55 Simple wrapper to get distance matrix from model name | |
56 | 53 |
57 Arguments: | 54 Arguments: |
58 model = model name | 55 index = preclone index from indexJunctions |
59 | 56 key = index key |
60 Return: | 57 rec = IgRecord to add to the index |
61 a pandas.DataFrame containing the character distance matrix | 58 fields = additional annotation fields to use to group preclones; |
62 """ | 59 if None use only V, J and junction length |
63 if model == 'aa': | 60 |
64 return(aa_model) | 61 Returns: |
65 elif model == 'ham': | 62 None. Updates index with new key and records. |
66 return(ham_model) | 63 """ |
67 elif model == 'm1n': | 64 index.setdefault(tuple(key), []).append(rec) |
68 return(m1n_model) | 65 |
69 elif model == 'hs1f': | 66 |
70 return(hs1f_model) | 67 def indexByUnion(index, key, rec, fields=None): |
71 elif model == 'hs5f': | 68 """ |
72 return(hs5f_model) | 69 Updates a preclone index with the union of nested keys |
70 | |
71 Arguments: | |
72 index = preclone index from indexJunctions | |
73 key = index key | |
74 rec = IgRecord to add to the index | |
75 fields = additional annotation fields to use to group preclones; | |
76 if None use only V, J and junction length | |
77 | |
78 Returns: | |
79 None. Updates index with new key and records. | |
80 """ | |
81 # List of values for this/new key | |
82 val = [rec] | |
83 f_range = list(range(2, 3 + (len(fields) if fields else 0))) | |
84 | |
85 # See if field/junction length combination exists in index | |
86 outer_dict = index | |
87 for field in f_range: | |
88 try: | |
89 outer_dict = outer_dict[key[field]] | |
90 except (KeyError): | |
91 outer_dict = None | |
92 break | |
93 # If field combination exists, look through Js | |
94 j_matches = [] | |
95 if outer_dict is not None: | |
96 for j in outer_dict.keys(): | |
97 if not set(key[1]).isdisjoint(set(j)): | |
98 key[1] = tuple(set(key[1]).union(set(j))) | |
99 j_matches += [j] | |
100 # If J overlap exists, look through Vs for each J | |
101 for j in j_matches: | |
102 v_matches = [] | |
103 # Collect V matches for this J | |
104 for v in outer_dict[j].keys(): | |
105 if not set(key[0]).isdisjoint(set(v)): | |
106 key[0] = tuple(set(key[0]).union(set(v))) | |
107 v_matches += [v] | |
108 # If there are V overlaps for this J, pop them out | |
109 if v_matches: | |
110 val += list(chain(*(outer_dict[j].pop(v) for v in v_matches))) | |
111 # If the J dict is now empty, remove it | |
112 if not outer_dict[j]: | |
113 outer_dict.pop(j, None) | |
114 | |
115 # Add value(s) into index nested dictionary | |
116 # OMG Python pointers are the best! | |
117 # Add field dictionaries into index | |
118 outer_dict = index | |
119 for field in f_range: | |
120 outer_dict.setdefault(key[field], {}) | |
121 outer_dict = outer_dict[key[field]] | |
122 # Add J, then V into index | |
123 if key[1] in outer_dict: | |
124 outer_dict[key[1]].update({key[0]: val}) | |
73 else: | 125 else: |
74 sys.stderr.write('Unrecognized distance model: %s.\n' % model) | 126 outer_dict[key[1]] = {key[0]: val} |
75 | 127 |
76 | 128 |
77 def indexJunctions(db_iter, fields=None, mode='gene', action='first'): | 129 def indexJunctions(db_iter, fields=None, mode=default_index_mode, |
130 action=default_index_action): | |
78 """ | 131 """ |
79 Identifies preclonal groups by V, J and junction length | 132 Identifies preclonal groups by V, J and junction length |
80 | 133 |
81 Arguments: | 134 Arguments: |
82 db_iter = an iterator of IgRecords defined by readDbFile | 135 db_iter = an iterator of IgRecords defined by readDbFile |
88 one of ('first', 'set') | 141 one of ('first', 'set') |
89 | 142 |
90 Returns: | 143 Returns: |
91 a dictionary of {(V, J, junction length):[IgRecords]} | 144 a dictionary of {(V, J, junction length):[IgRecords]} |
92 """ | 145 """ |
146 # print(fields) | |
93 # Define functions for grouping keys | 147 # Define functions for grouping keys |
94 if mode == 'allele' and fields is None: | 148 if mode == 'allele' and fields is None: |
95 def _get_key(rec, act): | 149 def _get_key(rec, act): |
96 return (rec.getVAllele(act), rec.getJAllele(act), | 150 return [rec.getVAllele(act), rec.getJAllele(act), |
97 None if rec.junction is None else len(rec.junction)) | 151 None if rec.junction is None else len(rec.junction)] |
98 elif mode == 'gene' and fields is None: | 152 elif mode == 'gene' and fields is None: |
99 def _get_key(rec, act): | 153 def _get_key(rec, act): |
100 return (rec.getVGene(act), rec.getJGene(act), | 154 return [rec.getVGene(act), rec.getJGene(act), |
101 None if rec.junction is None else len(rec.junction)) | 155 None if rec.junction is None else len(rec.junction)] |
102 elif mode == 'allele' and fields is not None: | 156 elif mode == 'allele' and fields is not None: |
103 def _get_key(rec, act): | 157 def _get_key(rec, act): |
104 vdj = [rec.getVAllele(act), rec.getJAllele(act), | 158 vdj = [rec.getVAllele(act), rec.getJAllele(act), |
105 None if rec.junction is None else len(rec.junction)] | 159 None if rec.junction is None else len(rec.junction)] |
106 ann = [rec.toDict().get(k, None) for k in fields] | 160 ann = [rec.toDict().get(k, None) for k in fields] |
107 return tuple(chain(vdj, ann)) | 161 return list(chain(vdj, ann)) |
108 elif mode == 'gene' and fields is not None: | 162 elif mode == 'gene' and fields is not None: |
109 def _get_key(rec, act): | 163 def _get_key(rec, act): |
110 vdj = [rec.getVGene(act), rec.getJGene(act), | 164 vdj = [rec.getVGene(act), rec.getJGene(act), |
111 None if rec.junction is None else len(rec.junction)] | 165 None if rec.junction is None else len(rec.junction)] |
112 ann = [rec.toDict().get(k, None) for k in fields] | 166 ann = [rec.toDict().get(k, None) for k in fields] |
113 return tuple(chain(vdj, ann)) | 167 return list(chain(vdj, ann)) |
168 | |
169 # Function to flatten nested dictionary | |
170 def _flatten_dict(d, parent_key=''): | |
171 items = [] | |
172 for k, v in d.items(): | |
173 new_key = parent_key + [k] if parent_key else [k] | |
174 if isinstance(v, dict): | |
175 items.extend(_flatten_dict(v, new_key).items()) | |
176 else: | |
177 items.append((new_key, v)) | |
178 flat_dict = {None if None in i[0] else tuple(i[0]): i[1] for i in items} | |
179 return flat_dict | |
180 | |
181 if action == 'first': | |
182 index_func = indexByIdentity | |
183 elif action == 'set': | |
184 index_func = indexByUnion | |
185 else: | |
186 sys.stderr.write('Unrecognized action: %s.\n' % action) | |
114 | 187 |
115 start_time = time() | 188 start_time = time() |
116 clone_index = {} | 189 clone_index = {} |
117 rec_count = 0 | 190 rec_count = 0 |
118 for rec in db_iter: | 191 for rec in db_iter: |
125 printProgress(rec_count, step=1000, start_time=start_time) | 198 printProgress(rec_count, step=1000, start_time=start_time) |
126 rec_count += 1 | 199 rec_count += 1 |
127 | 200 |
128 # Assigned passed preclone records to key and failed to index None | 201 # Assigned passed preclone records to key and failed to index None |
129 if all([k is not None and k != '' for k in key]): | 202 if all([k is not None and k != '' for k in key]): |
130 #print key | 203 # Update index dictionary |
131 # TODO: Has much slow. Should have less slow. | 204 index_func(clone_index, key, rec, fields) |
132 if action == 'set': | |
133 | |
134 f_range = list(range(2, 3 + (len(fields) if fields else 0))) | |
135 vdj_range = list(range(2)) | |
136 | |
137 # Check for any keys that have matching columns and junction length and overlapping genes/alleles | |
138 to_remove = [] | |
139 if len(clone_index) > (1 if None in clone_index else 0) and key not in clone_index: | |
140 key = list(key) | |
141 for k in clone_index: | |
142 if k is not None and all([key[i] == k[i] for i in f_range]): | |
143 if all([not set(key[i]).isdisjoint(set(k[i])) for i in vdj_range]): | |
144 for i in vdj_range: key[i] = tuple(set(key[i]).union(set(k[i]))) | |
145 to_remove.append(k) | |
146 | |
147 # Remove original keys, replace with union of all genes/alleles and append values to new key | |
148 val = [rec] | |
149 val += list(chain(*(clone_index.pop(k) for k in to_remove))) | |
150 clone_index[tuple(key)] = clone_index.get(tuple(key),[]) + val | |
151 | |
152 elif action == 'first': | |
153 clone_index.setdefault(key, []).append(rec) | |
154 else: | 205 else: |
155 clone_index.setdefault(None, []).append(rec) | 206 clone_index.setdefault(None, []).append(rec) |
156 | 207 |
157 printProgress(rec_count, step=1000, start_time=start_time, end=True) | 208 printProgress(rec_count, step=1000, start_time=start_time, end=True) |
209 | |
210 if action == 'set': | |
211 clone_index = _flatten_dict(clone_index) | |
158 | 212 |
159 return clone_index | 213 return clone_index |
160 | 214 |
161 | 215 |
162 def distanceClones(records, model=default_bygroup_model, distance=default_distance, | 216 def distanceClones(records, model=default_bygroup_model, distance=default_distance, |
177 | 231 |
178 Returns: | 232 Returns: |
179 a dictionary of lists defining {clone number: [IgRecords clonal group]} | 233 a dictionary of lists defining {clone number: [IgRecords clonal group]} |
180 """ | 234 """ |
181 # Get distance matrix if not provided | 235 # Get distance matrix if not provided |
182 if dist_mat is None: dist_mat = getModelMatrix(model) | 236 if dist_mat is None: |
183 | 237 try: |
238 dist_mat = distance_models[model] | |
239 except KeyError: | |
240 sys.exit('Unrecognized distance model: %s' % args_dict['model']) | |
241 | |
242 # TODO: can be cleaned up with abstract model class | |
184 # Determine length of n-mers | 243 # Determine length of n-mers |
185 if model in ['hs1f', 'm1n', 'aa', 'ham']: | 244 if model in ['hs1f_compat', 'm1n_compat', 'aa', 'ham', 'hh_s1f', 'mk_rs1nf']: |
186 nmer_len = 1 | 245 nmer_len = 1 |
187 elif model in ['hs5f']: | 246 elif model in ['hh_s5f', 'mk_rs5nf']: |
188 nmer_len = 5 | 247 nmer_len = 5 |
189 else: | 248 else: |
190 sys.stderr.write('Unrecognized distance model: %s.\n' % model) | 249 sys.exit('Unrecognized distance model: %s.\n' % model) |
191 | 250 |
192 # Define unique junction mapping | 251 # Define unique junction mapping |
193 seq_map = {} | 252 seq_map = {} |
194 for ig in records: | 253 for ig in records: |
195 seq = ig.getSeqField(seq_field) | 254 seq = ig.getSeqField(seq_field) |
196 # Check if sequence length is 0 | 255 # Check if sequence length is 0 |
197 if len(seq) == 0: | 256 if len(seq) == 0: |
198 return None | 257 return None |
199 | 258 |
200 seq = re.sub('[\.-]','N', str(seq)) | 259 seq = re.sub('[\.-]', 'N', str(seq)) |
201 if model == 'aa': seq = translate(seq) | 260 if model == 'aa': seq = translate(seq) |
202 | 261 |
203 seq_map.setdefault(seq, []).append(ig) | 262 seq_map.setdefault(seq, []).append(ig) |
204 | 263 |
205 # Process records | 264 # Process records |
208 | 267 |
209 # Define sequences | 268 # Define sequences |
210 seqs = list(seq_map.keys()) | 269 seqs = list(seq_map.keys()) |
211 | 270 |
212 # Calculate pairwise distance matrix | 271 # Calculate pairwise distance matrix |
213 dists = calcDistances(seqs, nmer_len, dist_mat, norm, sym) | 272 dists = calcDistances(seqs, nmer_len, dist_mat, sym=sym, norm=norm) |
214 | 273 |
215 # Perform hierarchical clustering | 274 # Perform hierarchical clustering |
216 clusters = formClusters(dists, linkage, distance) | 275 clusters = formClusters(dists, linkage, distance) |
217 | 276 |
218 # Turn clusters into clone dictionary | 277 # Turn clusters into clone dictionary |
447 # Exit upon reaching sentinel | 506 # Exit upon reaching sentinel |
448 if data is None: break | 507 if data is None: break |
449 | 508 |
450 # Define result object for iteration and get data records | 509 # Define result object for iteration and get data records |
451 records = data.data | 510 records = data.data |
511 # print(data.id) | |
452 result = DbResult(data.id, records) | 512 result = DbResult(data.id, records) |
453 | 513 |
454 # Check for invalid data (due to failed indexing) and add failed result | 514 # Check for invalid data (due to failed indexing) and add failed result |
455 if not data: | 515 if not data: |
456 result_queue.put(result) | 516 result_queue.put(result) |
899 database with assigned clonal group numbers. | 959 database with assigned clonal group numbers. |
900 clone-fail | 960 clone-fail |
901 database with records failing clonal grouping. | 961 database with records failing clonal grouping. |
902 | 962 |
903 required fields: | 963 required fields: |
904 SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION_LENGTH | 964 SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION |
905 | 965 |
906 <field> | 966 <field> |
907 sequence field specified by the --sf parameter | 967 sequence field specified by the --sf parameter |
908 | 968 |
909 output fields: | 969 output fields: |
924 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, | 984 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, |
925 multiproc=True) | 985 multiproc=True) |
926 | 986 |
927 # Distance cloning method | 987 # Distance cloning method |
928 parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent], | 988 parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent], |
929 formatter_class=CommonHelpFormatter, | 989 formatter_class=CommonHelpFormatter, |
930 help='''Defines clones as having same V assignment, | 990 help='''Defines clones as having same V assignment, |
931 J assignment, and junction length with | 991 J assignment, and junction length with |
932 specified substitution distance model.''') | 992 specified substitution distance model.''', |
993 description='''Defines clones as having same V assignment, | |
994 J assignment, and junction length with | |
995 specified substitution distance model.''') | |
933 parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None, | 996 parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None, |
934 help='Additional fields to use for grouping clones (non VDJ)') | 997 help='Additional fields to use for grouping clones (non VDJ)') |
935 parser_bygroup.add_argument('--mode', action='store', dest='mode', | 998 parser_bygroup.add_argument('--mode', action='store', dest='mode', |
936 choices=('allele', 'gene'), default='gene', | 999 choices=('allele', 'gene'), default=default_index_mode, |
937 help='''Specifies whether to use the V(D)J allele or gene for | 1000 help='''Specifies whether to use the V(D)J allele or gene for |
938 initial grouping.''') | 1001 initial grouping.''') |
939 parser_bygroup.add_argument('--act', action='store', dest='action', default='set', | 1002 parser_bygroup.add_argument('--act', action='store', dest='action', |
940 choices=('first', 'set'), | 1003 choices=('first', 'set'), default=default_index_action, |
941 help='''Specifies how to handle multiple V(D)J assignments | 1004 help='''Specifies how to handle multiple V(D)J assignments |
942 for initial grouping.''') | 1005 for initial grouping.''') |
943 parser_bygroup.add_argument('--model', action='store', dest='model', | 1006 parser_bygroup.add_argument('--model', action='store', dest='model', |
944 choices=('aa', 'ham', 'm1n', 'hs1f', 'hs5f'), | 1007 choices=choices_bygroup_model, |
945 default=default_bygroup_model, | 1008 default=default_bygroup_model, |
946 help='''Specifies which substitution model to use for | 1009 help='''Specifies which substitution model to use for calculating distance |
947 calculating distance between sequences. Where m1n is the | 1010 between sequences. The "ham" model is nucleotide Hamming distance and |
948 mouse single nucleotide transition/trasversion model | 1011 "aa" is amino acid Hamming distance. The "hh_s1f" and "hh_s5f" models are |
949 of Smith et al, 1996; hs1f is the human single | 1012 human specific single nucleotide and 5-mer content models, respectively, |
950 nucleotide model derived from Yaari et al, 2013; hs5f | 1013 from Yaari et al, 2013. The "mk_rs1nf" and "mk_rs5nf" models are |
951 is the human S5F model of Yaari et al, 2013; ham is | 1014 mouse specific single nucleotide and 5-mer content models, respectively, |
952 nucleotide Hamming distance; and aa is amino acid | 1015 from Cui et al, 2016. The "m1n_compat" and "hs1f_compat" models are |
953 Hamming distance. The hs5f data should be | 1016 deprecated models provided backwards compatibility with the "m1n" and |
954 considered experimental.''') | 1017 "hs1f" models in Change-O v0.3.3 and SHazaM v0.1.4. Both |
1018 5-mer models should be considered experimental.''') | |
955 parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float, | 1019 parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float, |
956 default=default_distance, | 1020 default=default_distance, |
957 help='The distance threshold for clonal grouping') | 1021 help='The distance threshold for clonal grouping') |
958 parser_bygroup.add_argument('--norm', action='store', dest='norm', | 1022 parser_bygroup.add_argument('--norm', action='store', dest='norm', |
959 choices=('len', 'mut', 'none'), default=default_norm, | 1023 choices=('len', 'mut', 'none'), default=default_norm, |
975 parser_bygroup.set_defaults(work_func=processQueue) | 1039 parser_bygroup.set_defaults(work_func=processQueue) |
976 parser_bygroup.set_defaults(collect_func=collectQueue) | 1040 parser_bygroup.set_defaults(collect_func=collectQueue) |
977 parser_bygroup.set_defaults(group_func=indexJunctions) | 1041 parser_bygroup.set_defaults(group_func=indexJunctions) |
978 parser_bygroup.set_defaults(clone_func=distanceClones) | 1042 parser_bygroup.set_defaults(clone_func=distanceClones) |
979 | 1043 |
980 | 1044 # Chen2010 |
981 # Hierarchical clustering cloning method | 1045 parser_chen = subparsers.add_parser('chen2010', parents=[parser_parent], |
982 parser_hclust = subparsers.add_parser('hclust', parents=[parser_parent], | |
983 formatter_class=CommonHelpFormatter, | 1046 formatter_class=CommonHelpFormatter, |
984 help='Defines clones by specified distance metric on CDR3s and \ | 1047 help='''Defines clones by method specified in Chen, 2010.''', |
985 cutting of hierarchical clustering tree') | 1048 description='''Defines clones by method specified in Chen, 2010.''') |
986 # parser_hclust.add_argument('-f', nargs='+', action='store', dest='fields', default=None, | 1049 parser_chen.set_defaults(feed_func=feedQueueClust) |
987 # help='Fields to use for grouping clones (non VDJ)') | 1050 parser_chen.set_defaults(work_func=processQueueClust) |
988 parser_hclust.add_argument('--method', action='store', dest='method', | 1051 parser_chen.set_defaults(collect_func=collectQueueClust) |
989 choices=('chen2010', 'ademokun2011'), default=default_hclust_model, | 1052 parser_chen.set_defaults(cluster_func=hierClust) |
990 help='Specifies which cloning method to use for calculating distance \ | 1053 |
991 between CDR3s, computing linkage, and cutting clusters') | 1054 # Ademokun2011 |
992 parser_hclust.set_defaults(feed_func=feedQueueClust) | 1055 parser_ade = subparsers.add_parser('ademokun2011', parents=[parser_parent], |
993 parser_hclust.set_defaults(work_func=processQueueClust) | 1056 formatter_class=CommonHelpFormatter, |
994 parser_hclust.set_defaults(collect_func=collectQueueClust) | 1057 help='''Defines clones by method specified in Ademokun, 2011.''', |
995 parser_hclust.set_defaults(cluster_func=hierClust) | 1058 description='''Defines clones by method specified in Ademokun, 2011.''') |
1059 parser_ade.set_defaults(feed_func=feedQueueClust) | |
1060 parser_ade.set_defaults(work_func=processQueueClust) | |
1061 parser_ade.set_defaults(collect_func=collectQueueClust) | |
1062 parser_ade.set_defaults(cluster_func=hierClust) | |
996 | 1063 |
997 return parser | 1064 return parser |
998 | 1065 |
999 | 1066 |
1000 if __name__ == '__main__': | 1067 if __name__ == '__main__': |
1021 'norm': args_dict['norm'], | 1088 'norm': args_dict['norm'], |
1022 'sym': args_dict['sym'], | 1089 'sym': args_dict['sym'], |
1023 'linkage': args_dict['linkage'], | 1090 'linkage': args_dict['linkage'], |
1024 'seq_field': args_dict['seq_field']} | 1091 'seq_field': args_dict['seq_field']} |
1025 | 1092 |
1026 # TODO: can be cleaned up with abstract model class | 1093 # Get distance matrix |
1027 args_dict['clone_args']['dist_mat'] = getModelMatrix(args_dict['model']) | 1094 try: |
1095 args_dict['clone_args']['dist_mat'] = distance_models[args_dict['model']] | |
1096 except KeyError: | |
1097 sys.exit('Unrecognized distance model: %s' % args_dict['model']) | |
1028 | 1098 |
1029 del args_dict['fields'] | 1099 del args_dict['fields'] |
1030 del args_dict['action'] | 1100 del args_dict['action'] |
1031 del args_dict['mode'] | 1101 del args_dict['mode'] |
1032 del args_dict['model'] | 1102 del args_dict['model'] |
1035 del args_dict['sym'] | 1105 del args_dict['sym'] |
1036 del args_dict['linkage'] | 1106 del args_dict['linkage'] |
1037 del args_dict['seq_field'] | 1107 del args_dict['seq_field'] |
1038 | 1108 |
1039 # Define clone_args | 1109 # Define clone_args |
1040 if args.command == 'hclust': | 1110 if args.command == 'chen2010': |
1041 dist_funcs = {'chen2010':distChen2010, 'ademokun2011':distAdemokun2011} | 1111 args_dict['clone_func'] = distChen2010 |
1042 args_dict['clone_func'] = dist_funcs[args_dict['method']] | 1112 args_dict['cluster_args'] = {'method': args.command } |
1043 args_dict['cluster_args'] = {'method': args_dict['method']} | 1113 |
1044 #del args_dict['fields'] | 1114 if args.command == 'ademokun2011': |
1045 del args_dict['method'] | 1115 args_dict['clone_func'] = distAdemokun2011 |
1116 args_dict['cluster_args'] = {'method': args.command } | |
1046 | 1117 |
1047 # Call defineClones | 1118 # Call defineClones |
1048 del args_dict['command'] | 1119 del args_dict['command'] |
1049 del args_dict['db_files'] | 1120 del args_dict['db_files'] |
1050 for f in args.__dict__['db_files']: | 1121 for f in args.__dict__['db_files']: |