Mercurial > repos > davidvanzessen > shm_csr
comparison change_o/DefineClones.py @ 52:22dddabe3637 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 23 May 2017 08:32:58 -0400 |
| parents | c33d93683a09 |
| children |
comparison
equal
deleted
inserted
replaced
| 51:8fa8836bd605 | 52:22dddabe3637 |
|---|---|
| 8 | 8 |
| 9 # Imports | 9 # Imports |
| 10 import os | 10 import os |
| 11 import re | 11 import re |
| 12 import sys | 12 import sys |
| 13 import csv | |
| 13 import numpy as np | 14 import numpy as np |
| 14 from argparse import ArgumentParser | 15 from argparse import ArgumentParser |
| 15 from collections import OrderedDict | 16 from collections import OrderedDict |
| 16 from itertools import chain | 17 from itertools import chain |
| 17 from textwrap import dedent | 18 from textwrap import dedent |
| 23 from presto.Defaults import default_out_args | 24 from presto.Defaults import default_out_args |
| 24 from presto.IO import getFileType, getOutputHandle, printLog, printProgress | 25 from presto.IO import getFileType, getOutputHandle, printLog, printProgress |
| 25 from presto.Multiprocessing import manageProcesses | 26 from presto.Multiprocessing import manageProcesses |
| 26 from presto.Sequence import getDNAScoreDict | 27 from presto.Sequence import getDNAScoreDict |
| 27 from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs | 28 from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs |
| 28 from changeo.Distance import getDNADistMatrix, getAADistMatrix, \ | 29 from changeo.Distance import distance_models, calcDistances, formClusters |
| 29 hs1f_model, m1n_model, hs5f_model, \ | |
| 30 calcDistances, formClusters | |
| 31 from changeo.IO import getDbWriter, readDbFile, countDbFile | 30 from changeo.IO import getDbWriter, readDbFile, countDbFile |
| 32 from changeo.Multiprocessing import DbData, DbResult | 31 from changeo.Multiprocessing import DbData, DbResult |
| 32 | |
| 33 ## Set maximum field size for csv.reader | |
| 34 csv.field_size_limit(sys.maxsize) | |
| 33 | 35 |
| 34 # Defaults | 36 # Defaults |
| 35 default_translate = False | 37 default_translate = False |
| 36 default_distance = 0.0 | 38 default_distance = 0.0 |
| 37 default_bygroup_model = 'hs1f' | 39 default_index_mode = 'gene' |
| 40 default_index_action = 'set' | |
| 41 default_bygroup_model = 'ham' | |
| 38 default_hclust_model = 'chen2010' | 42 default_hclust_model = 'chen2010' |
| 39 default_seq_field = 'JUNCTION' | 43 default_seq_field = 'JUNCTION' |
| 40 default_norm = 'len' | 44 default_norm = 'len' |
| 41 default_sym = 'avg' | 45 default_sym = 'avg' |
| 42 default_linkage = 'single' | 46 default_linkage = 'single' |
| 43 | 47 choices_bygroup_model = ('ham', 'aa', 'hh_s1f', 'hh_s5f', 'mk_rs1nf', 'mk_rs5nf', 'hs1f_compat', 'm1n_compat') |
| 44 # TODO: should be in Distance, but need to be after function definitions | 48 |
| 45 # Amino acid Hamming distance | 49 |
| 46 aa_model = getAADistMatrix(mask_dist=1, gap_dist=0) | 50 def indexByIdentity(index, key, rec, fields=None): |
| 47 | 51 """ |
| 48 # DNA Hamming distance | 52 Updates a preclone index with a simple key |
| 49 ham_model = getDNADistMatrix(mask_dist=0, gap_dist=0) | |
| 50 | |
| 51 | |
| 52 # TODO: this function is an abstraction to facilitate later cleanup | |
| 53 def getModelMatrix(model): | |
| 54 """ | |
| 55 Simple wrapper to get distance matrix from model name | |
| 56 | 53 |
| 57 Arguments: | 54 Arguments: |
| 58 model = model name | 55 index = preclone index from indexJunctions |
| 59 | 56 key = index key |
| 60 Return: | 57 rec = IgRecord to add to the index |
| 61 a pandas.DataFrame containing the character distance matrix | 58 fields = additional annotation fields to use to group preclones; |
| 62 """ | 59 if None use only V, J and junction length |
| 63 if model == 'aa': | 60 |
| 64 return(aa_model) | 61 Returns: |
| 65 elif model == 'ham': | 62 None. Updates index with new key and records. |
| 66 return(ham_model) | 63 """ |
| 67 elif model == 'm1n': | 64 index.setdefault(tuple(key), []).append(rec) |
| 68 return(m1n_model) | 65 |
| 69 elif model == 'hs1f': | 66 |
| 70 return(hs1f_model) | 67 def indexByUnion(index, key, rec, fields=None): |
| 71 elif model == 'hs5f': | 68 """ |
| 72 return(hs5f_model) | 69 Updates a preclone index with the union of nested keys |
| 70 | |
| 71 Arguments: | |
| 72 index = preclone index from indexJunctions | |
| 73 key = index key | |
| 74 rec = IgRecord to add to the index | |
| 75 fields = additional annotation fields to use to group preclones; | |
| 76 if None use only V, J and junction length | |
| 77 | |
| 78 Returns: | |
| 79 None. Updates index with new key and records. | |
| 80 """ | |
| 81 # List of values for this/new key | |
| 82 val = [rec] | |
| 83 f_range = list(range(2, 3 + (len(fields) if fields else 0))) | |
| 84 | |
| 85 # See if field/junction length combination exists in index | |
| 86 outer_dict = index | |
| 87 for field in f_range: | |
| 88 try: | |
| 89 outer_dict = outer_dict[key[field]] | |
| 90 except (KeyError): | |
| 91 outer_dict = None | |
| 92 break | |
| 93 # If field combination exists, look through Js | |
| 94 j_matches = [] | |
| 95 if outer_dict is not None: | |
| 96 for j in outer_dict.keys(): | |
| 97 if not set(key[1]).isdisjoint(set(j)): | |
| 98 key[1] = tuple(set(key[1]).union(set(j))) | |
| 99 j_matches += [j] | |
| 100 # If J overlap exists, look through Vs for each J | |
| 101 for j in j_matches: | |
| 102 v_matches = [] | |
| 103 # Collect V matches for this J | |
| 104 for v in outer_dict[j].keys(): | |
| 105 if not set(key[0]).isdisjoint(set(v)): | |
| 106 key[0] = tuple(set(key[0]).union(set(v))) | |
| 107 v_matches += [v] | |
| 108 # If there are V overlaps for this J, pop them out | |
| 109 if v_matches: | |
| 110 val += list(chain(*(outer_dict[j].pop(v) for v in v_matches))) | |
| 111 # If the J dict is now empty, remove it | |
| 112 if not outer_dict[j]: | |
| 113 outer_dict.pop(j, None) | |
| 114 | |
| 115 # Add value(s) into index nested dictionary | |
| 116 # OMG Python pointers are the best! | |
| 117 # Add field dictionaries into index | |
| 118 outer_dict = index | |
| 119 for field in f_range: | |
| 120 outer_dict.setdefault(key[field], {}) | |
| 121 outer_dict = outer_dict[key[field]] | |
| 122 # Add J, then V into index | |
| 123 if key[1] in outer_dict: | |
| 124 outer_dict[key[1]].update({key[0]: val}) | |
| 73 else: | 125 else: |
| 74 sys.stderr.write('Unrecognized distance model: %s.\n' % model) | 126 outer_dict[key[1]] = {key[0]: val} |
| 75 | 127 |
| 76 | 128 |
| 77 def indexJunctions(db_iter, fields=None, mode='gene', action='first'): | 129 def indexJunctions(db_iter, fields=None, mode=default_index_mode, |
| 130 action=default_index_action): | |
| 78 """ | 131 """ |
| 79 Identifies preclonal groups by V, J and junction length | 132 Identifies preclonal groups by V, J and junction length |
| 80 | 133 |
| 81 Arguments: | 134 Arguments: |
| 82 db_iter = an iterator of IgRecords defined by readDbFile | 135 db_iter = an iterator of IgRecords defined by readDbFile |
| 88 one of ('first', 'set') | 141 one of ('first', 'set') |
| 89 | 142 |
| 90 Returns: | 143 Returns: |
| 91 a dictionary of {(V, J, junction length):[IgRecords]} | 144 a dictionary of {(V, J, junction length):[IgRecords]} |
| 92 """ | 145 """ |
| 146 # print(fields) | |
| 93 # Define functions for grouping keys | 147 # Define functions for grouping keys |
| 94 if mode == 'allele' and fields is None: | 148 if mode == 'allele' and fields is None: |
| 95 def _get_key(rec, act): | 149 def _get_key(rec, act): |
| 96 return (rec.getVAllele(act), rec.getJAllele(act), | 150 return [rec.getVAllele(act), rec.getJAllele(act), |
| 97 None if rec.junction is None else len(rec.junction)) | 151 None if rec.junction is None else len(rec.junction)] |
| 98 elif mode == 'gene' and fields is None: | 152 elif mode == 'gene' and fields is None: |
| 99 def _get_key(rec, act): | 153 def _get_key(rec, act): |
| 100 return (rec.getVGene(act), rec.getJGene(act), | 154 return [rec.getVGene(act), rec.getJGene(act), |
| 101 None if rec.junction is None else len(rec.junction)) | 155 None if rec.junction is None else len(rec.junction)] |
| 102 elif mode == 'allele' and fields is not None: | 156 elif mode == 'allele' and fields is not None: |
| 103 def _get_key(rec, act): | 157 def _get_key(rec, act): |
| 104 vdj = [rec.getVAllele(act), rec.getJAllele(act), | 158 vdj = [rec.getVAllele(act), rec.getJAllele(act), |
| 105 None if rec.junction is None else len(rec.junction)] | 159 None if rec.junction is None else len(rec.junction)] |
| 106 ann = [rec.toDict().get(k, None) for k in fields] | 160 ann = [rec.toDict().get(k, None) for k in fields] |
| 107 return tuple(chain(vdj, ann)) | 161 return list(chain(vdj, ann)) |
| 108 elif mode == 'gene' and fields is not None: | 162 elif mode == 'gene' and fields is not None: |
| 109 def _get_key(rec, act): | 163 def _get_key(rec, act): |
| 110 vdj = [rec.getVGene(act), rec.getJGene(act), | 164 vdj = [rec.getVGene(act), rec.getJGene(act), |
| 111 None if rec.junction is None else len(rec.junction)] | 165 None if rec.junction is None else len(rec.junction)] |
| 112 ann = [rec.toDict().get(k, None) for k in fields] | 166 ann = [rec.toDict().get(k, None) for k in fields] |
| 113 return tuple(chain(vdj, ann)) | 167 return list(chain(vdj, ann)) |
| 168 | |
| 169 # Function to flatten nested dictionary | |
| 170 def _flatten_dict(d, parent_key=''): | |
| 171 items = [] | |
| 172 for k, v in d.items(): | |
| 173 new_key = parent_key + [k] if parent_key else [k] | |
| 174 if isinstance(v, dict): | |
| 175 items.extend(_flatten_dict(v, new_key).items()) | |
| 176 else: | |
| 177 items.append((new_key, v)) | |
| 178 flat_dict = {None if None in i[0] else tuple(i[0]): i[1] for i in items} | |
| 179 return flat_dict | |
| 180 | |
| 181 if action == 'first': | |
| 182 index_func = indexByIdentity | |
| 183 elif action == 'set': | |
| 184 index_func = indexByUnion | |
| 185 else: | |
| 186 sys.stderr.write('Unrecognized action: %s.\n' % action) | |
| 114 | 187 |
| 115 start_time = time() | 188 start_time = time() |
| 116 clone_index = {} | 189 clone_index = {} |
| 117 rec_count = 0 | 190 rec_count = 0 |
| 118 for rec in db_iter: | 191 for rec in db_iter: |
| 125 printProgress(rec_count, step=1000, start_time=start_time) | 198 printProgress(rec_count, step=1000, start_time=start_time) |
| 126 rec_count += 1 | 199 rec_count += 1 |
| 127 | 200 |
| 128 # Assigned passed preclone records to key and failed to index None | 201 # Assigned passed preclone records to key and failed to index None |
| 129 if all([k is not None and k != '' for k in key]): | 202 if all([k is not None and k != '' for k in key]): |
| 130 #print key | 203 # Update index dictionary |
| 131 # TODO: Has much slow. Should have less slow. | 204 index_func(clone_index, key, rec, fields) |
| 132 if action == 'set': | |
| 133 | |
| 134 f_range = list(range(2, 3 + (len(fields) if fields else 0))) | |
| 135 vdj_range = list(range(2)) | |
| 136 | |
| 137 # Check for any keys that have matching columns and junction length and overlapping genes/alleles | |
| 138 to_remove = [] | |
| 139 if len(clone_index) > (1 if None in clone_index else 0) and key not in clone_index: | |
| 140 key = list(key) | |
| 141 for k in clone_index: | |
| 142 if k is not None and all([key[i] == k[i] for i in f_range]): | |
| 143 if all([not set(key[i]).isdisjoint(set(k[i])) for i in vdj_range]): | |
| 144 for i in vdj_range: key[i] = tuple(set(key[i]).union(set(k[i]))) | |
| 145 to_remove.append(k) | |
| 146 | |
| 147 # Remove original keys, replace with union of all genes/alleles and append values to new key | |
| 148 val = [rec] | |
| 149 val += list(chain(*(clone_index.pop(k) for k in to_remove))) | |
| 150 clone_index[tuple(key)] = clone_index.get(tuple(key),[]) + val | |
| 151 | |
| 152 elif action == 'first': | |
| 153 clone_index.setdefault(key, []).append(rec) | |
| 154 else: | 205 else: |
| 155 clone_index.setdefault(None, []).append(rec) | 206 clone_index.setdefault(None, []).append(rec) |
| 156 | 207 |
| 157 printProgress(rec_count, step=1000, start_time=start_time, end=True) | 208 printProgress(rec_count, step=1000, start_time=start_time, end=True) |
| 209 | |
| 210 if action == 'set': | |
| 211 clone_index = _flatten_dict(clone_index) | |
| 158 | 212 |
| 159 return clone_index | 213 return clone_index |
| 160 | 214 |
| 161 | 215 |
| 162 def distanceClones(records, model=default_bygroup_model, distance=default_distance, | 216 def distanceClones(records, model=default_bygroup_model, distance=default_distance, |
| 177 | 231 |
| 178 Returns: | 232 Returns: |
| 179 a dictionary of lists defining {clone number: [IgRecords clonal group]} | 233 a dictionary of lists defining {clone number: [IgRecords clonal group]} |
| 180 """ | 234 """ |
| 181 # Get distance matrix if not provided | 235 # Get distance matrix if not provided |
| 182 if dist_mat is None: dist_mat = getModelMatrix(model) | 236 if dist_mat is None: |
| 183 | 237 try: |
| 238 dist_mat = distance_models[model] | |
| 239 except KeyError: | |
| 240 sys.exit('Unrecognized distance model: %s' % args_dict['model']) | |
| 241 | |
| 242 # TODO: can be cleaned up with abstract model class | |
| 184 # Determine length of n-mers | 243 # Determine length of n-mers |
| 185 if model in ['hs1f', 'm1n', 'aa', 'ham']: | 244 if model in ['hs1f_compat', 'm1n_compat', 'aa', 'ham', 'hh_s1f', 'mk_rs1nf']: |
| 186 nmer_len = 1 | 245 nmer_len = 1 |
| 187 elif model in ['hs5f']: | 246 elif model in ['hh_s5f', 'mk_rs5nf']: |
| 188 nmer_len = 5 | 247 nmer_len = 5 |
| 189 else: | 248 else: |
| 190 sys.stderr.write('Unrecognized distance model: %s.\n' % model) | 249 sys.exit('Unrecognized distance model: %s.\n' % model) |
| 191 | 250 |
| 192 # Define unique junction mapping | 251 # Define unique junction mapping |
| 193 seq_map = {} | 252 seq_map = {} |
| 194 for ig in records: | 253 for ig in records: |
| 195 seq = ig.getSeqField(seq_field) | 254 seq = ig.getSeqField(seq_field) |
| 196 # Check if sequence length is 0 | 255 # Check if sequence length is 0 |
| 197 if len(seq) == 0: | 256 if len(seq) == 0: |
| 198 return None | 257 return None |
| 199 | 258 |
| 200 seq = re.sub('[\.-]','N', str(seq)) | 259 seq = re.sub('[\.-]', 'N', str(seq)) |
| 201 if model == 'aa': seq = translate(seq) | 260 if model == 'aa': seq = translate(seq) |
| 202 | 261 |
| 203 seq_map.setdefault(seq, []).append(ig) | 262 seq_map.setdefault(seq, []).append(ig) |
| 204 | 263 |
| 205 # Process records | 264 # Process records |
| 208 | 267 |
| 209 # Define sequences | 268 # Define sequences |
| 210 seqs = list(seq_map.keys()) | 269 seqs = list(seq_map.keys()) |
| 211 | 270 |
| 212 # Calculate pairwise distance matrix | 271 # Calculate pairwise distance matrix |
| 213 dists = calcDistances(seqs, nmer_len, dist_mat, norm, sym) | 272 dists = calcDistances(seqs, nmer_len, dist_mat, sym=sym, norm=norm) |
| 214 | 273 |
| 215 # Perform hierarchical clustering | 274 # Perform hierarchical clustering |
| 216 clusters = formClusters(dists, linkage, distance) | 275 clusters = formClusters(dists, linkage, distance) |
| 217 | 276 |
| 218 # Turn clusters into clone dictionary | 277 # Turn clusters into clone dictionary |
| 447 # Exit upon reaching sentinel | 506 # Exit upon reaching sentinel |
| 448 if data is None: break | 507 if data is None: break |
| 449 | 508 |
| 450 # Define result object for iteration and get data records | 509 # Define result object for iteration and get data records |
| 451 records = data.data | 510 records = data.data |
| 511 # print(data.id) | |
| 452 result = DbResult(data.id, records) | 512 result = DbResult(data.id, records) |
| 453 | 513 |
| 454 # Check for invalid data (due to failed indexing) and add failed result | 514 # Check for invalid data (due to failed indexing) and add failed result |
| 455 if not data: | 515 if not data: |
| 456 result_queue.put(result) | 516 result_queue.put(result) |
| 899 database with assigned clonal group numbers. | 959 database with assigned clonal group numbers. |
| 900 clone-fail | 960 clone-fail |
| 901 database with records failing clonal grouping. | 961 database with records failing clonal grouping. |
| 902 | 962 |
| 903 required fields: | 963 required fields: |
| 904 SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION_LENGTH | 964 SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION |
| 905 | 965 |
| 906 <field> | 966 <field> |
| 907 sequence field specified by the --sf parameter | 967 sequence field specified by the --sf parameter |
| 908 | 968 |
| 909 output fields: | 969 output fields: |
| 924 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, | 984 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, |
| 925 multiproc=True) | 985 multiproc=True) |
| 926 | 986 |
| 927 # Distance cloning method | 987 # Distance cloning method |
| 928 parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent], | 988 parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent], |
| 929 formatter_class=CommonHelpFormatter, | 989 formatter_class=CommonHelpFormatter, |
| 930 help='''Defines clones as having same V assignment, | 990 help='''Defines clones as having same V assignment, |
| 931 J assignment, and junction length with | 991 J assignment, and junction length with |
| 932 specified substitution distance model.''') | 992 specified substitution distance model.''', |
| 993 description='''Defines clones as having same V assignment, | |
| 994 J assignment, and junction length with | |
| 995 specified substitution distance model.''') | |
| 933 parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None, | 996 parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None, |
| 934 help='Additional fields to use for grouping clones (non VDJ)') | 997 help='Additional fields to use for grouping clones (non VDJ)') |
| 935 parser_bygroup.add_argument('--mode', action='store', dest='mode', | 998 parser_bygroup.add_argument('--mode', action='store', dest='mode', |
| 936 choices=('allele', 'gene'), default='gene', | 999 choices=('allele', 'gene'), default=default_index_mode, |
| 937 help='''Specifies whether to use the V(D)J allele or gene for | 1000 help='''Specifies whether to use the V(D)J allele or gene for |
| 938 initial grouping.''') | 1001 initial grouping.''') |
| 939 parser_bygroup.add_argument('--act', action='store', dest='action', default='set', | 1002 parser_bygroup.add_argument('--act', action='store', dest='action', |
| 940 choices=('first', 'set'), | 1003 choices=('first', 'set'), default=default_index_action, |
| 941 help='''Specifies how to handle multiple V(D)J assignments | 1004 help='''Specifies how to handle multiple V(D)J assignments |
| 942 for initial grouping.''') | 1005 for initial grouping.''') |
| 943 parser_bygroup.add_argument('--model', action='store', dest='model', | 1006 parser_bygroup.add_argument('--model', action='store', dest='model', |
| 944 choices=('aa', 'ham', 'm1n', 'hs1f', 'hs5f'), | 1007 choices=choices_bygroup_model, |
| 945 default=default_bygroup_model, | 1008 default=default_bygroup_model, |
| 946 help='''Specifies which substitution model to use for | 1009 help='''Specifies which substitution model to use for calculating distance |
| 947 calculating distance between sequences. Where m1n is the | 1010 between sequences. The "ham" model is nucleotide Hamming distance and |
| 948 mouse single nucleotide transition/trasversion model | 1011 "aa" is amino acid Hamming distance. The "hh_s1f" and "hh_s5f" models are |
| 949 of Smith et al, 1996; hs1f is the human single | 1012 human specific single nucleotide and 5-mer content models, respectively, |
| 950 nucleotide model derived from Yaari et al, 2013; hs5f | 1013 from Yaari et al, 2013. The "mk_rs1nf" and "mk_rs5nf" models are |
| 951 is the human S5F model of Yaari et al, 2013; ham is | 1014 mouse specific single nucleotide and 5-mer content models, respectively, |
| 952 nucleotide Hamming distance; and aa is amino acid | 1015 from Cui et al, 2016. The "m1n_compat" and "hs1f_compat" models are |
| 953 Hamming distance. The hs5f data should be | 1016 deprecated models provided backwards compatibility with the "m1n" and |
| 954 considered experimental.''') | 1017 "hs1f" models in Change-O v0.3.3 and SHazaM v0.1.4. Both |
| 1018 5-mer models should be considered experimental.''') | |
| 955 parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float, | 1019 parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float, |
| 956 default=default_distance, | 1020 default=default_distance, |
| 957 help='The distance threshold for clonal grouping') | 1021 help='The distance threshold for clonal grouping') |
| 958 parser_bygroup.add_argument('--norm', action='store', dest='norm', | 1022 parser_bygroup.add_argument('--norm', action='store', dest='norm', |
| 959 choices=('len', 'mut', 'none'), default=default_norm, | 1023 choices=('len', 'mut', 'none'), default=default_norm, |
| 975 parser_bygroup.set_defaults(work_func=processQueue) | 1039 parser_bygroup.set_defaults(work_func=processQueue) |
| 976 parser_bygroup.set_defaults(collect_func=collectQueue) | 1040 parser_bygroup.set_defaults(collect_func=collectQueue) |
| 977 parser_bygroup.set_defaults(group_func=indexJunctions) | 1041 parser_bygroup.set_defaults(group_func=indexJunctions) |
| 978 parser_bygroup.set_defaults(clone_func=distanceClones) | 1042 parser_bygroup.set_defaults(clone_func=distanceClones) |
| 979 | 1043 |
| 980 | 1044 # Chen2010 |
| 981 # Hierarchical clustering cloning method | 1045 parser_chen = subparsers.add_parser('chen2010', parents=[parser_parent], |
| 982 parser_hclust = subparsers.add_parser('hclust', parents=[parser_parent], | |
| 983 formatter_class=CommonHelpFormatter, | 1046 formatter_class=CommonHelpFormatter, |
| 984 help='Defines clones by specified distance metric on CDR3s and \ | 1047 help='''Defines clones by method specified in Chen, 2010.''', |
| 985 cutting of hierarchical clustering tree') | 1048 description='''Defines clones by method specified in Chen, 2010.''') |
| 986 # parser_hclust.add_argument('-f', nargs='+', action='store', dest='fields', default=None, | 1049 parser_chen.set_defaults(feed_func=feedQueueClust) |
| 987 # help='Fields to use for grouping clones (non VDJ)') | 1050 parser_chen.set_defaults(work_func=processQueueClust) |
| 988 parser_hclust.add_argument('--method', action='store', dest='method', | 1051 parser_chen.set_defaults(collect_func=collectQueueClust) |
| 989 choices=('chen2010', 'ademokun2011'), default=default_hclust_model, | 1052 parser_chen.set_defaults(cluster_func=hierClust) |
| 990 help='Specifies which cloning method to use for calculating distance \ | 1053 |
| 991 between CDR3s, computing linkage, and cutting clusters') | 1054 # Ademokun2011 |
| 992 parser_hclust.set_defaults(feed_func=feedQueueClust) | 1055 parser_ade = subparsers.add_parser('ademokun2011', parents=[parser_parent], |
| 993 parser_hclust.set_defaults(work_func=processQueueClust) | 1056 formatter_class=CommonHelpFormatter, |
| 994 parser_hclust.set_defaults(collect_func=collectQueueClust) | 1057 help='''Defines clones by method specified in Ademokun, 2011.''', |
| 995 parser_hclust.set_defaults(cluster_func=hierClust) | 1058 description='''Defines clones by method specified in Ademokun, 2011.''') |
| 1059 parser_ade.set_defaults(feed_func=feedQueueClust) | |
| 1060 parser_ade.set_defaults(work_func=processQueueClust) | |
| 1061 parser_ade.set_defaults(collect_func=collectQueueClust) | |
| 1062 parser_ade.set_defaults(cluster_func=hierClust) | |
| 996 | 1063 |
| 997 return parser | 1064 return parser |
| 998 | 1065 |
| 999 | 1066 |
| 1000 if __name__ == '__main__': | 1067 if __name__ == '__main__': |
| 1021 'norm': args_dict['norm'], | 1088 'norm': args_dict['norm'], |
| 1022 'sym': args_dict['sym'], | 1089 'sym': args_dict['sym'], |
| 1023 'linkage': args_dict['linkage'], | 1090 'linkage': args_dict['linkage'], |
| 1024 'seq_field': args_dict['seq_field']} | 1091 'seq_field': args_dict['seq_field']} |
| 1025 | 1092 |
| 1026 # TODO: can be cleaned up with abstract model class | 1093 # Get distance matrix |
| 1027 args_dict['clone_args']['dist_mat'] = getModelMatrix(args_dict['model']) | 1094 try: |
| 1095 args_dict['clone_args']['dist_mat'] = distance_models[args_dict['model']] | |
| 1096 except KeyError: | |
| 1097 sys.exit('Unrecognized distance model: %s' % args_dict['model']) | |
| 1028 | 1098 |
| 1029 del args_dict['fields'] | 1099 del args_dict['fields'] |
| 1030 del args_dict['action'] | 1100 del args_dict['action'] |
| 1031 del args_dict['mode'] | 1101 del args_dict['mode'] |
| 1032 del args_dict['model'] | 1102 del args_dict['model'] |
| 1035 del args_dict['sym'] | 1105 del args_dict['sym'] |
| 1036 del args_dict['linkage'] | 1106 del args_dict['linkage'] |
| 1037 del args_dict['seq_field'] | 1107 del args_dict['seq_field'] |
| 1038 | 1108 |
| 1039 # Define clone_args | 1109 # Define clone_args |
| 1040 if args.command == 'hclust': | 1110 if args.command == 'chen2010': |
| 1041 dist_funcs = {'chen2010':distChen2010, 'ademokun2011':distAdemokun2011} | 1111 args_dict['clone_func'] = distChen2010 |
| 1042 args_dict['clone_func'] = dist_funcs[args_dict['method']] | 1112 args_dict['cluster_args'] = {'method': args.command } |
| 1043 args_dict['cluster_args'] = {'method': args_dict['method']} | 1113 |
| 1044 #del args_dict['fields'] | 1114 if args.command == 'ademokun2011': |
| 1045 del args_dict['method'] | 1115 args_dict['clone_func'] = distAdemokun2011 |
| 1116 args_dict['cluster_args'] = {'method': args.command } | |
| 1046 | 1117 |
| 1047 # Call defineClones | 1118 # Call defineClones |
| 1048 del args_dict['command'] | 1119 del args_dict['command'] |
| 1049 del args_dict['db_files'] | 1120 del args_dict['db_files'] |
| 1050 for f in args.__dict__['db_files']: | 1121 for f in args.__dict__['db_files']: |
