# HG changeset patch # User davidvanzessen # Date 1500291867 14400 # Node ID 183edf446dcfd15db85e3eb4bd22659ea698148d Uploaded diff -r 000000000000 -r 183edf446dcf CreateGermlines.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CreateGermlines.py Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,707 @@ +#!/usr/bin/env python3 +""" +Reconstructs germline sequences from alignment data +""" +# Info +__author__ = 'Namita Gupta, Jason Anthony Vander Heiden' +from changeo import __version__, __date__ + +# Imports +import os +import sys +from argparse import ArgumentParser +from collections import OrderedDict +from textwrap import dedent +from time import time + +# Presto and change imports +from presto.Defaults import default_out_args +from presto.IO import getOutputHandle, printLog, printProgress +from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs +from changeo.IO import getDbWriter, readDbFile, countDbFile, readRepo +from changeo.Receptor import allele_regex, parseAllele + +# Defaults +default_germ_types = 'dmask' +default_v_field = 'V_CALL' +default_seq_field = 'SEQUENCE_IMGT' + + +def joinGermline(align, repo_dict, germ_types, v_field, seq_field): + """ + Join gapped germline sequences aligned with sample sequences + + Arguments: + align = iterable yielding dictionaries of sample sequence data + repo_dict = dictionary of IMGT gapped germline sequences + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + + Returns: + dictionary of germline_type: germline_sequence + """ + j_field = 'J_CALL' + germlines = {'full': '', 'dmask': '', 'vonly': '', 'regions': ''} + result_log = OrderedDict() + result_log['ID'] = align['SEQUENCE_ID'] + + # Find germline V-region gene + if v_field == 'V_CALL_GENOTYPED': + vgene = parseAllele(align[v_field], allele_regex, 'list') + vkey = vgene + else: + vgene = parseAllele(align[v_field], allele_regex, 'first') + vkey = (vgene, ) + + try: + int(align['P3V_LENGTH']) + int(align['N1_LENGTH']) + int(align['P5D_LENGTH']) + int(align['P3D_LENGTH']) + int(align['N2_LENGTH']) + int(align['P5J_LENGTH']) + except: + regions_style = 'IgBLAST' + else: + regions_style = 'IMGT' + + # Build V-region germline + if vgene is not None: + result_log['V_CALL'] = ','.join(vkey) + if vkey in repo_dict: + vseq = repo_dict[vkey] + # Germline start + try: vstart = int(align['V_GERM_START_IMGT']) - 1 + except (TypeError, ValueError): vstart = 0 + # Germline length + try: vlen = int(align['V_GERM_LENGTH_IMGT']) + except (TypeError, ValueError): vlen = 0 + # TODO: not sure what this line is doing here. it no make no sense. + vpad = vlen - len(vseq[vstart:]) + if vpad < 0: vpad = 0 + germ_vseq = vseq[vstart:(vstart + vlen)] + ('N' * vpad) + else: + result_log['ERROR'] = 'Germline %s not in repertoire' % ','.join(vkey) + return result_log, germlines + else: + result_log['V_CALL'] = None + try: vlen = int(align['V_GERM_LENGTH_IMGT']) + except (TypeError, ValueError): vlen = 0 + germ_vseq = 'N' * vlen + + # Find germline D-region gene + dgene = parseAllele(align['D_CALL'], allele_regex, 'first') + + # Build D-region germline + if dgene is not None: + result_log['D_CALL'] = dgene + dkey = (dgene, ) + if dkey in repo_dict: + dseq = repo_dict[dkey] + # Germline start + try: dstart = int(align['D_GERM_START']) - 1 + except (TypeError, ValueError): dstart = 0 + # Germline length + try: dlen = int(align['D_GERM_LENGTH']) + except (TypeError, ValueError): dlen = 0 + germ_dseq = repo_dict[dkey][dstart:(dstart + dlen)] + else: + result_log['ERROR'] = 'Germline %s not in repertoire' % dgene + return result_log, germlines + else: + result_log['D_CALL'] = None + germ_dseq = '' + + # Find germline J-region gene + jgene = parseAllele(align[j_field], allele_regex, 'first') + + # Build D-region germline + if jgene is not None: + result_log['J_CALL'] = jgene + jkey = (jgene, ) + if jkey in repo_dict: + jseq = repo_dict[jkey] + # Germline start + try: jstart = int(align['J_GERM_START']) - 1 + except (TypeError, ValueError): jstart = 0 + # Germline length + try: jlen = int(align['J_GERM_LENGTH']) + except (TypeError, ValueError): jlen = 0 + # TODO: not sure what this line is doing either + jpad = jlen - len(jseq[jstart:]) + if jpad < 0: jpad = 0 + germ_jseq = jseq[jstart:(jstart + jlen)] + ('N' * jpad) + else: + result_log['ERROR'] = 'Germline %s not in repertoire' % jgene + return result_log, germlines + else: + result_log['J_CALL'] = None + try: jlen = int(align['J_GERM_LENGTH']) + except (TypeError, ValueError): jlen = 0 + germ_jseq = 'N' * jlen + + # Assemble pieces starting with V-region + germ_seq = germ_vseq + regions = 'V' * len(germ_vseq) + + try: + np1_len = int(align['NP1_LENGTH']) + except (TypeError, ValueError): + np1_len = 0 + + # NP nucleotide additions after V + if regions_style == 'IMGT': + # P nucleotide additions + try: + p3v_len = int(align['P3V_LENGTH']) + except (TypeError, ValueError): + p3v_len = 0 + if p3v_len < 0: + result_log['ERROR'] = 'P3V_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p3v_len + + # N1 nucleotide additions + try: + n1_len = int(align['N1_LENGTH']) + except (TypeError, ValueError): + n1_len = 0 + if n1_len < 0: + result_log['ERROR'] = 'N1_LENGTH is negative' + return result_log, germlines + + regions += 'N' * n1_len + + # P nucleotide additions before D + try: p5d_len = int(align['P5D_LENGTH']) + except (TypeError, ValueError): p5d_len = 0 + if p5d_len < 0: + result_log['ERROR'] = 'P5D_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p5d_len + else: + # IgBLAST style + # PNP nucleotide additions after V + if np1_len < 0: + result_log['ERROR'] = 'NP1_LENGTH is negative' + return result_log, germlines + + regions += 'N' * np1_len + + germ_seq += 'N' * np1_len + + # Add D-region + germ_seq += germ_dseq + regions += 'D' * len(germ_dseq) + + #print 'VD>', germ_seq, '\nVD>', regions + + try: + np2_len = int(align['NP2_LENGTH']) + except (TypeError, ValueError): + np2_len = 0 + + # NP nucleotide additions before J + if regions_style == 'IMGT': + # P nucleotide additions + try: + p3d_len = int(align['P3D_LENGTH']) + except (TypeError, ValueError): + p3d_len = 0 + if p3d_len < 0: + result_log['ERROR'] = 'P3D_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p3d_len + + # N2 nucleotide additions + try: + n2_len = int(align['N2_LENGTH']) + except (TypeError, ValueError): + n2_len = 0 + if n2_len < 0: + result_log['ERROR'] = 'N2_LENGTH is negative' + return result_log, germlines + + regions += 'N' * n2_len + + # P nucleotide additions + try: + p5j_len = int(align['P5J_LENGTH']) + except (TypeError, ValueError): + p5j_len = 0 + if p5j_len < 0: + result_log['ERROR'] = 'P5J_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p5j_len + else: + # IgBLAST style + # NP nucleotide additions + if np2_len < 0: + result_log['ERROR'] = 'NP2_LENGTH is negative' + return result_log, germlines + + regions += 'N' * np2_len + + germ_seq += 'N' * np2_len + + # Add J-region + germ_seq += germ_jseq + regions += 'J' * len(germ_jseq) + + #print('\nREGIONS>',regions,'\n') + + # Define return germlines + germlines['full'] = germ_seq + germlines['regions'] = regions + + if 'dmask' in germ_types: + germlines['dmask'] = germ_seq[:len(germ_vseq)] + \ + 'N' * (len(germ_seq) - len(germ_vseq) - len(germ_jseq)) + \ + germ_seq[-len(germ_jseq):] + if 'vonly' in germ_types: + germlines['vonly'] = germ_vseq + + # Check that input and germline sequence match + if len(align[seq_field]) == 0: + result_log['ERROR'] = 'Sequence is missing from %s column' % seq_field + elif len(germlines['full']) != len(align[seq_field]): + result_log['ERROR'] = 'Germline sequence is %d nucleotides longer than input sequence' % \ + (len(germlines['full']) - len(align[seq_field])) + + # Convert to uppercase + for k, v in germlines.items(): germlines[k] = v.upper() + + return result_log, germlines + + +def assembleEachGermline(db_file, repo, germ_types, v_field, seq_field, out_args=default_out_args): + """ + Write germline sequences to tab-delimited database file + + Arguments: + db_file = input tab-delimited database file + repo = folder with germline repertoire files + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + out_args = arguments for output preferences + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'CreateGermlines' + log['DB_FILE'] = os.path.basename(db_file) + log['GERM_TYPES'] = germ_types if isinstance(germ_types, str) else ','.join(germ_types) + log['CLONED'] = 'False' + log['V_FIELD'] = v_field + log['SEQ_FIELD'] = seq_field + printLog(log) + + # Get repertoire and open Db reader + repo_dict = readRepo(repo) + reader = readDbFile(db_file, ig=False) + + # Exit if V call field does not exist in reader + if v_field not in reader.fieldnames: + sys.exit('Error: V field does not exist in input database file.') + + # Define log handle + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + + add_fields = [] + seq_type = seq_field.split('_')[-1] + if 'full' in germ_types: add_fields += ['GERMLINE_' + seq_type] + if 'dmask' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_D_MASK'] + if 'vonly' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_V_REGION'] + if 'regions' in germ_types: add_fields += ['GERMLINE_REGIONS'] + + # Create output file handle and Db writer + pass_handle = getOutputHandle(db_file, 'germ-pass', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_args['out_type']) + pass_writer = getDbWriter(pass_handle, db_file, add_fields=add_fields) + + if out_args['failed']: + fail_handle = getOutputHandle(db_file, 'germ-fail', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_args['out_type']) + fail_writer = getDbWriter(fail_handle, db_file, add_fields=add_fields) + else: + fail_handle = None + fail_writer = None + + # Initialize time and total count for progress bar + start_time = time() + rec_count = countDbFile(db_file) + pass_count = fail_count = 0 + # Iterate over rows + for i, row in enumerate(reader): + # Print progress + printProgress(i, rec_count, 0.05, start_time) + + result_log, germlines = joinGermline(row, repo_dict, germ_types, v_field, seq_field) + + # Add germline field(s) to dictionary + if 'full' in germ_types: row['GERMLINE_' + seq_type] = germlines['full'] + if 'dmask' in germ_types: row['GERMLINE_' + seq_type + '_D_MASK'] = germlines['dmask'] + if 'vonly' in germ_types: row['GERMLINE_' + seq_type + '_V_REGION'] = germlines['vonly'] + if 'regions' in germ_types: row['GERMLINE_REGIONS'] = germlines['regions'] + + # Write row to pass or fail file + if 'ERROR' in result_log: + fail_count += 1 + if fail_writer is not None: fail_writer.writerow(row) + else: + result_log['SEQUENCE'] = row[seq_field] + result_log['GERMLINE'] = germlines['full'] + result_log['REGIONS'] = germlines['regions'] + + pass_count += 1 + pass_writer.writerow(row) + printLog(result_log, handle=log_handle) + + # Print log + printProgress(i + 1, rec_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + log['END'] = 'CreateGermlines' + printLog(log) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + +def makeCloneGermline(clone, clone_dict, repo_dict, germ_types, v_field, + seq_field, counts, writers, out_args): + """ + Determine consensus clone sequence and create germline for clone + + Arguments: + clone = clone ID + clone_dict = iterable yielding dictionaries of sequence data from clone + repo_dict = dictionary of IMGT gapped germline sequences + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + counts = dictionary of pass counter and fail counter + writers = dictionary with pass and fail DB writers + out_args = arguments for output preferences + + Returns: + None + """ + seq_type = seq_field.split('_')[-1] + j_field = 'J_CALL' + + # Create dictionaries to count observed V/J calls + v_dict = OrderedDict() + j_dict = OrderedDict() + + # Find longest sequence in clone + max_length = 0 + for val in clone_dict.values(): + v = val[v_field] + v_dict[v] = v_dict.get(v, 0) + 1 + j = val[j_field] + j_dict[j] = j_dict.get(j, 0) + 1 + if len(val[seq_field]) > max_length: + max_length = len(val[seq_field]) + + # Consensus V and J having most observations + v_cons = [k for k in list(v_dict.keys()) if v_dict[k] == max(v_dict.values())] + j_cons = [k for k in list(j_dict.keys()) if j_dict[k] == max(j_dict.values())] + + # Consensus sequence(s) with consensus V/J calls and longest sequence + cons = [val for val in list(clone_dict.values()) \ + if val.get(v_field, '') in v_cons and \ + val.get(j_field, '') in j_cons and \ + len(val[seq_field]) == max_length] + + # Sequence(s) with consensus V/J are not longest + if not cons: + # Sequence(s) with consensus V/J (not longest) + cons = [val for val in list(clone_dict.values()) \ + if val.get(v_field, '') in v_cons and val.get(j_field, '') in j_cons] + + # No sequence has both consensus V and J call + if not cons: + result_log = OrderedDict() + result_log['ID'] = clone + result_log['V_CALL'] = ','.join(v_cons) + result_log['J_CALL'] = ','.join(j_cons) + result_log['ERROR'] = 'No consensus sequence for clone found' + else: + # Pad end of consensus sequence with gaps to make it the max length + cons = cons[0] + cons['J_GERM_LENGTH'] = str(int(cons['J_GERM_LENGTH'] or 0) + max_length - len(cons[seq_field])) + cons[seq_field] += '.' * (max_length - len(cons[seq_field])) + result_log, germlines = joinGermline(cons, repo_dict, germ_types, v_field, seq_field) + result_log['ID'] = clone + result_log['CONSENSUS'] = cons['SEQUENCE_ID'] + else: + cons = cons[0] + result_log, germlines = joinGermline(cons, repo_dict, germ_types, v_field, seq_field) + result_log['ID'] = clone + result_log['CONSENSUS'] = cons['SEQUENCE_ID'] + + # Write sequences of clone + for val in clone_dict.values(): + if 'ERROR' not in result_log: + # Update lengths padded to longest sequence in clone + val['J_GERM_LENGTH'] = str(int(val['J_GERM_LENGTH'] or 0) + max_length - len(val[seq_field])) + val[seq_field] += '.' * (max_length - len(val[seq_field])) + + # Add column(s) to tab-delimited database file + if 'full' in germ_types: val['GERMLINE_' + seq_type] = germlines['full'] + if 'dmask' in germ_types: val['GERMLINE_' + seq_type + '_D_MASK'] = germlines['dmask'] + if 'vonly' in germ_types: val['GERMLINE_' + seq_type + '_V_REGION'] = germlines['vonly'] + if 'regions' in germ_types: val['GERMLINE_REGIONS'] = germlines['regions'] + + # Add field + val['GERMLINE_V_CALL'] = result_log['V_CALL'] + val['GERMLINE_D_CALL'] = result_log['D_CALL'] + val['GERMLINE_J_CALL'] = result_log['J_CALL'] + + result_log['SEQUENCE'] = cons[seq_field] + result_log['GERMLINE'] = germlines['full'] + result_log['REGIONS'] = germlines['regions'] + + # Write to pass file + counts['pass'] += 1 + writers['pass'].writerow(val) + else: + # Write to fail file + counts['fail'] += 1 + if writers['fail'] is not None: + writers['fail'].writerow(val) + # Return log + return result_log + + +def assembleCloneGermline(db_file, repo, germ_types, v_field, seq_field, out_args=default_out_args): + """ + Assemble one germline sequence for each clone in a tab-delimited database file + + Arguments: + db_file = input tab-delimited database file + repo = folder with germline repertoire files + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + out_args = arguments for output preferences + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'CreateGermlines' + log['DB_FILE'] = os.path.basename(db_file) + log['GERM_TYPES'] = germ_types if isinstance(germ_types, str) else ','.join(germ_types) + log['CLONED'] = 'True' + log['V_FIELD'] = v_field + log['SEQ_FIELD'] = seq_field + printLog(log) + + # Get repertoire and open Db reader + repo_dict = readRepo(repo) + reader = readDbFile(db_file, ig=False) + + # Exit if V call field does not exist in reader + if v_field not in reader.fieldnames: + sys.exit('Error: V field does not exist in input database file.') + + # Define log handle + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + + add_fields = [] + seq_type = seq_field.split('_')[-1] + if 'full' in germ_types: add_fields += ['GERMLINE_' + seq_type] + if 'dmask' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_D_MASK'] + if 'vonly' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_V_REGION'] + if 'regions' in germ_types: add_fields += ['GERMLINE_REGIONS'] + + add_fields += ['GERMLINE_V_CALL'] + add_fields += ['GERMLINE_D_CALL'] + add_fields += ['GERMLINE_J_CALL'] + + # Create output file handle and Db writer + writers = {} + pass_handle = getOutputHandle(db_file, 'germ-pass', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type=out_args['out_type']) + writers['pass'] = getDbWriter(pass_handle, db_file, add_fields=add_fields) + + if out_args['failed']: + fail_handle = getOutputHandle(db_file, 'germ-fail', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type=out_args['out_type']) + writers['fail'] = getDbWriter(fail_handle, db_file, add_fields=add_fields) + else: + fail_handle = None + writers['fail'] = None + + # Initialize time and total count for progress bar + start_time = time() + rec_count = countDbFile(db_file) + counts = {} + clone_count = counts['pass'] = counts['fail'] = 0 + # Iterate over rows + clone = 'initial' + clone_dict = OrderedDict() + for i, row in enumerate(reader): + # Print progress + printProgress(i, rec_count, 0.05, start_time) + + # Clone isn't over yet + if row.get('CLONE', '') == clone: + clone_dict[i] = row + # Clone just finished + elif clone_dict: + clone_count += 1 + result_log = makeCloneGermline(clone, clone_dict, repo_dict, germ_types, + v_field, seq_field, counts, writers, out_args) + printLog(result_log, handle=log_handle) + # Now deal with current row (first of next clone) + clone = row['CLONE'] + clone_dict = OrderedDict([(i, row)]) + # Last case is only for first row of file + else: + clone = row['CLONE'] + clone_dict = OrderedDict([(i, row)]) + + clone_count += 1 + result_log = makeCloneGermline(clone, clone_dict, repo_dict, germ_types, v_field, + seq_field, counts, writers, out_args) + printLog(result_log, handle=log_handle) + + # Print log + printProgress(i + 1, rec_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['CLONES'] = clone_count + log['RECORDS'] = rec_count + log['PASS'] = counts['pass'] + log['FAIL'] = counts['fail'] + log['END'] = 'CreateGermlines' + printLog(log) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + +def getArgParser(): + """ + Defines the ArgumentParser + + Arguments: + None + + Returns: + an ArgumentParser object + """ + # Define input and output field help message + fields = dedent( + ''' + output files: + germ-pass + database with assigned germline sequences. + germ-fail + database with records failing germline assignment. + + required fields: + SEQUENCE_ID, SEQUENCE_VDJ or SEQUENCE_IMGT, + V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, + V_SEQ_START, V_SEQ_LENGTH, V_GERM_START_IMGT, V_GERM_LENGTH_IMGT, + D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH, + J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH, + NP1_LENGTH, NP2_LENGTH + + optional fields: + N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH, P5J_LENGTH, + CLONE + + + output fields: + GERMLINE_VDJ, GERMLINE_VDJ_D_MASK, GERMLINE_VDJ_V_REGION, + GERMLINE_IMGT, GERMLINE_IMGT_D_MASK, GERMLINE_IMGT_V_REGION, + GERMLINE_V_CALL, GERMLINE_D_CALL, GERMLINE_J_CALL, + GERMLINE_REGIONS + ''') + + # Parent parser + parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, + annotation=False) + # Define argument parser + parser = ArgumentParser(description=__doc__, epilog=fields, + parents=[parser_parent], + formatter_class=CommonHelpFormatter) + parser.add_argument('--version', action='version', + version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) + + parser.add_argument('-r', nargs='+', action='store', dest='repo', required=True, + help='''List of folders and/or fasta files (with .fasta, .fna or .fa + extension) with germline sequences.''') + parser.add_argument('-g', action='store', dest='germ_types', default=default_germ_types, + nargs='+', choices=('full', 'dmask', 'vonly', 'regions'), + help='''Specify type(s) of germlines to include full germline, + germline with D-region masked, or germline for V region only.''') + parser.add_argument('--cloned', action='store_true', dest='cloned', + help='''Specify to create only one germline per clone. Assumes input file is + sorted by clone column, and will not yield correct results if the data + is unsorted. Note, if allele calls are ambiguous within a clonal group, + this will place the germline call used for the entire clone within the + GERMLINE_V_CALL, GERMLINE_D_CALL and GERMLINE_J_CALL fields.''') + parser.add_argument('--vf', action='store', dest='v_field', default=default_v_field, + help='Specify field to use for germline V call') + parser.add_argument('--sf', action='store', dest='seq_field', default=default_seq_field, + help='Specify field to use for sequence') + + return parser + + +if __name__ == "__main__": + """ + Parses command line arguments and calls main + """ + + # Parse command line arguments + parser = getArgParser() + checkArgs(parser) + args = parser.parse_args() + args_dict = parseCommonArgs(args) + del args_dict['db_files'] + del args_dict['cloned'] + args_dict['v_field'] = args_dict['v_field'].upper() + args_dict['seq_field'] = args_dict['seq_field'].upper() + + for f in args.__dict__['db_files']: + args_dict['db_file'] = f + if args.__dict__['cloned']: + assembleCloneGermline(**args_dict) + else: + assembleEachGermline(**args_dict) diff -r 000000000000 -r 183edf446dcf DefineClones.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DefineClones.py Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,1121 @@ +#!/usr/bin/env python3 +""" +Assign Ig sequences into clones +""" +# Info +__author__ = 'Namita Gupta, Jason Anthony Vander Heiden, Gur Yaari, Mohamed Uduman' +from changeo import __version__, __date__ + +# Imports +import os +import re +import sys +import csv +import numpy as np +from argparse import ArgumentParser +from collections import OrderedDict +from itertools import chain +from textwrap import dedent +from time import time +from Bio import pairwise2 +from Bio.Seq import translate + +# Presto and changeo imports +from presto.Defaults import default_out_args +from presto.IO import getFileType, getOutputHandle, printLog, printProgress +from presto.Multiprocessing import manageProcesses +from presto.Sequence import getDNAScoreDict +from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs +from changeo.Distance import distance_models, calcDistances, formClusters +from changeo.IO import getDbWriter, readDbFile, countDbFile +from changeo.Multiprocessing import DbData, DbResult + +# Defaults +default_translate = False +default_distance = 0.0 +default_index_mode = 'gene' +default_index_action = 'set' +default_bygroup_model = 'ham' +default_hclust_model = 'chen2010' +default_seq_field = 'JUNCTION' +default_norm = 'len' +default_sym = 'avg' +default_linkage = 'single' +choices_bygroup_model = ('ham', 'aa', 'hh_s1f', 'hh_s5f', 'mk_rs1nf', 'mk_rs5nf', 'hs1f_compat', 'm1n_compat') + + +def indexByIdentity(index, key, rec, fields=None): + """ + Updates a preclone index with a simple key + + Arguments: + index = preclone index from indexJunctions + key = index key + rec = IgRecord to add to the index + fields = additional annotation fields to use to group preclones; + if None use only V, J and junction length + + Returns: + None. Updates index with new key and records. + """ + index.setdefault(tuple(key), []).append(rec) + + +def indexByUnion(index, key, rec, fields=None): + """ + Updates a preclone index with the union of nested keys + + Arguments: + index = preclone index from indexJunctions + key = index key + rec = IgRecord to add to the index + fields = additional annotation fields to use to group preclones; + if None use only V, J and junction length + + Returns: + None. Updates index with new key and records. + """ + # List of values for this/new key + val = [rec] + f_range = list(range(2, 3 + (len(fields) if fields else 0))) + + # See if field/junction length combination exists in index + outer_dict = index + for field in f_range: + try: + outer_dict = outer_dict[key[field]] + except (KeyError): + outer_dict = None + break + # If field combination exists, look through Js + j_matches = [] + if outer_dict is not None: + for j in outer_dict.keys(): + if not set(key[1]).isdisjoint(set(j)): + key[1] = tuple(set(key[1]).union(set(j))) + j_matches += [j] + # If J overlap exists, look through Vs for each J + for j in j_matches: + v_matches = [] + # Collect V matches for this J + for v in outer_dict[j].keys(): + if not set(key[0]).isdisjoint(set(v)): + key[0] = tuple(set(key[0]).union(set(v))) + v_matches += [v] + # If there are V overlaps for this J, pop them out + if v_matches: + val += list(chain(*(outer_dict[j].pop(v) for v in v_matches))) + # If the J dict is now empty, remove it + if not outer_dict[j]: + outer_dict.pop(j, None) + + # Add value(s) into index nested dictionary + # OMG Python pointers are the best! + # Add field dictionaries into index + outer_dict = index + for field in f_range: + outer_dict.setdefault(key[field], {}) + outer_dict = outer_dict[key[field]] + # Add J, then V into index + if key[1] in outer_dict: + outer_dict[key[1]].update({key[0]: val}) + else: + outer_dict[key[1]] = {key[0]: val} + + +def indexJunctions(db_iter, fields=None, mode=default_index_mode, + action=default_index_action): + """ + Identifies preclonal groups by V, J and junction length + + Arguments: + db_iter = an iterator of IgRecords defined by readDbFile + fields = additional annotation fields to use to group preclones; + if None use only V, J and junction length + mode = specificity of alignment call to use for assigning preclones; + one of ('allele', 'gene') + action = how to handle multiple value fields when assigning preclones; + one of ('first', 'set') + + Returns: + a dictionary of {(V, J, junction length):[IgRecords]} + """ + # print(fields) + # Define functions for grouping keys + if mode == 'allele' and fields is None: + def _get_key(rec, act): + return [rec.getVAllele(act), rec.getJAllele(act), + None if rec.junction is None else len(rec.junction)] + elif mode == 'gene' and fields is None: + def _get_key(rec, act): + return [rec.getVGene(act), rec.getJGene(act), + None if rec.junction is None else len(rec.junction)] + elif mode == 'allele' and fields is not None: + def _get_key(rec, act): + vdj = [rec.getVAllele(act), rec.getJAllele(act), + None if rec.junction is None else len(rec.junction)] + ann = [rec.toDict().get(k, None) for k in fields] + return list(chain(vdj, ann)) + elif mode == 'gene' and fields is not None: + def _get_key(rec, act): + vdj = [rec.getVGene(act), rec.getJGene(act), + None if rec.junction is None else len(rec.junction)] + ann = [rec.toDict().get(k, None) for k in fields] + return list(chain(vdj, ann)) + + # Function to flatten nested dictionary + def _flatten_dict(d, parent_key=''): + items = [] + for k, v in d.items(): + new_key = parent_key + [k] if parent_key else [k] + if isinstance(v, dict): + items.extend(_flatten_dict(v, new_key).items()) + else: + items.append((new_key, v)) + flat_dict = {None if None in i[0] else tuple(i[0]): i[1] for i in items} + return flat_dict + + if action == 'first': + index_func = indexByIdentity + elif action == 'set': + index_func = indexByUnion + else: + sys.stderr.write('Unrecognized action: %s.\n' % action) + + start_time = time() + clone_index = {} + rec_count = 0 + for rec in db_iter: + key = _get_key(rec, action) + + # Print progress + if rec_count == 0: + print('PROGRESS> Grouping sequences') + + printProgress(rec_count, step=1000, start_time=start_time) + rec_count += 1 + + # Assigned passed preclone records to key and failed to index None + if all([k is not None and k != '' for k in key]): + # Update index dictionary + index_func(clone_index, key, rec, fields) + else: + clone_index.setdefault(None, []).append(rec) + + printProgress(rec_count, step=1000, start_time=start_time, end=True) + + if action == 'set': + clone_index = _flatten_dict(clone_index) + + return clone_index + + +def distanceClones(records, model=default_bygroup_model, distance=default_distance, + dist_mat=None, norm=default_norm, sym=default_sym, + linkage=default_linkage, seq_field=default_seq_field): + """ + Separates a set of IgRecords into clones + + Arguments: + records = an iterator of IgRecords + model = substitution model used to calculate distance + distance = the distance threshold to assign clonal groups + dist_mat = pandas DataFrame of pairwise nucleotide or amino acid distances + norm = normalization method + sym = symmetry method + linkage = type of linkage + seq_field = sequence field used to calculate distance between records + + Returns: + a dictionary of lists defining {clone number: [IgRecords clonal group]} + """ + # Get distance matrix if not provided + if dist_mat is None: + try: + dist_mat = distance_models[model] + except KeyError: + sys.exit('Unrecognized distance model: %s' % args_dict['model']) + + # TODO: can be cleaned up with abstract model class + # Determine length of n-mers + if model in ['hs1f_compat', 'm1n_compat', 'aa', 'ham', 'hh_s1f', 'mk_rs1nf']: + nmer_len = 1 + elif model in ['hh_s5f', 'mk_rs5nf']: + nmer_len = 5 + else: + sys.exit('Unrecognized distance model: %s.\n' % model) + + # Define unique junction mapping + seq_map = {} + for ig in records: + seq = ig.getSeqField(seq_field) + # Check if sequence length is 0 + if len(seq) == 0: + return None + + seq = re.sub('[\.-]', 'N', str(seq)) + if model == 'aa': seq = translate(seq) + + seq_map.setdefault(seq, []).append(ig) + + # Process records + if len(seq_map) == 1: + return {1:records} + + # Define sequences + seqs = list(seq_map.keys()) + + # Calculate pairwise distance matrix + dists = calcDistances(seqs, nmer_len, dist_mat, sym=sym, norm=norm) + + # Perform hierarchical clustering + clusters = formClusters(dists, linkage, distance) + + # Turn clusters into clone dictionary + clone_dict = {} + for i, c in enumerate(clusters): + clone_dict.setdefault(c, []).extend(seq_map[seqs[i]]) + + return clone_dict + + +def distChen2010(records): + """ + Calculate pairwise distances as defined in Chen 2010 + + Arguments: + records = list of IgRecords where first is query to be compared to others in list + + Returns: + list of distances + """ + # Pull out query sequence and V/J information + query = records.popitem(last=False) + query_cdr3 = query.junction[3:-3] + query_v_allele = query.getVAllele() + query_v_gene = query.getVGene() + query_v_family = query.getVFamily() + query_j_allele = query.getJAllele() + query_j_gene = query.getJGene() + # Create alignment scoring dictionary + score_dict = getDNAScoreDict() + + scores = [0]*len(records) + for i in range(len(records)): + ld = pairwise2.align.globalds(query_cdr3, records[i].junction[3:-3], + score_dict, -1, -1, one_alignment_only=True) + # Check V similarity + if records[i].getVAllele() == query_v_allele: ld += 0 + elif records[i].getVGene() == query_v_gene: ld += 1 + elif records[i].getVFamily() == query_v_family: ld += 3 + else: ld += 5 + # Check J similarity + if records[i].getJAllele() == query_j_allele: ld += 0 + elif records[i].getJGene() == query_j_gene: ld += 1 + else: ld += 3 + # Divide by length + scores[i] = ld/max(len(records[i].junction[3:-3]), query_cdr3) + + return scores + + +def distAdemokun2011(records): + """ + Calculate pairwise distances as defined in Ademokun 2011 + + Arguments: + records = list of IgRecords where first is query to be compared to others in list + + Returns: + list of distances + """ + # Pull out query sequence and V family information + query = records.popitem(last=False) + query_cdr3 = query.junction[3:-3] + query_v_family = query.getVFamily() + # Create alignment scoring dictionary + score_dict = getDNAScoreDict() + + scores = [0]*len(records) + for i in range(len(records)): + + if abs(len(query_cdr3) - len(records[i].junction[3:-3])) > 10: + scores[i] = 1 + elif query_v_family != records[i].getVFamily(): + scores[i] = 1 + else: + ld = pairwise2.align.globalds(query_cdr3, records[i].junction[3:-3], + score_dict, -1, -1, one_alignment_only=True) + scores[i] = ld/min(len(records[i].junction[3:-3]), query_cdr3) + + return scores + + +def hierClust(dist_mat, method='chen2010'): + """ + Calculate hierarchical clustering + + Arguments: + dist_mat = square-formed distance matrix of pairwise CDR3 comparisons + + Returns: + list of cluster ids + """ + if method == 'chen2010': + clusters = formClusters(dist_mat, 'average', 0.32) + elif method == 'ademokun2011': + clusters = formClusters(dist_mat, 'complete', 0.25) + else: clusters = np.ones(dist_mat.shape[0]) + + return clusters + +# TODO: Merge duplicate feed, process and collect functions. +def feedQueue(alive, data_queue, db_file, group_func, group_args={}): + """ + Feeds the data queue with Ig records + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue to hold data for processing + db_file = the Ig record database file + group_func = the function to use for assigning preclones + group_args = a dictionary of arguments to pass to group_func + + Returns: + None + """ + # Open input file and perform grouping + try: + # Iterate over Ig records and assign groups + db_iter = readDbFile(db_file) + clone_dict = group_func(db_iter, **group_args) + except: + #sys.stderr.write('Exception in feeder grouping step\n') + alive.value = False + raise + + # Add groups to data queue + try: + #print 'START FEED', alive.value + # Iterate over groups and feed data queue + clone_iter = iter(clone_dict.items()) + while alive.value: + # Get data from queue + if data_queue.full(): continue + else: data = next(clone_iter, None) + # Exit upon reaching end of iterator + if data is None: break + #print "FEED", alive.value, k + + # Feed queue + data_queue.put(DbData(*data)) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in feeder queue feeding step\n') + alive.value = False + raise + + return None + + +def feedQueueClust(alive, data_queue, db_file, group_func=None, group_args={}): + """ + Feeds the data queue with Ig records + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue to hold data for processing + db_file = the Ig record database file + + Returns: + None + """ + # Open input file and perform grouping + try: + # Iterate over Ig records and order by junction length + records = {} + db_iter = readDbFile(db_file) + for rec in db_iter: + records[rec.id] = rec + records = OrderedDict(sorted(list(records.items()), key=lambda i: i[1].junction_length)) + dist_dict = {} + for __ in range(len(records)): + k,v = records.popitem(last=False) + dist_dict[k] = [v].append(list(records.values())) + except: + #sys.stderr.write('Exception in feeder grouping step\n') + alive.value = False + raise + + # Add groups to data queue + try: + # print 'START FEED', alive.value + # Iterate over groups and feed data queue + dist_iter = iter(dist_dict.items()) + while alive.value: + # Get data from queue + if data_queue.full(): continue + else: data = next(dist_iter, None) + # Exit upon reaching end of iterator + if data is None: break + #print "FEED", alive.value, k + + # Feed queue + data_queue.put(DbData(*data)) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in feeder queue feeding step\n') + alive.value = False + raise + + return None + + +def processQueue(alive, data_queue, result_queue, clone_func, clone_args): + """ + Pulls from data queue, performs calculations, and feeds results queue + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue holding data to process + result_queue = a multiprocessing.Queue to hold processed results + clone_func = the function to call for clonal assignment + clone_args = a dictionary of arguments to pass to clone_func + + Returns: + None + """ + try: + # Iterator over data queue until sentinel object reached + while alive.value: + # Get data from queue + if data_queue.empty(): continue + else: data = data_queue.get() + # Exit upon reaching sentinel + if data is None: break + + # Define result object for iteration and get data records + records = data.data + # print(data.id) + result = DbResult(data.id, records) + + # Check for invalid data (due to failed indexing) and add failed result + if not data: + result_queue.put(result) + continue + + # Add V(D)J to log + result.log['ID'] = ','.join([str(x) for x in data.id]) + result.log['VALLELE'] = ','.join(set([(r.getVAllele() or '') for r in records])) + result.log['DALLELE'] = ','.join(set([(r.getDAllele() or '') for r in records])) + result.log['JALLELE'] = ','.join(set([(r.getJAllele() or '') for r in records])) + result.log['JUNCLEN'] = ','.join(set([(str(len(r.junction)) or '0') for r in records])) + result.log['SEQUENCES'] = len(records) + + # Checking for preclone failure and assign clones + clones = clone_func(records, **clone_args) if data else None + + # import cProfile + # prof = cProfile.Profile() + # clones = prof.runcall(clone_func, records, **clone_args) + # prof.dump_stats('worker-%d.prof' % os.getpid()) + + if clones is not None: + result.results = clones + result.valid = True + result.log['CLONES'] = len(clones) + else: + result.log['CLONES'] = 0 + + # Feed results to result queue + result_queue.put(result) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in worker\n') + alive.value = False + raise + + return None + + +def processQueueClust(alive, data_queue, result_queue, clone_func, clone_args): + """ + Pulls from data queue, performs calculations, and feeds results queue + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue holding data to process + result_queue = a multiprocessing.Queue to hold processed results + clone_func = the function to call for calculating pairwise distances between sequences + clone_args = a dictionary of arguments to pass to clone_func + + Returns: + None + """ + + try: + # print 'START WORK', alive.value + # Iterator over data queue until sentinel object reached + while alive.value: + # Get data from queue + if data_queue.empty(): continue + else: data = data_queue.get() + # Exit upon reaching sentinel + if data is None: break + # print "WORK", alive.value, data['id'] + + # Define result object for iteration and get data records + records = data.data + result = DbResult(data.id, records) + + # Create row of distance matrix and check for error + dist_row = clone_func(records, **clone_args) if data else None + if dist_row is not None: + result.results = dist_row + result.valid = True + + # Feed results to result queue + result_queue.put(result) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in worker\n') + alive.value = False + raise + + return None + + +def collectQueue(alive, result_queue, collect_queue, db_file, out_args, cluster_func=None, cluster_args={}): + """ + Assembles results from a queue of individual sequence results and manages log/file I/O + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + result_queue = a multiprocessing.Queue holding processQueue results + collect_queue = a multiprocessing.Queue to store collector return values + db_file = the input database file name + out_args = common output argument dictionary from parseCommonArgs + cluster_func = the function to call for carrying out clustering on distance matrix + cluster_args = a dictionary of arguments to pass to cluster_func + + Returns: + None + (adds 'log' and 'out_files' to collect_dict) + """ + # Open output files + try: + # Count records and define output format + out_type = getFileType(db_file) if out_args['out_type'] is None \ + else out_args['out_type'] + result_count = countDbFile(db_file) + + # Defined successful output handle + pass_handle = getOutputHandle(db_file, + out_label='clone-pass', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + pass_writer = getDbWriter(pass_handle, db_file, add_fields='CLONE') + + # Defined failed alignment output handle + if out_args['failed']: + fail_handle = getOutputHandle(db_file, + out_label='clone-fail', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + fail_writer = getDbWriter(fail_handle, db_file) + else: + fail_handle = None + fail_writer = None + + # Define log handle + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + except: + #sys.stderr.write('Exception in collector file opening step\n') + alive.value = False + raise + + # Get results from queue and write to files + try: + #print 'START COLLECT', alive.value + # Iterator over results queue until sentinel object reached + start_time = time() + rec_count = clone_count = pass_count = fail_count = 0 + while alive.value: + # Get result from queue + if result_queue.empty(): continue + else: result = result_queue.get() + # Exit upon reaching sentinel + if result is None: break + #print "COLLECT", alive.value, result['id'] + + # Print progress for previous iteration and update record count + if rec_count == 0: + print('PROGRESS> Assigning clones') + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += len(result.data) + + # Write passed and failed records + if result: + for clone in result.results.values(): + clone_count += 1 + for i, rec in enumerate(clone): + rec.annotations['CLONE'] = clone_count + pass_writer.writerow(rec.toDict()) + pass_count += 1 + result.log['CLONE%i-%i' % (clone_count, i + 1)] = str(rec.junction) + + else: + for i, rec in enumerate(result.data): + if fail_writer is not None: fail_writer.writerow(rec.toDict()) + fail_count += 1 + result.log['CLONE0-%i' % (i + 1)] = str(rec.junction) + + # Write log + printLog(result.log, handle=log_handle) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + + # Print total counts + printProgress(rec_count, result_count, 0.05, start_time) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + # Update return list + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['CLONES'] = clone_count + log['RECORDS'] = rec_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + collect_dict = {'log':log, 'out_files': [pass_handle.name]} + collect_queue.put(collect_dict) + except: + #sys.stderr.write('Exception in collector result processing step\n') + alive.value = False + raise + + return None + + +def collectQueueClust(alive, result_queue, collect_queue, db_file, out_args, cluster_func, cluster_args): + """ + Assembles results from a queue of individual sequence results and manages log/file I/O + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + result_queue = a multiprocessing.Queue holding processQueue results + collect_queue = a multiprocessing.Queue to store collector return values + db_file = the input database file name + out_args = common output argument dictionary from parseCommonArgs + cluster_func = the function to call for carrying out clustering on distance matrix + cluster_args = a dictionary of arguments to pass to cluster_func + + Returns: + None + (adds 'log' and 'out_files' to collect_dict) + """ + # Open output files + try: + + # Iterate over Ig records to count and order by junction length + result_count = 0 + records = {} + # print 'Reading file...' + db_iter = readDbFile(db_file) + for rec in db_iter: + records[rec.id] = rec + result_count += 1 + records = OrderedDict(sorted(list(records.items()), key=lambda i: i[1].junction_length)) + + # Define empty matrix to store assembled results + dist_mat = np.zeros((result_count,result_count)) + + # Count records and define output format + out_type = getFileType(db_file) if out_args['out_type'] is None \ + else out_args['out_type'] + + # Defined successful output handle + pass_handle = getOutputHandle(db_file, + out_label='clone-pass', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + pass_writer = getDbWriter(pass_handle, db_file, add_fields='CLONE') + + # Defined failed cloning output handle + if out_args['failed']: + fail_handle = getOutputHandle(db_file, + out_label='clone-fail', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + fail_writer = getDbWriter(fail_handle, db_file) + else: + fail_handle = None + fail_writer = None + + # Open log file + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + except: + alive.value = False + raise + + try: + # Iterator over results queue until sentinel object reached + start_time = time() + row_count = rec_count = 0 + while alive.value: + # Get result from queue + if result_queue.empty(): continue + else: result = result_queue.get() + # Exit upon reaching sentinel + if result is None: break + + # Print progress for previous iteration + if row_count == 0: + print('PROGRESS> Assigning clones') + printProgress(row_count, result_count, 0.05, start_time) + + # Update counts for iteration + row_count += 1 + rec_count += len(result) + + # Add result row to distance matrix + if result: + dist_mat[list(range(result_count-len(result),result_count)),result_count-len(result)] = result.results + + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + + # Calculate linkage and carry out clustering + # print dist_mat + clusters = cluster_func(dist_mat, **cluster_args) if dist_mat is not None else None + clones = {} + # print clusters + for i, c in enumerate(clusters): + clones.setdefault(c, []).append(records[list(records.keys())[i]]) + + # Write passed and failed records + clone_count = pass_count = fail_count = 0 + if clones: + for clone in clones.values(): + clone_count += 1 + for i, rec in enumerate(clone): + rec.annotations['CLONE'] = clone_count + pass_writer.writerow(rec.toDict()) + pass_count += 1 + #result.log['CLONE%i-%i' % (clone_count, i + 1)] = str(rec.junction) + + else: + for i, rec in enumerate(result.data): + fail_writer.writerow(rec.toDict()) + fail_count += 1 + #result.log['CLONE0-%i' % (i + 1)] = str(rec.junction) + + # Print final progress + printProgress(row_count, result_count, 0.05, start_time) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + # Update return list + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['CLONES'] = clone_count + log['RECORDS'] = rec_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + collect_dict = {'log':log, 'out_files': [pass_handle.name]} + collect_queue.put(collect_dict) + except: + alive.value = False + raise + + return None + + +def defineClones(db_file, feed_func, work_func, collect_func, clone_func, cluster_func=None, + group_func=None, group_args={}, clone_args={}, cluster_args={}, + out_args=default_out_args, nproc=None, queue_size=None): + """ + Define clonally related sequences + + Arguments: + db_file = filename of input database + feed_func = the function that feeds the queue + work_func = the worker function that will run on each CPU + collect_func = the function that collects results from the workers + group_func = the function to use for assigning preclones + clone_func = the function to use for determining clones within preclonal groups + group_args = a dictionary of arguments to pass to group_func + clone_args = a dictionary of arguments to pass to clone_func + out_args = common output argument dictionary from parseCommonArgs + nproc = the number of processQueue processes; + if None defaults to the number of CPUs + queue_size = maximum size of the argument queue; + if None defaults to 2*nproc + + Returns: + a list of successful output file names + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'DefineClones' + log['DB_FILE'] = os.path.basename(db_file) + if group_func is not None: + log['GROUP_FUNC'] = group_func.__name__ + log['GROUP_ARGS'] = group_args + log['CLONE_FUNC'] = clone_func.__name__ + + # TODO: this is yucky, but can be fixed by using a model class + clone_log = clone_args.copy() + if 'dist_mat' in clone_log: del clone_log['dist_mat'] + log['CLONE_ARGS'] = clone_log + + if cluster_func is not None: + log['CLUSTER_FUNC'] = cluster_func.__name__ + log['CLUSTER_ARGS'] = cluster_args + log['NPROC'] = nproc + printLog(log) + + # Define feeder function and arguments + feed_args = {'db_file': db_file, + 'group_func': group_func, + 'group_args': group_args} + # Define worker function and arguments + work_args = {'clone_func': clone_func, + 'clone_args': clone_args} + # Define collector function and arguments + collect_args = {'db_file': db_file, + 'out_args': out_args, + 'cluster_func': cluster_func, + 'cluster_args': cluster_args} + + # Call process manager + result = manageProcesses(feed_func, work_func, collect_func, + feed_args, work_args, collect_args, + nproc, queue_size) + + # Print log + result['log']['END'] = 'DefineClones' + printLog(result['log']) + + return result['out_files'] + + +def getArgParser(): + """ + Defines the ArgumentParser + + Arguments: + None + + Returns: + an ArgumentParser object + """ + # Define input and output fields + fields = dedent( + ''' + output files: + clone-pass + database with assigned clonal group numbers. + clone-fail + database with records failing clonal grouping. + + required fields: + SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION + + + sequence field specified by the --sf parameter + + output fields: + CLONE + ''') + + # Define ArgumentParser + parser = ArgumentParser(description=__doc__, epilog=fields, + formatter_class=CommonHelpFormatter) + parser.add_argument('--version', action='version', + version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) + subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='', + help='Cloning method') + # TODO: This is a temporary fix for Python issue 9253 + subparsers.required = True + + # Parent parser + parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, + multiproc=True) + + # Distance cloning method + parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='''Defines clones as having same V assignment, + J assignment, and junction length with + specified substitution distance model.''', + description='''Defines clones as having same V assignment, + J assignment, and junction length with + specified substitution distance model.''') + parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None, + help='Additional fields to use for grouping clones (non VDJ)') + parser_bygroup.add_argument('--mode', action='store', dest='mode', + choices=('allele', 'gene'), default=default_index_mode, + help='''Specifies whether to use the V(D)J allele or gene for + initial grouping.''') + parser_bygroup.add_argument('--act', action='store', dest='action', + choices=('first', 'set'), default=default_index_action, + help='''Specifies how to handle multiple V(D)J assignments + for initial grouping.''') + parser_bygroup.add_argument('--model', action='store', dest='model', + choices=choices_bygroup_model, + default=default_bygroup_model, + help='''Specifies which substitution model to use for calculating distance + between sequences. The "ham" model is nucleotide Hamming distance and + "aa" is amino acid Hamming distance. The "hh_s1f" and "hh_s5f" models are + human specific single nucleotide and 5-mer content models, respectively, + from Yaari et al, 2013. The "mk_rs1nf" and "mk_rs5nf" models are + mouse specific single nucleotide and 5-mer content models, respectively, + from Cui et al, 2016. The "m1n_compat" and "hs1f_compat" models are + deprecated models provided backwards compatibility with the "m1n" and + "hs1f" models in Change-O v0.3.3 and SHazaM v0.1.4. Both + 5-mer models should be considered experimental.''') + parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float, + default=default_distance, + help='The distance threshold for clonal grouping') + parser_bygroup.add_argument('--norm', action='store', dest='norm', + choices=('len', 'mut', 'none'), default=default_norm, + help='''Specifies how to normalize distances. One of none + (do not normalize), len (normalize by length), + or mut (normalize by number of mutations between sequences).''') + parser_bygroup.add_argument('--sym', action='store', dest='sym', + choices=('avg', 'min'), default=default_sym, + help='''Specifies how to combine asymmetric distances. One of avg + (average of A->B and B->A) or min (minimum of A->B and B->A).''') + parser_bygroup.add_argument('--link', action='store', dest='linkage', + choices=('single', 'average', 'complete'), default=default_linkage, + help='''Type of linkage to use for hierarchical clustering.''') + parser_bygroup.add_argument('--sf', action='store', dest='seq_field', + default=default_seq_field, + help='''The name of the field to be used to calculate + distance between records''') + parser_bygroup.set_defaults(feed_func=feedQueue) + parser_bygroup.set_defaults(work_func=processQueue) + parser_bygroup.set_defaults(collect_func=collectQueue) + parser_bygroup.set_defaults(group_func=indexJunctions) + parser_bygroup.set_defaults(clone_func=distanceClones) + + # Chen2010 + parser_chen = subparsers.add_parser('chen2010', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='''Defines clones by method specified in Chen, 2010.''', + description='''Defines clones by method specified in Chen, 2010.''') + parser_chen.set_defaults(feed_func=feedQueueClust) + parser_chen.set_defaults(work_func=processQueueClust) + parser_chen.set_defaults(collect_func=collectQueueClust) + parser_chen.set_defaults(cluster_func=hierClust) + + # Ademokun2011 + parser_ade = subparsers.add_parser('ademokun2011', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='''Defines clones by method specified in Ademokun, 2011.''', + description='''Defines clones by method specified in Ademokun, 2011.''') + parser_ade.set_defaults(feed_func=feedQueueClust) + parser_ade.set_defaults(work_func=processQueueClust) + parser_ade.set_defaults(collect_func=collectQueueClust) + parser_ade.set_defaults(cluster_func=hierClust) + + return parser + + +if __name__ == '__main__': + """ + Parses command line arguments and calls main function + """ + # Parse arguments + parser = getArgParser() + checkArgs(parser) + args = parser.parse_args() + args_dict = parseCommonArgs(args) + # Convert case of fields + if 'seq_field' in args_dict: + args_dict['seq_field'] = args_dict['seq_field'].upper() + if 'fields' in args_dict and args_dict['fields'] is not None: + args_dict['fields'] = [f.upper() for f in args_dict['fields']] + + # Define clone_args + if args.command == 'bygroup': + args_dict['group_args'] = {'fields': args_dict['fields'], + 'action': args_dict['action'], + 'mode':args_dict['mode']} + args_dict['clone_args'] = {'model': args_dict['model'], + 'distance': args_dict['distance'], + 'norm': args_dict['norm'], + 'sym': args_dict['sym'], + 'linkage': args_dict['linkage'], + 'seq_field': args_dict['seq_field']} + + # Get distance matrix + try: + args_dict['clone_args']['dist_mat'] = distance_models[args_dict['model']] + except KeyError: + sys.exit('Unrecognized distance model: %s' % args_dict['model']) + + del args_dict['fields'] + del args_dict['action'] + del args_dict['mode'] + del args_dict['model'] + del args_dict['distance'] + del args_dict['norm'] + del args_dict['sym'] + del args_dict['linkage'] + del args_dict['seq_field'] + + # Define clone_args + if args.command == 'chen2010': + args_dict['clone_func'] = distChen2010 + args_dict['cluster_args'] = {'method': args.command } + + if args.command == 'ademokun2011': + args_dict['clone_func'] = distAdemokun2011 + args_dict['cluster_args'] = {'method': args.command } + + # Call defineClones + del args_dict['command'] + del args_dict['db_files'] + for f in args.__dict__['db_files']: + args_dict['db_file'] = f + defineClones(**args_dict) diff -r 000000000000 -r 183edf446dcf IMGT_Human_IGHD.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/IMGT_Human_IGHD.fasta Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,89 @@ +>X97051|IGHD1-1*01|Homo_sapiens|F|D-REGION|33714..33730|17 nt|1| | | | |17+0=17| | | +ggtacaactggaacgac +>X13972|IGHD1-14*01|Homo_sapiens|ORF|D-REGION|14518..14534|17 nt|1| | | | |17+0=17| | | +ggtataaccggaaccac +>X97051|IGHD1-20*01|Homo_sapiens|F|D-REGION|62015..62031|17 nt|1| | | | |17+0=17| | | +ggtataactggaacgac +>X97051|IGHD1-26*01|Homo_sapiens|F|D-REGION|72169..72188|20 nt|1| | | | |20+0=20| | | +ggtatagtgggagctactac +>X13972|IGHD1-7*01|Homo_sapiens|F|D-REGION|5266..5282|17 nt|1| | | | |17+0=17| | | +ggtataactggaactac +>X55575|IGHD1/OR15-1a*01|Homo_sapiens|ORF|D-REGION|63..79|17 nt|1| | | | |17+0=17| | | +ggtataactggaacaac +>X55576|IGHD1/OR15-1b*01|Homo_sapiens|ORF|D-REGION|63..79|17 nt|1| | | | |17+0=17| | | +ggtataactggaacaac +>J00234|IGHD2-15*01|Homo_sapiens|F|D-REGION|29..59|31 nt|1| | | | |31+0=31| | | +aggatattgtagtggtggtagctgctactcc +>J00232|IGHD2-2*01|Homo_sapiens|F|D-REGION|29..59|31 nt|1| | | | |31+0=31| | | +aggatattgtagtagtaccagctgctatgcc +>X97051|IGHD2-2*02|Homo_sapiens|F|D-REGION|36367..36397|31 nt|1| | | | |31+0=31| | | +aggatattgtagtagtaccagctgctatacc +>M35648|IGHD2-2*03|Homo_sapiens|F|D-REGION|70..100|31 nt|1| | | | |31+0=31| | | +tggatattgtagtagtaccagctgctatgcc +>J00235|IGHD2-21*01|Homo_sapiens|F|D-REGION|29..56|28 nt|1| | | | |28+0=28| | | +agcatattgtggtggtgattgctattcc +>X97051|IGHD2-21*02|Homo_sapiens|F|D-REGION|64644..64671|28 nt|1| | | | |28+0=28| | | +agcatattgtggtggtgactgctattcc +>X13972|IGHD2-8*01|Homo_sapiens|F|D-REGION|7949..7979|31 nt|1| | | | |31+0=31| | | +aggatattgtactaatggtgtatgctatacc +>J00233|IGHD2-8*02|Homo_sapiens|F|D-REGION|29..59|31 nt|1| | | | |31+0=31| | | +aggatattgtactggtggtgtatgctatacc +>X55577|IGHD2/OR15-2a*01|Homo_sapiens|ORF|D-REGION|68..98|31 nt|1| | | | |31+0=31| | | +agaatattgtaatagtactactttctatgcc +>X55578|IGHD2/OR15-2b*01|Homo_sapiens|ORF|D-REGION|68..98|31 nt|1| | | | |31+0=31| | | +agaatattgtaatagtactactttctatgcc +>X13972|IGHD3-10*01|Homo_sapiens|F|D-REGION|10659..10689|31 nt|1| | | | |31+0=31| | | +gtattactatggttcggggagttattataac +>X93615|IGHD3-10*02|Homo_sapiens|F|D-REGION|30..59|30 nt|1| | | | |30+0=30| | | +gtattactatgttcggggagttattataac +>X93614|IGHD3-16*01|Homo_sapiens|F|D-REGION|30..66|37 nt|1| | | | |37+0=37| | | +gtattatgattacgtttgggggagttatgcttatacc +>X97051|IGHD3-16*02|Homo_sapiens|F|D-REGION|57552..57588|37 nt|1| | | | |37+0=37| | | +gtattatgattacgtttgggggagttatcgttatacc +>X93616|IGHD3-22*01|Homo_sapiens|F|D-REGION|30..60|31 nt|1| | | | |31+0=31| | | +gtattactatgatagtagtggttattactac +>X13972|IGHD3-3*01|Homo_sapiens|F|D-REGION|804..834|31 nt|1| | | | |31+0=31| | | +gtattacgatttttggagtggttattatacc +>X93618|IGHD3-3*02|Homo_sapiens|F|D-REGION|30..60|31 nt|1| | | | |31+0=31| | | +gtattagcatttttggagtggttattatacc +>X13972|IGHD3-9*01|Homo_sapiens|F|D-REGION|10475..10505|31 nt|1| | | | |31+0=31| | | +gtattacgatattttgactggttattataac +>X55579|IGHD3/OR15-3a*01|Homo_sapiens|ORF|D-REGION|210..240|31 nt|1| | | | |31+0=31| | | +gtattatgatttttggactggttattatacc +>X55580|IGHD3/OR15-3b*01|Homo_sapiens|ORF|D-REGION|210..240|31 nt|1| | | | |31+0=31| | | +gtattatgatttttggactggttattatacc +>X13972|IGHD4-11*01|Homo_sapiens|ORF|D-REGION|11550..11565|16 nt|1| | | | |16+0=16| | | +tgactacagtaactac +>X97051|IGHD4-17*01|Homo_sapiens|F|D-REGION|58699..58714|16 nt|1| | | | |16+0=16| | | +tgactacggtgactac +>X97051|IGHD4-23*01|Homo_sapiens|ORF|D-REGION|68334..68352|19 nt|1| | | | |19+0=19| | | +tgactacggtggtaactcc +>X13972|IGHD4-4*01|Homo_sapiens|F|D-REGION|1952..1967|16 nt|1| | | | |16+0=16| | | +tgactacagtaactac +>X55581|IGHD4/OR15-4a*01|Homo_sapiens|ORF|D-REGION|83..101|19 nt|1| | | | |19+0=19| | | +tgactatggtgctaactac +>X55582|IGHD4/OR15-4b*01|Homo_sapiens|ORF|D-REGION|82..100|19 nt|1| | | | |19+0=19| | | +tgactatggtgctaactac +>X13972|IGHD5-12*01|Homo_sapiens|F|D-REGION|12506..12528|23 nt|1| | | | |23+0=23| | | +gtggatatagtggctacgattac +>X97051|IGHD5-18*01|Homo_sapiens|F|D-REGION|59661..59680|20 nt|1| | | | |20+0=20| | | +gtggatacagctatggttac +>X97051|IGHD5-24*01|Homo_sapiens|ORF|D-REGION|69300..69319|20 nt|1| | | | |20+0=20| | | +gtagagatggctacaattac +>X13972|IGHD5-5*01|Homo_sapiens|F|D-REGION|2913..2932|20 nt|1| | | | |20+0=20| | | +gtggatacagctatggttac +>X55583|IGHD5/OR15-5a*01|Homo_sapiens|ORF|D-REGION|94..116|23 nt|1| | | | |23+0=23| | | +gtggatatagtgtctacgattac +>X55584|IGHD5/OR15-5b*01|Homo_sapiens|ORF|D-REGION|94..116|23 nt|1| | | | |23+0=23| | | +gtggatatagtgtctacgattac +>X13972|IGHD6-13*01|Homo_sapiens|F|D-REGION|14011..14031|21 nt|1| | | | |21+0=21| | | +gggtatagcagcagctggtac +>X97051|IGHD6-19*01|Homo_sapiens|F|D-REGION|61503..61523|21 nt|1| | | | |21+0=21| | | +gggtatagcagtggctggtac +>X97051|IGHD6-25*01|Homo_sapiens|F|D-REGION|71666..71683|18 nt|1| | | | |18+0=18| | | +gggtatagcagcggctac +>X13972|IGHD6-6*01|Homo_sapiens|F|D-REGION|4762..4779|18 nt|1| | | | |18+0=18| | | +gagtatagcagctcgtcc +>J00256|IGHD7-27*01|Homo_sapiens|F|D-REGION|621..631|11 nt|1| | | | |11+0=11| | | +ctaactgggga + diff -r 000000000000 -r 183edf446dcf IMGT_Human_IGHJ.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/IMGT_Human_IGHJ.fasta Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,31 @@ +>J00256|IGHJ1*01|Homo_sapiens|F|J-REGION|723..774|52 nt|1| | | | |52+0=52| | | +gctgaatacttccagcactggggccagggcaccctggtcaccgtctcctcag +>J00256|IGHJ2*01|Homo_sapiens|F|J-REGION|932..984|53 nt|2| | | | |53+0=53| | | +ctactggtacttcgatctctggggccgtggcaccctggtcactgtctcctcag +>J00256|IGHJ3*01|Homo_sapiens|F|J-REGION|1537..1586|50 nt|2| | | | |50+0=50| | | +tgatgcttttgatgtctggggccaagggacaatggtcaccgtctcttcag +>X86355|IGHJ3*02|Homo_sapiens|F|J-REGION|1107..1156|50 nt|2| | | | |50+0=50| | | +tgatgcttttgatatctggggccaagggacaatggtcaccgtctcttcag +>J00256|IGHJ4*01|Homo_sapiens|F|J-REGION|1912..1959|48 nt|3| | | | |48+0=48| | | +actactttgactactggggccaaggaaccctggtcaccgtctcctcag +>X86355|IGHJ4*02|Homo_sapiens|F|J-REGION|1480..1527|48 nt|3| | | | |48+0=48| | | +actactttgactactggggccagggaaccctggtcaccgtctcctcag +>M25625|IGHJ4*03|Homo_sapiens|F|J-REGION|446..493|48 nt|3| | | | |48+0=48| | | +gctactttgactactggggccaagggaccctggtcaccgtctcctcag +>J00256|IGHJ5*01|Homo_sapiens|F|J-REGION|2354..2404|51 nt|3| | | | |51+0=51| | | +acaactggttcgactcctggggccaaggaaccctggtcaccgtctcctcag +>X86355|IGHJ5*02|Homo_sapiens|F|J-REGION|1878..1928|51 nt|3| | | | |51+0=51| | | +acaactggttcgacccctggggccagggaaccctggtcaccgtctcctcag +>J00256|IGHJ6*01|Homo_sapiens|F|J-REGION|2947..3009|63 nt|3| | | | |63+0=63| | | +attactactactactacggtatggacgtctgggggcaagggaccacggtcaccgtctcct +cag +>X86355|IGHJ6*02|Homo_sapiens|F|J-REGION|2482..2543|62 nt|3| | | | |62+0=62| | | +attactactactactacggtatggacgtctggggccaagggaccacggtcaccgtctcct +ca +>X86356|IGHJ6*03|Homo_sapiens|F|J-REGION|2482..2543|62 nt|3| | | | |62+0=62| | | +attactactactactactacatggacgtctggggcaaagggaccacggtcaccgtctcct +ca +>AJ879487|IGHJ6*04|Homo_sapiens|F|J-REGION|39..101|63 nt|3| | | | |63+0=63| | | +attactactactactacggtatggacgtctggggcaaagggaccacggtcaccgtctcct +cag + diff -r 000000000000 -r 183edf446dcf IMGT_Human_IGHV.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/IMGT_Human_IGHV.fasta Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,2442 @@ +>M99641|IGHV1-18*01|Homo_sapiens|F|V-REGION|188..483|296 nt|1| | | | |296+24=320| | | +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggttacaccttt............accagctatggtatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac... +...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca +gacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggcc +gtgtattactgtgcgagaga +>X60503|IGHV1-18*02|Homo_sapiens|F|V-REGION|142..417|276 nt|1| | | | |276+24=300|partial in 3'| | +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggttacaccttt............accagctatggtatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac... +...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca +gacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc +>HM855463|IGHV1-18*03|Homo_sapiens|F|V-REGION|21..316|296 nt|1| | | | |296+24=320| | | +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggttacaccttt............accagctatggtatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac... +...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca +gacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggcc +gtgtattactgtgcgagaga +>KC713938|IGHV1-18*04|Homo_sapiens|F|V-REGION|392..687|296 nt|1| | | | |296+24=320| | | +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggttacaccttt............accagctacggtatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac... +...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca +gacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggcc +gtgtattactgtgcgagaga +>X07448|IGHV1-2*01|Homo_sapiens|F|V-REGION|269..564|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac +tgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagg +gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtc +gtgtattactgtgcgagaga +>X62106|IGHV1-2*02|Homo_sapiens|F|V-REGION|163..458|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagg +gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggcc +gtgtattactgtgcgagaga +>X92208|IGHV1-2*03|Homo_sapiens|F|V-REGION|160..455|296 nt|1| | || |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac +tgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagg +gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggcc +gtgtattactgtgcgagaga +>KF698733|IGHV1-2*04|Homo_sapiens|F|V-REGION|393..688|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagg +gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggcc +gtgtattactgtgcgagaga +>HM855674|IGHV1-2*05|Homo_sapiens|F|V-REGION|24..319|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac +tgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagg +gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtc +gtgtattactgtgcgagaga +>M99642|IGHV1-24*01|Homo_sapiens|F|V-REGION|210..505|296 nt|1| | | | |296+24=320| | | +caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggtttccggatacaccctc............actgaattatccatgcac +tgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa... +...gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgag +gacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcaacaga +>X62109|IGHV1-3*01|Homo_sapiens|F|V-REGION|163..458|296 nt|1| | | | |296+24=320| | | +caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtttcctgcaaggcttctggatacaccttc............actagctatgctatgcat +tgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc... +...aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagg +gacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggct +gtgtattactgtgcgagaga +>X62107|IGHV1-3*02|Homo_sapiens|F|V-REGION|157..452|296 nt|1| | | | |296+24=320| | | +caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtttcctgcaaggcttctggatacaccttc............actagctatgctatgcat +tgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc... +...aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagg +gacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggct +gtgtattactgtgcgagaga +>KF698736|IGHV1-38-4*01|Homo_sapiens|ORF|V-REGION|391..686|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaa +gtctcctgtagtttttctgggtttaccatc............accagctacggtatacat +tgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc... +...aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagg +gacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggct +gtgtattactatgcaagaca +>X92209|IGHV1-45*01|Homo_sapiens|F|V-REGION|144..439|296 nt|1| | || |296+24=320| | | +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaag +gtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcac +tgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc... +...aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagg +gacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagcc +atgtattactgtgcaagana +>AB019438|IGHV1-45*02|Homo_sapiens|F|V-REGION|126317..126612|296 nt|1| | | | |296+24=320| | | +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaag +gtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcac +tgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc... +...aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagg +gacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagcc +atgtattactgtgcaagata +>Z17391|IGHV1-45*03|Homo_sapiens|F|V-REGION|1..260|260 nt|1| | | | |260+58=318|partial in 5'| | +.....................................agaagactgggtcctcagtgaag +gtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcac +tgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc... +...aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagg +gacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagcc +atgtattactgtgcaaga +>X92343|IGHV1-46*01|Homo_sapiens|F|V-REGION|295..590|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtttcctgcaaggcatctggatacaccttc............accagctactatatgcac +tgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt... +...ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagg +gacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>J00240|IGHV1-46*02|Homo_sapiens|F|V-REGION|402..697|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtttcctgcaaggcatctggatacaccttc............aacagctactatatgcac +tgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt... +...ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagg +gacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>L06612|IGHV1-46*03|Homo_sapiens|F|V-REGION|266..561|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtttcctgcaaggcatctggatacaccttc............accagctactatatgcac +tgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt... +...ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagg +gacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgctagaga +>M29809|IGHV1-58*01|Homo_sapiens|F|V-REGION|293..588|296 nt|1| | | | |296+24=320| | | +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaag +gtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcag +tgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc... +...agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagg +gacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggcc +gtgtattactgtgcggcaga +>AB019438|IGHV1-58*02|Homo_sapiens|F|V-REGION|10875..11170|296 nt|1| | | | |296+24=320| | | +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaag +gtctcctgcaaggcttctggattcaccttt............actagctctgctatgcag +tgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc... +...agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagg +gacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggcc +gtgtattactgtgcggcaga +>AB019437|IGHV1-68*01|Homo_sapiens|P|V-REGION|129383..129678|296 nt|1| | | | |296+24=320| | | +caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaag +gtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcac +tggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac... +...aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagg +gacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggct +gtgtattactgggcaagata +>L22582|IGHV1-69*01|Homo_sapiens|F|V-REGION|376..671|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>Z27506|IGHV1-69*02|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc... +...cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgaga +>X92340|IGHV1-69*03|Homo_sapiens|F|V-REGION|133..407|275 nt|1| | | | |275+24=299|partial in 3'| | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc +>M83132|IGHV1-69*04|Homo_sapiens|F|V-REGION|406..701|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc... +...cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>X67905|IGHV1-69*05|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgaga +>L22583|IGHV1-69*06|Homo_sapiens|F|V-REGION|376..671|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>Z29978|IGHV1-69*07|Homo_sapiens|F|V-REGION|1..233|233 nt|1| | | | |233+58=291|partial in 5' and in 3' | | +.....................................agaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag +>Z14309|IGHV1-69*08|Homo_sapiens|F|V-REGION|97..392|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc... +...cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>Z14307|IGHV1-69*09|Homo_sapiens|F|V-REGION|97..392|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc... +...cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>Z14300|IGHV1-69*10|Homo_sapiens|F|V-REGION|97..392|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>Z14296|IGHV1-69*11|Homo_sapiens|F|V-REGION|97..392|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc... +...cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>Z14301|IGHV1-69*12|Homo_sapiens|F|V-REGION|97..392|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>Z14214|IGHV1-69*13|Homo_sapiens|(F)|V-REGION|55..350|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>KC713948|IGHV1-69*14|Homo_sapiens|F|V-REGION|394..689|296 nt|1| | | | |296+24=320| | | +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>KF698734|IGHV1-69-2*01|Homo_sapiens|F|V-REGION|393..688|296 nt|1| | | | |296+24=320| | | +gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaa +atctcctgcaaggtttctggatacaccttc............accgactactacatgcac +tgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa... +...gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcg +gacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcaacaga +>Z29977|IGHV1-69-2*02|Homo_sapiens|F|V-REGION|1..233|233 nt|1| | | | |233+58=291|partial in 5'| | +.....................................agaagcctggggctacagtgaaa +atctcctgcaaggtttctggatacaccttc............accgactactacatgcac +tgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa... +...gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcg +gacacgtctacagacacagcctacatggagctgagcagcctgagatctgag +>KC713934|IGHV1-69D*01|Homo_sapiens|F|V-REGION|394..689|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaag +gtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagc +tgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc... +...tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcg +gacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>M99637|IGHV1-8*01|Homo_sapiens|F|V-REGION|201..496|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accagttatgatatcaac +tgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac... +...agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccagg +aacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagagg +>HM855457|IGHV1-8*02|Homo_sapiens|F|V-REGION|24..319|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accagctatgatatcaac +tgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac... +...agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccagg +aacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagagg +>M13911|IGHV1-NL1*01|Homo_sapiens|P|V-REGION|125..420|296 nt|1| | | | |296+24=320| | | +caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaag +gtctcctgctaggcttccagatacaccttc............accaaatactttacacgg +tgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac... +...aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagt +gacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtc +gtgtattactgtgtgagaga +>Z29631|IGHV1/OR15-1*01|Homo_sapiens|ORF|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacatcttc............accgactactatatgcac +tgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagg +gacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggcc +acgtattactgtgcgaga +>AJ004954|IGHV1/OR15-1*02|Homo_sapiens|ORF|V-REGION|25..320|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacatcttc............accgactactatatgcac +tgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagg +gacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggcc +acgtattactgtgcgagaga +>HM855589|IGHV1/OR15-1*03|Homo_sapiens|ORF|V-REGION|23..318|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacatcttc............accgactactatatgcac +tgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagg +gacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagcc +acgtattactgtgcgagaga +>HM855394|IGHV1/OR15-1*04|Homo_sapiens|ORF|V-REGION|24..319|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacatcttc............accgactactatatgcac +tgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac... +...agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagg +gacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggcc +acgtattactgtgcgagaga +>L25543|IGHV1/OR15-2*01|Homo_sapiens|P|V-REGION|229..524|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaag +gtctcctgcaaggcttctggttacaccttt............accagctactatatgcac +tgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac... +...aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccaga +gacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggcc +gtgtattactgtgcgagaga +>HM855297|IGHV1/OR15-2*02|Homo_sapiens|P|V-REGION|24..319|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaag +gtctcctgcaaggcttctggttacaccttt............accagctactatatgcac +tgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac... +...aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccaga +gacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggcc +gtgtattactgtgcgagaga +>HM855556|IGHV1/OR15-2*03|Homo_sapiens|P|V-REGION|20..315|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaag +gtctcctgcaaggcttctggttacaccttt............accagctactatatgcac +tgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac... +...aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccaga +gacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggcc +gtgtattactgtgcgagaga +>Z29595|IGHV1/OR15-3*01|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accgactactttatgaac +tggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc... +...aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagg +gacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgaga +>HM855458|IGHV1/OR15-3*02|Homo_sapiens|P|V-REGION|21..316|296 nt|1| | | | |296+24=320| | | +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accgactactttatgaac +tggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc... +...aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagg +gacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgagaga +>J00238|IGHV1/OR15-3*03|Homo_sapiens|P|V-REGION|375..668|294 nt|1| | | | |294+24=318| | | +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accagctactatatgaac +tggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc... +...aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagg +gacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgaga +>Z29596|IGHV1/OR15-4*01|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaag +gtctccttcaaggcttctggatacaccttc............accaacaactttatgcac +tgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc... +...aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagg +gacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggcc +gtgtattactgtgcgaga +>Z29633|IGHV1/OR15-5*01|Homo_sapiens|ORF|V-REGION|1..260|260 nt|1| | | | |260+58=318|partial in 5'| | +.....................................agaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accagctactgtatgcac +tgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt... +...gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagg +gacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggcc +atgtattactgtgtgaga +>Z12314|IGHV1/OR15-5*02|Homo_sapiens|ORF|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccttc............accaactactgtatgcac +tgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt... +...gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagg +gacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggcc +atgtattactgtgtgaga +>L25542|IGHV1/OR15-9*01|Homo_sapiens|ORF|V-REGION|188..483|296 nt|1| | | | |296+24=320| | | +caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgagg +atctcctgcaaggcttctggatacaccttc............accagctactgtatgcac +tgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt... +...gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagg +gacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggcc +atgtattactgtgtgagaga +>AF254982|IGHV1/OR21-1*01|Homo_sapiens|ORF|V-REGION|164866..165161|296 nt|1| | | | |296+24=320| | | +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag +gtctcctgcaaggcttctggatacaccatc............accagctactgtatgcac +tgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt... +...gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagg +gacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggcc +atgtattactgtgtgagaga +>M99647|IGHV2-10*01|Homo_sapiens|P|V-REGION|211..511|301 nt|1| | | | |301+21=322| | | +caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatg +ctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggt +tagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat... +......gataataaatactacagcccatctctgaag...agtaggctcattatctccaag +gacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagcc +acacattactgtgcaaggagac +>M99648|IGHV2-26*01|Homo_sapiens|F|V-REGION|164..464|301 nt|1| | | | |301+21=322| | | +caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacg +ctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat... +......gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaag +gacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagcc +acatattactgtgcacggatac +>X62111|IGHV2-5*01|Homo_sapiens|F|V-REGION|214..514|301 nt|1| | | | |301+21=322| | | +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat... +......gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acatattactgtgcacacagac +>KF698731|IGHV2-5*02|Homo_sapiens|F|V-REGION|394..694|301 nt|1| | | | |301+21=322| | | +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat... +......gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acatattactgtgcacacagac +>X93619|IGHV2-5*03|Homo_sapiens|F|V-REGION|1..210|210 nt|1| | | | |210+50=260|partial in 5' and in 3' | | +................................gctggtgaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat... +......gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaag +gacacctccaaaaaccaggt +>L21963|IGHV2-5*04|Homo_sapiens|F|V-REGION|144..438|295 nt|1| | | | |295+21=316| | | +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat... +......gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggc +acatattactgtgtac +>L21964|IGHV2-5*05|Homo_sapiens|F|V-REGION|144..444|301 nt|1| | | | |301+21=322| | | +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat... +......gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acatattactgtgcacacagac +>L21966|IGHV2-5*06|Homo_sapiens|F|V-REGION|143..442|300 nt|1| | | | |300+21=321| | | +cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat... +......gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acatattactgtgcacacaga +>L21971|IGHV2-5*08|Homo_sapiens|F|V-REGION|144..444|301 nt|1| | | | |301+21=322| | | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat... +......gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acatattactgtgcacacagac +>L21972|IGHV2-5*09|Homo_sapiens|F|V-REGION|144..444|301 nt|1| | | | |301+21=322| | | +caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggc +tggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat... +......gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acatattactgtgcacacagac +>L21969|IGHV2-70*01|Homo_sapiens|F|V-REGION|144..444|301 nt|1| | | | |301+21=322| | | +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acgtattactgtgcacggatac +>X92241|IGHV2-70*02|Homo_sapiens|F|V-REGION|144..433|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggcc +gtgtattactg +>X92238|IGHV2-70*03|Homo_sapiens|F|V-REGION|144..433|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaattctacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggcc +gtgtattactg +>Z12330|IGHV2-70*04|Homo_sapiens|F|V-REGION|1..288|288 nt|1| | | | |288+21=309|partial in 3'| | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaattctacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acgtattac +>Z27502|IGHV2-70*05|Homo_sapiens|F|V-REGION|1..237|237 nt|1| | | | |237+47=284|partial in 5' and in 3' | | +..........................t...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaattctacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatgga +>X92239|IGHV2-70*06|Homo_sapiens|F|V-REGION|144..433|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaattctacagcacatccctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggcc +gtgtattactg +>X92243|IGHV2-70*07|Homo_sapiens|F|V-REGION|144..433|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggcc +gtgtattactg +>X92245|IGHV2-70*08|Homo_sapiens|F|V-REGION|144..433|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggcc +gtgtattactg +>L21962|IGHV2-70*09|Homo_sapiens|ORF|V-REGION|144..440|297 nt|1| | | | |297+21=318| | | +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacg +ctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat... +......gatgataaatactacagcacatctctgaac...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggc +acatattactgtgtacgg +>L21965|IGHV2-70*10|Homo_sapiens|F|V-REGION|144..444|301 nt|1| | | | |301+21=322| | | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acgtattactgtgcacggatac +>L21967|IGHV2-70*11|Homo_sapiens|F|V-REGION|144..444|301 nt|1| | | | |301+21=322| | | +cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acgtattactgtgcacggatac +>L21970|IGHV2-70*12|Homo_sapiens|F|V-REGION|144..444|301 nt|1| | | | |301+21=322| | | +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacg +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acatattactgtgcacacagac +>AB019437|IGHV2-70*13|Homo_sapiens|F|V-REGION|110422..110722|301 nt|1| | | | |301+21=322| | | +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat... +......gatgataaatactacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acgtattattgtgcacggatac +>KC713935|IGHV2-70D*04|Homo_sapiens|F|V-REGION|394..694|301 nt|1| | | | |301+21=322| | | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagc +tggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaattctacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acgtattactgtgcacggatac +>KC713949|IGHV2-70D*14|Homo_sapiens|F|V-REGION|394..694|301 nt|1| | | | |301+21=322| | | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcaca +ctgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagc +tggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat... +......gatgataaattctacagcacatctctgaag...accaggctcaccatctccaag +gacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagcc +acgtattactgtgcacggatac +>L25544|IGHV2/OR16-5*01|Homo_sapiens|ORF|V-REGION|170..470|301 nt|1| | || |301+21=322| | | +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacg +ctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagc +tggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat... +......gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaag +gacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagcc +acgtattactgtgcatggagag +>M99652|IGHV3-11*01|Homo_sapiens|F|V-REGION|202..497|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgagc +tggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagg +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgagaga +>X92287|IGHV3-11*03|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgagc +tggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgaga +>HM855329|IGHV3-11*04|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgagc +tggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagg +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>HM855583|IGHV3-11*05|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgagc +tggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgagaga +>KC713940|IGHV3-11*06|Homo_sapiens|F|V-REGION|405..700|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgagc +tggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>X92217|IGHV3-13*01|Homo_sapiens|F|V-REGION|162..454|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctacgacatgcac +tgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct... +......ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccaga +gaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggct +gtgtattactgtgcaagaga +>M99653|IGHV3-13*02|Homo_sapiens|F|V-REGION|467..759|293 nt|1| | | | |293+27=320| | | +gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgaga +ctctcctgtgcagcctctggattcaccttc............agtaactacgacatgcac +tgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct... +......ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccaga +gaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggct +gtgtattactgtgcaagaga +>U29582|IGHV3-13*03|Homo_sapiens|F|V-REGION|1..291|291 nt|1| | | | |291+27=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcac +tgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct... +......ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccaga +gaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggct +gtgtattactgtgcaaga +>HM855616|IGHV3-13*04|Homo_sapiens|F|V-REGION|22..314|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctacgacatgcac +tgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct... +......ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccaga +gaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggct +gtgtattactgtgcaagaga +>KC713939|IGHV3-13*05|Homo_sapiens|F|V-REGION|411..703|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctacgacatgcac +tgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct... +......ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccaga +gaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggct +gtgtattactgtgcaagaga +>X92216|IGHV3-15*01|Homo_sapiens|F|V-REGION|162..463|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttaga +ctctcctgtgcagcctctggattcactttc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaact +gatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtaccacaga +>M99654|IGHV3-15*02|Homo_sapiens|F|V-REGION|176..477|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttaga +ctctcctgtgcagcctctggattcactttc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaact +gatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtaccacaga +>M99408|IGHV3-15*03|Homo_sapiens|F|V-REGION|128..429|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttaga +ctctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagct +aatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaaga +gttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtaccacaga +>M99402|IGHV3-15*04|Homo_sapiens|F|V-REGION|128..429|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttaga +ctctcctgtgcagcctctggattcactttc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaact +gatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtaccacaga +>M99403|IGHV3-15*05|Homo_sapiens|F|V-REGION|128..429|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttaga +ctctcctgtgcagcctctggattcactttc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaact +gatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagcc +gtgtattactgtaccacaga +>M99404|IGHV3-15*06|Homo_sapiens|F|V-REGION|128..429|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttaga +ctctcctgtgcagcctctggattcactttc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaact +gatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtaccacaga +>M99406|IGHV3-15*07|Homo_sapiens|F|V-REGION|128..429|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttaga +ctctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaac +tgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaact +gatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtaccacaga +>M99400|IGHV3-15*08|Homo_sapiens|F|V-REGION|128..429|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttaga +ctctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagct +aatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggcc +gtgtattactgtaccacagg +>M99655|IGHV3-16*01|Homo_sapiens|ORF|V-REGION|188..483|296 nt|1| | | | |296+24=320| | | +gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaac +tgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat... +...ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccaga +gacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggct +gtgtattactgtgtgagaaa +>AB019440|IGHV3-16*02|Homo_sapiens|ORF|V-REGION| |296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaac +tgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat... +...ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccaga +gacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggct +gtgtattactgtgtgagaaa +>M99656|IGHV3-19*01|Homo_sapiens|P|V-REGION|296..591|296 nt|1| | | | |296+24=320| | | +acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaac +tgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat... +...ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccaga +gacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggct +gtgtattactgtgtgagaaa +>M99657|IGHV3-20*01|Homo_sapiens|F|V-REGION|170..465|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............gatgattatggcatgagc +tgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat... +...ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggcc +ttgtatcactgtgcgagaga +>KC713937|IGHV3-20*02|Homo_sapiens|ORF|V-REGION|411..706|296 nt|1| | || |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgaga +ctctcctttgcagcctctggattcaccttt............gatgattatggcatgagc +tgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat... +...ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggcc +ttgtatcactgtgcgagaga +>AB019439|IGHV3-21*01|Homo_sapiens|F|V-REGION|197575..197870|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatagcatgaac +tgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt... +...agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>M99658|IGHV3-21*02|Homo_sapiens|F|V-REGION|169..464|296 nt|1| | | | |296+24=320| | | +gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatagcatgaac +tgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt... +...agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>HM855323|IGHV3-21*03|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatagcatgaac +tgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt... +...agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagct +gtgtattactgtgcgagaga +>HM855688|IGHV3-21*04|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| |rev-compl| +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatagcatgaac +tgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt... +...agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgagaga +>M99659|IGHV3-22*01|Homo_sapiens|P|V-REGION|245..546|302 nt|1| | | | |302+18=320| | | +gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agttactactacatgagc +ggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagct +aatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaaga +gatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggcc +gtgtattactgttccagaga +>AB019439|IGHV3-22*02|Homo_sapiens|P|V-REGION|174880..175181|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agttactactacatgagc +ggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagct +aatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaaga +gatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggcc +gtgtattactgttccagaga +>M99660|IGHV3-23*01|Homo_sapiens|F|V-REGION|170..465|296 nt|1| | | | |296+24=320| | | +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agcagctatgccatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt... +...ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtatattactgtgcgaaaga +>M35415|IGHV3-23*02|Homo_sapiens|F|V-REGION|190..485|296 nt|1| | | | |296+24=320| | | +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agcagctatgccatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt... +...ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtatattactgtgcgaaaga +>AM940223|IGHV3-23*03|Homo_sapiens|F|V-REGION|1..296|296 nt|1| | | | |296+24=320| | | +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agcagctatgccatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt... +...ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccaga +gataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtatattactgtgcgaaaga +>AJ879486|IGHV3-23*04|Homo_sapiens|F|V-REGION|147..442|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agcagctatgccatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt... +...ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtatattactgtgcgaaaga +>AY757302|IGHV3-23*05|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318|partial in 3'| | +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agcagctatgccatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt... +...ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtatattactgtgcgaaa +>AC244492|IGHV3-23D*01|Homo_sapiens|F|V-REGION|21795..22090|296 nt|1| | | | |296+24=320| | | +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agcagctatgccatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt... +...ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtatattactgtgcgaaaga +>M99661|IGHV3-25*01|Homo_sapiens|P|V-REGION|236..531|296 nt|1| | | | |296+24=320| | | +gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgaga +ctctcctgtgcagcctctcaattcaccttc............agtagctactacatgaac +tgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat... +...gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccaga +gataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggcc +ctctattagtgtaccagaga +>AB019439|IGHV3-25*02|Homo_sapiens|P|V-REGION|143626..143921|296 nt|1| | | | |296+24=320| | | +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgaga +ctctcctgtgcagcctctcaattcaccttc............agtagctactacatgaac +tgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat... +...gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccaga +gataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggcc +ctctattagtgtaccagaga +>Z12356|IGHV3-25*03|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgaga +ctctcctgtgcagcctctcaattcaccttc............agtagctactacatgaac +tgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat... +...gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccaga +gataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggcc +ctgtattagtgtaccaga +>HM855898|IGHV3-25*04|Homo_sapiens|ORF|V-REGION|22..317|296 nt|1| | || |296+24=320| | | +gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgaga +ctctcctgtgcagcctctcaattcaccttc............agtagctactacatgaac +tgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat... +...gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccaga +gataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggcc +ctgtattactgtaccagaga +>HM855413|IGHV3-25*05|Homo_sapiens|P|V-REGION|22..317|296 nt|1| | | | |296+24=320| |rev-compl| +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgaga +ctctcctgtgcagcctctcaattcaccttc............agtagctactacatgaac +tgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat... +...gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccaga +gataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggcc +ctctattagtgtaccagaga +>AB019439|IGHV3-29*01|Homo_sapiens|P|V-REGION|101886..102183|298 nt|1| | || |298+24=322| | | +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgaga +ctctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagc +ccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat... +...ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaa +gacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggct +gtgtatggctgtacataaggtt +>M83134|IGHV3-30*01|Homo_sapiens|F|V-REGION|1940..2235|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>L26401|IGHV3-30*02|Homo_sapiens|F|V-REGION|104..399|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaaaga +>M99663|IGHV3-30*03|Homo_sapiens|F|V-REGION|168..463|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>L06615|IGHV3-30*04|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77323|IGHV3-30*05|Homo_sapiens|F|V-REGION|112..406|296 nt|1| | |+1| |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggct +gtgtattactgtgcgagaga +>L06617|IGHV3-30*06|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>L06614|IGHV3-30*07|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>M62737|IGHV3-30*08|Homo_sapiens|F|V-REGION|58..351|294 nt|1| | | | |294+24=318| | | +caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaga +>M77300|IGHV3-30*09|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77326|IGHV3-30*10|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77331|IGHV3-30*11|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77338|IGHV3-30*12|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>M77339|IGHV3-30*13|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77324|IGHV3-30*14|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77327|IGHV3-30*15|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77328|IGHV3-30*16|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77329|IGHV3-30*17|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>X92214|IGHV3-30*18|Homo_sapiens|F|V-REGION|160..455|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaaaga +>L06616|IGHV3-30*19|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>AB019439|IGHV3-30-2*01|Homo_sapiens|P|V-REGION|88935..89232|298 nt|1| | || |298+24=322| | | +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgaga +ctctcctgtgcagactctggattaaccttc............agtagctactgaaggaac +tcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat... +...ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaa +gaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagct +gtgtgttactgtatgtgaggca +>KC162924|IGHV3-30-22*01|Homo_sapiens|P|V-REGION|41477..41774|298 nt|1| | | | |298+24=322| |rev-compl| +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgaga +ctctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagc +cgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat... +...ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaa +gacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggac +gtgtatggctgtacataaggtc +>X92283|IGHV3-30-3*01|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaga +>M77302|IGHV3-30-3*02|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaaaga +>KC713945|IGHV3-30-3*03|Homo_sapiens|F|V-REGION|409..704|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>AC244456|IGHV3-30-33*01|Homo_sapiens|P|V-REGION|11005..11300|296 nt|1| | | | |296+24=320| |rev-compl| +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgaga +ctctcctgtgcagactctggattaaccttc............agtagctactgaaggagc +tcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat... +...ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaa +gaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagct +gtgtgttactgtatgtgagg +>AC244456|IGHV3-30-42*01|Homo_sapiens|P|V-REGION|22749..23046|298 nt|1| | | | |298+24=322| |rev-compl| +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgaga +ctctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagc +ccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat... +...ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaa +gacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggct +gtgtatggctgtacataaggtt +>AC244456|IGHV3-30-5*01|Homo_sapiens|F|V-REGION|26706..27001|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaaaga +>AC245243|IGHV3-30-5*02|Homo_sapiens|F|V-REGION|3298..3593|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaaaga +>AC244456|IGHV3-30-52*01|Homo_sapiens|P|V-REGION|36011..36306|296 nt|1| | | | |296+24=320| |rev-compl| +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgaga +ctctcctgtgcagactctggattaaccttc............agtagctactgaaggaac +tcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat... +...ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaa +gaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagct +gtgtgttactgtatgtgagg +>AB019439|IGHV3-32*01|Homo_sapiens|P|V-REGION|77173..77470|298 nt|1| | | | |298+24=322| | | +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgaga +ctctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagc +cgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat... +...ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaa +gacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggcc +gtgtatggctatacataaggtc +>AB019439|IGHV3-33*01|Homo_sapiens|F|V-REGION|73526..73821|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>M99665|IGHV3-33*02|Homo_sapiens|F|V-REGION|179..474|296 nt|1| | | | |296+24=320| | | +caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat... +...ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccaga +gacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>M77305|IGHV3-33*03|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgaaaga +>M77335|IGHV3-33*04|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>M77334|IGHV3-33*05|Homo_sapiens|F|V-REGION|41..336|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>HM855436|IGHV3-33*06|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat... +...ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgaaaga +>AB019439|IGHV3-33-2*01|Homo_sapiens|P|V-REGION|64215..64512|298 nt|1| | || |298+24=322| | | +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgaga +ctctcctgtgcagactctggattaaccttc............agtagctactgaatgagc +tcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat... +...ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaa +gaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagct +gtgtgttactgtatgtgaggca +>M99666|IGHV3-35*01|Homo_sapiens|ORF|V-REGION|298..593|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgaga +ctctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaac +tgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat... +...ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccaga +gacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggct +gtgtattactgtgtgagaaa +>M99669|IGHV3-38*01|Homo_sapiens|ORF|V-REGION|169..460|292 nt|1| | | | |292+30=322| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgaga +ctctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagc +tggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt...... +......ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggcc +gcgtattactgtgccagatata +>AB019439|IGHV3-38*02|Homo_sapiens|ORF|V-REGION|22845..23136|292 nt|1| | | | |292+30=322| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgaga +ctctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagc +tggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt...... +......ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggcc +gtgtattactgtgccagatata +>KC713943|IGHV3-38*03|Homo_sapiens|ORF|V-REGION|411..702|292 nt|1| | | | |292+30=322| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgaga +ctctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagc +tggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt...... +......ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggcc +gtgtattactgtgccagatata +>KF698732|IGHV3-38-3*01|Homo_sapiens|ORF|V-REGION|411..700|290 nt|1| | | | |290+30=320| | | +gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagc +tgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt...... +......ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtaagaaaga +>M99672|IGHV3-43*01|Homo_sapiens|F|V-REGION|330..627|298 nt|1| | | | |298+24=322| | | +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............gatgattataccatgcac +tgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat... +...ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccaga +gacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgcc +ttgtattactgtgcaaaagata +>HM855392|IGHV3-43*02|Homo_sapiens|F|V-REGION|22..319|298 nt|1| | | | |298+24=322| | | +gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............gatgattatgccatgcac +tgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat... +...ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccaga +gacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgcc +ttgtattactgtgcaaaagata +>KC713950|IGHV3-43D*01|Homo_sapiens|F|V-REGION|411..708|298 nt|1| | | | |298+24=322| | | +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............gatgattatgccatgcac +tgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat... +...ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccaga +gacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgcc +ttgtattactgtgcaaaagata +>Z18900|IGHV3-47*01|Homo_sapiens|P|V-REGION|1..291|291 nt|1| | | | |291+27=318| | | +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcga +ccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcac +tgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt... +......ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccaga +gacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggct +gtgtattattgtgcaaga +>AB019438|IGHV3-47*02|Homo_sapiens|P|V-REGION|114743..115035|293 nt|1| | | | |293+27=320| | | +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcac +tgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt... +......ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccaga +gacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggct +gtgtattattgtgcaagaga +>M99675|IGHV3-48*01|Homo_sapiens|F|V-REGION|334..629|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatagcatgaac +tgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>AB019438|IGHV3-48*02|Homo_sapiens|F|V-REGION|95434..95729|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatagcatgaac +tgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggct +gtgtattactgtgcgagaga +>U03893|IGHV3-48*03|Homo_sapiens|F|V-REGION|200..495|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaac +tgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtttattactgtgcgagaga +>HM855336|IGHV3-48*04|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| |rev-compl| +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatagcatgaac +tgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt... +...agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>M99676|IGHV3-49*01|Homo_sapiens|F|V-REGION|384..685|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgaga +ctctcctgtacagcttctggattcaccttt............ggtgattatgctatgagc +tggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagct +tatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaaga +gatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtactagaga +>M99401|IGHV3-49*02|Homo_sapiens|F|V-REGION|128..429|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgaga +ctctcctgtacagcttctggattcaccttt............gggtattatcctatgagc +tgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagct +tatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaaga +gatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtactagaga +>AB019438|IGHV3-49*03|Homo_sapiens|F|V-REGION|76304..76605|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgaga +ctctcctgtacagcttctggattcaccttt............ggtgattatgctatgagc +tggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagct +tatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaaga +gatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtactagaga +>AM940220|IGHV3-49*04|Homo_sapiens|F|V-REGION|1..302|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgaga +ctctcctgtacagcttctggattcaccttt............ggtgattatgctatgagc +tgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagct +tatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaaga +gatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtactagaga +>AM940221|IGHV3-49*05|Homo_sapiens|F|V-REGION|1..302|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgaga +ctctcctgtacagcttctggattcaccttt............ggtgattatgctatgagc +tggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagct +tatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaaga +gatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagcc +gtgtattactgtactagaga +>M99678|IGHV3-52*01|Homo_sapiens|P|V-REGION|367..662|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctcctggatgcac +tgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac... +...ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccaga +gacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgacc +gtgtattactgtgtgagagg +>Z17388|IGHV3-52*02|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318|partial in 3'| | +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctcctggatgcac +tgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac... +...ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccaga +gacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgacc +gtgtattactgtgtgaga +>J00237|IGHV3-52*03|Homo_sapiens|P|V-REGION|177..470|294 nt|1| | | | |294+24=318|partial in 3'| | +gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctcctggatgcac +tgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac... +...ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccaga +gacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgacc +gtgtattactgtgtgaga +>M99679|IGHV3-53*01|Homo_sapiens|F|V-REGION|196..488|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgaga +ctctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt... +......ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgagaga +>KF698735|IGHV3-53*02|Homo_sapiens|F|V-REGION|409..701|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgaga +ctctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt... +......ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgagaga +>J03617|IGHV3-53*03|Homo_sapiens|F|V-REGION|679..971|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgaga +ctctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagc +tgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt... +......ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgctaggga +>HM855453|IGHV3-53*04|Homo_sapiens|F|V-REGION|22..314|293 nt|1| | | | |293+27=320| |rev-compl| +gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt... +......ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccaga +cacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggcc +gtgtattactgtgcgagaga +>M99680|IGHV3-54*01|Homo_sapiens|P|V-REGION|297..592|296 nt|1| | || |296+24=320| | | +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgaga +ctctcctgtgcagactctggattaaccttc............agtagctactgaatgagc +tcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat... +...agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaa +gaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggcc +gtgtattactgtatgtgagt +>X92215|IGHV3-54*02|Homo_sapiens|P|V-REGION|346..641|296 nt|1| | | | |296+24=320| | | +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgaga +ctctcctgtgcagactctggattaaccttc............agtagctactgaatgagc +tcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat... +...agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaa +gaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggcc +gtgtattactgtatgtgagg +>AB019438|IGHV3-54*04|Homo_sapiens|P|V-REGION|31896..32191|296 nt|1| | | | |296+24=320| | | +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgaga +ctctcctgtgcagactctggattaaccttc............agtagctactgaatgagc +tcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat... +...agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaa +gaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggcc +gtgtattactgtatgtgagt +>AB019437|IGHV3-62*01|Homo_sapiens|P|V-REGION|190113..190408|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctctgctatgcac +tgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt... +...ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccaga +gacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagtt +gtgtactactgtgtgaaaga +>M99681|IGHV3-63*01|Homo_sapiens|P|V-REGION|170..467|298 nt|1| | | | |298+24=322| | | +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgaga +ctctcctgtgtagcctctggattcaccttc............agtagctactgaatgagc +tgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat... +...ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaa +gacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgacc +atgcatggctgtacataaggtt +>Z15099|IGHV3-63*02|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgaga +ctctcctgtgtagcctctggattcaccttc............agtagctactgaatgagc +tgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat... +...ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaa +gacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgacc +atgcatggctgtacataa +>M99682|IGHV3-64*01|Homo_sapiens|F|V-REGION|241..536|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat... +...gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggct +gtgtattactgtgcgagaga +>AB019437|IGHV3-64*02|Homo_sapiens|F|V-REGION|175507..175802|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat... +...gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggct +gtgtattactgtgcgagaga +>M77298|IGHV3-64*03|Homo_sapiens|F|V-REGION|114..409|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgttcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat... +...gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggct +gtgtattactgtgtgaaaga +>M77299|IGHV3-64*04|Homo_sapiens|F|V-REGION|112..407|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgttcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat... +...gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>M77301|IGHV3-64*05|Homo_sapiens|F|V-REGION|114..409|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgttcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat... +...gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggct +gtgtattactgtgtgaaaga +>KC713941|IGHV3-64D*06|Homo_sapiens|F|V-REGION|407..702|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgttcagcctctggattcaccttc............agtagctatgctatgcac +tgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat... +...gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggct +gtgtattactgtgtgaaaga +>X92218|IGHV3-66*01|Homo_sapiens|F|V-REGION|160..452|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt... +......ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>Z27504|IGHV3-66*02|Homo_sapiens|F|V-REGION|1..291|291 nt|1| | | | |291+27=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt... +......ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaga +>AB019437|IGHV3-66*03|Homo_sapiens|F|V-REGION|158218..158510|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgaga +ctctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt... +......ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgagaga +>X70208|IGHV3-66*04|Homo_sapiens|F|V-REGION|450..742|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagc +tgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt... +......ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccaga +gacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaca +>AJ879484|IGHV3-69-1*01|Homo_sapiens|P|V-REGION|169..461|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgaac +tgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt... +......agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>AJ879485|IGHV3-69-1*02|Homo_sapiens|P|V-REGION|169..461|293 nt|1| | | | |293+27=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgaac +tgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt... +......agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtttattactgtgcgagaga +>M99649|IGHV3-7*01|Homo_sapiens|F|V-REGION|344..639|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agtagctattggatgagc +tgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat... +...ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>X92288|IGHV3-7*02|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agtagctattggatgagc +tgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat... +...ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgaga +>HM855666|IGHV3-7*03|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| |rev-compl| +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttt............agtagctattggatgagc +tgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat... +...ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgagaga +>AB019437|IGHV3-71*01|Homo_sapiens|P|V-REGION|105844..106145|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgagc +tgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagct +aatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaaga +gatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggcc +gtgtattactgtgcgagaga +>HM855875|IGHV3-71*02|Homo_sapiens|P|V-REGION|22..323|302 nt|1| | | | |302+18=320| |rev-compl| +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgactactacatgagc +tgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagct +aatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaaga +gatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggct +gtgtattactgtgcgagaga +>HM855455|IGHV3-71*03|Homo_sapiens|P|V-REGION|22..323|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctggtttcaccttc............agtgactactacatgagc +tgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagct +aatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaaga +gatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgcgagaga +>X92206|IGHV3-72*01|Homo_sapiens|F|V-REGION|247..548|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtgaccactacatggac +tgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagct +aacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaaga +gatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggcc +gtgtattactgtgctagaga +>Z29979|IGHV3-72*02|Homo_sapiens|F|V-REGION|1..165|165 nt|1| | | | |165+99=264|partial in 5' and in 3' | | +............................................................ +........................accttc............agtgaccactacatggac +tgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagct +aacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaaga +gatgattcaaagaactcactgtat +>X70197|IGHV3-73*01|Homo_sapiens|F|V-REGION|684..985|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaa +ctctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcac +tgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagct +aacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccaga +gatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggcc +gtgtattactgtactagaca +>AB019437|IGHV3-73*02|Homo_sapiens|F|V-REGION|78310..78611|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaa +ctctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcac +tgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagct +aacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccaga +gatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggcc +gtgtattactgtactagaca +>L33851|IGHV3-74*01|Homo_sapiens|F|V-REGION|183..478|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctactggatgcac +tgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat... +...gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccaga +gacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggct +gtgtattactgtgcaagaga +>Z17392|IGHV3-74*02|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctactggatgcac +tgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat... +...gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccaga +gacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggct +gtgtattactgtgcaaga +>J00239|IGHV3-74*03|Homo_sapiens|F|V-REGION|179..474|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctactggatgcac +tgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat... +...gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccaga +gacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggct +gtgtattactgtgcaagaga +>M99651|IGHV3-9*01|Homo_sapiens|F|V-REGION|280..577|298 nt|1| | | | |298+24=322| | | +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgaga +ctctcctgtgcagcctctggattcaccttt............gatgattatgccatgcac +tgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat... +...agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggcc +ttgtattactgtgcaaaagata +>HM855577|IGHV3-9*02|Homo_sapiens|F|V-REGION|22..319|298 nt|1| | | | |298+24=322| | | +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgaga +ctctcctgtgcagcctctggattcacctct............gatgattatgccatgcac +tgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat... +...agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggcc +ttgtattactgtgcaaaagata +>KC713947|IGHV3-9*03|Homo_sapiens|F|V-REGION|399..696|298 nt|1| | | | |298+24=322| | | +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgaga +ctctcctgtgcagcctctggattcaccttt............gatgattatgccatgcac +tgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat... +...agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccaga +gacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggcc +ttgtattactgtgcaaaagata +>HM855939|IGHV3-NL1*01|Homo_sapiens|F|V-REGION|22..317|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgaga +ctctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcac +tgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt... +...ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgcgaaaga +>Z29597|IGHV3/OR15-7*01|Homo_sapiens|ORF|V-REGION|1..300|300 nt|1| | | | |300+18=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgaga +ctctcatgtgcagcctctggattcaccttc............agtgaccactacatgagc +tgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagct +aacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaaga +gaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggcc +gtgtattactgtgctaga +>M36530|IGHV3/OR15-7*02|Homo_sapiens|ORF|V-REGION|247..546|300 nt|1| | | | |300+18=318| | | +gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgaga +ctctcatgtgctgcctctggattcaccttc............agtgaccactacatgagc +tgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagct +aacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaaga +gaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggcc +gtgtattactgtgctaga +>Z12332|IGHV3/OR15-7*03|Homo_sapiens|ORF|V-REGION|1..300|300 nt|1| | | | |300+18=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgaga +ctctcatgtgcagcctctggattcaccttc............agtgaccactacatgagc +tgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagct +aacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaaga +gaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggcc +gtgtattactgtgctaga +>HM855865|IGHV3/OR15-7*05|Homo_sapiens|ORF|V-REGION|22..323|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgaga +ctctcatgtgcagcctctggattcaccttc............agtgaccactacatgagc +tgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagct +aacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaaga +gaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggcc +gtgtattactgtgctagaga +>Z29607|IGHV3/OR16-10*01|Homo_sapiens|ORF|V-REGION|1..291|291 nt|1| | | | |291+27=318| | | +gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgaga +ctctcctgtgcaggctctggattcaccttc............agtagctatgctatgcac +tgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt... +......ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggct +gtgtattactgtgcaaga +>Z12345|IGHV3/OR16-10*02|Homo_sapiens|ORF|V-REGION|1..291|291 nt|1| | | | |291+27=318| | | +gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcaggctctggattcaccttc............agtagctatgctatgcac +tgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt... +......ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggct +gtgtattactgtgcaaga +>HM855718|IGHV3/OR16-10*03|Homo_sapiens|ORF|V-REGION|22..314|293 nt|1| | | | |293+27=320| |rev-compl| +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcaggctctggattcaccttc............agtagctatgctatgcac +tgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt... +......ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccaga +gacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggct +gtgtattactgtgcaagaga +>Z29609|IGHV3/OR16-12*01|Homo_sapiens|ORF|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaa +ctctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagc +tggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt... +...ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccaca +gacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggcc +gtgtattactgtgcaaga +>Z29610|IGHV3/OR16-13*01|Homo_sapiens|ORF|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctactggatgcac +tgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat... +...gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccaga +gacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggct +gtgtattactgtactaga +>Z29611|IGHV3/OR16-14*01|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtagctactggatgcac +tgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat... +...gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccaga +gacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggct +gtgtattactgtactaga +>L25546|IGHV3/OR16-15*01|Homo_sapiens|P|V-REGION|204..499|296 nt|1| | | | |296+24=320| | | +gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +ctctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaac +tgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat... +...ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccaga +gacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggcc +gtgtattactgtgtgagaaa +>Z29612|IGHV3/OR16-15*02|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +cactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaac +tgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat... +...ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccaga +gacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggcc +gtgtattactgtgtgaga +>Z29613|IGHV3/OR16-16*01|Homo_sapiens|P|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaga +cactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaac +tgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat... +...ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccaga +gacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggcc +gtgtattactgtgtgaga +>HM855668|IGHV3/OR16-6*02|Homo_sapiens|ORF|V-REGION|22..323|302 nt|1| | | | |302+18=320| | | +gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttaga +ctctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagc +tgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagct +aatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaaga +gatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggcc +gtgtattactgtaccacagg +>Z29605|IGHV3/OR16-8*01|Homo_sapiens|ORF|V-REGION|1..294|294 nt|1| | || |294+24=318| | | +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctgtcctgtccagcctctggattcaccttc............agtaaccactacatgagc +tgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat... +...agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagg +gacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggct +gtgtattactgtgtgaaa +>HM855427|IGHV3/OR16-8*02|Homo_sapiens|ORF|V-REGION|22..317|296 nt|1| | | | |296+24=320| |rev-compl| +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgaga +ctgtcctgtccagactctggattcaccttc............agtaaccactacatgagc +tgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat... +...agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagg +gacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggct +gtgtattactgtgtgaaaca +>Z29606|IGHV3/OR16-9*01|Homo_sapiens|ORF|V-REGION|1..294|294 nt|1| | || |294+24=318| | | +gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgaga +ctctcctgtgcagcctctggattcaccttc............agtaaccactacacgagc +tgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat... +...agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagg +gacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggct +gtgtattactgtgtgaaa +>X05714|IGHV4-28*01|Homo_sapiens|F|V-REGION|290..585|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggc +tggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggcc +gtgtattactgtgcgagaaa +>M83133|IGHV4-28*02|Homo_sapiens|F|V-REGION|811..1106|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggc +tggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt... +......gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggcc +gtgtattactgtgcgagaaa +>X92233|IGHV4-28*03|Homo_sapiens|F|V-REGION|140..435|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggc +tggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggcc +gtgtattactgtgcgagaga +>X56358|IGHV4-28*04|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggc +tggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggc +gtgtattactgtgcgaga +>HM855339|IGHV4-28*05|Homo_sapiens|F|V-REGION|26..321|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggc +tggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt... +......gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggcc +gtgtattactgtgcgagaaa +>HM855782|IGHV4-28*06|Homo_sapiens|F|V-REGION|26..321|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggc +tggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt... +......gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggcc +gtgtattactgtgcgagaaa +>KC713936|IGHV4-28*07|Homo_sapiens|F|V-REGION|390..685|296 nt|1| | | | |296+24=320| | | +caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggc +tggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggcc +gtgtattactgtgcgagaaa +>L10089|IGHV4-30-2*01|Homo_sapiens|F|V-REGION|1..299|299 nt|1| | | | |299+21=320| | | +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagc +tggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgccagaga +>M95122|IGHV4-30-2*02|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+21=315| | | +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagc +tggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcg +>X92229|IGHV4-30-2*03|Homo_sapiens|F|V-REGION|140..438|299 nt|1| | | | |299+21=320| | | +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagc +tggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggct +gtgtattactgtgcgagaca +>Z75351|IGHV4-30-2*04|Homo_sapiens|F|V-REGION|1..227|227 nt|1| | | | |227+93=320|partial in 5'| | +............................................................ +...............tctggtggctccatcagc......agtggtggttactcctggagc +tggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactgtgcgagaga +>HM855593|IGHV4-30-2*05|Homo_sapiens|F|V-REGION|40..338|299 nt|1| | | | |299+21=320| | | +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagc +tggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggcc +gtgtattactgtgccagaga +>KC713944|IGHV4-30-2*06|Homo_sapiens|F|V-REGION|390..688|299 nt|1| | | | |299+21=320| | | +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagc +tggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgccagaga +>Z14238|IGHV4-30-4*01|Homo_sapiens|F|V-REGION|140..438|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagt +tggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggcc +gtgtattactgtgccagaga +>Z14239|IGHV4-30-4*02|Homo_sapiens|F|V-REGION|140..438|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagt +tggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggcc +gtgtattactgtgccagaga +>X92274|IGHV4-30-4*03|Homo_sapiens|F|V-REGION|140..429|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagt +tggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactg +>X92275|IGHV4-30-4*04|Homo_sapiens|F|V-REGION|140..429|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagt +tggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggcc +gtgtattactg +>Z75353|IGHV4-30-4*05|Homo_sapiens|F|V-REGION|1..228|228 nt|1| | | | |228+92=320|partial in 5'| | +............................................................ +..............ctctggtggctccatcagc......agtggtgattactactggagt +tggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggcc +gtgtattactgtgccagaga +>Z75360|IGHV4-30-4*06|Homo_sapiens|F|V-REGION|1..227|227 nt|1| | | | |227+93=320|partial in 5'| | +............................................................ +...............tctggtggctccatcagc......agtggtgattactactggagt +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggcc +gtgtattactgtgccagaga +>KC713946|IGHV4-30-4*07|Homo_sapiens|F|V-REGION|390..688|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagc +tggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgccagaga +>L10098|IGHV4-31*01|Homo_sapiens|F|V-REGION|27..325|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactgtgcgagaga +>M99683|IGHV4-31*02|Homo_sapiens|F|V-REGION|290..588|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactgtgcgagaga +>Z14237|IGHV4-31*03|Homo_sapiens|F|V-REGION|140..438|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactgtgcgagaga +>M95120|IGHV4-31*04|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+21=315| | | +caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactgtgcg +>M95121|IGHV4-31*05|Homo_sapiens|F|V-REGION|1..291|291 nt|1| | | | |291+24=315| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggcc +gtgtattactgtgcg +>X92270|IGHV4-31*06|Homo_sapiens|F|V-REGION|140..429|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactg +>X92271|IGHV4-31*07|Homo_sapiens|F|V-REGION|140..429|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactg +>X92272|IGHV4-31*08|Homo_sapiens|F|V-REGION|140..429|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactg +>X92273|IGHV4-31*09|Homo_sapiens|F|V-REGION|140..429|290 nt|1| | | | |290+21=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>Z14235|IGHV4-31*10|Homo_sapiens|F|V-REGION|140..438|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagc +tggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggcc +gtggattactgtgcgagaga +>AB019439|IGHV4-34*01|Homo_sapiens|F|V-REGION|59657..59949|293 nt|1| | | | |293+27=320| | | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggct +gtgtattactgtgcgagagg +>M99684|IGHV4-34*02|Homo_sapiens|F|V-REGION|311..603|293 nt|1| | | | |293+27=320| | | +caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggct +gtgtattactgtgcgagagg +>X92255|IGHV4-34*03|Homo_sapiens|F|V-REGION|141..424|284 nt|1| | | | |284+27=311|partial in 3'| | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>X92236|IGHV4-34*04|Homo_sapiens|F|V-REGION|141..433|293 nt|1| | | | |293+27=320| | | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggct +gtgtattactgtgcgagagg +>X92237|IGHV4-34*05|Homo_sapiens|F|V-REGION|141..433|293 nt|1| | | | |293+27=320| | | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgc +tggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggct +gtgtattactgtgcgagagg +>X92256|IGHV4-34*06|Homo_sapiens|F|V-REGION|141..424|284 nt|1| | | | |284+27=311|partial in 3'| | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggcc +gtgtattactg +>X92258|IGHV4-34*07|Homo_sapiens|F|V-REGION|141..424|284 nt|1| | | | |284+27=311|partial in 3'| | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>M95113|IGHV4-34*08|Homo_sapiens|F|V-REGION|1..288|288 nt|1| | | | |288+27=315| | | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggaccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggct +gtgtattactgtgcg +>Z14241|IGHV4-34*09|Homo_sapiens|F|V-REGION|140..432|293 nt|1| | | | |293+27=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactgtgcgagaga +>Z14242|IGHV4-34*10|Homo_sapiens|F|V-REGION|141..433|293 nt|1| | | | |293+27=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagta +gacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagata +>X05716|IGHV4-34*11|Homo_sapiens|F|V-REGION|292..584|293 nt|1| | | | |293+27=320| | | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagc +tggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt... +......gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagta +gacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggcc +gtgtattgctgtgcgagaga +>X56591|IGHV4-34*12|Homo_sapiens|F|V-REGION|1..291|291 nt|1| | | | |291+27=318| | | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt... +......ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggct +gtgtattactgtgcgaga +>Z75356|IGHV4-34*13|Homo_sapiens|F|V-REGION|1..221|221 nt|1| | | | |221+99=320|partial in 5'| | +............................................................ +...............tatggtgggtccttc............agtggttactactggagc +tggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt... +......ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggct +gtgtattactgtgcgagagg +>Z12367|IGHV4-38-2*01|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+24=318| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggc +tggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactgtgcgaga +>AC233755|IGHV4-38-2*02|Homo_sapiens|F|V-REGION|41583..41878|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggttactccatcagc.........agtggttactactggggc +tggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactgtgcgagaga +>AB019439|IGHV4-39*01|Homo_sapiens|F|V-REGION|11626..11924|299 nt|1| | | | |299+21=320| | | +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggc +tggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggct +gtgtattactgtgcgagaca +>X05715|IGHV4-39*02|Homo_sapiens|F|V-REGION|291..589|299 nt|1| | | | |299+21=320| | | +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggc +tggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggct +gtgtattactgtgcgagaga +>X92259|IGHV4-39*03|Homo_sapiens|F|V-REGION|141..430|290 nt|1| | | | |290+21=311|partial in 3'| | +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggc +tggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactg +>X92297|IGHV4-39*04|Homo_sapiens|F|V-REGION|1..196|196 nt|1| | | | |196+100=296|partial in 5'| | +............................................................ +......................gctccatcagc......agtagtagttactactggggc +tggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac +>M95116|IGHV4-39*05|Homo_sapiens|F|V-REGION|1..294|294 nt|1| | | | |294+21=315| | | +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtcc +ctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggc +tggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggct +gtgtattactgtgcg +>Z14236|IGHV4-39*06|Homo_sapiens|F|V-REGION|140..438|299 nt|1| | | | |299+21=320| | | +cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggc +tggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>AM940222|IGHV4-39*07|Homo_sapiens|F|V-REGION|1..299|299 nt|1| | | | |299+21=320| | | +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggc +tggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>X05713|IGHV4-4*01|Homo_sapiens|F|V-REGION|292..587|296 nt|1| | || |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagt +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattgctgtgcgagaga +>X92232|IGHV4-4*02|Homo_sapiens|F|V-REGION|140..435|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagt +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>X92252|IGHV4-4*03|Homo_sapiens|F|V-REGION|140..426|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagt +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>X92253|IGHV4-4*04|Homo_sapiens|F|V-REGION|140..426|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtcc +ctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagt +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>X92254|IGHV4-4*05|Homo_sapiens|F|V-REGION|140..426|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtcc +ctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagt +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>Z75355|IGHV4-4*06|Homo_sapiens|F|V-REGION|1..224|224 nt|1| | || |224+96=320|partial in 5'| | +............................................................ +...............tctggtggctccatcagc.........agtagtaactggtggagt +tgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt... +......gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>X62112|IGHV4-4*07|Homo_sapiens|F|V-REGION|229..521|293 nt|1| | | | |293+27=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>KC713942|IGHV4-4*08|Homo_sapiens|F|V-REGION|390..682|293 nt|1| | | | |293+27=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactgtgcgagaga +>M99685|IGHV4-55*01|Homo_sapiens|P|V-REGION|370..665|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgta +gacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagata +>X92223|IGHV4-55*02|Homo_sapiens|P|V-REGION|349..644|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagta +gacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagata +>X92263|IGHV4-55*03|Homo_sapiens|P|V-REGION|141..427|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>X92265|IGHV4-55*04|Homo_sapiens|P|V-REGION|141..427|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagta +gacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>X92266|IGHV4-55*05|Homo_sapiens|P|V-REGION|141..427|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgta +gacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactg +>X92267|IGHV4-55*06|Homo_sapiens|P|V-REGION|141..427|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgta +gacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactg +>X92268|IGHV4-55*07|Homo_sapiens|P|V-REGION|141..427|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgta +gacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactg +>X92234|IGHV4-55*08|Homo_sapiens|P|V-REGION|141..436|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagta +gacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>X92235|IGHV4-55*09|Homo_sapiens|P|V-REGION|140..435|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt... +......gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggcc +gtgtattactgtgcgagaaa +>AB019438|IGHV4-59*01|Homo_sapiens|F|V-REGION|5995..6287|293 nt|1| | | | |293+27=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgagaga +>M29812|IGHV4-59*02|Homo_sapiens|F|V-REGION|290..582|293 nt|1| | | | |293+27=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccgtc............agtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgagaga +>M95114|IGHV4-59*03|Homo_sapiens|F|V-REGION|1..288|288 nt|1| | | | |288+27=315| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcg +>M95117|IGHV4-59*04|Homo_sapiens|F|V-REGION|1..288|288 nt|1| | | | |288+27=315| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggct +gtgtattactgtgcg +>M95118|IGHV4-59*05|Homo_sapiens|F|V-REGION|1..288|288 nt|1| | | | |288+27=315| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt... +......gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggct +gtgtattactgtgcg +>M95119|IGHV4-59*06|Homo_sapiens|F|V-REGION|1..288|288 nt|1| | | | |288+27=315| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtcactggtggctccatc............agtagttactactggagc +tggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt... +......gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagta +gacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggcc +gtgtattactgtgcg +>X56360|IGHV4-59*07|Homo_sapiens|F|V-REGION|1..291|291 nt|1| | | | |291+27=318| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgaga +>HM855471|IGHV4-59*08|Homo_sapiens|F|V-REGION|40..332|293 nt|1| | | | |293+27=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatc............agtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactgtgcgagaca +>Z75359|IGHV4-59*09|Homo_sapiens|F|V-REGION|1..221|221 nt|1| | || |221+99=320|partial in 5'| | +............................................................ +...............tctggtggctccatc............agtagttactactggagc +tggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgagagg +>Z14243|IGHV4-59*10|Homo_sapiens|F|V-REGION|141..433|293 nt|1| | | | |293+27=320| | | +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtcc +ctcacctgcgctgtctatggtggctccatc............agtagttactactggagc +tggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagata +>M29811|IGHV4-61*01|Homo_sapiens|F|V-REGION|290..588|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgagaga +>L10097|IGHV4-61*02|Homo_sapiens|F|V-REGION|27..325|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagc +tggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggcc +gtgtattactgtgcgagaga +>X92230|IGHV4-61*03|Homo_sapiens|F|V-REGION|140..438|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgagaga +>X92250|IGHV4-61*04|Homo_sapiens|F|V-REGION|140..426|287 nt|1| | | | |287+24=311|partial in 3'| | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattggatatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggcc +gtgtattactg +>X56356|IGHV4-61*05|Homo_sapiens|F|V-REGION|1..297|297 nt|1| | | | |297+21=318| | | +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgaga +>Z75347|IGHV4-61*06|Homo_sapiens|ORF|V-REGION|1..227|227 nt|1| | | | |227+93=320|partial in 5'| | +............................................................ +...............tctggtggctccgtcagc......agtggtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgccagaga +>Z75348|IGHV4-61*07|Homo_sapiens|F|V-REGION|1..227|227 nt|1| | | | |227+93=320|partial in 5'| | +............................................................ +...............tctggtggctccgtcagc......agtggtagttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgagaca +>AB019437|IGHV4-61*08|Homo_sapiens|F|V-REGION|194119..194417|299 nt|1| | | | |299+21=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagc +tggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt... +......gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagta +gacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggcc +gtgtattactgtgcgagaga +>HM855539|IGHV4/OR15-8*01|Homo_sapiens|ORF|V-REGION|40..335|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>X05712|IGHV4/OR15-8*02|Homo_sapiens|ORF|V-REGION|262..557|296 nt|1| | | | |296+24=320| | | +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaata +gacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>HM855418|IGHV4/OR15-8*03|Homo_sapiens|ORF|V-REGION|26..321|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtcc +ctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagc +tgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt... +......gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagta +gacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggcc +gtgtattactgtgcgagaga +>X92227|IGHV5-10-1*01|Homo_sapiens|F|V-REGION|13..306|294 nt|1| | | | |294+24=318| | | +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgagg +atctcctgtaagggttctggatacagcttt............accagctactggatcagc +tgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt... +...gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagct +gacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atgtattactgtgcgaga +>X92279|IGHV5-10-1*02|Homo_sapiens|F|V-REGION|252..546|295 nt|1| | | | |295+25=320| | | +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgagg +atctcctgtaagggttctggatacagcttt............accagctactggatcagc +tgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt... +...gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagct +gacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgcc +atgtattactgtgcgagaca +>X56375|IGHV5-10-1*03|Homo_sapiens|F|V-REGION|12..305|294 nt|1| | | | |294+24=318| | | +gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgagg +atctcctgtaagggttctggatacagcttt............accagctactggatcagc +tgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt... +...gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagct +gacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atgtattactgtgcgaga +>X56376|IGHV5-10-1*04|Homo_sapiens|F|V-REGION|12..305|294 nt|1| | | | |294+24=318| | | +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgagg +atctcctgtaagggttctggatacagcttt............accagctactggatcagc +tgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt... +...gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagct +gacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atgtattactgtgcgaga +>M99686|IGHV5-51*01|Homo_sapiens|F|V-REGION|308..603|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaag +atctcctgtaagggttctggatacagcttt............accagctactggatcggc +tgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt... +...gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagcc +gacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atgtattactgtgcgagaca +>M18806|IGHV5-51*02|Homo_sapiens|F|V-REGION|251..546|296 nt|1| | | | |296+24=320| | | +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaag +atctcctgtaagggttctggatacagcttt............accagctactggaccggc +tgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt... +...gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagcc +gacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atgtattactgtgcgagaca +>X56368|IGHV5-51*03|Homo_sapiens|F|V-REGION|12..305|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaag +atctcctgtaagggttctggatacagcttt............accagctactggatcggc +tgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt... +...gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagcc +gacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atgtattactgtgcgaga +>X56367|IGHV5-51*04|Homo_sapiens|F|V-REGION|12..305|294 nt|1| | | | |294+24=318| | | +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaag +atctcctgtaagggttctggatacagcttt............accagctactggatcggc +tgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt... +...gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagcc +gacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atgtattactgtgcgaga +>Z27449|IGHV5-51*05|Homo_sapiens|F|V-REGION|1..245|245 nt|1| | | | |245+58=303|partial in 5'| | +.....................................aaaagcccggggagtctctgaag +atctcctgtaagggttctggatacagcttt............accagctactggatcggc +tgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt... +...gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagcc +gacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgcc +atg +>X92213|IGHV5-78*01|Homo_sapiens|P|V-REGION|734..1027|294 nt|1| | | | |294+24=318| | | +gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgagg +atctcctgtaagacttctggatacagcttt............accagctactggatccac +tgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg... +...aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagcc +gacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgcc +atgtattattgtgtgaga +>J04097|IGHV6-1*01|Homo_sapiens|F|V-REGION|480..784|305 nt|1| | | | |305+15=320| | | +caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctca +ctcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaac +tggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc +...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaaccca +gacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggct +gtgtattactgtgcaagaga +>Z14223|IGHV6-1*02|Homo_sapiens|F|V-REGION|142..446|305 nt|1| | | | |305+15=320| | | +caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctca +ctcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaac +tggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc +...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaaccca +gacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggct +gtgtattactgtgcaagaga +>AB019439|IGHV7-34-1*01|Homo_sapiens|P|V-REGION|56018..56310|293 nt|1| | | | |293+27=320| | | +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaag +gtctcctataagtcttctggttacaccttc............accatctatggtatgaat +tgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac... +...actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatg +gacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggcc +gagtattactgtgcgaagta +>HM855644|IGHV7-34-1*02|Homo_sapiens|P|V-REGION|24..316|293 nt|1| | | | |293+27=320| |rev-compl| +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaag +gtctcctataagtcttctggttacaccttc............accatctatggtatgaat +tgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac... +...aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatg +gacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggcc +gagtattactgtgcgaagta +>L10057|IGHV7-4-1*01|Homo_sapiens|F|V-REGION|95..388|294 nt|1| | | | |294+24=318| | | +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag +gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac... +...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg +gacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgcc +gtgtattactgtgcgaga +>X62110|IGHV7-4-1*02|Homo_sapiens|F|V-REGION|158..453|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag +gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac... +...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg +gacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgcc +gtgtattactgtgcgagaga +>X92290|IGHV7-4-1*03|Homo_sapiens|F|V-REGION|1..274|274 nt|1| | | | |274+24=298|partial in 3'| | +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag +gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac... +...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg +gacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg +>HM855485|IGHV7-4-1*04|Homo_sapiens|F|V-REGION|24..319|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag +gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac... +...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg +gacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgcc +gtgtattactgtgcgagaga +>HM855361|IGHV7-4-1*05|Homo_sapiens|F|V-REGION|24..319|296 nt|1| | | | |296+24=320| |rev-compl| +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag +gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat +tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac... +...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg +gacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgcc +gtgtgttactgtgcgagaga +>AC241995|IGHV7-40*03|Homo_sapiens|P|V-REGION|10101..10396|296 nt|1| | | | |296+24=320| | | +ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaat +atgatatatgtaaatcatggaatactatgc............agccagtatggtatgaat +tcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac... +...actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatg +gacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggcc +gtgtatgactgtatgagaga +>AB019437|IGHV7-81*01|Homo_sapiens|ORF|V-REGION|6456..6751|296 nt|1| | | | |296+24=320| | | +caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaag +gtctcctgcaaggcttctggttacagtttc............accacctatggtatgaat +tgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac... +...actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatg +gacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggcc +atgtattactgtgcgagata + diff -r 000000000000 -r 183edf446dcf LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,437 @@ +Attribution-NonCommercial-ShareAlike 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International +Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution-NonCommercial-ShareAlike 4.0 International Public License +("Public License"). To the extent this Public License may be +interpreted as a contract, You are granted the Licensed Rights in +consideration of Your acceptance of these terms and conditions, and the +Licensor grants You such rights in consideration of benefits the +Licensor receives from making the Licensed Material available under +these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. BY-NC-SA Compatible License means a license listed at + creativecommons.org/compatiblelicenses, approved by Creative + Commons as essentially the equivalent of this Public License. + + d. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + e. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + f. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + g. License Elements means the license attributes listed in the name + of a Creative Commons Public License. The License Elements of this + Public License are Attribution, NonCommercial, and ShareAlike. + + h. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + i. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + j. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + k. NonCommercial means not primarily intended for or directed towards + commercial advantage or monetary compensation. For purposes of + this Public License, the exchange of the Licensed Material for + other material subject to Copyright and Similar Rights by digital + file-sharing or similar means is NonCommercial provided there is + no payment of monetary compensation in connection with the + exchange. + + l. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + m. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + n. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part, for NonCommercial purposes only; and + + b. produce, reproduce, and Share Adapted Material for + NonCommercial purposes only. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. Additional offer from the Licensor -- Adapted Material. + Every recipient of Adapted Material from You + automatically receives an offer from the Licensor to + exercise the Licensed Rights in the Adapted Material + under the conditions of the Adapter's License You apply. + + c. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties, including when + the Licensed Material is used other than for NonCommercial + purposes. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + b. ShareAlike. + + In addition to the conditions in Section 3(a), if You Share + Adapted Material You produce, the following conditions also apply. + + 1. The Adapter's License You apply must be a Creative Commons + license with the same License Elements, this version or + later, or a BY-NC-SA Compatible License. + + 2. You must include the text of, or the URI or hyperlink to, the + Adapter's License You apply. You may satisfy this condition + in any reasonable manner based on the medium, means, and + context in which You Share Adapted Material. + + 3. You may not offer or impose any additional or different terms + or conditions on, or apply any Effective Technological + Measures to, Adapted Material that restrict exercise of the + rights granted under the Adapter's License You apply. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database for NonCommercial purposes + only; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material, + including for purposes of Section 3(b); and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. diff -r 000000000000 -r 183edf446dcf MakeDb.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MakeDb.py Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,556 @@ +#!/usr/bin/env python3 +""" +Create tab-delimited database file to store sequence alignment information +""" +# Info +__author__ = 'Namita Gupta, Jason Anthony Vander Heiden' +from changeo import __version__, __date__ + +# Imports +import os +import sys +from argparse import ArgumentParser +from collections import OrderedDict +from textwrap import dedent +from time import time +from Bio import SeqIO + +# Presto and changeo imports +from presto.Defaults import default_out_args +from presto.Annotation import parseAnnotation +from presto.IO import countSeqFile, printLog, printMessage, printProgress, readSeqFile +from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs +from changeo.IO import countDbFile, extractIMGT, getDbWriter, readRepo +from changeo.Parsers import IgBLASTReader, IMGTReader, IHMMuneReader, getIDforIMGT + + +def getFilePrefix(aligner_output, out_args): + """ + Get file name prefix and create output directory + + Arguments: + aligner_output : aligner output file or directory. + out_args : dictionary of output arguments. + + Returns: + str : file name prefix. + """ + # Determine output directory + if not out_args['out_dir']: + out_dir = os.path.dirname(os.path.abspath(aligner_output)) + else: + out_dir = os.path.abspath(out_args['out_dir']) + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + # Determine file prefix + if out_args['out_name']: + file_prefix = out_args['out_name'] + else: + file_prefix = os.path.splitext(os.path.split(os.path.abspath(aligner_output))[1])[0] + + return os.path.join(out_dir, file_prefix) + + +def getSeqDict(seq_file): + """ + Create a dictionary from a sequence file. + + Arguments: + seq_file : sequence file. + + Returns: + dict : sequence description as keys with Bio.SeqRecords as values. + """ + seq_dict = SeqIO.to_dict(readSeqFile(seq_file), + key_function=lambda x: x.description) + + return seq_dict + + +def writeDb(db, fields, file_prefix, total_count, id_dict=None, no_parse=True, partial=False, + out_args=default_out_args): + """ + Writes tab-delimited database file in output directory. + + Arguments: + db : a iterator of IgRecord objects containing alignment data. + fields : a list of ordered field names to write. + file_prefix : directory and prefix for CLIP tab-delim file. + total_count : number of records (for progress bar). + id_dict : a dictionary of the truncated sequence ID mapped to the full sequence ID. + no_parse : if ID is to be parsed for pRESTO output with default delimiters. + partial : if True put incomplete alignments in the pass file. + out_args : common output argument dictionary from parseCommonArgs. + + Returns: + None + """ + # Function to check for valid records strictly + def _pass_strict(rec): + valid = [rec.v_call and rec.v_call != 'None', + rec.j_call and rec.j_call != 'None', + rec.functional is not None, + rec.seq_vdj, + rec.junction] + return all(valid) + + # Function to check for valid records loosely + def _pass_gentle(rec): + valid = [rec.v_call and rec.v_call != 'None', + rec.d_call and rec.d_call != 'None', + rec.j_call and rec.j_call != 'None'] + return any(valid) + + # Set pass criteria + _pass = _pass_gentle if partial else _pass_strict + + # Define output file names + pass_file = '%s_db-pass.tab' % file_prefix + fail_file = '%s_db-fail.tab' % file_prefix + + # Initiate handles, writers and counters + pass_handle = None + fail_handle = None + pass_writer = None + fail_writer = None + start_time = time() + rec_count = pass_count = fail_count = 0 + + # Validate and write output + printProgress(0, total_count, 0.05, start_time) + for i, record in enumerate(db, start=1): + + # Replace sequence description with full string, if required + if id_dict is not None and record.id in id_dict: + record.id = id_dict[record.id] + + # Parse sequence description into new columns + if not no_parse: + try: + record.annotations = parseAnnotation(record.id, delimiter=out_args['delimiter']) + record.id = record.annotations['ID'] + del record.annotations['ID'] + + # TODO: This is not the best approach. should pass in output fields. + # If first record, use parsed description to define extra columns + if i == 1: fields.extend(list(record.annotations.keys())) + except IndexError: + # Could not parse pRESTO-style annotations so fall back to no parse + no_parse = True + sys.stderr.write('\nWARNING: Sequence annotation format not recognized. Sequence headers will not be parsed.\n') + + # Count pass or fail and write to appropriate file + if _pass(record): + # Open pass file + if pass_writer is None: + pass_handle = open(pass_file, 'wt') + pass_writer = getDbWriter(pass_handle, add_fields=fields) + + # Write row to pass file + pass_count += 1 + pass_writer.writerow(record.toDict()) + else: + # Open failed file + if out_args['failed'] and fail_writer is None: + fail_handle = open(fail_file, 'wt') + fail_writer = getDbWriter(fail_handle, add_fields=fields) + + # Write row to fail file if specified + fail_count += 1 + if fail_writer is not None: + fail_writer.writerow(record.toDict()) + + # Print progress + printProgress(i, total_count, 0.05, start_time) + + # Print consol log + log = OrderedDict() + log['OUTPUT'] = pass_file + log['PASS'] = pass_count + log['FAIL'] = fail_count + log['END'] = 'MakeDb' + printLog(log) + + if pass_handle is not None: pass_handle.close() + if fail_handle is not None: fail_handle.close() + + +# TODO: may be able to merge with other mains +def parseIMGT(aligner_output, seq_file=None, no_parse=True, partial=False, + parse_scores=False, parse_regions=False, parse_junction=False, + out_args=default_out_args): + """ + Main for IMGT aligned sample sequences. + + Arguments: + aligner_output : zipped file or unzipped folder output by IMGT. + seq_file : FASTA file input to IMGT (from which to get seqID). + no_parse : if ID is to be parsed for pRESTO output with default delimiters. + partial : If True put incomplete alignments in the pass file. + parse_scores : if True add alignment score fields to output file. + parse_regions : if True add FWR and CDR region fields to output file. + out_args : common output argument dictionary from parseCommonArgs. + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'MakeDb' + log['ALIGNER'] = 'IMGT' + log['ALIGNER_OUTPUT'] = aligner_output + log['SEQ_FILE'] = os.path.basename(seq_file) if seq_file else '' + log['NO_PARSE'] = no_parse + log['PARTIAL'] = partial + log['SCORES'] = parse_scores + log['REGIONS'] = parse_regions + log['JUNCTION'] = parse_junction + printLog(log) + + start_time = time() + printMessage('Loading sequence files', start_time=start_time, width=25) + # Extract IMGT files + temp_dir, imgt_files = extractIMGT(aligner_output) + # Count records in IMGT files + total_count = countDbFile(imgt_files['summary']) + # Get (parsed) IDs from fasta file submitted to IMGT + id_dict = getIDforIMGT(seq_file) if seq_file else {} + printMessage('Done', start_time=start_time, end=True, width=25) + + # Parse IMGT output and write db + with open(imgt_files['summary'], 'r') as summary_handle, \ + open(imgt_files['gapped'], 'r') as gapped_handle, \ + open(imgt_files['ntseq'], 'r') as ntseq_handle, \ + open(imgt_files['junction'], 'r') as junction_handle: + parse_iter = IMGTReader(summary_handle, gapped_handle, ntseq_handle, junction_handle, + parse_scores=parse_scores, parse_regions=parse_regions, + parse_junction=parse_junction) + file_prefix = getFilePrefix(aligner_output, out_args) + writeDb(parse_iter, parse_iter.fields, file_prefix, total_count, id_dict=id_dict, + no_parse=no_parse, partial=partial, out_args=out_args) + + # Cleanup temp directory + temp_dir.cleanup() + + return None + + +# TODO: may be able to merge with other mains +def parseIgBLAST(aligner_output, seq_file, repo, no_parse=True, partial=False, + parse_regions=False, parse_scores=False, parse_igblast_cdr3=False, + out_args=default_out_args): + """ + Main for IgBLAST aligned sample sequences. + + Arguments: + aligner_output : IgBLAST output file to process. + seq_file : fasta file input to IgBlast (from which to get sequence). + repo : folder with germline repertoire files. + no_parse : if ID is to be parsed for pRESTO output with default delimiters. + partial : If True put incomplete alignments in the pass file. + parse_regions : if True add FWR and CDR fields to output file. + parse_scores : if True add alignment score fields to output file. + parse_igblast_cdr3 : if True parse CDR3 sequences generated by IgBLAST + out_args : common output argument dictionary from parseCommonArgs. + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'MakeDB' + log['ALIGNER'] = 'IgBlast' + log['ALIGNER_OUTPUT'] = os.path.basename(aligner_output) + log['SEQ_FILE'] = os.path.basename(seq_file) + log['NO_PARSE'] = no_parse + log['PARTIAL'] = partial + log['SCORES'] = parse_scores + log['REGIONS'] = parse_regions + printLog(log) + + start_time = time() + printMessage('Loading sequence files', start_time=start_time, width=25) + # Count records in sequence file + total_count = countSeqFile(seq_file) + # Get input sequence dictionary + seq_dict = getSeqDict(seq_file) + # Create germline repo dictionary + repo_dict = readRepo(repo) + printMessage('Done', start_time=start_time, end=True, width=25) + + # Parse and write output + with open(aligner_output, 'r') as f: + parse_iter = IgBLASTReader(f, seq_dict, repo_dict, + parse_scores=parse_scores, parse_regions=parse_regions, + parse_igblast_cdr3=parse_igblast_cdr3) + file_prefix = getFilePrefix(aligner_output, out_args) + writeDb(parse_iter, parse_iter.fields, file_prefix, total_count, + no_parse=no_parse, partial=partial, out_args=out_args) + + return None + + +# TODO: may be able to merge with other mains +def parseIHMM(aligner_output, seq_file, repo, no_parse=True, partial=False, + parse_scores=False, parse_regions=False, out_args=default_out_args): + """ + Main for iHMMuneAlign aligned sample sequences. + + Arguments: + aligner_output : iHMMune-Align output file to process. + seq_file : fasta file input to iHMMuneAlign (from which to get sequence). + repo : folder with germline repertoire files. + no_parse : if ID is to be parsed for pRESTO output with default delimiters. + partial : If True put incomplete alignments in the pass file. + parse_scores : if True parse alignment scores. + parse_regions : if True add FWR and CDR region fields. + out_args : common output argument dictionary from parseCommonArgs. + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'MakeDB' + log['ALIGNER'] = 'iHMMune-Align' + log['ALIGNER_OUTPUT'] = os.path.basename(aligner_output) + log['SEQ_FILE'] = os.path.basename(seq_file) + log['NO_PARSE'] = no_parse + log['PARTIAL'] = partial + log['SCORES'] = parse_scores + log['REGIONS'] = parse_regions + printLog(log) + + start_time = time() + printMessage('Loading sequence files', start_time=start_time, width=25) + # Count records in sequence file + total_count = countSeqFile(seq_file) + # Get input sequence dictionary + seq_dict = getSeqDict(seq_file) + # Create germline repo dictionary + repo_dict = readRepo(repo) + printMessage('Done', start_time=start_time, end=True, width=25) + + # Parse and write output + with open(aligner_output, 'r') as f: + parse_iter = IHMMuneReader(f, seq_dict, repo_dict, + parse_scores=parse_scores, parse_regions=parse_regions) + file_prefix = getFilePrefix(aligner_output, out_args) + writeDb(parse_iter, parse_iter.fields, file_prefix, total_count, + no_parse=no_parse, partial=partial, out_args=out_args) + + return None + + +def getArgParser(): + """ + Defines the ArgumentParser. + + Returns: + argparse.ArgumentParser + """ + fields = dedent( + ''' + output files: + db-pass + database of alignment records with functionality information, + V and J calls, and a junction region. + db-fail + database with records that fail due to no functionality information + (did not pass IMGT), no V call, no J call, or no junction region. + + universal output fields: + SEQUENCE_ID, SEQUENCE_INPUT, SEQUENCE_VDJ, SEQUENCE_IMGT, + FUNCTIONAL, IN_FRAME, STOP, MUTATED_INVARIANT, INDELS, + V_CALL, D_CALL, J_CALL, + V_SEQ_START, V_SEQ_LENGTH, + D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH, + J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH, + JUNCTION_LENGTH, JUNCTION, NP1_LENGTH, NP2_LENGTH, + FWR1_IMGT, FWR2_IMGT, FWR3_IMGT, FWR4_IMGT, + CDR1_IMGT, CDR2_IMGT, CDR3_IMGT + + imgt specific output fields: + V_GERM_START_IMGT, V_GERM_LENGTH_IMGT, + N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH, P5J_LENGTH, + D_FRAME, V_SCORE, V_IDENTITY, J_SCORE, J_IDENTITY, + + igblast specific output fields: + V_GERM_START_VDJ, V_GERM_LENGTH_VDJ, + V_EVALUE, V_SCORE, V_IDENTITY, V_BTOP, + J_EVALUE, J_SCORE, J_IDENTITY, J_BTOP. + CDR3_IGBLAST_NT, CDR3_IGBLAST_AA + + ihmm specific output fields: + V_GERM_START_VDJ, V_GERM_LENGTH_VDJ, + HMM_SCORE + ''') + + # Define ArgumentParser + parser = ArgumentParser(description=__doc__, epilog=fields, + formatter_class=CommonHelpFormatter) + parser.add_argument('--version', action='version', + version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) + subparsers = parser.add_subparsers(title='subcommands', dest='command', + help='Aligner used', metavar='') + # TODO: This is a temporary fix for Python issue 9253 + subparsers.required = True + + # Parent parser + parser_parent = getCommonArgParser(seq_in=False, seq_out=False, log=False) + + # IgBlast Aligner + parser_igblast = subparsers.add_parser('igblast', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Process IgBLAST output.', + description='Process IgBLAST output.') + parser_igblast.add_argument('-i', nargs='+', action='store', dest='aligner_outputs', + required=True, + help='''IgBLAST output files in format 7 with query sequence + (IgBLAST argument \'-outfmt "7 std qseq sseq btop"\').''') + parser_igblast.add_argument('-r', nargs='+', action='store', dest='repo', required=True, + help='''List of folders and/or fasta files containing + IMGT-gapped germline sequences corresponding to the + set of germlines used in the IgBLAST alignment.''') + parser_igblast.add_argument('-s', action='store', nargs='+', dest='seq_files', + required=True, + help='''List of input FASTA files (with .fasta, .fna or .fa + extension), containing sequences.''') + parser_igblast.add_argument('--noparse', action='store_true', dest='no_parse', + help='''Specify to prevent input sequence headers from being parsed + to add new columns to database. Parsing of sequence headers requires + headers to be in the pRESTO annotation format, so this should be specified + when sequence headers are incompatible with the pRESTO annotation scheme. + Note, unrecognized header formats will default to this behavior.''') + parser_igblast.add_argument('--partial', action='store_true', dest='partial', + help='''If specified, include incomplete V(D)J alignments in + the pass file instead of the fail file.''') + parser_igblast.add_argument('--scores', action='store_true', dest='parse_scores', + help='''Specify if alignment score metrics should be + included in the output. Adds the V_SCORE, V_IDENTITY, + V_EVALUE, V_BTOP, J_SCORE, J_IDENTITY, + J_BTOP, and J_EVALUE columns.''') + parser_igblast.add_argument('--regions', action='store_true', dest='parse_regions', + help='''Specify if IMGT FWR and CDRs should be + included in the output. Adds the FWR1_IMGT, FWR2_IMGT, + FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and + CDR3_IMGT columns.''') + parser_igblast.add_argument('--cdr3', action='store_true', + dest='parse_igblast_cdr3', + help='''Specify if the CDR3 sequences generated by IgBLAST + should be included in the output. Adds the columns + CDR3_IGBLAST_NT and CDR3_IGBLAST_AA. Requires IgBLAST + version 1.5 or greater.''') + parser_igblast.set_defaults(func=parseIgBLAST) + + # IMGT aligner + parser_imgt = subparsers.add_parser('imgt', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='''Process IMGT/HighV-Quest output + (does not work with V-QUEST).''', + description='''Process IMGT/HighV-Quest output + (does not work with V-QUEST).''') + parser_imgt.add_argument('-i', nargs='+', action='store', dest='aligner_outputs', + help='''Either zipped IMGT output files (.zip or .txz) or a + folder containing unzipped IMGT output files (which must + include 1_Summary, 2_IMGT-gapped, 3_Nt-sequences, + and 6_Junction).''') + parser_imgt.add_argument('-s', nargs='*', action='store', dest='seq_files', + required=False, + help='''List of input FASTA files (with .fasta, .fna or .fa + extension) containing sequences.''') + parser_imgt.add_argument('--noparse', action='store_true', dest='no_parse', + help='''Specify to prevent input sequence headers from being parsed + to add new columns to database. Parsing of sequence headers requires + headers to be in the pRESTO annotation format, so this should be specified + when sequence headers are incompatible with the pRESTO annotation scheme. + Note, unrecognized header formats will default to this behavior.''') + parser_imgt.add_argument('--partial', action='store_true', dest='partial', + help='''If specified, include incomplete V(D)J alignments in + the pass file instead of the fail file.''') + parser_imgt.add_argument('--scores', action='store_true', dest='parse_scores', + help='''Specify if alignment score metrics should be + included in the output. Adds the V_SCORE, V_IDENTITY, + J_SCORE and J_IDENTITY.''') + parser_imgt.add_argument('--regions', action='store_true', dest='parse_regions', + help='''Specify if IMGT FWRs and CDRs should be + included in the output. Adds the FWR1_IMGT, FWR2_IMGT, + FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and + CDR3_IMGT columns.''') + parser_imgt.add_argument('--junction', action='store_true', dest='parse_junction', + help='''Specify if detailed junction fields should be + included in the output. Adds the columns + N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH, + P5J_LENGTH, D_FRAME.''') + parser_imgt.set_defaults(func=parseIMGT) + + # iHMMuneAlign Aligner + parser_ihmm = subparsers.add_parser('ihmm', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Process iHMMune-Align output.', + description='Process iHMMune-Align output.') + parser_ihmm.add_argument('-i', nargs='+', action='store', dest='aligner_outputs', + required=True, + help='''iHMMune-Align output file.''') + parser_ihmm.add_argument('-r', nargs='+', action='store', dest='repo', required=True, + help='''List of folders and/or FASTA files containing + IMGT-gapped germline sequences corresponding to the + set of germlines used in the IgBLAST alignment.''') + parser_ihmm.add_argument('-s', action='store', nargs='+', dest='seq_files', + required=True, + help='''List of input FASTA files (with .fasta, .fna or .fa + extension) containing sequences.''') + parser_ihmm.add_argument('--noparse', action='store_true', dest='no_parse', + help='''Specify to prevent input sequence headers from being parsed + to add new columns to database. Parsing of sequence headers requires + headers to be in the pRESTO annotation format, so this should be specified + when sequence headers are incompatible with the pRESTO annotation scheme. + Note, unrecognized header formats will default to this behavior.''') + parser_ihmm.add_argument('--partial', action='store_true', dest='partial', + help='''If specified, include incomplete V(D)J alignments in + the pass file instead of the fail file.''') + parser_ihmm.add_argument('--scores', action='store_true', dest='parse_scores', + help='''Specify if alignment score metrics should be + included in the output. Adds the path score of the + iHMMune-Align hidden Markov model to HMM_SCORE.''') + parser_ihmm.add_argument('--regions', action='store_true', dest='parse_regions', + help='''Specify if IMGT FWRs and CDRs should be + included in the output. Adds the FWR1_IMGT, FWR2_IMGT, + FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and + CDR3_IMGT columns.''') + parser_ihmm.set_defaults(func=parseIHMM) + + return parser + + +if __name__ == "__main__": + """ + Parses command line arguments and calls main + """ + parser = getArgParser() + checkArgs(parser) + args = parser.parse_args() + args_dict = parseCommonArgs(args, in_arg='aligner_outputs') + + # Set no ID parsing if sequence files are not provided + if 'seq_files' in args_dict and not args_dict['seq_files']: + args_dict['no_parse'] = True + + # Delete + if 'seq_files' in args_dict: del args_dict['seq_files'] + if 'aligner_outputs' in args_dict: del args_dict['aligner_outputs'] + if 'command' in args_dict: del args_dict['command'] + if 'func' in args_dict: del args_dict['func'] + + if args.command == 'imgt': + for i in range(len(args.__dict__['aligner_outputs'])): + args_dict['aligner_output'] = args.__dict__['aligner_outputs'][i] + args_dict['seq_file'] = args.__dict__['seq_files'][i] \ + if args.__dict__['seq_files'] else None + args.func(**args_dict) + elif args.command == 'igblast' or args.command == 'ihmm': + for i in range(len(args.__dict__['aligner_outputs'])): + args_dict['aligner_output'] = args.__dict__['aligner_outputs'][i] + args_dict['seq_file'] = args.__dict__['seq_files'][i] + args.func(**args_dict) diff -r 000000000000 -r 183edf446dcf ParseDb.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ParseDb.py Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,1119 @@ +#!/usr/bin/env python3 +""" +Parses tab delimited database files +""" +# Info +__author__ = 'Jason Anthony Vander Heiden' +from changeo import __version__, __date__ + +# Imports +import csv +import os +import re +from argparse import ArgumentParser +from collections import OrderedDict + +from textwrap import dedent +from time import time +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio.Alphabet import IUPAC + +# Presto and changeo imports +from presto.Defaults import default_delimiter, default_out_args +from presto.Annotation import flattenAnnotation +from presto.IO import getOutputHandle, printLog, printProgress, printMessage +from changeo.Defaults import default_csv_size +from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs +from changeo.IO import getDbWriter, readDbFile, countDbFile + +# System settings +csv.field_size_limit(default_csv_size) + +# Defaults +default_id_field = 'SEQUENCE_ID' +default_seq_field = 'SEQUENCE_IMGT' +default_germ_field = 'GERMLINE_IMGT_D_MASK' +default_index_field = 'INDEX' + +# TODO: convert SQL-ish operations to modify_func() as per ParseHeaders + +def getDbSeqRecord(db_record, id_field, seq_field, meta_fields=None, + delimiter=default_delimiter): + """ + Parses a database record into a SeqRecord + + Arguments: + db_record = a dictionary containing a database record + id_field = the field containing identifiers + seq_field = the field containing sequences + meta_fields = a list of fields to add to sequence annotations + delimiter = a tuple of delimiters for (fields, values, value lists) + + Returns: + a SeqRecord + """ + # Return None if ID or sequence fields are empty + if not db_record[id_field] or not db_record[seq_field]: + return None + + # Create description string + desc_dict = OrderedDict([('ID', db_record[id_field])]) + if meta_fields is not None: + desc_dict.update([(f, db_record[f]) for f in meta_fields if f in db_record]) + desc_str = flattenAnnotation(desc_dict, delimiter=delimiter) + + # Create SeqRecord + seq_record = SeqRecord(Seq(db_record[seq_field], IUPAC.ambiguous_dna), + id=desc_str, name=desc_str, description='') + + return seq_record + + +def splitDbFile(db_file, field, num_split=None, out_args=default_out_args): + """ + Divides a tab-delimited database file into segments by description tags + + Arguments: + db_file = filename of the tab-delimited database file to split + field = the field name by which to split db_file + num_split = the numerical threshold by which to group sequences; + if None treat field as textual + out_args = common output argument dictionary from parseCommonArgs + + Returns: + a list of output file names + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'split' + log['FILE'] = os.path.basename(db_file) + log['FIELD'] = field + log['NUM_SPLIT'] = num_split + printLog(log) + + # Open IgRecord reader iter object + reader = readDbFile(db_file, ig=False) + + # Determine total numbers of records + rec_count = countDbFile(db_file) + + start_time = time() + count = 0 + # Sort records into files based on textual field + if num_split is None: + # Create set of unique field tags + tmp_iter = readDbFile(db_file, ig=False) + tag_list = list(set([row[field] for row in tmp_iter])) + + # Forbidden characters in filename and replacements + noGood = {'\/':'f','\\':'b','?':'q','\%':'p','*':'s',':':'c', + '\|':'pi','\"':'dq','\'':'sq','<':'gt','>':'lt',' ':'_'} + # Replace forbidden characters in tag_list + tag_dict = {} + for tag in tag_list: + for c,r in noGood.items(): + tag_dict[tag] = (tag_dict.get(tag, tag).replace(c,r) \ + if c in tag else tag_dict.get(tag, tag)) + + # Create output handles + handles_dict = {tag:getOutputHandle(db_file, + '%s-%s' % (field, label), + out_type = out_args['out_type'], + out_name = out_args['out_name'], + out_dir = out_args['out_dir']) + for tag, label in tag_dict.items()} + + # Create Db writer instances + writers_dict = {tag:getDbWriter(handles_dict[tag], db_file) + for tag in tag_dict} + + # Iterate over IgRecords + for row in reader: + printProgress(count, rec_count, 0.05, start_time) + count += 1 + # Write row to appropriate file + tag = row[field] + writers_dict[tag].writerow(row) + + # Sort records into files based on numeric num_split + else: + num_split = float(num_split) + + # Create output handles + handles_dict = {'under':getOutputHandle(db_file, + 'under-%.1f' % num_split, + out_type = out_args['out_type'], + out_name = out_args['out_name'], + out_dir = out_args['out_dir']), + 'atleast':getOutputHandle(db_file, + 'atleast-%.1f' % num_split, + out_type = out_args['out_type'], + out_name = out_args['out_name'], + out_dir = out_args['out_dir'])} + + # Create Db writer instances + writers_dict = {'under':getDbWriter(handles_dict['under'], db_file), + 'atleast':getDbWriter(handles_dict['atleast'], db_file)} + + # Iterate over IgRecords + for row in reader: + printProgress(count, rec_count, 0.05, start_time) + count += 1 + tag = row[field] + tag = 'under' if float(tag) < num_split else 'atleast' + writers_dict[tag].writerow(row) + + # Write log + printProgress(count, rec_count, 0.05, start_time) + log = OrderedDict() + for i, k in enumerate(handles_dict): + log['OUTPUT%i' % (i + 1)] = os.path.basename(handles_dict[k].name) + log['RECORDS'] = rec_count + log['PARTS'] = len(handles_dict) + log['END'] = 'ParseDb' + printLog(log) + + # Close output file handles + for t in handles_dict: handles_dict[t].close() + + return [handles_dict[t].name for t in handles_dict] + + +# TODO: SHOULD ALLOW FOR UNSORTED CLUSTER COLUMN +# TODO: SHOULD ALLOW FOR GROUPING FIELDS +def convertDbBaseline(db_file, id_field=default_id_field, seq_field=default_seq_field, + germ_field=default_germ_field, cluster_field=None, + meta_fields=None, out_args=default_out_args): + """ + Builds fasta files from database records + + Arguments: + db_file = the database file name + id_field = the field containing identifiers + seq_field = the field containing sample sequences + germ_field = the field containing germline sequences + cluster_field = the field containing clonal groupings + if None write the germline for each record + meta_fields = a list of fields to add to sequence annotations + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'fasta' + log['FILE'] = os.path.basename(db_file) + log['ID_FIELD'] = id_field + log['SEQ_FIELD'] = seq_field + log['GERM_FIELD'] = germ_field + log['CLUSTER_FIELD'] = cluster_field + if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields) + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='clip') + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = germ_count = pass_count = fail_count = 0 + cluster_last = None + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + + # Update cluster ID + cluster = rec.get(cluster_field, None) + + # Get germline SeqRecord when needed + if cluster_field is None: + germ = getDbSeqRecord(rec, id_field, germ_field, meta_fields, + delimiter=out_args['delimiter']) + germ.id = '>' + germ.id + elif cluster != cluster_last: + germ = getDbSeqRecord(rec, cluster_field, germ_field, + delimiter=out_args['delimiter']) + germ.id = '>' + germ.id + else: + germ = None + + # Get read SeqRecord + seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, + delimiter=out_args['delimiter']) + + # Write germline + if germ is not None: + germ_count += 1 + SeqIO.write(germ, pass_handle, 'fasta') + + # Write sequences + if seq is not None: + pass_count += 1 + SeqIO.write(seq, pass_handle, 'fasta') + else: + fail_count += 1 + + # Set last cluster ID + cluster_last = cluster + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['GERMLINES'] = germ_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def convertDbFasta(db_file, id_field=default_id_field, seq_field=default_seq_field, + meta_fields=None, out_args=default_out_args): + """ + Builds fasta files from database records + + Arguments: + db_file = the database file name + id_field = the field containing identifiers + seq_field = the field containing sequences + meta_fields = a list of fields to add to sequence annotations + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'fasta' + log['FILE'] = os.path.basename(db_file) + log['ID_FIELD'] = id_field + log['SEQ_FIELD'] = seq_field + if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields) + printLog(log) + + # Open file handles + out_type = 'fasta' + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type=out_type) + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = pass_count = fail_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + + # Get SeqRecord + seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, out_args['delimiter']) + + # Write sequences + if seq is not None: + pass_count += 1 + SeqIO.write(seq, pass_handle, out_type) + else: + fail_count += 1 + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def addDbFile(db_file, fields, values, out_args=default_out_args): + """ + Adds field and value pairs to a database file + + Arguments: + db_file = the database file name + fields = a list of fields to add + values = a list of values to assign to all rows of each field + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'add' + log['FILE'] = os.path.basename(db_file) + log['FIELDS'] = ','.join(fields) + log['VALUES'] = ','.join(values) + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-add', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + pass_writer = getDbWriter(pass_handle, db_file, add_fields=fields) + # Count records + result_count = countDbFile(db_file) + + # Define fields and values to append + add_dict = {k:v for k,v in zip(fields, values) if k not in db_iter.fieldnames} + + # Iterate over records + start_time = time() + rec_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + # Write updated row + rec.update(add_dict) + pass_writer.writerow(rec) + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def indexDbFile(db_file, field=default_index_field, out_args=default_out_args): + """ + Adds an index column to a database file + + Arguments: + db_file = the database file name + field = the name of the index field to add + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'index' + log['FILE'] = os.path.basename(db_file) + log['FIELD'] = field + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-index', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + pass_writer = getDbWriter(pass_handle, db_file, add_fields=field) + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + + # Add count and write updated row + rec.update({field:rec_count}) + pass_writer.writerow(rec) + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def dropDbFile(db_file, fields, out_args=default_out_args): + """ + Deletes entire fields from a database file + + Arguments: + db_file = the database file name + fields = a list of fields to drop + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'add' + log['FILE'] = os.path.basename(db_file) + log['FIELDS'] = ','.join(fields) + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-drop', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + pass_writer = getDbWriter(pass_handle, db_file, exclude_fields=fields) + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + # Write row + pass_writer.writerow(rec) + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def deleteDbFile(db_file, fields, values, logic='any', regex=False, + out_args=default_out_args): + """ + Deletes records from a database file + + Arguments: + db_file = the database file name + fields = a list of fields to check for deletion criteria + values = a list of values defining deletion targets + logic = one of 'any' or 'all' defining whether one or all fields must have a match. + regex = if False do exact full string matches; if True allow partial regex matches. + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + # Define string match function + if regex: + def _match_func(x, patterns): return any([re.search(p, x) for p in patterns]) + else: + def _match_func(x, patterns): return x in patterns + + # Define logic function + if logic == 'any': + _logic_func = any + elif logic == 'all': + _logic_func = all + + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'delete' + log['FILE'] = os.path.basename(db_file) + log['FIELDS'] = ','.join(fields) + log['VALUES'] = ','.join(values) + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-delete', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + pass_writer = getDbWriter(pass_handle, db_file) + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = pass_count = fail_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + + # Check for deletion values in all fields + delete = _logic_func([_match_func(rec.get(f, False), values) for f in fields]) + + # Write sequences + if not delete: + pass_count += 1 + pass_writer.writerow(rec) + else: + fail_count += 1 + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['KEPT'] = pass_count + log['DELETED'] = fail_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def renameDbFile(db_file, fields, names, out_args=default_out_args): + """ + Renames fields in a database file + + Arguments: + db_file = the database file name + fields = a list of fields to rename + values = a list of new names for fields + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'rename' + log['FILE'] = os.path.basename(db_file) + log['FIELDS'] = ','.join(fields) + log['NAMES'] = ','.join(names) + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-rename', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + + # Get header and rename fields + header = (readDbFile(db_file, ig=False)).fieldnames + for f, n in zip(fields, names): + i = header.index(f) + header[i] = n + + # Open writer and write new header + # TODO: should modify getDbWriter to take a list of fields + pass_writer = csv.DictWriter(pass_handle, fieldnames=header, dialect='excel-tab') + pass_writer.writeheader() + + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + # TODO: repeating renaming is unnecessary. should had a non-dict reader/writer to DbCore + # Rename fields + for f, n in zip(fields, names): + rec[n] = rec.pop(f) + # Write + pass_writer.writerow(rec) + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def selectDbFile(db_file, fields, values, logic='any', regex=False, + out_args=default_out_args): + """ + Selects records from a database file + + Arguments: + db_file = the database file name + fields = a list of fields to check for selection criteria + values = a list of values defining selection targets + logic = one of 'any' or 'all' defining whether one or all fields must have a match. + regex = if False do exact full string matches; if True allow partial regex matches. + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + # Define string match function + if regex: + def _match_func(x, patterns): return any([re.search(p, x) for p in patterns]) + else: + def _match_func(x, patterns): return x in patterns + + # Define logic function + if logic == 'any': + _logic_func = any + elif logic == 'all': + _logic_func = all + + # Print console log + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'select' + log['FILE'] = os.path.basename(db_file) + log['FIELDS'] = ','.join(fields) + log['VALUES'] = ','.join(values) + log['REGEX'] =regex + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-select', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + pass_writer = getDbWriter(pass_handle, db_file) + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = pass_count = fail_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + + # Check for selection values in all fields + select = _logic_func([_match_func(rec.get(f, False), values) for f in fields]) + + # Write sequences + if select: + pass_count += 1 + pass_writer.writerow(rec) + else: + fail_count += 1 + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['SELECTED'] = pass_count + log['DISCARDED'] = fail_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def sortDbFile(db_file, field, numeric=False, descend=False, + out_args=default_out_args): + """ + Sorts records by values in an annotation field + + Arguments: + db_file = the database filename + field = the field name to sort by + numeric = if True sort field numerically; + if False sort field alphabetically + descend = if True sort in descending order; + if False sort in ascending order + + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'sort' + log['FILE'] = os.path.basename(db_file) + log['FIELD'] = field + log['NUMERIC'] = numeric + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-sort', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + pass_writer = getDbWriter(pass_handle, db_file) + + + # Store all records in a dictionary + start_time = time() + printMessage("Indexing: Running", start_time=start_time) + db_dict = {i:r for i, r in enumerate(db_iter)} + result_count = len(db_dict) + + # Sort db_dict by field values + tag_dict = {k:v[field] for k, v in db_dict.items()} + if numeric: tag_dict = {k:float(v or 0) for k, v in tag_dict.items()} + sorted_keys = sorted(tag_dict, key=tag_dict.get, reverse=descend) + printMessage("Indexing: Done", start_time=start_time, end=True) + + # Iterate over records + start_time = time() + rec_count = 0 + for key in sorted_keys: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + + # Write records + pass_writer.writerow(db_dict[key]) + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def updateDbFile(db_file, field, values, updates, out_args=default_out_args): + """ + Updates field and value pairs to a database file + + Arguments: + db_file = the database file name + field = the field to update + values = a list of values to specifying which rows to update + updates = a list of values to update each value with + out_args = common output argument dictionary from parseCommonArgs + + Returns: + the output file name + """ + log = OrderedDict() + log['START'] = 'ParseDb' + log['COMMAND'] = 'update' + log['FILE'] = os.path.basename(db_file) + log['FIELD'] = field + log['VALUES'] = ','.join(values) + log['UPDATES'] = ','.join(updates) + printLog(log) + + # Open file handles + db_iter = readDbFile(db_file, ig=False) + pass_handle = getOutputHandle(db_file, out_label='parse-update', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type='tab') + pass_writer = getDbWriter(pass_handle, db_file) + # Count records + result_count = countDbFile(db_file) + + # Iterate over records + start_time = time() + rec_count = pass_count = 0 + for rec in db_iter: + # Print progress for previous iteration + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += 1 + + # Updated values if found + for x, y in zip(values, updates): + if rec[field] == x: + rec[field] = y + pass_count += 1 + + # Write records + pass_writer.writerow(rec) + + # Print counts + printProgress(rec_count, result_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['UPDATED'] = pass_count + log['END'] = 'ParseDb' + printLog(log) + + # Close file handles + pass_handle.close() + + return pass_handle.name + + +def getArgParser(): + """ + Defines the ArgumentParser + + Arguments: + None + + Returns: + an ArgumentParser object + """ + # Define input and output field help message + fields = dedent( + ''' + output files: + sequences + FASTA formatted sequences output from the subcommands fasta and clip. + - + database files partitioned by annotation and . + parse- + output of the database modification functions where is one of + the subcommands add, index, drop, delete, rename, select, sort or update. + + required fields: + SEQUENCE_ID + + optional fields: + JUNCTION, SEQUENCE_IMGT, SEQUENCE_VDJ, GERMLINE_IMGT, GERMLINE_VDJ, + GERMLINE_IMGT_D_MASK, GERMLINE_VDJ_D_MASK, + GERMLINE_IMGT_V_REGION, GERMLINE_VDJ_V_REGION + + output fields: + None + ''') + + # Define ArgumentParser + parser = ArgumentParser(description=__doc__, epilog=fields, + formatter_class=CommonHelpFormatter) + parser.add_argument('--version', action='version', + version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) + subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='', + help='Database operation') + # TODO: This is a temporary fix for Python issue 9253 + subparsers.required = True + + # Define parent parser + parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, + failed=False, log=False) + + # Subparser to convert database entries to sequence file + parser_seq = subparsers.add_parser('fasta', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Creates a fasta file from database records.', + description='Creates a fasta file from database records.') + parser_seq.add_argument('--if', action='store', dest='id_field', + default=default_id_field, + help='The name of the field containing identifiers') + parser_seq.add_argument('--sf', action='store', dest='seq_field', + default=default_seq_field, + help='The name of the field containing sequences') + parser_seq.add_argument('--mf', nargs='+', action='store', dest='meta_fields', + help='List of annotation fields to add to the sequence description') + parser_seq.set_defaults(func=convertDbFasta) + + # Subparser to convert database entries to clip-fasta file + parser_baseln = subparsers.add_parser('baseline', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + description='Creates a BASELINe fasta file from database records.', + help='''Creates a specially formatted fasta file + from database records for input into the BASELINe + website. The format groups clonally related sequences + sequentially, with the germline sequence preceding + each clone and denoted by headers starting with ">>".''') + parser_baseln.add_argument('--if', action='store', dest='id_field', + default=default_id_field, + help='The name of the field containing identifiers') + parser_baseln.add_argument('--sf', action='store', dest='seq_field', + default=default_seq_field, + help='The name of the field containing reads') + parser_baseln.add_argument('--gf', action='store', dest='germ_field', + default=default_germ_field, + help='The name of the field containing germline sequences') + parser_baseln.add_argument('--cf', action='store', dest='cluster_field', default=None, + help='The name of the field containing containing sorted clone IDs') + parser_baseln.add_argument('--mf', nargs='+', action='store', dest='meta_fields', + help='List of annotation fields to add to the sequence description') + parser_baseln.set_defaults(func=convertDbBaseline) + + # Subparser to partition files by annotation values + parser_split = subparsers.add_parser('split', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Splits database files by field values.', + description='Splits database files by field values') + parser_split.add_argument('-f', action='store', dest='field', type=str, required=True, + help='Annotation field by which to split database files.') + parser_split.add_argument('--num', action='store', dest='num_split', type=float, default=None, + help='''Specify to define the field as numeric and group + records by whether they are less than or at least + (greater than or equal to) the specified value.''') + parser_split.set_defaults(func=splitDbFile) + + # Subparser to add records + parser_add = subparsers.add_parser('add', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Adds field and value pairs.', + description='Adds field and value pairs.') + parser_add.add_argument('-f', nargs='+', action='store', dest='fields', required=True, + help='The name of the fields to add.') + parser_add.add_argument('-u', nargs='+', action='store', dest='values', required=True, + help='The value to assign to all rows for each field.') + parser_add.set_defaults(func=addDbFile) + + # Subparser to delete records + parser_delete = subparsers.add_parser('delete', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Deletes specific records.', + description='Deletes specific records.') + parser_delete.add_argument('-f', nargs='+', action='store', dest='fields', required=True, + help='The name of the fields to check for deletion criteria.') + parser_delete.add_argument('-u', nargs='+', action='store', dest='values', default=['', 'NA'], + help='''The values defining which records to delete. A value + may appear in any of the fields specified with -f.''') + parser_delete.add_argument('--logic', action='store', dest='logic', + choices=('any', 'all'), default='any', + help='''Defines whether a value may appear in any field (any) + or whether it must appear in all fields (all).''') + parser_delete.add_argument('--regex', action='store_true', dest='regex', + help='''If specified, treat values as regular expressions + and allow partial string matches.''') + parser_delete.set_defaults(func=deleteDbFile) + + # Subparser to drop fields + parser_drop = subparsers.add_parser('drop', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Deletes entire fields.', + description='Deletes specific records.') + parser_drop.add_argument('-f', nargs='+', action='store', dest='fields', required=True, + help='The name of the fields to delete from the database.') + parser_drop.set_defaults(func=dropDbFile) + + # Subparser to index fields + parser_index = subparsers.add_parser('index', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Adds a numeric index field.', + description='Adds a numeric index field.') + parser_index.add_argument('-f', action='store', dest='field', + default=default_index_field, + help='The name of the index field to add to the database.') + parser_index.set_defaults(func=indexDbFile) + + # Subparser to rename fields + parser_rename = subparsers.add_parser('rename', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Renames fields.', + description='Renames fields.') + parser_rename.add_argument('-f', nargs='+', action='store', dest='fields', required=True, + help='List of fields to rename.') + parser_rename.add_argument('-k', nargs='+', action='store', dest='names', required=True, + help='List of new names for each field.') + parser_rename.set_defaults(func=renameDbFile) + + # Subparser to select records + parser_select = subparsers.add_parser('select', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Selects specific records.', + description='Selects specific records.') + parser_select.add_argument('-f', nargs='+', action='store', dest='fields', required=True, + help='The name of the fields to check for selection criteria.') + parser_select.add_argument('-u', nargs='+', action='store', dest='values', required=True, + help='''The values defining with records to select. A value + may appear in any of the fields specified with -f.''') + parser_select.add_argument('--logic', action='store', dest='logic', + choices=('any', 'all'), default='any', + help='''Defines whether a value may appear in any field (any) + or whether it must appear in all fields (all).''') + parser_select.add_argument('--regex', action='store_true', dest='regex', + help='''If specified, treat values as regular expressions + and allow partial string matches.''') + parser_select.set_defaults(func=selectDbFile) + + # Subparser to sort file by records + parser_sort = subparsers.add_parser('sort', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Sorts records by field values.', + description='Sorts records by field values.') + parser_sort.add_argument('-f', action='store', dest='field', type=str, required=True, + help='The annotation field by which to sort records.') + parser_sort.add_argument('--num', action='store_true', dest='numeric', default=False, + help='''Specify to define the sort column as numeric rather + than textual.''') + parser_sort.add_argument('--descend', action='store_true', dest='descend', + help='''If specified, sort records in descending, rather + than ascending, order by values in the target field.''') + parser_sort.set_defaults(func=sortDbFile) + + # Subparser to update records + parser_update = subparsers.add_parser('update', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Updates field and value pairs.', + description='Updates field and value pairs.') + parser_update.add_argument('-f', action='store', dest='field', required=True, + help='The name of the field to update.') + parser_update.add_argument('-u', nargs='+', action='store', dest='values', required=True, + help='The values that will be replaced.') + parser_update.add_argument('-t', nargs='+', action='store', dest='updates', required=True, + help='''The new value to assign to each selected row.''') + parser_update.set_defaults(func=updateDbFile) + + return parser + + +if __name__ == '__main__': + """ + Parses command line arguments and calls main function + """ + # Parse arguments + parser = getArgParser() + checkArgs(parser) + args = parser.parse_args() + args_dict = parseCommonArgs(args) + # Convert case of fields + if 'id_field' in args_dict: + args_dict['id_field'] = args_dict['id_field'].upper() + if 'seq_field' in args_dict: + args_dict['seq_field'] = args_dict['seq_field'].upper() + if 'germ_field' in args_dict: + args_dict['germ_field'] = args_dict['germ_field'].upper() + if 'field' in args_dict: + args_dict['field'] = args_dict['field'].upper() + if 'cluster_field' in args_dict and args_dict['cluster_field'] is not None: + args_dict['cluster_field'] = args_dict['cluster_field'].upper() + if 'meta_fields' in args_dict and args_dict['meta_fields'] is not None: + args_dict['meta_fields'] = [f.upper() for f in args_dict['meta_fields']] + if 'fields' in args_dict: + args_dict['fields'] = [f.upper() for f in args_dict['fields']] + + # Check modify_args arguments + if args.command == 'add' and len(args_dict['fields']) != len(args_dict['values']): + parser.error('You must specify exactly one value (-u) per field (-f)') + elif args.command == 'rename' and len(args_dict['fields']) != len(args_dict['names']): + parser.error('You must specify exactly one new name (-k) per field (-f)') + elif args.command == 'update' and len(args_dict['values']) != len(args_dict['updates']): + parser.error('You must specify exactly one value (-u) per replacement (-t)') + + # Call parser function for each database file + del args_dict['command'] + del args_dict['func'] + del args_dict['db_files'] + for f in args.__dict__['db_files']: + args_dict['db_file'] = f + args.func(**args_dict) + diff -r 000000000000 -r 183edf446dcf create_germlines.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_germlines.sh Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,23 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +input=$1 +type=$2 +cloned=$3 +output=$4 + +cp $input $PWD/input.tab #file has to have a ".tab" extension + +if [ "true" == "$cloned" ] ; then + cloned="--cloned" +else + cloned="" +fi + +mkdir $PWD/outdir + +#/home/galaxy/anaconda3/bin/python $dir/CreateGermlines.py -d $PWD/input.tab -r $dir/IMGT_Human_IGH[VDJ].fasta --outdir $PWD/outdir --outname output -g $type $cloned +#/data/users/david/anaconda3/bin/python $dir/CreateGermlines.py -d $PWD/input.tab -r $dir/IMGT_Human_IGH[VDJ].fasta --outdir $PWD/outdir --outname output -g $type $cloned +python3 $dir/CreateGermlines.py -d $PWD/input.tab -r $dir/IMGT_Human_IGH[VDJ].fasta --outdir $PWD/outdir --outname output -g $type $cloned + +mv $PWD/outdir/output_germ-pass.tab $output diff -r 000000000000 -r 183edf446dcf create_germlines.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_germlines.xml Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,28 @@ + + Change-O + + create_germlines.sh $input $type $cloned $out_file + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btv359 + + + + + + diff -r 000000000000 -r 183edf446dcf define_clones.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/define_clones.r Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,15 @@ +args <- commandArgs(trailingOnly = TRUE) + +input=args[1] +output=args[2] + +change.o = read.table(input, header=T, sep="\t", quote="", stringsAsFactors=F) + +freq = data.frame(table(change.o$CLONE)) +freq2 = data.frame(table(freq$Freq)) + +freq2$final = as.numeric(freq2$Freq) * as.numeric(as.character(freq2$Var1)) + +names(freq2) = c("Clone size", "Nr of clones", "Nr of sequences") + +write.table(x=freq2, file=output, sep="\t",quote=F,row.names=F,col.names=T) diff -r 000000000000 -r 183edf446dcf define_clones.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/define_clones.sh Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,41 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +#define_clones.sh $input $noparse $scores $regions $out_file + +type=$1 +input=$2 + +mkdir -p $PWD/outdir + +echo "defineclones: $PWD/outdir" + +cp $input $PWD/input.tab #file has to have a ".tab" extension + +if [ "bygroup" == "$type" ] ; then + mode=$3 + act=$4 + model=$5 + norm=$6 + sym=$7 + link=$8 + dist=$9 + output=${10} + output2=${11} + + python3 $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link + #/home/galaxy/anaconda3/bin/python $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link + + Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 +else + method=$3 + output=$4 + output2=$5 + + python3 $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method + #/home/galaxy/anaconda3/bin/python $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method + + Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 +fi + +cp $PWD/outdir/output_clone-pass.tab $output diff -r 000000000000 -r 183edf446dcf define_clones.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/define_clones.xml Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,68 @@ + + Change-O + + #if $input_type.input_type_select=="bygroup" + define_clones.sh bygroup $input $input_type.mode $input_type.act $input_type.model $input_type.norm $input_type.sym $input_type.link $input_type.dist $out_file $out_file2 + #else + define_clones.sh hclust $input $input_type.method $out_file $out_file2 + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btv359 + + + + + + diff -r 000000000000 -r 183edf446dcf makedb.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/makedb.sh Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,36 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +input=$1 +noparse=$2 +scores=$3 +regions=$4 +output=$5 + +if [ "true" == "$noparse" ] ; then + noparse="--noparse" +else + noparse="" +fi + +if [ "true" == "$scores" ] ; then + scores="--scores" +else + scores="" +fi + +if [ "true" == "$regions" ] ; then + regions="--regions" +else + regions="" +fi + +mkdir $PWD/outdir + +echo "`which python3`" + +python3 $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions +#/data/users/david/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions +#/home/galaxy/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions + +mv $PWD/outdir/output_db-pass.tab $output diff -r 000000000000 -r 183edf446dcf makedb.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/makedb.xml Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,31 @@ + + Change-O + + makedb.sh $input $noparse $scores $regions $out_file + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btv359 + + + + + + diff -r 000000000000 -r 183edf446dcf parsedb.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parsedb.sh Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,92 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +action=$1 +input=$2 +output=$3 + +cp $input $PWD/input.tab + +input="$PWD/input.tab" + +mkdir $PWD/outdir + +if [ "fasta" == "$action" ] ; then + python3 $dir/ParseDb.py fasta -d $input --outdir $PWD/outdir --outname output + mv $PWD/outdir/output_sequences.fasta $output +elif [ "clip" == "$action" ] ; then + python3 $dir/ParseDb.py clip -d $input --outdir $PWD/outdir --outname output + mv $PWD/outdir/output_sequences.fasta $output +elif [ "split" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + label=$5 + mkdir $PWD/split + python3 $dir/ParseDb.py split -d $input --outdir $PWD/split --outname output -f $field + #rename "s/output_${field}/$label/" $PWD/split/* +elif [ "add" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + value=$5 + python3 $dir/ParseDb.py add -d $input --outdir $PWD/outdir --outname output -f $field -u $value + mv $PWD/outdir/output_parse-add.tab $output +elif [ "delete" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + value=$5 + regex=$6 + if [ "true" == "$regex" ] ; then + regex="--regex" + else + regex="" + fi + python3 $dir/ParseDb.py delete -d $input --outdir $PWD/outdir --outname output -f $field -u $value --logic any $regex + mv $PWD/outdir/output_parse-delete.tab $output +elif [ "drop" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + python3 $dir/ParseDb.py drop -d $input --outdir $PWD/outdir --outname output -f $field + mv $PWD/outdir/output_parse-drop.tab $output +elif [ "index" == "$action" ] ; then + field=$4 + python3 $dir/ParseDb.py index -d $input --outdir $PWD/outdir --outname output -f $field + mv $PWD/outdir/output_parse-index.tab $output +elif [ "rename" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + newname=$5 + python3 $dir/ParseDb.py rename -d $input --outdir $PWD/outdir --outname output -f $field -k $newname + mv $PWD/outdir/output_parse-rename.tab $output +elif [ "select" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + value=$5 + regex=$6 + if [ "true" == "$regex" ] ; then + regex="--regex" + else + regex="" + fi + python3 $dir/ParseDb.py select -d $input --outdir $PWD/outdir --outname output -f $field -u $value --logic any $regex + mv $PWD/outdir/output_parse-select.tab $output +elif [ "sort" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + num=$5 + tmp="" + if [ "true" == "$num" ] ; then + tmp="--num" + fi + desc=$6 + if [ "true" == "$desc" ] ; then + tmp="--descend $tmp" + fi + python3 $dir/ParseDb.py sort -d $input --outdir $PWD/outdir --outname output -f $field $tmp + mv $PWD/outdir/output_parse-sort.tab $output +elif [ "update" == "$action" ] ; then + field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`" + value=$5 + replace=$6 + regex=$7 + if [ "true" == "$regex" ] ; then + regex="--regex" + else + regex="" + fi + python3 $dir/ParseDb.py update -d $input --outdir $PWD/outdir --outname output -f $field -u $value -t $replace $regex + mv $PWD/outdir/output_parse-update.tab $output +fi + diff -r 000000000000 -r 183edf446dcf parsedb.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parsedb.xml Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,120 @@ + + Change-O + + #if $action.action_select=="fasta" + parsedb.sh fasta $input $out_file + #elif $action.action_select=="clip" + parsedb.sh clip $input $out_file + #elif $action.action_select=="split" + parsedb.sh split $input $out_file $action.column '$input.name' + #elif $action.action_select=="add" + parsedb.sh add $input $out_file $action.column $action.value + #elif $action.action_select=="delete" + parsedb.sh delete $input $out_file $action.column $action.value $action.regex + #elif $action.action_select=="drop" + parsedb.sh drop $input $out_file $action.column + #elif $action.action_select=="index" + parsedb.sh index $input $out_file $action.column + #elif $action.action_select=="rename" + parsedb.sh rename $input $out_file $action.column $action.newname + #elif $action.action_select=="select" + parsedb.sh select $input $out_file $action.column $action.value $action.regex + #elif $action.action_select=="sort" + parsedb.sh sort $input $out_file $action.column $action.num $action.desc + #elif $action.action_select=="update" + parsedb.sh update $input $out_file $action.column $action.value $action.update $action.regex + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + l + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + action['action_select'] != "split" + + + + action['action_select'] == "split" + + + + 10.1093/bioinformatics/btv359 + + + + +