Mercurial > repos > davidvanzessen > change_o
diff CreateGermlines.py @ 0:183edf446dcf draft default tip
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 17 Jul 2017 07:44:27 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CreateGermlines.py Mon Jul 17 07:44:27 2017 -0400 @@ -0,0 +1,707 @@ +#!/usr/bin/env python3 +""" +Reconstructs germline sequences from alignment data +""" +# Info +__author__ = 'Namita Gupta, Jason Anthony Vander Heiden' +from changeo import __version__, __date__ + +# Imports +import os +import sys +from argparse import ArgumentParser +from collections import OrderedDict +from textwrap import dedent +from time import time + +# Presto and change imports +from presto.Defaults import default_out_args +from presto.IO import getOutputHandle, printLog, printProgress +from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs +from changeo.IO import getDbWriter, readDbFile, countDbFile, readRepo +from changeo.Receptor import allele_regex, parseAllele + +# Defaults +default_germ_types = 'dmask' +default_v_field = 'V_CALL' +default_seq_field = 'SEQUENCE_IMGT' + + +def joinGermline(align, repo_dict, germ_types, v_field, seq_field): + """ + Join gapped germline sequences aligned with sample sequences + + Arguments: + align = iterable yielding dictionaries of sample sequence data + repo_dict = dictionary of IMGT gapped germline sequences + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + + Returns: + dictionary of germline_type: germline_sequence + """ + j_field = 'J_CALL' + germlines = {'full': '', 'dmask': '', 'vonly': '', 'regions': ''} + result_log = OrderedDict() + result_log['ID'] = align['SEQUENCE_ID'] + + # Find germline V-region gene + if v_field == 'V_CALL_GENOTYPED': + vgene = parseAllele(align[v_field], allele_regex, 'list') + vkey = vgene + else: + vgene = parseAllele(align[v_field], allele_regex, 'first') + vkey = (vgene, ) + + try: + int(align['P3V_LENGTH']) + int(align['N1_LENGTH']) + int(align['P5D_LENGTH']) + int(align['P3D_LENGTH']) + int(align['N2_LENGTH']) + int(align['P5J_LENGTH']) + except: + regions_style = 'IgBLAST' + else: + regions_style = 'IMGT' + + # Build V-region germline + if vgene is not None: + result_log['V_CALL'] = ','.join(vkey) + if vkey in repo_dict: + vseq = repo_dict[vkey] + # Germline start + try: vstart = int(align['V_GERM_START_IMGT']) - 1 + except (TypeError, ValueError): vstart = 0 + # Germline length + try: vlen = int(align['V_GERM_LENGTH_IMGT']) + except (TypeError, ValueError): vlen = 0 + # TODO: not sure what this line is doing here. it no make no sense. + vpad = vlen - len(vseq[vstart:]) + if vpad < 0: vpad = 0 + germ_vseq = vseq[vstart:(vstart + vlen)] + ('N' * vpad) + else: + result_log['ERROR'] = 'Germline %s not in repertoire' % ','.join(vkey) + return result_log, germlines + else: + result_log['V_CALL'] = None + try: vlen = int(align['V_GERM_LENGTH_IMGT']) + except (TypeError, ValueError): vlen = 0 + germ_vseq = 'N' * vlen + + # Find germline D-region gene + dgene = parseAllele(align['D_CALL'], allele_regex, 'first') + + # Build D-region germline + if dgene is not None: + result_log['D_CALL'] = dgene + dkey = (dgene, ) + if dkey in repo_dict: + dseq = repo_dict[dkey] + # Germline start + try: dstart = int(align['D_GERM_START']) - 1 + except (TypeError, ValueError): dstart = 0 + # Germline length + try: dlen = int(align['D_GERM_LENGTH']) + except (TypeError, ValueError): dlen = 0 + germ_dseq = repo_dict[dkey][dstart:(dstart + dlen)] + else: + result_log['ERROR'] = 'Germline %s not in repertoire' % dgene + return result_log, germlines + else: + result_log['D_CALL'] = None + germ_dseq = '' + + # Find germline J-region gene + jgene = parseAllele(align[j_field], allele_regex, 'first') + + # Build D-region germline + if jgene is not None: + result_log['J_CALL'] = jgene + jkey = (jgene, ) + if jkey in repo_dict: + jseq = repo_dict[jkey] + # Germline start + try: jstart = int(align['J_GERM_START']) - 1 + except (TypeError, ValueError): jstart = 0 + # Germline length + try: jlen = int(align['J_GERM_LENGTH']) + except (TypeError, ValueError): jlen = 0 + # TODO: not sure what this line is doing either + jpad = jlen - len(jseq[jstart:]) + if jpad < 0: jpad = 0 + germ_jseq = jseq[jstart:(jstart + jlen)] + ('N' * jpad) + else: + result_log['ERROR'] = 'Germline %s not in repertoire' % jgene + return result_log, germlines + else: + result_log['J_CALL'] = None + try: jlen = int(align['J_GERM_LENGTH']) + except (TypeError, ValueError): jlen = 0 + germ_jseq = 'N' * jlen + + # Assemble pieces starting with V-region + germ_seq = germ_vseq + regions = 'V' * len(germ_vseq) + + try: + np1_len = int(align['NP1_LENGTH']) + except (TypeError, ValueError): + np1_len = 0 + + # NP nucleotide additions after V + if regions_style == 'IMGT': + # P nucleotide additions + try: + p3v_len = int(align['P3V_LENGTH']) + except (TypeError, ValueError): + p3v_len = 0 + if p3v_len < 0: + result_log['ERROR'] = 'P3V_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p3v_len + + # N1 nucleotide additions + try: + n1_len = int(align['N1_LENGTH']) + except (TypeError, ValueError): + n1_len = 0 + if n1_len < 0: + result_log['ERROR'] = 'N1_LENGTH is negative' + return result_log, germlines + + regions += 'N' * n1_len + + # P nucleotide additions before D + try: p5d_len = int(align['P5D_LENGTH']) + except (TypeError, ValueError): p5d_len = 0 + if p5d_len < 0: + result_log['ERROR'] = 'P5D_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p5d_len + else: + # IgBLAST style + # PNP nucleotide additions after V + if np1_len < 0: + result_log['ERROR'] = 'NP1_LENGTH is negative' + return result_log, germlines + + regions += 'N' * np1_len + + germ_seq += 'N' * np1_len + + # Add D-region + germ_seq += germ_dseq + regions += 'D' * len(germ_dseq) + + #print 'VD>', germ_seq, '\nVD>', regions + + try: + np2_len = int(align['NP2_LENGTH']) + except (TypeError, ValueError): + np2_len = 0 + + # NP nucleotide additions before J + if regions_style == 'IMGT': + # P nucleotide additions + try: + p3d_len = int(align['P3D_LENGTH']) + except (TypeError, ValueError): + p3d_len = 0 + if p3d_len < 0: + result_log['ERROR'] = 'P3D_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p3d_len + + # N2 nucleotide additions + try: + n2_len = int(align['N2_LENGTH']) + except (TypeError, ValueError): + n2_len = 0 + if n2_len < 0: + result_log['ERROR'] = 'N2_LENGTH is negative' + return result_log, germlines + + regions += 'N' * n2_len + + # P nucleotide additions + try: + p5j_len = int(align['P5J_LENGTH']) + except (TypeError, ValueError): + p5j_len = 0 + if p5j_len < 0: + result_log['ERROR'] = 'P5J_LENGTH is negative' + return result_log, germlines + + regions += 'P' * p5j_len + else: + # IgBLAST style + # NP nucleotide additions + if np2_len < 0: + result_log['ERROR'] = 'NP2_LENGTH is negative' + return result_log, germlines + + regions += 'N' * np2_len + + germ_seq += 'N' * np2_len + + # Add J-region + germ_seq += germ_jseq + regions += 'J' * len(germ_jseq) + + #print('\nREGIONS>',regions,'\n') + + # Define return germlines + germlines['full'] = germ_seq + germlines['regions'] = regions + + if 'dmask' in germ_types: + germlines['dmask'] = germ_seq[:len(germ_vseq)] + \ + 'N' * (len(germ_seq) - len(germ_vseq) - len(germ_jseq)) + \ + germ_seq[-len(germ_jseq):] + if 'vonly' in germ_types: + germlines['vonly'] = germ_vseq + + # Check that input and germline sequence match + if len(align[seq_field]) == 0: + result_log['ERROR'] = 'Sequence is missing from %s column' % seq_field + elif len(germlines['full']) != len(align[seq_field]): + result_log['ERROR'] = 'Germline sequence is %d nucleotides longer than input sequence' % \ + (len(germlines['full']) - len(align[seq_field])) + + # Convert to uppercase + for k, v in germlines.items(): germlines[k] = v.upper() + + return result_log, germlines + + +def assembleEachGermline(db_file, repo, germ_types, v_field, seq_field, out_args=default_out_args): + """ + Write germline sequences to tab-delimited database file + + Arguments: + db_file = input tab-delimited database file + repo = folder with germline repertoire files + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + out_args = arguments for output preferences + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'CreateGermlines' + log['DB_FILE'] = os.path.basename(db_file) + log['GERM_TYPES'] = germ_types if isinstance(germ_types, str) else ','.join(germ_types) + log['CLONED'] = 'False' + log['V_FIELD'] = v_field + log['SEQ_FIELD'] = seq_field + printLog(log) + + # Get repertoire and open Db reader + repo_dict = readRepo(repo) + reader = readDbFile(db_file, ig=False) + + # Exit if V call field does not exist in reader + if v_field not in reader.fieldnames: + sys.exit('Error: V field does not exist in input database file.') + + # Define log handle + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + + add_fields = [] + seq_type = seq_field.split('_')[-1] + if 'full' in germ_types: add_fields += ['GERMLINE_' + seq_type] + if 'dmask' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_D_MASK'] + if 'vonly' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_V_REGION'] + if 'regions' in germ_types: add_fields += ['GERMLINE_REGIONS'] + + # Create output file handle and Db writer + pass_handle = getOutputHandle(db_file, 'germ-pass', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_args['out_type']) + pass_writer = getDbWriter(pass_handle, db_file, add_fields=add_fields) + + if out_args['failed']: + fail_handle = getOutputHandle(db_file, 'germ-fail', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_args['out_type']) + fail_writer = getDbWriter(fail_handle, db_file, add_fields=add_fields) + else: + fail_handle = None + fail_writer = None + + # Initialize time and total count for progress bar + start_time = time() + rec_count = countDbFile(db_file) + pass_count = fail_count = 0 + # Iterate over rows + for i, row in enumerate(reader): + # Print progress + printProgress(i, rec_count, 0.05, start_time) + + result_log, germlines = joinGermline(row, repo_dict, germ_types, v_field, seq_field) + + # Add germline field(s) to dictionary + if 'full' in germ_types: row['GERMLINE_' + seq_type] = germlines['full'] + if 'dmask' in germ_types: row['GERMLINE_' + seq_type + '_D_MASK'] = germlines['dmask'] + if 'vonly' in germ_types: row['GERMLINE_' + seq_type + '_V_REGION'] = germlines['vonly'] + if 'regions' in germ_types: row['GERMLINE_REGIONS'] = germlines['regions'] + + # Write row to pass or fail file + if 'ERROR' in result_log: + fail_count += 1 + if fail_writer is not None: fail_writer.writerow(row) + else: + result_log['SEQUENCE'] = row[seq_field] + result_log['GERMLINE'] = germlines['full'] + result_log['REGIONS'] = germlines['regions'] + + pass_count += 1 + pass_writer.writerow(row) + printLog(result_log, handle=log_handle) + + # Print log + printProgress(i + 1, rec_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['RECORDS'] = rec_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + log['END'] = 'CreateGermlines' + printLog(log) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + +def makeCloneGermline(clone, clone_dict, repo_dict, germ_types, v_field, + seq_field, counts, writers, out_args): + """ + Determine consensus clone sequence and create germline for clone + + Arguments: + clone = clone ID + clone_dict = iterable yielding dictionaries of sequence data from clone + repo_dict = dictionary of IMGT gapped germline sequences + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + counts = dictionary of pass counter and fail counter + writers = dictionary with pass and fail DB writers + out_args = arguments for output preferences + + Returns: + None + """ + seq_type = seq_field.split('_')[-1] + j_field = 'J_CALL' + + # Create dictionaries to count observed V/J calls + v_dict = OrderedDict() + j_dict = OrderedDict() + + # Find longest sequence in clone + max_length = 0 + for val in clone_dict.values(): + v = val[v_field] + v_dict[v] = v_dict.get(v, 0) + 1 + j = val[j_field] + j_dict[j] = j_dict.get(j, 0) + 1 + if len(val[seq_field]) > max_length: + max_length = len(val[seq_field]) + + # Consensus V and J having most observations + v_cons = [k for k in list(v_dict.keys()) if v_dict[k] == max(v_dict.values())] + j_cons = [k for k in list(j_dict.keys()) if j_dict[k] == max(j_dict.values())] + + # Consensus sequence(s) with consensus V/J calls and longest sequence + cons = [val for val in list(clone_dict.values()) \ + if val.get(v_field, '') in v_cons and \ + val.get(j_field, '') in j_cons and \ + len(val[seq_field]) == max_length] + + # Sequence(s) with consensus V/J are not longest + if not cons: + # Sequence(s) with consensus V/J (not longest) + cons = [val for val in list(clone_dict.values()) \ + if val.get(v_field, '') in v_cons and val.get(j_field, '') in j_cons] + + # No sequence has both consensus V and J call + if not cons: + result_log = OrderedDict() + result_log['ID'] = clone + result_log['V_CALL'] = ','.join(v_cons) + result_log['J_CALL'] = ','.join(j_cons) + result_log['ERROR'] = 'No consensus sequence for clone found' + else: + # Pad end of consensus sequence with gaps to make it the max length + cons = cons[0] + cons['J_GERM_LENGTH'] = str(int(cons['J_GERM_LENGTH'] or 0) + max_length - len(cons[seq_field])) + cons[seq_field] += '.' * (max_length - len(cons[seq_field])) + result_log, germlines = joinGermline(cons, repo_dict, germ_types, v_field, seq_field) + result_log['ID'] = clone + result_log['CONSENSUS'] = cons['SEQUENCE_ID'] + else: + cons = cons[0] + result_log, germlines = joinGermline(cons, repo_dict, germ_types, v_field, seq_field) + result_log['ID'] = clone + result_log['CONSENSUS'] = cons['SEQUENCE_ID'] + + # Write sequences of clone + for val in clone_dict.values(): + if 'ERROR' not in result_log: + # Update lengths padded to longest sequence in clone + val['J_GERM_LENGTH'] = str(int(val['J_GERM_LENGTH'] or 0) + max_length - len(val[seq_field])) + val[seq_field] += '.' * (max_length - len(val[seq_field])) + + # Add column(s) to tab-delimited database file + if 'full' in germ_types: val['GERMLINE_' + seq_type] = germlines['full'] + if 'dmask' in germ_types: val['GERMLINE_' + seq_type + '_D_MASK'] = germlines['dmask'] + if 'vonly' in germ_types: val['GERMLINE_' + seq_type + '_V_REGION'] = germlines['vonly'] + if 'regions' in germ_types: val['GERMLINE_REGIONS'] = germlines['regions'] + + # Add field + val['GERMLINE_V_CALL'] = result_log['V_CALL'] + val['GERMLINE_D_CALL'] = result_log['D_CALL'] + val['GERMLINE_J_CALL'] = result_log['J_CALL'] + + result_log['SEQUENCE'] = cons[seq_field] + result_log['GERMLINE'] = germlines['full'] + result_log['REGIONS'] = germlines['regions'] + + # Write to pass file + counts['pass'] += 1 + writers['pass'].writerow(val) + else: + # Write to fail file + counts['fail'] += 1 + if writers['fail'] is not None: + writers['fail'].writerow(val) + # Return log + return result_log + + +def assembleCloneGermline(db_file, repo, germ_types, v_field, seq_field, out_args=default_out_args): + """ + Assemble one germline sequence for each clone in a tab-delimited database file + + Arguments: + db_file = input tab-delimited database file + repo = folder with germline repertoire files + germ_types = types of germline sequences to be output + (full germline, D-region masked, only V-region germline) + v_field = field in which to look for V call + seq_field = field in which to look for sequence + out_args = arguments for output preferences + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'CreateGermlines' + log['DB_FILE'] = os.path.basename(db_file) + log['GERM_TYPES'] = germ_types if isinstance(germ_types, str) else ','.join(germ_types) + log['CLONED'] = 'True' + log['V_FIELD'] = v_field + log['SEQ_FIELD'] = seq_field + printLog(log) + + # Get repertoire and open Db reader + repo_dict = readRepo(repo) + reader = readDbFile(db_file, ig=False) + + # Exit if V call field does not exist in reader + if v_field not in reader.fieldnames: + sys.exit('Error: V field does not exist in input database file.') + + # Define log handle + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + + add_fields = [] + seq_type = seq_field.split('_')[-1] + if 'full' in germ_types: add_fields += ['GERMLINE_' + seq_type] + if 'dmask' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_D_MASK'] + if 'vonly' in germ_types: add_fields += ['GERMLINE_' + seq_type + '_V_REGION'] + if 'regions' in germ_types: add_fields += ['GERMLINE_REGIONS'] + + add_fields += ['GERMLINE_V_CALL'] + add_fields += ['GERMLINE_D_CALL'] + add_fields += ['GERMLINE_J_CALL'] + + # Create output file handle and Db writer + writers = {} + pass_handle = getOutputHandle(db_file, 'germ-pass', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type=out_args['out_type']) + writers['pass'] = getDbWriter(pass_handle, db_file, add_fields=add_fields) + + if out_args['failed']: + fail_handle = getOutputHandle(db_file, 'germ-fail', out_dir=out_args['out_dir'], + out_name=out_args['out_name'], out_type=out_args['out_type']) + writers['fail'] = getDbWriter(fail_handle, db_file, add_fields=add_fields) + else: + fail_handle = None + writers['fail'] = None + + # Initialize time and total count for progress bar + start_time = time() + rec_count = countDbFile(db_file) + counts = {} + clone_count = counts['pass'] = counts['fail'] = 0 + # Iterate over rows + clone = 'initial' + clone_dict = OrderedDict() + for i, row in enumerate(reader): + # Print progress + printProgress(i, rec_count, 0.05, start_time) + + # Clone isn't over yet + if row.get('CLONE', '') == clone: + clone_dict[i] = row + # Clone just finished + elif clone_dict: + clone_count += 1 + result_log = makeCloneGermline(clone, clone_dict, repo_dict, germ_types, + v_field, seq_field, counts, writers, out_args) + printLog(result_log, handle=log_handle) + # Now deal with current row (first of next clone) + clone = row['CLONE'] + clone_dict = OrderedDict([(i, row)]) + # Last case is only for first row of file + else: + clone = row['CLONE'] + clone_dict = OrderedDict([(i, row)]) + + clone_count += 1 + result_log = makeCloneGermline(clone, clone_dict, repo_dict, germ_types, v_field, + seq_field, counts, writers, out_args) + printLog(result_log, handle=log_handle) + + # Print log + printProgress(i + 1, rec_count, 0.05, start_time) + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['CLONES'] = clone_count + log['RECORDS'] = rec_count + log['PASS'] = counts['pass'] + log['FAIL'] = counts['fail'] + log['END'] = 'CreateGermlines' + printLog(log) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + +def getArgParser(): + """ + Defines the ArgumentParser + + Arguments: + None + + Returns: + an ArgumentParser object + """ + # Define input and output field help message + fields = dedent( + ''' + output files: + germ-pass + database with assigned germline sequences. + germ-fail + database with records failing germline assignment. + + required fields: + SEQUENCE_ID, SEQUENCE_VDJ or SEQUENCE_IMGT, + V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, + V_SEQ_START, V_SEQ_LENGTH, V_GERM_START_IMGT, V_GERM_LENGTH_IMGT, + D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH, + J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH, + NP1_LENGTH, NP2_LENGTH + + optional fields: + N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH, P5J_LENGTH, + CLONE + + + output fields: + GERMLINE_VDJ, GERMLINE_VDJ_D_MASK, GERMLINE_VDJ_V_REGION, + GERMLINE_IMGT, GERMLINE_IMGT_D_MASK, GERMLINE_IMGT_V_REGION, + GERMLINE_V_CALL, GERMLINE_D_CALL, GERMLINE_J_CALL, + GERMLINE_REGIONS + ''') + + # Parent parser + parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, + annotation=False) + # Define argument parser + parser = ArgumentParser(description=__doc__, epilog=fields, + parents=[parser_parent], + formatter_class=CommonHelpFormatter) + parser.add_argument('--version', action='version', + version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) + + parser.add_argument('-r', nargs='+', action='store', dest='repo', required=True, + help='''List of folders and/or fasta files (with .fasta, .fna or .fa + extension) with germline sequences.''') + parser.add_argument('-g', action='store', dest='germ_types', default=default_germ_types, + nargs='+', choices=('full', 'dmask', 'vonly', 'regions'), + help='''Specify type(s) of germlines to include full germline, + germline with D-region masked, or germline for V region only.''') + parser.add_argument('--cloned', action='store_true', dest='cloned', + help='''Specify to create only one germline per clone. Assumes input file is + sorted by clone column, and will not yield correct results if the data + is unsorted. Note, if allele calls are ambiguous within a clonal group, + this will place the germline call used for the entire clone within the + GERMLINE_V_CALL, GERMLINE_D_CALL and GERMLINE_J_CALL fields.''') + parser.add_argument('--vf', action='store', dest='v_field', default=default_v_field, + help='Specify field to use for germline V call') + parser.add_argument('--sf', action='store', dest='seq_field', default=default_seq_field, + help='Specify field to use for sequence') + + return parser + + +if __name__ == "__main__": + """ + Parses command line arguments and calls main + """ + + # Parse command line arguments + parser = getArgParser() + checkArgs(parser) + args = parser.parse_args() + args_dict = parseCommonArgs(args) + del args_dict['db_files'] + del args_dict['cloned'] + args_dict['v_field'] = args_dict['v_field'].upper() + args_dict['seq_field'] = args_dict['seq_field'].upper() + + for f in args.__dict__['db_files']: + args_dict['db_file'] = f + if args.__dict__['cloned']: + assembleCloneGermline(**args_dict) + else: + assembleEachGermline(**args_dict)