Galaxy |

Changeset 0:183edf446dcf (2017-07-17)

Commit message:
Uploaded

added:
CreateGermlines.py
DefineClones.py
IMGT_Human_IGHD.fasta
IMGT_Human_IGHJ.fasta
IMGT_Human_IGHV.fasta
LICENSE
MakeDb.py
ParseDb.py
create_germlines.sh
create_germlines.xml
define_clones.r
define_clones.sh
define_clones.xml
makedb.sh
makedb.xml
parsedb.sh
parsedb.xml

diff -r 000000000000 -r 183edf446dcf CreateGermlines.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/CreateGermlines.py Mon Jul 17 07:44:27 2017 -0400

[

b'@@ -0,0 +1,707 @@\n+#!/usr/bin/env python3\n+"""\n+Reconstructs germline sequences from alignment data\n+"""\n+# Info\n+__author__ = \'Namita Gupta, Jason Anthony Vander Heiden\'\n+from changeo import __version__, __date__\n+\n+# Imports\n+import os\n+import sys\n+from argparse import ArgumentParser\n+from collections import OrderedDict\n+from textwrap import dedent\n+from time import time\n+\n+# Presto and change imports\n+from presto.Defaults import default_out_args\n+from presto.IO import getOutputHandle, printLog, printProgress\n+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs\n+from changeo.IO import getDbWriter, readDbFile, countDbFile, readRepo\n+from changeo.Receptor import allele_regex, parseAllele\n+\n+# Defaults\n+default_germ_types = \'dmask\'\n+default_v_field = \'V_CALL\'\n+default_seq_field = \'SEQUENCE_IMGT\'\n+\n+ \n+def joinGermline(align, repo_dict, germ_types, v_field, seq_field):\n+ """\n+ Join gapped germline sequences aligned with sample sequences\n+ \n+ Arguments:\n+ align = iterable yielding dictionaries of sample sequence data\n+ repo_dict = dictionary of IMGT gapped germline sequences\n+ germ_types = types of germline sequences to be output\n+ (full germline, D-region masked, only V-region germline)\n+ v_field = field in which to look for V call\n+ seq_field = field in which to look for sequence\n+ \n+ Returns:\n+ dictionary of germline_type: germline_sequence\n+ """\n+ j_field = \'J_CALL\'\n+ germlines = {\'full\': \'\', \'dmask\': \'\', \'vonly\': \'\', \'regions\': \'\'}\n+ result_log = OrderedDict()\n+ result_log[\'ID\'] = align[\'SEQUENCE_ID\']\n+\n+ # Find germline V-region gene\n+ if v_field == \'V_CALL_GENOTYPED\':\n+ vgene = parseAllele(align[v_field], allele_regex, \'list\')\n+ vkey = vgene\n+ else:\n+ vgene = parseAllele(align[v_field], allele_regex, \'first\')\n+ vkey = (vgene, )\n+\n+ try:\n+ int(align[\'P3V_LENGTH\'])\n+ int(align[\'N1_LENGTH\'])\n+ int(align[\'P5D_LENGTH\'])\n+ int(align[\'P3D_LENGTH\'])\n+ int(align[\'N2_LENGTH\'])\n+ int(align[\'P5J_LENGTH\'])\n+ except:\n+ regions_style = \'IgBLAST\'\n+ else:\n+ regions_style = \'IMGT\'\n+\n+ # Build V-region germline\n+ if vgene is not None:\n+ result_log[\'V_CALL\'] = \',\'.join(vkey)\n+ if vkey in repo_dict:\n+ vseq = repo_dict[vkey]\n+ # Germline start\n+ try: vstart = int(align[\'V_GERM_START_IMGT\']) - 1\n+ except (TypeError, ValueError): vstart = 0\n+ # Germline length\n+ try: vlen = int(align[\'V_GERM_LENGTH_IMGT\'])\n+ except (TypeError, ValueError): vlen = 0\n+ # TODO: not sure what this line is doing here. it no make no sense.\n+ vpad = vlen - len(vseq[vstart:])\n+ if vpad < 0: vpad = 0\n+ germ_vseq = vseq[vstart:(vstart + vlen)] + (\'N\' * vpad)\n+ else:\n+ result_log[\'ERROR\'] = \'Germline %s not in repertoire\' % \',\'.join(vkey)\n+ return result_log, germlines\n+ else:\n+ result_log[\'V_CALL\'] = None\n+ try: vlen = int(align[\'V_GERM_LENGTH_IMGT\'])\n+ except (TypeError, ValueError): vlen = 0\n+ germ_vseq = \'N\' * vlen\n+\n+ # Find germline D-region gene\n+ dgene = parseAllele(align[\'D_CALL\'], allele_regex, \'first\')\n+\n+ # Build D-region germline\n+ if dgene is not None:\n+ result_log[\'D_CALL\'] = dgene\n+ dkey = (dgene, )\n+ if dkey in repo_dict:\n+ dseq = repo_dict[dkey]\n+ # Germline start\n+ try: dstart = int(align[\'D_GERM_START\']) - 1\n+ except (TypeError, ValueError): dstart = 0\n+ # Germline length\n+ try: dlen = int(align[\'D_GERM_LENGTH\'])\n+ except (TypeError, ValueError): dlen = 0\n+ germ_dseq = repo_dict[dkey][dstart:(dstart + dlen)]\n+ else:\n+ result_log[\'ERROR\'] = \'Germline %s not in repertoire\' % dgene\n+ return '..b'tParser\n+\n+ Arguments:\n+ None\n+\n+ Returns:\n+ an ArgumentParser object\n+ """\n+ # Define input and output field help message\n+ fields = dedent(\n+ \'\'\'\n+ output files:\n+ germ-pass\n+ database with assigned germline sequences.\n+ germ-fail\n+ database with records failing germline assignment.\n+\n+ required fields:\n+ SEQUENCE_ID, SEQUENCE_VDJ or SEQUENCE_IMGT,\n+ V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL,\n+ V_SEQ_START, V_SEQ_LENGTH, V_GERM_START_IMGT, V_GERM_LENGTH_IMGT,\n+ D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH,\n+ J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH,\n+ NP1_LENGTH, NP2_LENGTH\n+\n+ optional fields:\n+ N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH, P5J_LENGTH,\n+ CLONE\n+\n+\n+ output fields:\n+ GERMLINE_VDJ, GERMLINE_VDJ_D_MASK, GERMLINE_VDJ_V_REGION,\n+ GERMLINE_IMGT, GERMLINE_IMGT_D_MASK, GERMLINE_IMGT_V_REGION,\n+ GERMLINE_V_CALL, GERMLINE_D_CALL, GERMLINE_J_CALL,\n+ GERMLINE_REGIONS\n+ \'\'\')\n+\n+ # Parent parser\n+ parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True,\n+ annotation=False)\n+ # Define argument parser\n+ parser = ArgumentParser(description=__doc__, epilog=fields,\n+ parents=[parser_parent],\n+ formatter_class=CommonHelpFormatter)\n+ parser.add_argument(\'--version\', action=\'version\',\n+ version=\'%(prog)s:\' + \' %s-%s\' %(__version__, __date__))\n+\n+ parser.add_argument(\'-r\', nargs=\'+\', action=\'store\', dest=\'repo\', required=True,\n+ help=\'\'\'List of folders and/or fasta files (with .fasta, .fna or .fa\n+ extension) with germline sequences.\'\'\')\n+ parser.add_argument(\'-g\', action=\'store\', dest=\'germ_types\', default=default_germ_types,\n+ nargs=\'+\', choices=(\'full\', \'dmask\', \'vonly\', \'regions\'),\n+ help=\'\'\'Specify type(s) of germlines to include full germline,\n+ germline with D-region masked, or germline for V region only.\'\'\')\n+ parser.add_argument(\'--cloned\', action=\'store_true\', dest=\'cloned\',\n+ help=\'\'\'Specify to create only one germline per clone. Assumes input file is\n+ sorted by clone column, and will not yield correct results if the data\n+ is unsorted. Note, if allele calls are ambiguous within a clonal group,\n+ this will place the germline call used for the entire clone within the\n+ GERMLINE_V_CALL, GERMLINE_D_CALL and GERMLINE_J_CALL fields.\'\'\')\n+ parser.add_argument(\'--vf\', action=\'store\', dest=\'v_field\', default=default_v_field,\n+ help=\'Specify field to use for germline V call\')\n+ parser.add_argument(\'--sf\', action=\'store\', dest=\'seq_field\', default=default_seq_field,\n+ help=\'Specify field to use for sequence\')\n+\n+ return parser\n+\n+\n+if __name__ == "__main__":\n+ """\n+ Parses command line arguments and calls main\n+ """\n+\n+ # Parse command line arguments\n+ parser = getArgParser()\n+ checkArgs(parser)\n+ args = parser.parse_args()\n+ args_dict = parseCommonArgs(args)\n+ del args_dict[\'db_files\']\n+ del args_dict[\'cloned\']\n+ args_dict[\'v_field\'] = args_dict[\'v_field\'].upper()\n+ args_dict[\'seq_field\'] = args_dict[\'seq_field\'].upper()\n+ \n+ for f in args.__dict__[\'db_files\']:\n+ args_dict[\'db_file\'] = f\n+ if args.__dict__[\'cloned\']:\n+ assembleCloneGermline(**args_dict)\n+ else:\n+ assembleEachGermline(**args_dict)\n'

diff -r 000000000000 -r 183edf446dcf DefineClones.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DefineClones.py Mon Jul 17 07:44:27 2017 -0400

[

b'@@ -0,0 +1,1121 @@\n+#!/usr/bin/env python3\n+"""\n+Assign Ig sequences into clones\n+"""\n+# Info\n+__author__ = \'Namita Gupta, Jason Anthony Vander Heiden, Gur Yaari, Mohamed Uduman\'\n+from changeo import __version__, __date__\n+\n+# Imports\n+import os\n+import re\n+import sys\n+import csv\n+import numpy as np\n+from argparse import ArgumentParser\n+from collections import OrderedDict\n+from itertools import chain\n+from textwrap import dedent\n+from time import time\n+from Bio import pairwise2\n+from Bio.Seq import translate\n+\n+# Presto and changeo imports\n+from presto.Defaults import default_out_args\n+from presto.IO import getFileType, getOutputHandle, printLog, printProgress\n+from presto.Multiprocessing import manageProcesses\n+from presto.Sequence import getDNAScoreDict\n+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs\n+from changeo.Distance import distance_models, calcDistances, formClusters\n+from changeo.IO import getDbWriter, readDbFile, countDbFile\n+from changeo.Multiprocessing import DbData, DbResult\n+\n+# Defaults\n+default_translate = False\n+default_distance = 0.0\n+default_index_mode = \'gene\'\n+default_index_action = \'set\'\n+default_bygroup_model = \'ham\'\n+default_hclust_model = \'chen2010\'\n+default_seq_field = \'JUNCTION\'\n+default_norm = \'len\'\n+default_sym = \'avg\'\n+default_linkage = \'single\'\n+choices_bygroup_model = (\'ham\', \'aa\', \'hh_s1f\', \'hh_s5f\', \'mk_rs1nf\', \'mk_rs5nf\', \'hs1f_compat\', \'m1n_compat\')\n+\n+\n+def indexByIdentity(index, key, rec, fields=None):\n+ """\n+ Updates a preclone index with a simple key\n+\n+ Arguments:\n+ index = preclone index from indexJunctions\n+ key = index key\n+ rec = IgRecord to add to the index\n+ fields = additional annotation fields to use to group preclones;\n+ if None use only V, J and junction length\n+\n+ Returns:\n+ None. Updates index with new key and records.\n+ """\n+ index.setdefault(tuple(key), []).append(rec)\n+\n+\n+def indexByUnion(index, key, rec, fields=None):\n+ """\n+ Updates a preclone index with the union of nested keys\n+\n+ Arguments:\n+ index = preclone index from indexJunctions\n+ key = index key\n+ rec = IgRecord to add to the index\n+ fields = additional annotation fields to use to group preclones;\n+ if None use only V, J and junction length\n+\n+ Returns:\n+ None. Updates index with new key and records.\n+ """\n+ # List of values for this/new key\n+ val = [rec]\n+ f_range = list(range(2, 3 + (len(fields) if fields else 0)))\n+\n+ # See if field/junction length combination exists in index\n+ outer_dict = index\n+ for field in f_range:\n+ try:\n+ outer_dict = outer_dict[key[field]]\n+ except (KeyError):\n+ outer_dict = None\n+ break\n+ # If field combination exists, look through Js\n+ j_matches = []\n+ if outer_dict is not None:\n+ for j in outer_dict.keys():\n+ if not set(key[1]).isdisjoint(set(j)):\n+ key[1] = tuple(set(key[1]).union(set(j)))\n+ j_matches += [j]\n+ # If J overlap exists, look through Vs for each J\n+ for j in j_matches:\n+ v_matches = []\n+ # Collect V matches for this J\n+ for v in outer_dict[j].keys():\n+ if not set(key[0]).isdisjoint(set(v)):\n+ key[0] = tuple(set(key[0]).union(set(v)))\n+ v_matches += [v]\n+ # If there are V overlaps for this J, pop them out\n+ if v_matches:\n+ val += list(chain(*(outer_dict[j].pop(v) for v in v_matches)))\n+ # If the J dict is now empty, remove it\n+ if not outer_dict[j]:\n+ outer_dict.pop(j, None)\n+\n+ # Add value(s) into index nested dictionary\n+ # OMG Python pointers are the best!\n+ # Add field dictionaries into index\n+ outer_dict = index\n+ for field in f_range:\n+ outer_dict.setdefault(key[field], {})\n+ outer_dict = outer_dict[key[field]]\n+ # Add J, then V into index\n+ '..b'eq_field,\n+ help=\'\'\'The name of the field to be used to calculate\n+ distance between records\'\'\')\n+ parser_bygroup.set_defaults(feed_func=feedQueue)\n+ parser_bygroup.set_defaults(work_func=processQueue)\n+ parser_bygroup.set_defaults(collect_func=collectQueue) \n+ parser_bygroup.set_defaults(group_func=indexJunctions) \n+ parser_bygroup.set_defaults(clone_func=distanceClones)\n+ \n+ # Chen2010\n+ parser_chen = subparsers.add_parser(\'chen2010\', parents=[parser_parent],\n+ formatter_class=CommonHelpFormatter,\n+ help=\'\'\'Defines clones by method specified in Chen, 2010.\'\'\',\n+ description=\'\'\'Defines clones by method specified in Chen, 2010.\'\'\')\n+ parser_chen.set_defaults(feed_func=feedQueueClust)\n+ parser_chen.set_defaults(work_func=processQueueClust)\n+ parser_chen.set_defaults(collect_func=collectQueueClust)\n+ parser_chen.set_defaults(cluster_func=hierClust)\n+\n+ # Ademokun2011\n+ parser_ade = subparsers.add_parser(\'ademokun2011\', parents=[parser_parent],\n+ formatter_class=CommonHelpFormatter,\n+ help=\'\'\'Defines clones by method specified in Ademokun, 2011.\'\'\',\n+ description=\'\'\'Defines clones by method specified in Ademokun, 2011.\'\'\')\n+ parser_ade.set_defaults(feed_func=feedQueueClust)\n+ parser_ade.set_defaults(work_func=processQueueClust)\n+ parser_ade.set_defaults(collect_func=collectQueueClust)\n+ parser_ade.set_defaults(cluster_func=hierClust)\n+ \n+ return parser\n+\n+\n+if __name__ == \'__main__\':\n+ """\n+ Parses command line arguments and calls main function\n+ """\n+ # Parse arguments\n+ parser = getArgParser()\n+ checkArgs(parser)\n+ args = parser.parse_args()\n+ args_dict = parseCommonArgs(args)\n+ # Convert case of fields\n+ if \'seq_field\' in args_dict:\n+ args_dict[\'seq_field\'] = args_dict[\'seq_field\'].upper()\n+ if \'fields\' in args_dict and args_dict[\'fields\'] is not None: \n+ args_dict[\'fields\'] = [f.upper() for f in args_dict[\'fields\']]\n+ \n+ # Define clone_args\n+ if args.command == \'bygroup\':\n+ args_dict[\'group_args\'] = {\'fields\': args_dict[\'fields\'],\n+ \'action\': args_dict[\'action\'], \n+ \'mode\':args_dict[\'mode\']}\n+ args_dict[\'clone_args\'] = {\'model\': args_dict[\'model\'],\n+ \'distance\': args_dict[\'distance\'],\n+ \'norm\': args_dict[\'norm\'],\n+ \'sym\': args_dict[\'sym\'],\n+ \'linkage\': args_dict[\'linkage\'],\n+ \'seq_field\': args_dict[\'seq_field\']}\n+\n+ # Get distance matrix\n+ try:\n+ args_dict[\'clone_args\'][\'dist_mat\'] = distance_models[args_dict[\'model\']]\n+ except KeyError:\n+ sys.exit(\'Unrecognized distance model: %s\' % args_dict[\'model\'])\n+\n+ del args_dict[\'fields\']\n+ del args_dict[\'action\']\n+ del args_dict[\'mode\']\n+ del args_dict[\'model\']\n+ del args_dict[\'distance\']\n+ del args_dict[\'norm\']\n+ del args_dict[\'sym\']\n+ del args_dict[\'linkage\']\n+ del args_dict[\'seq_field\']\n+\n+ # Define clone_args\n+ if args.command == \'chen2010\':\n+ args_dict[\'clone_func\'] = distChen2010\n+ args_dict[\'cluster_args\'] = {\'method\': args.command }\n+\n+ if args.command == \'ademokun2011\':\n+ args_dict[\'clone_func\'] = distAdemokun2011\n+ args_dict[\'cluster_args\'] = {\'method\': args.command }\n+ \n+ # Call defineClones\n+ del args_dict[\'command\']\n+ del args_dict[\'db_files\']\n+ for f in args.__dict__[\'db_files\']:\n+ args_dict[\'db_file\'] = f\n+ defineClones(**args_dict)\n'

diff -r 000000000000 -r 183edf446dcf IMGT_Human_IGHD.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/IMGT_Human_IGHD.fasta Mon Jul 17 07:44:27 2017 -0400

@@ -0,0 +1,89 @@
+>X97051|IGHD1-1*01|Homo_sapiens|F|D-REGION|33714..33730|17 nt|1| | | | |17+0=17| | |
+ggtacaactggaacgac
+>X13972|IGHD1-14*01|Homo_sapiens|ORF|D-REGION|14518..14534|17 nt|1| | | | |17+0=17| | |
+ggtataaccggaaccac
+>X97051|IGHD1-20*01|Homo_sapiens|F|D-REGION|62015..62031|17 nt|1| | | | |17+0=17| | |
+ggtataactggaacgac
+>X97051|IGHD1-26*01|Homo_sapiens|F|D-REGION|72169..72188|20 nt|1| | | | |20+0=20| | |
+ggtatagtgggagctactac
+>X13972|IGHD1-7*01|Homo_sapiens|F|D-REGION|5266..5282|17 nt|1| | | | |17+0=17| | |
+ggtataactggaactac
+>X55575|IGHD1/OR15-1a*01|Homo_sapiens|ORF|D-REGION|63..79|17 nt|1| | | | |17+0=17| | |
+ggtataactggaacaac
+>X55576|IGHD1/OR15-1b*01|Homo_sapiens|ORF|D-REGION|63..79|17 nt|1| | | | |17+0=17| | |
+ggtataactggaacaac
+>J00234|IGHD2-15*01|Homo_sapiens|F|D-REGION|29..59|31 nt|1| | | | |31+0=31| | |
+aggatattgtagtggtggtagctgctactcc
+>J00232|IGHD2-2*01|Homo_sapiens|F|D-REGION|29..59|31 nt|1| | | | |31+0=31| | |
+aggatattgtagtagtaccagctgctatgcc
+>X97051|IGHD2-2*02|Homo_sapiens|F|D-REGION|36367..36397|31 nt|1| | | | |31+0=31| | |
+aggatattgtagtagtaccagctgctatacc
+>M35648|IGHD2-2*03|Homo_sapiens|F|D-REGION|70..100|31 nt|1| | | | |31+0=31| | |
+tggatattgtagtagtaccagctgctatgcc
+>J00235|IGHD2-21*01|Homo_sapiens|F|D-REGION|29..56|28 nt|1| | | | |28+0=28| | |
+agcatattgtggtggtgattgctattcc
+>X97051|IGHD2-21*02|Homo_sapiens|F|D-REGION|64644..64671|28 nt|1| | | | |28+0=28| | |
+agcatattgtggtggtgactgctattcc
+>X13972|IGHD2-8*01|Homo_sapiens|F|D-REGION|7949..7979|31 nt|1| | | | |31+0=31| | |
+aggatattgtactaatggtgtatgctatacc
+>J00233|IGHD2-8*02|Homo_sapiens|F|D-REGION|29..59|31 nt|1| | | | |31+0=31| | |
+aggatattgtactggtggtgtatgctatacc
+>X55577|IGHD2/OR15-2a*01|Homo_sapiens|ORF|D-REGION|68..98|31 nt|1| | | | |31+0=31| | |
+agaatattgtaatagtactactttctatgcc
+>X55578|IGHD2/OR15-2b*01|Homo_sapiens|ORF|D-REGION|68..98|31 nt|1| | | | |31+0=31| | |
+agaatattgtaatagtactactttctatgcc
+>X13972|IGHD3-10*01|Homo_sapiens|F|D-REGION|10659..10689|31 nt|1| | | | |31+0=31| | |
+gtattactatggttcggggagttattataac
+>X93615|IGHD3-10*02|Homo_sapiens|F|D-REGION|30..59|30 nt|1| | | | |30+0=30| | |
+gtattactatgttcggggagttattataac
+>X93614|IGHD3-16*01|Homo_sapiens|F|D-REGION|30..66|37 nt|1| | | | |37+0=37| | |
+gtattatgattacgtttgggggagttatgcttatacc
+>X97051|IGHD3-16*02|Homo_sapiens|F|D-REGION|57552..57588|37 nt|1| | | | |37+0=37| | |
+gtattatgattacgtttgggggagttatcgttatacc
+>X93616|IGHD3-22*01|Homo_sapiens|F|D-REGION|30..60|31 nt|1| | | | |31+0=31| | |
+gtattactatgatagtagtggttattactac
+>X13972|IGHD3-3*01|Homo_sapiens|F|D-REGION|804..834|31 nt|1| | | | |31+0=31| | |
+gtattacgatttttggagtggttattatacc
+>X93618|IGHD3-3*02|Homo_sapiens|F|D-REGION|30..60|31 nt|1| | | | |31+0=31| | |
+gtattagcatttttggagtggttattatacc
+>X13972|IGHD3-9*01|Homo_sapiens|F|D-REGION|10475..10505|31 nt|1| | | | |31+0=31| | |
+gtattacgatattttgactggttattataac
+>X55579|IGHD3/OR15-3a*01|Homo_sapiens|ORF|D-REGION|210..240|31 nt|1| | | | |31+0=31| | |
+gtattatgatttttggactggttattatacc
+>X55580|IGHD3/OR15-3b*01|Homo_sapiens|ORF|D-REGION|210..240|31 nt|1| | | | |31+0=31| | |
+gtattatgatttttggactggttattatacc
+>X13972|IGHD4-11*01|Homo_sapiens|ORF|D-REGION|11550..11565|16 nt|1| | | | |16+0=16| | |
+tgactacagtaactac
+>X97051|IGHD4-17*01|Homo_sapiens|F|D-REGION|58699..58714|16 nt|1| | | | |16+0=16| | |
+tgactacggtgactac
+>X97051|IGHD4-23*01|Homo_sapiens|ORF|D-REGION|68334..68352|19 nt|1| | | | |19+0=19| | |
+tgactacggtggtaactcc
+>X13972|IGHD4-4*01|Homo_sapiens|F|D-REGION|1952..1967|16 nt|1| | | | |16+0=16| | |
+tgactacagtaactac
+>X55581|IGHD4/OR15-4a*01|Homo_sapiens|ORF|D-REGION|83..101|19 nt|1| | | | |19+0=19| | |
+tgactatggtgctaactac
+>X55582|IGHD4/OR15-4b*01|Homo_sapiens|ORF|D-REGION|82..100|19 nt|1| | | | |19+0=19| | |
+tgactatggtgctaactac
+>X13972|IGHD5-12*01|Homo_sapiens|F|D-REGION|12506..12528|23 nt|1| | | | |23+0=23| | |
+gtggatatagtggctacgattac
+>X97051|IGHD5-18*01|Homo_sapiens|F|D-REGION|59661..59680|20 nt|1| | | | |20+0=20| | |
+gtggatacagctatggttac
+>X97051|IGHD5-24*01|Homo_sapiens|ORF|D-REGION|69300..69319|20 nt|1| | | | |20+0=20| | |
+gtagagatggctacaattac
+>X13972|IGHD5-5*01|Homo_sapiens|F|D-REGION|2913..2932|20 nt|1| | | | |20+0=20| | |
+gtggatacagctatggttac
+>X55583|IGHD5/OR15-5a*01|Homo_sapiens|ORF|D-REGION|94..116|23 nt|1| | | | |23+0=23| | |
+gtggatatagtgtctacgattac
+>X55584|IGHD5/OR15-5b*01|Homo_sapiens|ORF|D-REGION|94..116|23 nt|1| | | | |23+0=23| | |
+gtggatatagtgtctacgattac
+>X13972|IGHD6-13*01|Homo_sapiens|F|D-REGION|14011..14031|21 nt|1| | | | |21+0=21| | |
+gggtatagcagcagctggtac
+>X97051|IGHD6-19*01|Homo_sapiens|F|D-REGION|61503..61523|21 nt|1| | | | |21+0=21| | |
+gggtatagcagtggctggtac
+>X97051|IGHD6-25*01|Homo_sapiens|F|D-REGION|71666..71683|18 nt|1| | | | |18+0=18| | |
+gggtatagcagcggctac
+>X13972|IGHD6-6*01|Homo_sapiens|F|D-REGION|4762..4779|18 nt|1| | | | |18+0=18| | |
+gagtatagcagctcgtcc
+>J00256|IGHD7-27*01|Homo_sapiens|F|D-REGION|621..631|11 nt|1| | | | |11+0=11| | |
+ctaactgggga
+

diff -r 000000000000 -r 183edf446dcf IMGT_Human_IGHJ.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/IMGT_Human_IGHJ.fasta Mon Jul 17 07:44:27 2017 -0400

@@ -0,0 +1,31 @@
+>J00256|IGHJ1*01|Homo_sapiens|F|J-REGION|723..774|52 nt|1| | | | |52+0=52| | |
+gctgaatacttccagcactggggccagggcaccctggtcaccgtctcctcag
+>J00256|IGHJ2*01|Homo_sapiens|F|J-REGION|932..984|53 nt|2| | | | |53+0=53| | |
+ctactggtacttcgatctctggggccgtggcaccctggtcactgtctcctcag
+>J00256|IGHJ3*01|Homo_sapiens|F|J-REGION|1537..1586|50 nt|2| | | | |50+0=50| | |
+tgatgcttttgatgtctggggccaagggacaatggtcaccgtctcttcag
+>X86355|IGHJ3*02|Homo_sapiens|F|J-REGION|1107..1156|50 nt|2| | | | |50+0=50| | |
+tgatgcttttgatatctggggccaagggacaatggtcaccgtctcttcag
+>J00256|IGHJ4*01|Homo_sapiens|F|J-REGION|1912..1959|48 nt|3| | | | |48+0=48| | |
+actactttgactactggggccaaggaaccctggtcaccgtctcctcag
+>X86355|IGHJ4*02|Homo_sapiens|F|J-REGION|1480..1527|48 nt|3| | | | |48+0=48| | |
+actactttgactactggggccagggaaccctggtcaccgtctcctcag
+>M25625|IGHJ4*03|Homo_sapiens|F|J-REGION|446..493|48 nt|3| | | | |48+0=48| | |
+gctactttgactactggggccaagggaccctggtcaccgtctcctcag
+>J00256|IGHJ5*01|Homo_sapiens|F|J-REGION|2354..2404|51 nt|3| | | | |51+0=51| | |
+acaactggttcgactcctggggccaaggaaccctggtcaccgtctcctcag
+>X86355|IGHJ5*02|Homo_sapiens|F|J-REGION|1878..1928|51 nt|3| | | | |51+0=51| | |
+acaactggttcgacccctggggccagggaaccctggtcaccgtctcctcag
+>J00256|IGHJ6*01|Homo_sapiens|F|J-REGION|2947..3009|63 nt|3| | | | |63+0=63| | |
+attactactactactacggtatggacgtctgggggcaagggaccacggtcaccgtctcct
+cag
+>X86355|IGHJ6*02|Homo_sapiens|F|J-REGION|2482..2543|62 nt|3| | | | |62+0=62| | |
+attactactactactacggtatggacgtctggggccaagggaccacggtcaccgtctcct
+ca
+>X86356|IGHJ6*03|Homo_sapiens|F|J-REGION|2482..2543|62 nt|3| | | | |62+0=62| | |
+attactactactactactacatggacgtctggggcaaagggaccacggtcaccgtctcct
+ca
+>AJ879487|IGHJ6*04|Homo_sapiens|F|J-REGION|39..101|63 nt|3| | | | |63+0=63| | |
+attactactactactacggtatggacgtctggggcaaagggaccacggtcaccgtctcct
+cag
+

diff -r 000000000000 -r 183edf446dcf IMGT_Human_IGHV.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/IMGT_Human_IGHV.fasta Mon Jul 17 07:44:27 2017 -0400

b"@@ -0,0 +1,2442 @@\n+>M99641|IGHV1-18*01|Homo_sapiens|F|V-REGION|188..483|296 nt|1| | | | |296+24=320| | |\n+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggttacaccttt............accagctatggtatcagc\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac...\n+...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca\n+gacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggcc\n+gtgtattactgtgcgagaga\n+>X60503|IGHV1-18*02|Homo_sapiens|F|V-REGION|142..417|276 nt|1| | | | |276+24=300|partial in 3'| |\n+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggttacaccttt............accagctatggtatcagc\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac...\n+...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca\n+gacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc\n+>HM855463|IGHV1-18*03|Homo_sapiens|F|V-REGION|21..316|296 nt|1| | | | |296+24=320| | |\n+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggttacaccttt............accagctatggtatcagc\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac...\n+...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca\n+gacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggcc\n+gtgtattactgtgcgagaga\n+>KC713938|IGHV1-18*04|Homo_sapiens|F|V-REGION|392..687|296 nt|1| | | | |296+24=320| | |\n+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggttacaccttt............accagctacggtatcagc\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac...\n+...aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccaca\n+gacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggcc\n+gtgtattactgtgcgagaga\n+>X07448|IGHV1-2*01|Homo_sapiens|F|V-REGION|269..564|296 nt|1| | | | |296+24=320| | |\n+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac...\n+...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagg\n+gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtc\n+gtgtattactgtgcgagaga\n+>X62106|IGHV1-2*02|Homo_sapiens|F|V-REGION|163..458|296 nt|1| | | | |296+24=320| | |\n+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac...\n+...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagg\n+gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggcc\n+gtgtattactgtgcgagaga\n+>X92208|IGHV1-2*03|Homo_sapiens|F|V-REGION|160..455|296 nt|1| | || |296+24=320| | |\n+caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaag\n+gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac\n+tgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac...\n+...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagg\n+gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggcc\n+gtgtattactgtgcgagaga\n+>KF698733|IGHV1-2*04|Homo_sapiens|F|V-REGION|393..688|296 nt|1| | | | |296+24=320| | |\n+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac...\n+...agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagg\n+gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggcc\n+gtgtattactgtgcgagaga\n+>HM855674|IGHV1-2*05|Homo_sapiens|F|V-REGION|24..319|296 nt|1| | | | |296+24=320| | |\n+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggatacaccttc............accggctactatatgcac\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac...\n+...agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagg\n+gacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtc\n+gtgtattactgtgcgagaga\n+>M99642|IGHV1-24*01|Homo_sapiens|F|V-REGION|210..505|296 nt|1| | | | |296+24=320| | |\n+caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctgcaaggtttccggatacaccctc............actgaattatccatgcac\n+tgggtgc"..b"tcgagaggccttgagtggctgggaaggacatactacaggtcc\n+...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaaccca\n+gacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggct\n+gtgtattactgtgcaagaga\n+>AB019439|IGHV7-34-1*01|Homo_sapiens|P|V-REGION|56018..56310|293 nt|1| | | | |293+27=320| | |\n+...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctataagtcttctggttacaccttc............accatctatggtatgaat\n+tgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac...\n+...actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatg\n+gacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggcc\n+gagtattactgtgcgaagta\n+>HM855644|IGHV7-34-1*02|Homo_sapiens|P|V-REGION|24..316|293 nt|1| | | | |293+27=320| |rev-compl|\n+...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaag\n+gtctcctataagtcttctggttacaccttc............accatctatggtatgaat\n+tgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac...\n+...aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatg\n+gacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggcc\n+gagtattactgtgcgaagta\n+>L10057|IGHV7-4-1*01|Homo_sapiens|F|V-REGION|95..388|294 nt|1| | | | |294+24=318| | |\n+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag\n+gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac...\n+...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg\n+gacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgcc\n+gtgtattactgtgcgaga\n+>X62110|IGHV7-4-1*02|Homo_sapiens|F|V-REGION|158..453|296 nt|1| | | | |296+24=320| | |\n+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag\n+gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac...\n+...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg\n+gacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgcc\n+gtgtattactgtgcgagaga\n+>X92290|IGHV7-4-1*03|Homo_sapiens|F|V-REGION|1..274|274 nt|1| | | | |274+24=298|partial in 3'| |\n+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag\n+gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac...\n+...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg\n+gacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg\n+>HM855485|IGHV7-4-1*04|Homo_sapiens|F|V-REGION|24..319|296 nt|1| | | | |296+24=320| |rev-compl|\n+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag\n+gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac...\n+...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg\n+gacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgcc\n+gtgtattactgtgcgagaga\n+>HM855361|IGHV7-4-1*05|Homo_sapiens|F|V-REGION|24..319|296 nt|1| | | | |296+24=320| |rev-compl|\n+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaag\n+gtttcctgcaaggcttctggatacaccttc............actagctatgctatgaat\n+tgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac...\n+...actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttg\n+gacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgcc\n+gtgtgttactgtgcgagaga\n+>AC241995|IGHV7-40*03|Homo_sapiens|P|V-REGION|10101..10396|296 nt|1| | | | |296+24=320| | |\n+ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaat\n+atgatatatgtaaatcatggaatactatgc............agccagtatggtatgaat\n+tcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac...\n+...actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatg\n+gacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggcc\n+gtgtatgactgtatgagaga\n+>AB019437|IGHV7-81*01|Homo_sapiens|ORF|V-REGION|6456..6751|296 nt|1| | | | |296+24=320| | |\n+caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaag\n+gtctcctgcaaggcttctggttacagtttc............accacctatggtatgaat\n+tgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac...\n+...actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatg\n+gacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggcc\n+atgtattactgtgcgagata\n+\n"

diff -r 000000000000 -r 183edf446dcf LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE Mon Jul 17 07:44:27 2017 -0400

b'@@ -0,0 +1,437 @@\n+Attribution-NonCommercial-ShareAlike 4.0 International\n+\n+=======================================================================\n+\n+Creative Commons Corporation ("Creative Commons") is not a law firm and\n+does not provide legal services or legal advice. Distribution of\n+Creative Commons public licenses does not create a lawyer-client or\n+other relationship. Creative Commons makes its licenses and related\n+information available on an "as-is" basis. Creative Commons gives no\n+warranties regarding its licenses, any material licensed under their\n+terms and conditions, or any related information. Creative Commons\n+disclaims all liability for damages resulting from their use to the\n+fullest extent possible.\n+\n+Using Creative Commons Public Licenses\n+\n+Creative Commons public licenses provide a standard set of terms and\n+conditions that creators and other rights holders may use to share\n+original works of authorship and other material subject to copyright\n+and certain other rights specified in the public license below. The\n+following considerations are for informational purposes only, are not\n+exhaustive, and do not form part of our licenses.\n+\n+ Considerations for licensors: Our public licenses are\n+ intended for use by those authorized to give the public\n+ permission to use material in ways otherwise restricted by\n+ copyright and certain other rights. Our licenses are\n+ irrevocable. Licensors should read and understand the terms\n+ and conditions of the license they choose before applying it.\n+ Licensors should also secure all rights necessary before\n+ applying our licenses so that the public can reuse the\n+ material as expected. Licensors should clearly mark any\n+ material not subject to the license. This includes other CC-\n+ licensed material, or material used under an exception or\n+ limitation to copyright. More considerations for licensors:\n+\twiki.creativecommons.org/Considerations_for_licensors\n+\n+ Considerations for the public: By using one of our public\n+ licenses, a licensor grants the public permission to use the\n+ licensed material under specified terms and conditions. If\n+ the licensor\'s permission is not necessary for any reason--for\n+ example, because of any applicable exception or limitation to\n+ copyright--then that use is not regulated by the license. Our\n+ licenses grant only permissions under copyright and certain\n+ other rights that a licensor has authority to grant. Use of\n+ the licensed material may still be restricted for other\n+ reasons, including because others have copyright or other\n+ rights in the material. A licensor may make special requests,\n+ such as asking that all changes be marked or described.\n+ Although not required by our licenses, you are encouraged to\n+ respect those requests where reasonable. More_considerations\n+ for the public:\n+\twiki.creativecommons.org/Considerations_for_licensees\n+\n+=======================================================================\n+\n+Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International\n+Public License\n+\n+By exercising the Licensed Rights (defined below), You accept and agree\n+to be bound by the terms and conditions of this Creative Commons\n+Attribution-NonCommercial-ShareAlike 4.0 International Public License\n+("Public License"). To the extent this Public License may be\n+interpreted as a contract, You are granted the Licensed Rights in\n+consideration of Your acceptance of these terms and conditions, and the\n+Licensor grants You such rights in consideration of benefits the\n+Licensor receives from making the Licensed Material available under\n+these terms and conditions.\n+\n+\n+Section 1 -- Definitions.\n+\n+ a. Adapted Material means material subject to Copyright and Similar\n+ Rights that is derived from or based upon the Licensed Material\n+ and in which the Licensed Material is translated, altered,\n+ arranged, tr'..b'anties and limitation of liability provided\n+ above shall be interpreted in a manner that, to the extent\n+ possible, most closely approximates an absolute disclaimer and\n+ waiver of all liability.\n+\n+\n+Section 6 -- Term and Termination.\n+\n+ a. This Public License applies for the term of the Copyright and\n+ Similar Rights licensed here. However, if You fail to comply with\n+ this Public License, then Your rights under this Public License\n+ terminate automatically.\n+\n+ b. Where Your right to use the Licensed Material has terminated under\n+ Section 6(a), it reinstates:\n+\n+ 1. automatically as of the date the violation is cured, provided\n+ it is cured within 30 days of Your discovery of the\n+ violation; or\n+\n+ 2. upon express reinstatement by the Licensor.\n+\n+ For the avoidance of doubt, this Section 6(b) does not affect any\n+ right the Licensor may have to seek remedies for Your violations\n+ of this Public License.\n+\n+ c. For the avoidance of doubt, the Licensor may also offer the\n+ Licensed Material under separate terms or conditions or stop\n+ distributing the Licensed Material at any time; however, doing so\n+ will not terminate this Public License.\n+\n+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public\n+ License.\n+\n+\n+Section 7 -- Other Terms and Conditions.\n+\n+ a. The Licensor shall not be bound by any additional or different\n+ terms or conditions communicated by You unless expressly agreed.\n+\n+ b. Any arrangements, understandings, or agreements regarding the\n+ Licensed Material not stated herein are separate from and\n+ independent of the terms and conditions of this Public License.\n+\n+\n+Section 8 -- Interpretation.\n+\n+ a. For the avoidance of doubt, this Public License does not, and\n+ shall not be interpreted to, reduce, limit, restrict, or impose\n+ conditions on any use of the Licensed Material that could lawfully\n+ be made without permission under this Public License.\n+\n+ b. To the extent possible, if any provision of this Public License is\n+ deemed unenforceable, it shall be automatically reformed to the\n+ minimum extent necessary to make it enforceable. If the provision\n+ cannot be reformed, it shall be severed from this Public License\n+ without affecting the enforceability of the remaining terms and\n+ conditions.\n+\n+ c. No term or condition of this Public License will be waived and no\n+ failure to comply consented to unless expressly agreed to by the\n+ Licensor.\n+\n+ d. Nothing in this Public License constitutes or may be interpreted\n+ as a limitation upon, or waiver of, any privileges and immunities\n+ that apply to the Licensor or You, including from the legal\n+ processes of any jurisdiction or authority.\n+\n+=======================================================================\n+\n+Creative Commons is not a party to its public\n+licenses. Notwithstanding, Creative Commons may elect to apply one of\n+its public licenses to material it publishes and in those instances\n+will be considered the \xe2\x80\x9cLicensor.\xe2\x80\x9d The text of the Creative Commons\n+public licenses is dedicated to the public domain under the CC0 Public\n+Domain Dedication. Except for the limited purpose of indicating that\n+material is shared under a Creative Commons public license or as\n+otherwise permitted by the Creative Commons policies published at\n+creativecommons.org/policies, Creative Commons does not authorize the\n+use of the trademark "Creative Commons" or any other trademark or logo\n+of Creative Commons without its prior written consent including,\n+without limitation, in connection with any unauthorized modifications\n+to any of its public licenses or any other arrangements,\n+understandings, or agreements concerning use of licensed material. For\n+the avoidance of doubt, this paragraph does not form part of the\n+public licenses.\n+\n+Creative Commons may be contacted at creativecommons.org.\n'

diff -r 000000000000 -r 183edf446dcf MakeDb.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MakeDb.py Mon Jul 17 07:44:27 2017 -0400

[

b'@@ -0,0 +1,556 @@\n+#!/usr/bin/env python3\n+"""\n+Create tab-delimited database file to store sequence alignment information\n+"""\n+# Info\n+__author__ = \'Namita Gupta, Jason Anthony Vander Heiden\'\n+from changeo import __version__, __date__\n+\n+# Imports\n+import os\n+import sys\n+from argparse import ArgumentParser\n+from collections import OrderedDict\n+from textwrap import dedent\n+from time import time\n+from Bio import SeqIO\n+\n+# Presto and changeo imports\n+from presto.Defaults import default_out_args\n+from presto.Annotation import parseAnnotation\n+from presto.IO import countSeqFile, printLog, printMessage, printProgress, readSeqFile\n+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs\n+from changeo.IO import countDbFile, extractIMGT, getDbWriter, readRepo\n+from changeo.Parsers import IgBLASTReader, IMGTReader, IHMMuneReader, getIDforIMGT\n+\n+\n+def getFilePrefix(aligner_output, out_args):\n+ """\n+ Get file name prefix and create output directory\n+\n+ Arguments:\n+ aligner_output : aligner output file or directory.\n+ out_args : dictionary of output arguments.\n+\n+ Returns:\n+ str : file name prefix.\n+ """\n+ # Determine output directory\n+ if not out_args[\'out_dir\']:\n+ out_dir = os.path.dirname(os.path.abspath(aligner_output))\n+ else:\n+ out_dir = os.path.abspath(out_args[\'out_dir\'])\n+ if not os.path.exists(out_dir):\n+ os.mkdir(out_dir)\n+\n+ # Determine file prefix\n+ if out_args[\'out_name\']:\n+ file_prefix = out_args[\'out_name\']\n+ else:\n+ file_prefix = os.path.splitext(os.path.split(os.path.abspath(aligner_output))[1])[0]\n+\n+ return os.path.join(out_dir, file_prefix)\n+\n+\n+def getSeqDict(seq_file):\n+ """\n+ Create a dictionary from a sequence file.\n+\n+ Arguments:\n+ seq_file : sequence file.\n+\n+ Returns:\n+ dict : sequence description as keys with Bio.SeqRecords as values.\n+ """\n+ seq_dict = SeqIO.to_dict(readSeqFile(seq_file),\n+ key_function=lambda x: x.description)\n+\n+ return seq_dict\n+\n+\n+def writeDb(db, fields, file_prefix, total_count, id_dict=None, no_parse=True, partial=False,\n+ out_args=default_out_args):\n+ """\n+ Writes tab-delimited database file in output directory.\n+ \n+ Arguments:\n+ db : a iterator of IgRecord objects containing alignment data.\n+ fields : a list of ordered field names to write.\n+ file_prefix : directory and prefix for CLIP tab-delim file.\n+ total_count : number of records (for progress bar).\n+ id_dict : a dictionary of the truncated sequence ID mapped to the full sequence ID.\n+ no_parse : if ID is to be parsed for pRESTO output with default delimiters.\n+ partial : if True put incomplete alignments in the pass file.\n+ out_args : common output argument dictionary from parseCommonArgs.\n+\n+ Returns:\n+ None\n+ """\n+ # Function to check for valid records strictly\n+ def _pass_strict(rec):\n+ valid = [rec.v_call and rec.v_call != \'None\',\n+ rec.j_call and rec.j_call != \'None\',\n+ rec.functional is not None,\n+ rec.seq_vdj,\n+ rec.junction]\n+ return all(valid)\n+\n+ # Function to check for valid records loosely\n+ def _pass_gentle(rec):\n+ valid = [rec.v_call and rec.v_call != \'None\',\n+ rec.d_call and rec.d_call != \'None\',\n+ rec.j_call and rec.j_call != \'None\']\n+ return any(valid)\n+\n+ # Set pass criteria\n+ _pass = _pass_gentle if partial else _pass_strict\n+\n+ # Define output file names\n+ pass_file = \'%s_db-pass.tab\' % file_prefix\n+ fail_file = \'%s_db-fail.tab\' % file_prefix\n+\n+ # Initiate handles, writers and counters\n+ pass_handle = None\n+ fail_handle = None\n+ pass_writer = None\n+ fail_writer = None\n+ start_time = time()\n+ rec_count = pass_count = fail_count = 0\n+\n+ # Validate and write out'..b' help=\'Process iHMMune-Align output.\',\n+ description=\'Process iHMMune-Align output.\')\n+ parser_ihmm.add_argument(\'-i\', nargs=\'+\', action=\'store\', dest=\'aligner_outputs\',\n+ required=True,\n+ help=\'\'\'iHMMune-Align output file.\'\'\')\n+ parser_ihmm.add_argument(\'-r\', nargs=\'+\', action=\'store\', dest=\'repo\', required=True,\n+ help=\'\'\'List of folders and/or FASTA files containing\n+ IMGT-gapped germline sequences corresponding to the\n+ set of germlines used in the IgBLAST alignment.\'\'\')\n+ parser_ihmm.add_argument(\'-s\', action=\'store\', nargs=\'+\', dest=\'seq_files\',\n+ required=True,\n+ help=\'\'\'List of input FASTA files (with .fasta, .fna or .fa\n+ extension) containing sequences.\'\'\')\n+ parser_ihmm.add_argument(\'--noparse\', action=\'store_true\', dest=\'no_parse\',\n+ help=\'\'\'Specify to prevent input sequence headers from being parsed\n+ to add new columns to database. Parsing of sequence headers requires\n+ headers to be in the pRESTO annotation format, so this should be specified\n+ when sequence headers are incompatible with the pRESTO annotation scheme.\n+ Note, unrecognized header formats will default to this behavior.\'\'\')\n+ parser_ihmm.add_argument(\'--partial\', action=\'store_true\', dest=\'partial\',\n+ help=\'\'\'If specified, include incomplete V(D)J alignments in\n+ the pass file instead of the fail file.\'\'\')\n+ parser_ihmm.add_argument(\'--scores\', action=\'store_true\', dest=\'parse_scores\',\n+ help=\'\'\'Specify if alignment score metrics should be\n+ included in the output. Adds the path score of the\n+ iHMMune-Align hidden Markov model to HMM_SCORE.\'\'\')\n+ parser_ihmm.add_argument(\'--regions\', action=\'store_true\', dest=\'parse_regions\',\n+ help=\'\'\'Specify if IMGT FWRs and CDRs should be\n+ included in the output. Adds the FWR1_IMGT, FWR2_IMGT,\n+ FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and\n+ CDR3_IMGT columns.\'\'\')\n+ parser_ihmm.set_defaults(func=parseIHMM)\n+\n+ return parser\n+ \n+ \n+if __name__ == "__main__":\n+ """\n+ Parses command line arguments and calls main\n+ """\n+ parser = getArgParser()\n+ checkArgs(parser)\n+ args = parser.parse_args()\n+ args_dict = parseCommonArgs(args, in_arg=\'aligner_outputs\')\n+\n+ # Set no ID parsing if sequence files are not provided\n+ if \'seq_files\' in args_dict and not args_dict[\'seq_files\']:\n+ args_dict[\'no_parse\'] = True\n+\n+ # Delete\n+ if \'seq_files\' in args_dict: del args_dict[\'seq_files\']\n+ if \'aligner_outputs\' in args_dict: del args_dict[\'aligner_outputs\']\n+ if \'command\' in args_dict: del args_dict[\'command\']\n+ if \'func\' in args_dict: del args_dict[\'func\'] \n+ \n+ if args.command == \'imgt\':\n+ for i in range(len(args.__dict__[\'aligner_outputs\'])):\n+ args_dict[\'aligner_output\'] = args.__dict__[\'aligner_outputs\'][i]\n+ args_dict[\'seq_file\'] = args.__dict__[\'seq_files\'][i] \\\n+ if args.__dict__[\'seq_files\'] else None\n+ args.func(**args_dict)\n+ elif args.command == \'igblast\' or args.command == \'ihmm\':\n+ for i in range(len(args.__dict__[\'aligner_outputs\'])):\n+ args_dict[\'aligner_output\'] = args.__dict__[\'aligner_outputs\'][i]\n+ args_dict[\'seq_file\'] = args.__dict__[\'seq_files\'][i]\n+ args.func(**args_dict)\n'

diff -r 000000000000 -r 183edf446dcf ParseDb.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ParseDb.py Mon Jul 17 07:44:27 2017 -0400

[

b'@@ -0,0 +1,1119 @@\n+#!/usr/bin/env python3\n+"""\n+Parses tab delimited database files\n+"""\n+# Info\n+__author__ = \'Jason Anthony Vander Heiden\'\n+from changeo import __version__, __date__\n+\n+# Imports\n+import csv\n+import os\n+import re\n+from argparse import ArgumentParser\n+from collections import OrderedDict\n+\n+from textwrap import dedent\n+from time import time\n+from Bio import SeqIO\n+from Bio.Seq import Seq\n+from Bio.SeqRecord import SeqRecord\n+from Bio.Alphabet import IUPAC\n+\n+# Presto and changeo imports\n+from presto.Defaults import default_delimiter, default_out_args\n+from presto.Annotation import flattenAnnotation\n+from presto.IO import getOutputHandle, printLog, printProgress, printMessage\n+from changeo.Defaults import default_csv_size\n+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs\n+from changeo.IO import getDbWriter, readDbFile, countDbFile\n+\n+# System settings\n+csv.field_size_limit(default_csv_size)\n+\n+# Defaults\n+default_id_field = \'SEQUENCE_ID\'\n+default_seq_field = \'SEQUENCE_IMGT\'\n+default_germ_field = \'GERMLINE_IMGT_D_MASK\'\n+default_index_field = \'INDEX\'\n+\n+# TODO: convert SQL-ish operations to modify_func() as per ParseHeaders\n+\n+def getDbSeqRecord(db_record, id_field, seq_field, meta_fields=None, \n+ delimiter=default_delimiter):\n+ """\n+ Parses a database record into a SeqRecord\n+\n+ Arguments: \n+ db_record = a dictionary containing a database record\n+ id_field = the field containing identifiers\n+ seq_field = the field containing sequences\n+ meta_fields = a list of fields to add to sequence annotations\n+ delimiter = a tuple of delimiters for (fields, values, value lists) \n+\n+ Returns: \n+ a SeqRecord\n+ """\n+ # Return None if ID or sequence fields are empty\n+ if not db_record[id_field] or not db_record[seq_field]:\n+ return None\n+ \n+ # Create description string\n+ desc_dict = OrderedDict([(\'ID\', db_record[id_field])])\n+ if meta_fields is not None:\n+ desc_dict.update([(f, db_record[f]) for f in meta_fields if f in db_record]) \n+ desc_str = flattenAnnotation(desc_dict, delimiter=delimiter)\n+ \n+ # Create SeqRecord\n+ seq_record = SeqRecord(Seq(db_record[seq_field], IUPAC.ambiguous_dna),\n+ id=desc_str, name=desc_str, description=\'\')\n+ \n+ return seq_record\n+\n+\n+def splitDbFile(db_file, field, num_split=None, out_args=default_out_args):\n+ """\n+ Divides a tab-delimited database file into segments by description tags\n+\n+ Arguments:\n+ db_file = filename of the tab-delimited database file to split\n+ field = the field name by which to split db_file\n+ num_split = the numerical threshold by which to group sequences;\n+ if None treat field as textual\n+ out_args = common output argument dictionary from parseCommonArgs\n+\n+ Returns:\n+ a list of output file names\n+ """\n+ log = OrderedDict()\n+ log[\'START\'] = \'ParseDb\'\n+ log[\'COMMAND\'] = \'split\'\n+ log[\'FILE\'] = os.path.basename(db_file)\n+ log[\'FIELD\'] = field\n+ log[\'NUM_SPLIT\'] = num_split\n+ printLog(log)\n+\n+ # Open IgRecord reader iter object\n+ reader = readDbFile(db_file, ig=False)\n+\n+ # Determine total numbers of records\n+ rec_count = countDbFile(db_file)\n+\n+ start_time = time()\n+ count = 0\n+ # Sort records into files based on textual field\n+ if num_split is None:\n+ # Create set of unique field tags\n+ tmp_iter = readDbFile(db_file, ig=False)\n+ tag_list = list(set([row[field] for row in tmp_iter]))\n+\n+ # Forbidden characters in filename and replacements\n+ noGood = {\'\\/\':\'f\',\'\\\\\':\'b\',\'?\':\'q\',\'\\%\':\'p\',\'*\':\'s\',\':\':\'c\',\n+ \'\\|\':\'pi\',\'\\"\':\'dq\',\'\\\'\':\'sq\',\'<\':\'gt\',\'>\':\'lt\',\' \':\'_\'}\n+ # Replace forbidden characters in tag_list\n+ tag_dict = {}\n+ for tag in tag_list:\n+ for c,r in noGood.items():\n+ tag_dict[tag] = (tag_dict.get(tag, tag'..b'w partial string matches.\'\'\')\n+ parser_select.set_defaults(func=selectDbFile)\n+\n+ # Subparser to sort file by records\n+ parser_sort = subparsers.add_parser(\'sort\', parents=[parser_parent],\n+ formatter_class=CommonHelpFormatter,\n+ help=\'Sorts records by field values.\',\n+ description=\'Sorts records by field values.\')\n+ parser_sort.add_argument(\'-f\', action=\'store\', dest=\'field\', type=str, required=True,\n+ help=\'The annotation field by which to sort records.\')\n+ parser_sort.add_argument(\'--num\', action=\'store_true\', dest=\'numeric\', default=False,\n+ help=\'\'\'Specify to define the sort column as numeric rather\n+ than textual.\'\'\')\n+ parser_sort.add_argument(\'--descend\', action=\'store_true\', dest=\'descend\',\n+ help=\'\'\'If specified, sort records in descending, rather\n+ than ascending, order by values in the target field.\'\'\')\n+ parser_sort.set_defaults(func=sortDbFile)\n+\n+ # Subparser to update records\n+ parser_update = subparsers.add_parser(\'update\', parents=[parser_parent],\n+ formatter_class=CommonHelpFormatter,\n+ help=\'Updates field and value pairs.\',\n+ description=\'Updates field and value pairs.\')\n+ parser_update.add_argument(\'-f\', action=\'store\', dest=\'field\', required=True,\n+ help=\'The name of the field to update.\')\n+ parser_update.add_argument(\'-u\', nargs=\'+\', action=\'store\', dest=\'values\', required=True,\n+ help=\'The values that will be replaced.\')\n+ parser_update.add_argument(\'-t\', nargs=\'+\', action=\'store\', dest=\'updates\', required=True,\n+ help=\'\'\'The new value to assign to each selected row.\'\'\')\n+ parser_update.set_defaults(func=updateDbFile)\n+\n+ return parser\n+\n+\n+if __name__ == \'__main__\':\n+ """\n+ Parses command line arguments and calls main function\n+ """\n+ # Parse arguments\n+ parser = getArgParser()\n+ checkArgs(parser)\n+ args = parser.parse_args()\n+ args_dict = parseCommonArgs(args)\n+ # Convert case of fields\n+ if \'id_field\' in args_dict:\n+ args_dict[\'id_field\'] = args_dict[\'id_field\'].upper()\n+ if \'seq_field\' in args_dict:\n+ args_dict[\'seq_field\'] = args_dict[\'seq_field\'].upper()\n+ if \'germ_field\' in args_dict:\n+ args_dict[\'germ_field\'] = args_dict[\'germ_field\'].upper()\n+ if \'field\' in args_dict:\n+ args_dict[\'field\'] = args_dict[\'field\'].upper()\n+ if \'cluster_field\' in args_dict and args_dict[\'cluster_field\'] is not None:\n+ args_dict[\'cluster_field\'] = args_dict[\'cluster_field\'].upper()\n+ if \'meta_fields\' in args_dict and args_dict[\'meta_fields\'] is not None:\n+ args_dict[\'meta_fields\'] = [f.upper() for f in args_dict[\'meta_fields\']]\n+ if \'fields\' in args_dict:\n+ args_dict[\'fields\'] = [f.upper() for f in args_dict[\'fields\']]\n+\n+ # Check modify_args arguments\n+ if args.command == \'add\' and len(args_dict[\'fields\']) != len(args_dict[\'values\']):\n+ parser.error(\'You must specify exactly one value (-u) per field (-f)\')\n+ elif args.command == \'rename\' and len(args_dict[\'fields\']) != len(args_dict[\'names\']):\n+ parser.error(\'You must specify exactly one new name (-k) per field (-f)\')\n+ elif args.command == \'update\' and len(args_dict[\'values\']) != len(args_dict[\'updates\']):\n+ parser.error(\'You must specify exactly one value (-u) per replacement (-t)\')\n+\n+ # Call parser function for each database file\n+ del args_dict[\'command\']\n+ del args_dict[\'func\']\n+ del args_dict[\'db_files\']\n+ for f in args.__dict__[\'db_files\']:\n+ args_dict[\'db_file\'] = f\n+ args.func(**args_dict)\n+ \n'

diff -r 000000000000 -r 183edf446dcf create_germlines.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/create_germlines.sh Mon Jul 17 07:44:27 2017 -0400

[

@@ -0,0 +1,23 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+input=$1
+type=$2
+cloned=$3
+output=$4
+
+cp $input $PWD/input.tab #file has to have a ".tab" extension
+
+if [ "true" == "$cloned" ] ; then
+ cloned="--cloned"
+else
+ cloned=""
+fi
+
+mkdir $PWD/outdir
+
+#/home/galaxy/anaconda3/bin/python $dir/CreateGermlines.py -d $PWD/input.tab -r $dir/IMGT_Human_IGH[VDJ].fasta --outdir $PWD/outdir --outname output -g $type $cloned
+#/data/users/david/anaconda3/bin/python $dir/CreateGermlines.py -d $PWD/input.tab -r $dir/IMGT_Human_IGH[VDJ].fasta --outdir $PWD/outdir --outname output -g $type $cloned
+python3 $dir/CreateGermlines.py -d $PWD/input.tab -r $dir/IMGT_Human_IGH[VDJ].fasta --outdir $PWD/outdir --outname output -g $type $cloned
+
+mv $PWD/outdir/output_germ-pass.tab $output

diff -r 000000000000 -r 183edf446dcf create_germlines.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/create_germlines.xml Mon Jul 17 07:44:27 2017 -0400

@@ -0,0 +1,28 @@
+<tool id="change_o_create_germlines_galaxy" name="Create Germlines" version="1.0">
+ <description>Change-O</description>
+ <command interpreter="bash">
+ create_germlines.sh $input $type $cloned $out_file
+ </command>
+ <inputs>
+ <param name="input" type="data" label="Input IMGT zip file" />
+ <param name="type" type="select" label="Type" help="Specify type(s) of germlines to include full germline, germline with D-region masked, or germline for V-region only." >
+ <option value="full" selected="true">Full germline</option>
+ <option value="dmask">Masked D-region</option>
+ <option value="vonly" >V-region only</option>
+ </param>
+ <param name="cloned" type="select" label="Cloned" help="Create one germline per clone" >
+ <option value="true">True</option>
+ <option value="false" selected="true">False</option>
+ </param>
+ </inputs>
+ <outputs>
+ <data format="tabular" name="out_file" label = "Change-O Germline on ${on_string}"/>
+ </outputs>
+ <citations>
+ <citation type="doi">10.1093/bioinformatics/btv359</citation>
+ </citations>
+ <help>
+
+
+ </help>
+</tool>

diff -r 000000000000 -r 183edf446dcf define_clones.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/define_clones.r Mon Jul 17 07:44:27 2017 -0400

[

@@ -0,0 +1,15 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+input=args[1]
+output=args[2]
+
+change.o = read.table(input, header=T, sep="\t", quote="", stringsAsFactors=F)
+
+freq = data.frame(table(change.o$CLONE))
+freq2 = data.frame(table(freq$Freq))
+
+freq2$final = as.numeric(freq2$Freq) * as.numeric(as.character(freq2$Var1))
+
+names(freq2) = c("Clone size", "Nr of clones", "Nr of sequences")
+
+write.table(x=freq2, file=output, sep="\t",quote=F,row.names=F,col.names=T)

diff -r 000000000000 -r 183edf446dcf define_clones.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/define_clones.sh Mon Jul 17 07:44:27 2017 -0400

[

@@ -0,0 +1,41 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+#define_clones.sh $input $noparse $scores $regions $out_file
+
+type=$1
+input=$2
+
+mkdir -p $PWD/outdir
+
+echo "defineclones: $PWD/outdir"
+
+cp $input $PWD/input.tab #file has to have a ".tab" extension
+
+if [ "bygroup" == "$type" ] ; then
+ mode=$3
+ act=$4
+ model=$5
+ norm=$6
+ sym=$7
+ link=$8
+ dist=$9
+ output=${10}
+ output2=${11}
+
+ python3 $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
+ #/home/galaxy/anaconda3/bin/python $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
+
+ Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
+else
+ method=$3
+ output=$4
+ output2=$5
+
+ python3 $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
+ #/home/galaxy/anaconda3/bin/python $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
+
+ Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
+fi
+
+cp $PWD/outdir/output_clone-pass.tab $output

diff -r 000000000000 -r 183edf446dcf define_clones.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/define_clones.xml Mon Jul 17 07:44:27 2017 -0400

@@ -0,0 +1,68 @@
+<tool id="change_o_define_clones_galaxy" name="Define Clones" version="1.0">
+ <description>Change-O</description>
+ <command interpreter="bash">
+ #if $input_type.input_type_select=="bygroup"
+ define_clones.sh bygroup $input $input_type.mode $input_type.act $input_type.model $input_type.norm $input_type.sym $input_type.link $input_type.dist $out_file $out_file2
+ #else
+ define_clones.sh hclust $input $input_type.method $out_file $out_file2
+ #end if
+ </command>
+ <inputs>
+ <param name="input" type="data" format="tabular" label="A Change-O DB file" />
+ <conditional name="input_type">
+ <param name="input_type_select" type="select" label="Input type">
+ <option value="bygroup" selected="true">Define clones by V assignment, J assignment and junction length</option>
+ <option value="hclust">Define clones by specified distance metric on CDR3s and cutting of hierarchical clustering tree</option>
+ </param>
+ <when value="bygroup">
+ <param name="mode" type="select" label="Specifies whether to use the V(D)J allele or gene for initial grouping.">
+ <option value="allele">Allele</option>
+ <option value="gene" selected="true">Gene</option>
+ </param>
+ <param name="act" type="select" label="Specifies how to handle multiple V(D)J assignments for initial grouping.">
+ <option value="first" selected="true">First</option>
+ <option value="set">Set</option>
+ </param>
+ <param name="model" type="select" label="Specifies which substitution model to use for calculating distance between sequences.">
+ <option value="aa">AA hamming distance</option>
+ <option value="ham" selected="true">Nucleotide hamming distance</option>
+ <option value="m1n">Mouse single nucleotide (Smith et al, 1996)</option>
+ <option value="hs1f">Human single nucleotide (Yaari et al, 2013)</option>
+ <option value="hs5f">Human S5F (Yaari et al, 2013)</option>
+ </param>
+ <param name="norm" type="select" label="Specifies how to normalize distances.">
+ <option value="none" selected="true">Do not normalize</option>
+ <option value="mut">Normalize by number of mutations</option>
+ <option value="len">Normalize by length</option>
+ </param>
+ <param name="sym" type="select" label="Specifies how to combine asymmetric distances.">
+ <option value="avg">Average</option>
+ <option value="min" selected="true">Minimum</option>
+ </param>
+ <param name="link" type="select" label="Type of linkage to use for hierarchical clustering.">
+ <option value="single">Single</option>
+ <option value="average">Average</option>
+ <option value="complete" selected="true">Complete</option>
+ </param>
+ <param name="dist" size="4" type="float" value="0.0" label="The distance threshold for clonal grouping" />
+ </when>
+ <when value="hclust">
+ <param name="method" type="select" label="Specifies which cloning method to use for calculating distance between CDR3s">
+ <option value="chen2010" selected="true">Chen et al 2010</option>
+ <option value="ademokun2011">Ademokun et al 2011</option>
+ </param>
+ </when>
+ </conditional>
+ </inputs>
+ <outputs>
+ <data format="tabular" name="out_file" label = "Change-o DB clones ${input.name}"/>
+ <data format="tabular" name="out_file2" label = "Change-o DB clones info ${input.name}"/>
+ </outputs>
+ <citations>
+ <citation type="doi">10.1093/bioinformatics/btv359</citation>
+ </citations>
+ <help>
+
+
+ </help>
+</tool>

diff -r 000000000000 -r 183edf446dcf makedb.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/makedb.sh Mon Jul 17 07:44:27 2017 -0400

[

@@ -0,0 +1,36 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+input=$1
+noparse=$2
+scores=$3
+regions=$4
+output=$5
+
+if [ "true" == "$noparse" ] ; then
+ noparse="--noparse"
+else
+ noparse=""
+fi
+
+if [ "true" == "$scores" ] ; then
+ scores="--scores"
+else
+ scores=""
+fi
+
+if [ "true" == "$regions" ] ; then
+ regions="--regions"
+else
+ regions=""
+fi
+
+mkdir $PWD/outdir
+
+echo "`which python3`"
+
+python3 $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
+#/data/users/david/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
+#/home/galaxy/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
+
+mv $PWD/outdir/output_db-pass.tab $output

diff -r 000000000000 -r 183edf446dcf makedb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/makedb.xml Mon Jul 17 07:44:27 2017 -0400

@@ -0,0 +1,31 @@
+<tool id="change_o_makdedb_galaxy" name="MakeDB" version="1.0">
+ <description>Change-O</description>
+ <command interpreter="bash">
+ makedb.sh $input $noparse $scores $regions $out_file
+ </command>
+ <inputs>
+ <param name="input" type="data" label="Input IMGT zip file" />
+ <param name="noparse" type="select" label="No parse" help="Specify if input IDs should not be parsed to add new columns to database." >
+ <option value="true">True</option>
+ <option value="false" selected="true">False</option>
+ </param>
+ <param name="scores" type="select" label="Scores" help="Specify if alignment score metrics should be included in the output." >
+ <option value="true">True</option>
+ <option value="false" selected="true">False</option>
+ </param>
+ <param name="regions" type="select" label="Regions" help="Specify if IMGT framework and CDR regions should be included in the output." >
+ <option value="true">True</option>
+ <option value="false" selected="true">False</option>
+ </param>
+ </inputs>
+ <outputs>
+ <data format="tabular" name="out_file" label = "Change-o DB ${input.name}"/>
+ </outputs>
+ <citations>
+ <citation type="doi">10.1093/bioinformatics/btv359</citation>
+ </citations>
+ <help>
+
+
+ </help>
+</tool>

diff -r 000000000000 -r 183edf446dcf parsedb.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/parsedb.sh Mon Jul 17 07:44:27 2017 -0400

[

@@ -0,0 +1,92 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+action=$1
+input=$2
+output=$3
+
+cp $input $PWD/input.tab
+
+input="$PWD/input.tab"
+
+mkdir $PWD/outdir
+
+if [ "fasta" == "$action" ] ; then
+ python3 $dir/ParseDb.py fasta -d $input --outdir $PWD/outdir --outname output
+ mv $PWD/outdir/output_sequences.fasta $output
+elif [ "clip" == "$action" ] ; then
+ python3 $dir/ParseDb.py clip -d $input --outdir $PWD/outdir --outname output
+ mv $PWD/outdir/output_sequences.fasta $output
+elif [ "split" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ label=$5
+ mkdir $PWD/split
+ python3 $dir/ParseDb.py split -d $input --outdir $PWD/split --outname output -f $field
+ #rename "s/output_${field}/$label/" $PWD/split/*
+elif [ "add" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ value=$5
+ python3 $dir/ParseDb.py add -d $input --outdir $PWD/outdir --outname output -f $field -u $value
+ mv $PWD/outdir/output_parse-add.tab $output
+elif [ "delete" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ value=$5
+ regex=$6
+ if [ "true" == "$regex" ] ; then
+ regex="--regex"
+ else
+ regex=""
+ fi
+ python3 $dir/ParseDb.py delete -d $input --outdir $PWD/outdir --outname output -f $field -u $value --logic any $regex
+ mv $PWD/outdir/output_parse-delete.tab $output
+elif [ "drop" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ python3 $dir/ParseDb.py drop -d $input --outdir $PWD/outdir --outname output -f $field
+ mv $PWD/outdir/output_parse-drop.tab $output
+elif [ "index" == "$action" ] ; then
+ field=$4
+ python3 $dir/ParseDb.py index -d $input --outdir $PWD/outdir --outname output -f $field
+ mv $PWD/outdir/output_parse-index.tab $output
+elif [ "rename" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ newname=$5
+ python3 $dir/ParseDb.py rename -d $input --outdir $PWD/outdir --outname output -f $field -k $newname
+ mv $PWD/outdir/output_parse-rename.tab $output
+elif [ "select" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ value=$5
+ regex=$6
+ if [ "true" == "$regex" ] ; then
+ regex="--regex"
+ else
+ regex=""
+ fi
+ python3 $dir/ParseDb.py select -d $input --outdir $PWD/outdir --outname output -f $field -u $value --logic any $regex
+ mv $PWD/outdir/output_parse-select.tab $output
+elif [ "sort" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ num=$5
+ tmp=""
+ if [ "true" == "$num" ] ; then
+ tmp="--num"
+ fi
+ desc=$6
+ if [ "true" == "$desc" ] ; then
+ tmp="--descend $tmp"
+ fi
+ python3 $dir/ParseDb.py sort -d $input --outdir $PWD/outdir --outname output -f $field $tmp
+ mv $PWD/outdir/output_parse-sort.tab $output
+elif [ "update" == "$action" ] ; then
+ field="`cat $input 2> /dev/null | head -n 1 | cut -f$4 | tr '\n\r' ' '`"
+ value=$5
+ replace=$6
+ regex=$7
+ if [ "true" == "$regex" ] ; then
+ regex="--regex"
+ else
+ regex=""
+ fi
+ python3 $dir/ParseDb.py update -d $input --outdir $PWD/outdir --outname output -f $field -u $value -t $replace $regex
+ mv $PWD/outdir/output_parse-update.tab $output
+fi
+

diff -r 000000000000 -r 183edf446dcf parsedb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/parsedb.xml Mon Jul 17 07:44:27 2017 -0400

[

@@ -0,0 +1,120 @@
+<tool id="change_o_parsedb_galaxy" name="ParseDB" version="1.0">
+ <description>Change-O</description>
+ <command interpreter="bash">
+ #if $action.action_select=="fasta"
+ parsedb.sh fasta $input $out_file
+ #elif $action.action_select=="clip"
+ parsedb.sh clip $input $out_file
+ #elif $action.action_select=="split"
+ parsedb.sh split $input $out_file $action.column '$input.name'
+ #elif $action.action_select=="add"
+ parsedb.sh add $input $out_file $action.column $action.value
+ #elif $action.action_select=="delete"
+ parsedb.sh delete $input $out_file $action.column $action.value $action.regex
+ #elif $action.action_select=="drop"
+ parsedb.sh drop $input $out_file $action.column
+ #elif $action.action_select=="index"
+ parsedb.sh index $input $out_file $action.column
+ #elif $action.action_select=="rename"
+ parsedb.sh rename $input $out_file $action.column $action.newname
+ #elif $action.action_select=="select"
+ parsedb.sh select $input $out_file $action.column $action.value $action.regex
+ #elif $action.action_select=="sort"
+ parsedb.sh sort $input $out_file $action.column $action.num $action.desc
+ #elif $action.action_select=="update"
+ parsedb.sh update $input $out_file $action.column $action.value $action.update $action.regex
+ #end if
+ </command>
+ <inputs>
+ <param name="input" type="data" format="tabular" label="Change-o DB file" />
+ <conditional name="action">
+ <param name="action_select" type="select" label="Action">
+ <option value="fasta">Create a fasta file from database records</option>
+ <option value="clip">Create a clip-fasta file from database records</option>
+ <option value="split">Split database files by field values</option>
+ <option value="add">Add field and value pairs</option>
+ <option value="delete">Delete specific records</option>
+ <option value="drop">Delete entire fields</option>
+ <option value="index">Add a numeric index field</option>
+ <option value="rename">Renames fields</option>
+ <option value="select">Select specific records</option>
+ <option value="sort">Sort records by field values</option>
+ <option value="update">Update field and value pairs</option>
+ </param>
+ <when value="fasta">
+
+ </when>
+ <when value="clip">
+
+ </when>
+ <when value="split">
+ <param name="column" label="Select the column to split on" type="data_column" data_ref="input" numerical="False" use_header_names="True" force_select="True" />
+ </when>l
+ <when value="add">
+ <param name="column" type="text" size="20" label="The new column name." />
+ <param name="value" type="text" size="20" label="The value that will be put in the new column" />
+ </when>
+ <when value="delete">
+ <param name="column" label="Select the column to search on." type="data_column" data_ref="input" numerical="False" use_header_names="True" force_select="True" />
+ <param name="value" type="text"  size="20" label="The value that will be used" />
+ <param name="regex" type="select" label="Regex" help="Treat values as regular expressions and allow partial string matches.">
+ <option value="text" selected="true">False</option>
+ <option value="regex">True</option>
+ </param>
+ </when>
+ <when value="drop">
+ <param name="column" label="Select the column to remove" type="data_column" data_ref="input" numerical="False" use_header_names="True" force_select="True" />
+ </when>
+ <when value="index">
+ <param name="column" type="text" size="20" value="INDEX" label="The index column name" />
+ </when>
+ <when value="rename">
+ <param name="column" label="Select the column to delete on" type="data_column" data_ref="input" numerical="False" use_header_names="True" force_select="True" />
+ <param name="newname" type="text" size="20" value="newname" label="The new column name" />
+ </when>
+ <when value="select">
+ <param name="column" label="Select the column to search on" type="data_column" data_ref="input" numerical="False" use_header_names="True" force_select="True" />
+ <param name="value" type="text" size="20" label="The value that will be used" />
+ <param name="regex" type="select" label="Regex" help="Treat values as regular expressions and allow partial string matches">
+ <option value="text" selected="true">False</option>
+ <option value="regex">True</option>
+ </param>
+ </when>
+ <when value="sort">
+ <param name="column" label="Select the column to sort on" type="data_column" data_ref="input" numerical="False" use_header_names="True" force_select="True" />
+ <param name="num" type="select" label="Numerical" help="Define the sort column as numeric rather than textual.">
+ <option value="false" selected="true">False</option>
+ <option value="true">True</option>
+ </param>
+ <param name="desc" type="select" label="Descending" help="Sort records in descending">
+ <option value="false" selected="true">False</option>
+ <option value="true">True</option>
+ </param>
+ </when>
+ <when value="update">
+ <param name="column" label="Select the column to search on" type="data_column" data_ref="input" numerical="False" use_header_names="True" force_select="True" />
+ <param name="value" type="text" size="20" label="The value that will be replaced" />
+ <param name="update" type="text" size="20" label="The value that will replace the original" />
+ <param name="regex" type="select" label="Regex" help="Treat values as regular expressions and allow partial string matches">
+ <option value="text" selected="true">False</option>
+ <option value="regex">True</option>
+ </param>
+ </when>
+ </conditional>
+ </inputs>
+ <outputs>
+ <data format="tabular" name="out_file" label = "Change-o DB ${input.name}">
+     <filter>action['action_select'] != "split"</filter>
+ </data>
+        <data format="txt" name="split">
+ <discover_datasets pattern="(?P<designation>.+)\.tab" ext="tabular" directory="split" visible="true" />
+ <filter>action['action_select'] == "split"</filter>
+        </data>
+ </outputs>
+ <citations>
+ <citation type="doi">10.1093/bioinformatics/btv359</citation>
+ </citations>
+ <help>
+
+ </help>
+</tool>