Previous changeset 77:58d2377b507d (2019-06-19) Next changeset 79:98e3fecedd2b (2020-09-01) |
Commit message:
Uploaded |
modified:
LICENSE README.md aa_histogram.r baseline/Baseline_Functions.r baseline/Baseline_Main.r baseline/FiveS_Mutability.RData baseline/FiveS_Substitution.RData baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa baseline/IMGTVHreferencedataset20161215.fa baseline/IMGTVHreferencedataset20161215.fasta baseline/baseline_url.txt baseline/comparePDFs.r baseline/filter.r baseline/script_imgt.py baseline/script_xlsx.py baseline/wrapper.sh change_o/change_o_url.txt change_o/define_clones.r change_o/define_clones.sh change_o/makedb.sh change_o/select_first_in_clone.r check_unique_id.r datatypes_conf.xml gene_identification.py imgt_loader.r merge.r merge_and_filter.r naive_output.r new_imgt.r pattern_plots.r plot_pdf.r sequence_overview.r shm_clonality.htm shm_csr.htm shm_csr.py shm_csr.r shm_csr.xml shm_downloads.htm shm_first.htm shm_frequency.htm shm_overview.htm shm_selection.htm shm_transition.htm style.tar.gz subclass_definition.db.nhr subclass_definition.db.nin subclass_definition.db.nsq summary_to_fasta.py wrapper.sh |
added:
.gitattributes .gitignore change_o/DefineClones.py change_o/MakeDb.py mutation_column_checker.py |
b |
diff -r 58d2377b507d -r aff3ba86ef7a .gitattributes --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitattributes Mon Aug 31 11:20:08 2020 -0400 |
b |
@@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto |
b |
diff -r 58d2377b507d -r aff3ba86ef7a .gitignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitignore Mon Aug 31 11:20:08 2020 -0400 |
b |
@@ -0,0 +1,4 @@ + +shm_csr\.tar\.gz + +\.vscode/settings\.json |
b |
diff -r 58d2377b507d -r aff3ba86ef7a change_o/DefineClones.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/change_o/DefineClones.py Mon Aug 31 11:20:08 2020 -0400 |
[ |
b'@@ -0,0 +1,739 @@\n+#!/usr/bin/env python3\n+"""\n+Assign Ig sequences into clones\n+"""\n+\n+# Info\n+__author__ = \'Namita Gupta, Jason Anthony Vander Heiden, Gur Yaari, Mohamed Uduman\'\n+from changeo import __version__, __date__\n+\n+# Imports\n+import os\n+import re\n+import sys\n+from argparse import ArgumentParser\n+from collections import OrderedDict\n+from itertools import chain\n+from textwrap import dedent\n+from time import time\n+from Bio.Seq import translate\n+\n+# Presto and changeo imports\n+from presto.Defaults import default_out_args\n+from presto.IO import printLog, printProgress, printCount, printWarning, printError\n+from presto.Multiprocessing import manageProcesses\n+from changeo.Defaults import default_format, default_v_field, default_j_field, default_junction_field\n+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs\n+from changeo.Distance import distance_models, calcDistances, formClusters\n+from changeo.IO import countDbFile, getDbFields, getFormatOperators, getOutputHandle, \\\n+ AIRRWriter, ChangeoWriter\n+from changeo.Multiprocessing import DbResult, feedDbQueue, processDbQueue\n+\n+# Defaults\n+default_translate = False\n+default_distance = 0.0\n+default_index_mode = \'gene\'\n+default_index_action = \'set\'\n+default_distance_model = \'ham\'\n+default_norm = \'len\'\n+default_sym = \'avg\'\n+default_linkage = \'single\'\n+default_max_missing=0\n+choices_distance_model = (\'ham\', \'aa\', \'hh_s1f\', \'hh_s5f\',\n+ \'mk_rs1nf\', \'mk_rs5nf\',\n+ \'hs1f_compat\', \'m1n_compat\')\n+\n+\n+def filterMissing(data, seq_field=default_junction_field, v_field=default_v_field,\n+ j_field=default_j_field, max_missing=default_max_missing):\n+ """\n+ Splits a set of sequence into passed and failed groups based on the number\n+ of missing characters in the sequence\n+\n+ Arguments:\n+ data : changeo.Multiprocessing.DbData object.\n+ seq_field : sequence field to filter on.\n+ v_field : field containing the V call.\n+ j_field : field containing the J call.\n+ max_missing : maximum number of missing characters (non-ACGT) to permit before failing the record.\n+\n+ Returns:\n+ changeo.Multiprocessing.DbResult : objected containing filtered records.\n+ """\n+ # Function to validate the sequence string\n+ def _pass(seq):\n+ if len(seq) > 0 and len(re.findall(r\'[^ACGT]\', seq)) <= max_missing:\n+ return True\n+ else:\n+ return False\n+\n+ # Define result object for iteration and get data records\n+ result = DbResult(data.id, data.data)\n+\n+ if not data:\n+ result.data_pass = []\n+ result.data_fail = data.data\n+ return result\n+\n+ result.data_pass = []\n+ result.data_fail = []\n+ for rec in data.data:\n+ seq = rec.getField(seq_field)\n+ if _pass(seq): result.data_pass.append(rec)\n+ else: result.data_fail.append(rec)\n+\n+ # Add V(D)J to log\n+ result.log[\'ID\'] = \',\'.join([str(x) for x in data.id])\n+ result.log[\'VCALL\'] = \',\'.join(set([(r.getVAllele(field=v_field) or \'\') for r in data.data]))\n+ result.log[\'JCALL\'] = \',\'.join(set([(r.getJAllele(field=j_field) or \'\') for r in data.data]))\n+ result.log[\'JUNCLEN\'] = \',\'.join(set([(str(len(r.junction)) or \'0\') for r in data.data]))\n+ result.log[\'CLONED\'] = len(result.data_pass)\n+ result.log[\'FILTERED\'] = len(result.data_fail)\n+\n+ return result\n+\n+\n+def indexByIdentity(index, key, rec, group_fields=None):\n+ """\n+ Updates a preclone index with a simple key\n+\n+ Arguments:\n+ index : preclone index from groupByGene\n+ key : index key\n+ rec : Receptor to add to the index\n+ group_fields : additional annotation fields to use to group preclones;\n+ if None use only V, J and junction length\n+\n+ Returns:\n+ None : Updates index with new key and records.\n+ """\n+ index.setdefault(tuple(key), []).append(rec)\n+\n+\n+def i'..b'.add_argument(\'--norm\', action=\'store\', dest=\'norm\',\n+ choices=(\'len\', \'mut\', \'none\'), default=default_norm,\n+ help=\'\'\'Specifies how to normalize distances. One of none\n+ (do not normalize), len (normalize by length),\n+ or mut (normalize by number of mutations between sequences).\'\'\')\n+ group.add_argument(\'--sym\', action=\'store\', dest=\'sym\',\n+ choices=(\'avg\', \'min\'), default=default_sym,\n+ help=\'\'\'Specifies how to combine asymmetric distances. One of avg\n+ (average of A->B and B->A) or min (minimum of A->B and B->A).\'\'\')\n+ group.add_argument(\'--link\', action=\'store\', dest=\'linkage\',\n+ choices=(\'single\', \'average\', \'complete\'), default=default_linkage,\n+ help=\'\'\'Type of linkage to use for hierarchical clustering.\'\'\')\n+ group.add_argument(\'--maxmiss\', action=\'store\', dest=\'max_missing\', type=int,\n+ default=default_max_missing,\n+ help=\'\'\'The maximum number of non-ACGT characters (gaps or Ns) to \n+ permit in the junction sequence before excluding the record \n+ from clonal assignment. Note, under single linkage \n+ non-informative positions can create artifactual links \n+ between unrelated sequences. Use with caution.\'\'\')\n+ parser.set_defaults(group_func=groupByGene)\n+ parser.set_defaults(clone_func=distanceClones)\n+ \n+ return parser\n+\n+\n+if __name__ == \'__main__\':\n+ """\n+ Parses command line arguments and calls main function\n+ """\n+ # Parse arguments\n+ parser = getArgParser()\n+ checkArgs(parser)\n+ args = parser.parse_args()\n+ args_dict = parseCommonArgs(args)\n+\n+ # # Set default fields if not specified.\n+ # default_fields = {\'seq_field\': default_junction_field,\n+ # \'v_field\': default_v_field,\n+ # \'j_field\': default_j_field}\n+ #\n+ # # Default Change-O fields\n+ # if args_dict[\'format\'] == \'changeo\':\n+ # for f in default_fields:\n+ # if args_dict[f] is None: args_dict[f] = default_fields[f]\n+ # else: args_dict[f] = args_dict[f].upper()\n+ #\n+ # # Default AIRR fields\n+ # if args_dict[\'format\'] == \'airr\':\n+ # for f in default_fields:\n+ # if args_dict[f] is None: args_dict[f] = ChangeoSchema.toAIRR(default_fields[f])\n+ # else: args_dict[f] = args_dict[f].lower()\n+\n+ # Define grouping and cloning function arguments\n+ args_dict[\'group_args\'] = {\'action\': args_dict[\'action\'],\n+ \'mode\':args_dict[\'mode\']}\n+ args_dict[\'clone_args\'] = {\'model\': args_dict[\'model\'],\n+ \'distance\': args_dict[\'distance\'],\n+ \'norm\': args_dict[\'norm\'],\n+ \'sym\': args_dict[\'sym\'],\n+ \'linkage\': args_dict[\'linkage\']}\n+\n+ # Get distance matrix\n+ try:\n+ args_dict[\'clone_args\'][\'dist_mat\'] = distance_models[args_dict[\'model\']]\n+ except KeyError:\n+ printError(\'Unrecognized distance model: %s\' % args_dict[\'model\'])\n+\n+ # Clean argument dictionary\n+ del args_dict[\'action\']\n+ del args_dict[\'mode\']\n+ del args_dict[\'model\']\n+ del args_dict[\'distance\']\n+ del args_dict[\'norm\']\n+ del args_dict[\'sym\']\n+ del args_dict[\'linkage\']\n+\n+ # Clean arguments dictionary\n+ del args_dict[\'db_files\']\n+ if \'out_files\' in args_dict: del args_dict[\'out_files\']\n+\n+ # Call main function for each input file\n+ for i, f in enumerate(args.__dict__[\'db_files\']):\n+ args_dict[\'db_file\'] = f\n+ args_dict[\'out_file\'] = args.__dict__[\'out_files\'][i] \\\n+ if args.__dict__[\'out_files\'] else None\n+ defineClones(**args_dict)\n' |
b |
diff -r 58d2377b507d -r aff3ba86ef7a change_o/MakeDb.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/change_o/MakeDb.py Mon Aug 31 11:20:08 2020 -0400 |
[ |
b'@@ -0,0 +1,885 @@\n+#!/usr/bin/env python3\n+"""\n+Create tab-delimited database file to store sequence alignment information\n+"""\n+\n+# Info\n+__author__ = \'Namita Gupta, Jason Anthony Vander Heiden\'\n+from changeo import __version__, __date__\n+\n+# Imports\n+import os\n+import re\n+import csv\n+from argparse import ArgumentParser\n+from collections import OrderedDict\n+from textwrap import dedent\n+from time import time\n+from Bio import SeqIO\n+\n+# Presto and changeo imports\n+from presto.Annotation import parseAnnotation\n+from presto.IO import countSeqFile, printLog, printMessage, printProgress, printError, printWarning, readSeqFile\n+from changeo.Defaults import default_format, default_out_args\n+from changeo.Commandline import CommonHelpFormatter, checkArgs, getCommonArgParser, parseCommonArgs\n+from changeo.Alignment import RegionDefinition\n+from changeo.Gene import buildGermline\n+from changeo.IO import countDbFile, extractIMGT, readGermlines, getFormatOperators, getOutputHandle, \\\n+ AIRRWriter, ChangeoWriter, IgBLASTReader, IgBLASTReaderAA, IMGTReader, IHMMuneReader\n+from changeo.Receptor import ChangeoSchema, AIRRSchema\n+\n+# 10X Receptor attributes\n+cellranger_base = [\'cell\', \'c_call\', \'conscount\', \'umicount\']\n+cellranger_extended = [\'cell\', \'c_call\', \'conscount\', \'umicount\',\n+ \'v_call_10x\', \'d_call_10x\', \'j_call_10x\',\n+ \'junction_10x\', \'junction_10x_aa\']\n+\n+def readCellRanger(cellranger_file, fields=cellranger_base):\n+ """\n+ Load a Cell Ranger annotation table\n+\n+ Arguments:\n+ cellranger_file (str): path to the annotation file.\n+ fields (list): list of fields to keep.\n+\n+ Returns:\n+ dict: dict of dicts with contig_id as the primary key.\n+ """\n+ # Mapping of 10X annotations to Receptor attributes\n+ cellranger_map = {\'cell\': \'barcode\',\n+ \'c_call\': \'c_gene\',\n+ \'locus\': \'chain\',\n+ \'conscount\': \'reads\',\n+ \'umicount\': \'umis\',\n+ \'v_call_10x\': \'v_gene\',\n+ \'d_call_10x\': \'d_gene\',\n+ \'j_call_10x\': \'j_gene\',\n+ \'junction_10x\': \'cdr3_nt\',\n+ \'junction_10x_aa\': \'cdr3\'}\n+\n+ # Function to parse individual fields\n+ def _parse(x):\n+ return \'\' if x == \'None\' else x\n+\n+ # Generate annotation dictionary\n+ ann_dict = {}\n+ with open(cellranger_file) as csv_file:\n+ # Detect delimiters\n+ dialect = csv.Sniffer().sniff(csv_file.readline())\n+ csv_file.seek(0)\n+ # Read in annotation file\n+ csv_reader = csv.DictReader(csv_file, dialect=dialect)\n+\n+ # Generate annotation dictionary\n+ for row in csv_reader:\n+ ann_dict[row[\'contig_id\']] = {f: _parse(row[cellranger_map[f]]) for f in fields}\n+\n+ return ann_dict\n+\n+\n+def addGermline(receptor, references, amino_acid=False):\n+ """\n+ Add full length germline to Receptor object\n+\n+ Arguments:\n+ receptor (changeo.Receptor.Receptor): Receptor object to modify.\n+ references (dict): dictionary of IMGT-gapped references sequences.\n+ amino_acid (bool): if True build amino acid germline, otherwise build nucleotide germline\n+\n+ Returns:\n+ changeo.Receptor.Receptor: modified Receptor with the germline sequence added.\n+ """\n+ if amino_acid:\n+ __, germlines, __ = buildGermline(receptor, references, seq_field=\'sequence_aa_imgt\',\n+ amino_acid=True)\n+ germline_seq = None if germlines is None else germlines[\'full\']\n+ receptor.setField(\'germline_aa_imgt\', germline_seq)\n+ else:\n+ __, germlines, __ = buildGermline(receptor, references, amino_acid=False)\n+ germline_seq = None if germlines is None else germlines[\'full\']\n+ receptor.setField(\'germline_imgt\', germline_seq)\n+\n+ return receptor\n+\n+\n+def getIDforIMGT(seq_file):\n+ """\n+ Create a sequenc'..b' required=True,\n+ help=\'\'\'iHMMune-Align output file.\'\'\')\n+ group_ihmm.add_argument(\'-r\', nargs=\'+\', action=\'store\', dest=\'repo\', required=True,\n+ help=\'\'\'List of folders and/or FASTA files containing\n+ the set of germline sequences used by iHMMune-Align. These\n+ reference sequences must contain IMGT-numbering spacers (gaps)\n+ in the V segment.\'\'\')\n+ group_ihmm.add_argument(\'-s\', action=\'store\', nargs=\'+\', dest=\'seq_files\',\n+ required=True,\n+ help=\'\'\'List of input FASTA files (with .fasta, .fna or .fa\n+ extension) containing sequences.\'\'\')\n+ group_ihmm.add_argument(\'--10x\', action=\'store\', nargs=\'+\', dest=\'cellranger_file\',\n+ help=\'\'\'Table file containing 10X annotations (with .csv or .tsv\n+ extension).\'\'\')\n+ group_ihmm.add_argument(\'--asis-id\', action=\'store_true\', dest=\'asis_id\',\n+ help=\'\'\'Specify to prevent input sequence headers from being parsed\n+ to add new columns to database. Parsing of sequence headers requires\n+ headers to be in the pRESTO annotation format, so this should be specified\n+ when sequence headers are incompatible with the pRESTO annotation scheme.\n+ Note, unrecognized header formats will default to this behavior.\'\'\')\n+ group_ihmm.add_argument(\'--partial\', action=\'store_true\', dest=\'partial\',\n+ help=\'\'\'If specified, include incomplete V(D)J alignments in\n+ the pass file instead of the fail file. An incomplete alignment\n+ is defined as a record for which a valid IMGT-gapped sequence \n+ cannot be built or that is missing a V gene assignment, \n+ J gene assignment, junction region, or productivity call.\'\'\')\n+ group_ihmm.add_argument(\'--extended\', action=\'store_true\', dest=\'extended\',\n+ help=\'\'\'Specify to include additional aligner specific fields in the output. \n+ Adds the path score of the iHMMune-Align hidden Markov model as vdj_score;\n+ adds fwr1, fwr2, fwr3, fwr4, cdr1, cdr2 and cdr3.\'\'\')\n+ parser_ihmm.set_defaults(func=parseIHMM)\n+\n+ return parser\n+ \n+ \n+if __name__ == "__main__":\n+ """\n+ Parses command line arguments and calls main\n+ """\n+ parser = getArgParser()\n+ checkArgs(parser)\n+ args = parser.parse_args()\n+ args_dict = parseCommonArgs(args, in_arg=\'aligner_files\')\n+\n+ # Set no ID parsing if sequence files are not provided\n+ if \'seq_files\' in args_dict and not args_dict[\'seq_files\']:\n+ args_dict[\'asis_id\'] = True\n+\n+ # Delete\n+ if \'aligner_files\' in args_dict: del args_dict[\'aligner_files\']\n+ if \'seq_files\' in args_dict: del args_dict[\'seq_files\']\n+ if \'out_files\' in args_dict: del args_dict[\'out_files\']\n+ if \'command\' in args_dict: del args_dict[\'command\']\n+ if \'func\' in args_dict: del args_dict[\'func\'] \n+\n+ # Call main\n+ for i, f in enumerate(args.__dict__[\'aligner_files\']):\n+ args_dict[\'aligner_file\'] = f\n+ args_dict[\'seq_file\'] = args.__dict__[\'seq_files\'][i] \\\n+ if args.__dict__[\'seq_files\'] else None\n+ args_dict[\'out_file\'] = args.__dict__[\'out_files\'][i] \\\n+ if args.__dict__[\'out_files\'] else None\n+ args_dict[\'cellranger_file\'] = args.__dict__[\'cellranger_file\'][i] \\\n+ if args.__dict__[\'cellranger_file\'] else None\n+ args.func(**args_dict)\n' |
b |
diff -r 58d2377b507d -r aff3ba86ef7a change_o/define_clones.sh --- a/change_o/define_clones.sh Wed Jun 19 04:31:44 2019 -0400 +++ b/change_o/define_clones.sh Mon Aug 31 11:20:08 2020 -0400 |
b |
@@ -21,7 +21,7 @@ output=${10} output2=${11} - DefineClones.py -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link + python3 $dir/DefineClones.py -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 else @@ -29,7 +29,7 @@ output=$4 output2=$5 - DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method + python3 $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 fi |
b |
diff -r 58d2377b507d -r aff3ba86ef7a change_o/makedb.sh --- a/change_o/makedb.sh Wed Jun 19 04:31:44 2019 -0400 +++ b/change_o/makedb.sh Mon Aug 31 11:20:08 2020 -0400 |
b |
@@ -29,7 +29,7 @@ echo "makedb: $PWD/outdir" -MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions +python3 $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions mv $PWD/outdir/output_db-pass.tab $output |
b |
diff -r 58d2377b507d -r aff3ba86ef7a merge_and_filter.r --- a/merge_and_filter.r Wed Jun 19 04:31:44 2019 -0400 +++ b/merge_and_filter.r Mon Aug 31 11:20:08 2020 -0400 |
[ |
@@ -53,10 +53,6 @@ hotspots = fix_column_names(hotspots) AAs = fix_column_names(AAs) -if(!("Sequence.number" %in% names(summ))){ - summ["Sequence.number"] = 1:nrow(summ) -} - if(method == "blastn"){ #"qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" gene_identification = gene_identification[!duplicated(gene_identification$qseqid),] @@ -173,17 +169,10 @@ write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) -missing.FR1 = result$FR1.IMGT.seq == "" | is.na(result$FR1.IMGT.seq) -missing.CDR1 = result$CDR1.IMGT.seq == "" | is.na(result$CDR1.IMGT.seq) -missing.FR2 = result$FR2.IMGT.seq == "" | is.na(result$FR2.IMGT.seq) -missing.CDR2 = result$CDR2.IMGT.seq == "" | is.na(result$CDR2.IMGT.seq) -missing.FR3 = result$FR3.IMGT.seq == "" | is.na(result$FR3.IMGT.seq) - -print(paste("Number of empty CDR1 sequences:", sum(missing.FR1))) -print(paste("Number of empty FR2 sequences:", sum(missing.CDR1))) -print(paste("Number of empty CDR2 sequences:", sum(missing.FR2))) -print(paste("Number of empty FR3 sequences:", sum(missing.CDR2))) -print(paste("Number of empty FR3 sequences:", sum(missing.FR3))) +print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == "", na.rm=T))) +print(paste("Number of empty FR2 sequences:", sum(result$FR2.IMGT.seq == "", na.rm=T))) +print(paste("Number of empty CDR2 sequences:", sum(result$CDR2.IMGT.seq == "", na.rm=T))) +print(paste("Number of empty FR3 sequences:", sum(result$FR3.IMGT.seq == "", na.rm=T))) if(empty.region.filter == "leader"){ result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] |
b |
diff -r 58d2377b507d -r aff3ba86ef7a mutation_column_checker.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mutation_column_checker.py Mon Aug 31 11:20:08 2020 -0400 |
[ |
@@ -0,0 +1,27 @@ +import re + +mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") + +with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle: + first = True + fr3_index = -1 + for i, line in enumerate(file_handle): + line_split = line.split("\t") + if first: + fr3_index = line_split.index("FR3-IMGT") + first = False + continue + + if len(line_split) < fr3_index: + continue + + fr3_data = line_split[fr3_index] + if len(fr3_data) > 5: + try: + test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x] + except: + print(line_split[1]) + print("Something went wrong at line {line} with:".format(line=line_split[0])) + #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)]) + if i % 100000 == 0: + print(i) |
b |
diff -r 58d2377b507d -r aff3ba86ef7a shm_csr.xml --- a/shm_csr.xml Wed Jun 19 04:31:44 2019 -0400 +++ b/shm_csr.xml Mon Aug 31 11:20:08 2020 -0400 |
b |
@@ -2,13 +2,13 @@ <description></description> <requirements> <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.16.0">numpy</requirement> <requirement type="package" version="1.2.0">xlrd</requirement> <requirement type="package" version="3.0.0">r-ggplot2</requirement> <requirement type="package" version="1.4.3">r-reshape2</requirement> <requirement type="package" version="0.5.0">r-scales</requirement> <requirement type="package" version="3.4_5">r-seqinr</requirement> <requirement type="package" version="1.11.4">r-data.table</requirement> - <!--<requirement type="package" version="0.4.5">changeo</requirement>--> </requirements> <command interpreter="bash"> #if str ( $filter_unique.filter_unique_select ) == "remove": |