comparison barcode_splitter_multi/barcode_splitter.py @ 1:e3f91eee8c75 draft

Uploaded
author hepcat72
date Mon, 29 Aug 2016 15:21:20 -0400
parents 50df2d629d51
children 5e0fd61660b7
comparison
equal deleted inserted replaced
0:50df2d629d51 1:e3f91eee8c75
8 import re 8 import re
9 import sys 9 import sys
10 from collections import defaultdict 10 from collections import defaultdict
11 import subprocess 11 import subprocess
12 12
13 __version__ = "0.12" 13 __version__ = "0.13"
14 __author__ = "Lance Parsons & Robert Leach" 14 __author__ = "Lance Parsons & Robert Leach"
15 __author_email__ = "lparsons@princeton.edu,rleach@princeton.edu" 15 __author_email__ = "lparsons@princeton.edu,rleach@princeton.edu"
16 __copyright__ = "Copyright 2011, Lance Parsons & Robert leach" 16 __copyright__ = "Copyright 2011, Lance Parsons & Robert leach"
17 __license__ = ("BSD 2-Clause License " 17 __license__ = ("BSD 2-Clause License "
18 "http://www.opensource.org/licenses/BSD-2-Clause") 18 "http://www.opensource.org/licenses/BSD-2-Clause")
242 242
243 if(barcode_path is None): 243 if(barcode_path is None):
244 cur_outputs = unmatchedOutputs 244 cur_outputs = unmatchedOutputs
245 unmatched_path = getBarcodeMatchPath(approx_bc_dict,index_seqs) 245 unmatched_path = getBarcodeMatchPath(approx_bc_dict,index_seqs)
246 incrementNDDictInt(unmatched_counts,unmatched_path) 246 incrementNDDictInt(unmatched_counts,unmatched_path)
247 if (not UNMATCHED in unmatched_path and
248 not MULTIMATCHED in unmatched_path):
249 sys.stderr.write('WARNING: Sequences match barcodes on '
250 'different rows: %s for sequence ID: %s\n'
251 %(index_seqs, prim_index_read['seq_id']))
247 elif(MULTIMATCHED in barcode_path): 252 elif(MULTIMATCHED in barcode_path):
248 cur_outputs = multimatchedOutputs 253 cur_outputs = multimatchedOutputs
249 unmatched_path = getBarcodeMatchPath(approx_bc_dict,barcode_path) 254 unmatched_path = getBarcodeMatchPath(approx_bc_dict,barcode_path)
250 incrementNDDictInt(unmatched_counts,unmatched_path) 255 incrementNDDictInt(unmatched_counts,unmatched_path)
251 sys.stderr.write('WARNING: More than one barcode matches %s, ' 256 sys.stderr.write('WARNING: More than one barcode matches %s, '
636 "UNMATCHED", and "MULTIMATCHED" values) to use in the summary output. The 641 "UNMATCHED", and "MULTIMATCHED" values) to use in the summary output. The
637 intent is to give the user useful feedback about which level(s) of barcodes 642 intent is to give the user useful feedback about which level(s) of barcodes
638 are not matrching instead of listing all sequences as simply "UNMATCHED". 643 are not matrching instead of listing all sequences as simply "UNMATCHED".
639 It also reduces complexity of the unmatched output by not including 644 It also reduces complexity of the unmatched output by not including
640 specific barcodes, which do not matter in unmatched/multimatched cases.''' 645 specific barcodes, which do not matter in unmatched/multimatched cases.'''
641 cur_dict = in_dict
642 dim = 0 646 dim = 0
643 path = [] 647 path = []
644 for cur_key in keys_list: 648 for cur_key in keys_list:
645 dim += 1 649 dim += 1
646 if cur_key is MATCHED or cur_key is MULTIMATCHED: 650 if cur_key is MATCHED or cur_key is MULTIMATCHED:
647 path.append(cur_key) 651 path.append(cur_key)
648 elif cur_key in cur_dict: 652 elif cur_key in in_dict[ANY][str(dim)]:
649 path.append(MATCHED) 653 path.append(MATCHED)
650 else: 654 else:
651 path.append(UNMATCHED) 655 path.append(UNMATCHED)
652 cur_dict = in_dict[ANY][str(dim)]
653 return path 656 return path
654 657
655 def setNDApproxDictVal(in_dict, keys_list, mismatches): 658 def setNDApproxDictVal(in_dict, keys_list, mismatches):
656 '''Creates (or builds upon) an N-Dimensional ("ND") dictionary (in_dict) 659 '''Creates (or builds upon) an N-Dimensional ("ND") dictionary (in_dict)
657 whose keys are mismatch variants (constructed using the keys_list and the 660 whose keys are mismatch variants (constructed using the keys_list and the