Mercurial > repos > hepcat72 > barcode_splitter_multi
comparison barcode_splitter_multi/barcode_splitter.py @ 1:e3f91eee8c75 draft
Uploaded
author | hepcat72 |
---|---|
date | Mon, 29 Aug 2016 15:21:20 -0400 |
parents | 50df2d629d51 |
children | 5e0fd61660b7 |
comparison
equal
deleted
inserted
replaced
0:50df2d629d51 | 1:e3f91eee8c75 |
---|---|
8 import re | 8 import re |
9 import sys | 9 import sys |
10 from collections import defaultdict | 10 from collections import defaultdict |
11 import subprocess | 11 import subprocess |
12 | 12 |
13 __version__ = "0.12" | 13 __version__ = "0.13" |
14 __author__ = "Lance Parsons & Robert Leach" | 14 __author__ = "Lance Parsons & Robert Leach" |
15 __author_email__ = "lparsons@princeton.edu,rleach@princeton.edu" | 15 __author_email__ = "lparsons@princeton.edu,rleach@princeton.edu" |
16 __copyright__ = "Copyright 2011, Lance Parsons & Robert leach" | 16 __copyright__ = "Copyright 2011, Lance Parsons & Robert leach" |
17 __license__ = ("BSD 2-Clause License " | 17 __license__ = ("BSD 2-Clause License " |
18 "http://www.opensource.org/licenses/BSD-2-Clause") | 18 "http://www.opensource.org/licenses/BSD-2-Clause") |
242 | 242 |
243 if(barcode_path is None): | 243 if(barcode_path is None): |
244 cur_outputs = unmatchedOutputs | 244 cur_outputs = unmatchedOutputs |
245 unmatched_path = getBarcodeMatchPath(approx_bc_dict,index_seqs) | 245 unmatched_path = getBarcodeMatchPath(approx_bc_dict,index_seqs) |
246 incrementNDDictInt(unmatched_counts,unmatched_path) | 246 incrementNDDictInt(unmatched_counts,unmatched_path) |
247 if (not UNMATCHED in unmatched_path and | |
248 not MULTIMATCHED in unmatched_path): | |
249 sys.stderr.write('WARNING: Sequences match barcodes on ' | |
250 'different rows: %s for sequence ID: %s\n' | |
251 %(index_seqs, prim_index_read['seq_id'])) | |
247 elif(MULTIMATCHED in barcode_path): | 252 elif(MULTIMATCHED in barcode_path): |
248 cur_outputs = multimatchedOutputs | 253 cur_outputs = multimatchedOutputs |
249 unmatched_path = getBarcodeMatchPath(approx_bc_dict,barcode_path) | 254 unmatched_path = getBarcodeMatchPath(approx_bc_dict,barcode_path) |
250 incrementNDDictInt(unmatched_counts,unmatched_path) | 255 incrementNDDictInt(unmatched_counts,unmatched_path) |
251 sys.stderr.write('WARNING: More than one barcode matches %s, ' | 256 sys.stderr.write('WARNING: More than one barcode matches %s, ' |
636 "UNMATCHED", and "MULTIMATCHED" values) to use in the summary output. The | 641 "UNMATCHED", and "MULTIMATCHED" values) to use in the summary output. The |
637 intent is to give the user useful feedback about which level(s) of barcodes | 642 intent is to give the user useful feedback about which level(s) of barcodes |
638 are not matrching instead of listing all sequences as simply "UNMATCHED". | 643 are not matrching instead of listing all sequences as simply "UNMATCHED". |
639 It also reduces complexity of the unmatched output by not including | 644 It also reduces complexity of the unmatched output by not including |
640 specific barcodes, which do not matter in unmatched/multimatched cases.''' | 645 specific barcodes, which do not matter in unmatched/multimatched cases.''' |
641 cur_dict = in_dict | |
642 dim = 0 | 646 dim = 0 |
643 path = [] | 647 path = [] |
644 for cur_key in keys_list: | 648 for cur_key in keys_list: |
645 dim += 1 | 649 dim += 1 |
646 if cur_key is MATCHED or cur_key is MULTIMATCHED: | 650 if cur_key is MATCHED or cur_key is MULTIMATCHED: |
647 path.append(cur_key) | 651 path.append(cur_key) |
648 elif cur_key in cur_dict: | 652 elif cur_key in in_dict[ANY][str(dim)]: |
649 path.append(MATCHED) | 653 path.append(MATCHED) |
650 else: | 654 else: |
651 path.append(UNMATCHED) | 655 path.append(UNMATCHED) |
652 cur_dict = in_dict[ANY][str(dim)] | |
653 return path | 656 return path |
654 | 657 |
655 def setNDApproxDictVal(in_dict, keys_list, mismatches): | 658 def setNDApproxDictVal(in_dict, keys_list, mismatches): |
656 '''Creates (or builds upon) an N-Dimensional ("ND") dictionary (in_dict) | 659 '''Creates (or builds upon) an N-Dimensional ("ND") dictionary (in_dict) |
657 whose keys are mismatch variants (constructed using the keys_list and the | 660 whose keys are mismatch variants (constructed using the keys_list and the |