Mercurial > repos > davidvanzessen > shm_csr
diff baseline/script_imgt.py @ 63:8728284105ee draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 06 Dec 2017 08:04:52 -0500 |
parents | 4c5ba6b5d10d |
children | ba33b94637ca |
line wrap: on
line diff
--- a/baseline/script_imgt.py Tue Dec 05 10:57:13 2017 -0500 +++ b/baseline/script_imgt.py Wed Dec 06 08:04:52 2017 -0500 @@ -10,12 +10,18 @@ args = parser.parse_args() +print "script_imgt.py" +print "input:", args.input +print "ref:", args.ref +print "output:", args.output +print "id:", args.id + refdic = dict() with open(args.ref, 'rU') as ref: currentSeq = "" currentId = "" for line in ref: - if line[0] is ">": + if line.startswith(">"): if currentSeq is not "" and currentId is not "": refdic[currentId[1:]] = currentSeq currentId = line.rstrip() @@ -23,7 +29,8 @@ else: currentSeq += line.rstrip() refdic[currentId[1:]] = currentSeq - + +print "Have", str(len(refdic)), "reference sequences" vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", @@ -37,16 +44,13 @@ vPattern = re.compile("|".join(vPattern)) def filterGene(s, pattern): - s1 = s[s.find(" ") + 1:] - return s1[:s1.find(" ")] - """ if type(s) is not str: return None res = pattern.search(s) if res: return res.group(0) return None - """ + currentSeq = ""