Mercurial > repos > davidvanzessen > shm_csr
comparison baseline/script_imgt.py @ 63:8728284105ee draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 06 Dec 2017 08:04:52 -0500 |
parents | 4c5ba6b5d10d |
children | ba33b94637ca |
comparison
equal
deleted
inserted
replaced
62:aa8d37bd1930 | 63:8728284105ee |
---|---|
8 parser.add_argument("--output", help="Output file") | 8 parser.add_argument("--output", help="Output file") |
9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") | 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") |
10 | 10 |
11 args = parser.parse_args() | 11 args = parser.parse_args() |
12 | 12 |
13 print "script_imgt.py" | |
14 print "input:", args.input | |
15 print "ref:", args.ref | |
16 print "output:", args.output | |
17 print "id:", args.id | |
18 | |
13 refdic = dict() | 19 refdic = dict() |
14 with open(args.ref, 'rU') as ref: | 20 with open(args.ref, 'rU') as ref: |
15 currentSeq = "" | 21 currentSeq = "" |
16 currentId = "" | 22 currentId = "" |
17 for line in ref: | 23 for line in ref: |
18 if line[0] is ">": | 24 if line.startswith(">"): |
19 if currentSeq is not "" and currentId is not "": | 25 if currentSeq is not "" and currentId is not "": |
20 refdic[currentId[1:]] = currentSeq | 26 refdic[currentId[1:]] = currentSeq |
21 currentId = line.rstrip() | 27 currentId = line.rstrip() |
22 currentSeq = "" | 28 currentSeq = "" |
23 else: | 29 else: |
24 currentSeq += line.rstrip() | 30 currentSeq += line.rstrip() |
25 refdic[currentId[1:]] = currentSeq | 31 refdic[currentId[1:]] = currentSeq |
26 | 32 |
33 print "Have", str(len(refdic)), "reference sequences" | |
27 | 34 |
28 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, | 35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, |
29 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", | 36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", |
30 # r"(IGKV[0-3]D?-[0-9]{1,2})", | 37 # r"(IGKV[0-3]D?-[0-9]{1,2})", |
31 # r"(IGLV[0-9]-[0-9]{1,2})", | 38 # r"(IGLV[0-9]-[0-9]{1,2})", |
35 | 42 |
36 #vPattern = re.compile(r"|".join(vPattern)) | 43 #vPattern = re.compile(r"|".join(vPattern)) |
37 vPattern = re.compile("|".join(vPattern)) | 44 vPattern = re.compile("|".join(vPattern)) |
38 | 45 |
39 def filterGene(s, pattern): | 46 def filterGene(s, pattern): |
40 s1 = s[s.find(" ") + 1:] | |
41 return s1[:s1.find(" ")] | |
42 """ | |
43 if type(s) is not str: | 47 if type(s) is not str: |
44 return None | 48 return None |
45 res = pattern.search(s) | 49 res = pattern.search(s) |
46 if res: | 50 if res: |
47 return res.group(0) | 51 return res.group(0) |
48 return None | 52 return None |
49 """ | 53 |
50 | 54 |
51 | 55 |
52 currentSeq = "" | 56 currentSeq = "" |
53 currentId = "" | 57 currentId = "" |
54 first=True | 58 first=True |