Mercurial > repos > davidvanzessen > shm_csr
comparison baseline/script_imgt.py @ 63:8728284105ee draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 06 Dec 2017 08:04:52 -0500 |
| parents | 4c5ba6b5d10d |
| children | ba33b94637ca |
comparison
equal
deleted
inserted
replaced
| 62:aa8d37bd1930 | 63:8728284105ee |
|---|---|
| 8 parser.add_argument("--output", help="Output file") | 8 parser.add_argument("--output", help="Output file") |
| 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") | 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") |
| 10 | 10 |
| 11 args = parser.parse_args() | 11 args = parser.parse_args() |
| 12 | 12 |
| 13 print "script_imgt.py" | |
| 14 print "input:", args.input | |
| 15 print "ref:", args.ref | |
| 16 print "output:", args.output | |
| 17 print "id:", args.id | |
| 18 | |
| 13 refdic = dict() | 19 refdic = dict() |
| 14 with open(args.ref, 'rU') as ref: | 20 with open(args.ref, 'rU') as ref: |
| 15 currentSeq = "" | 21 currentSeq = "" |
| 16 currentId = "" | 22 currentId = "" |
| 17 for line in ref: | 23 for line in ref: |
| 18 if line[0] is ">": | 24 if line.startswith(">"): |
| 19 if currentSeq is not "" and currentId is not "": | 25 if currentSeq is not "" and currentId is not "": |
| 20 refdic[currentId[1:]] = currentSeq | 26 refdic[currentId[1:]] = currentSeq |
| 21 currentId = line.rstrip() | 27 currentId = line.rstrip() |
| 22 currentSeq = "" | 28 currentSeq = "" |
| 23 else: | 29 else: |
| 24 currentSeq += line.rstrip() | 30 currentSeq += line.rstrip() |
| 25 refdic[currentId[1:]] = currentSeq | 31 refdic[currentId[1:]] = currentSeq |
| 26 | 32 |
| 33 print "Have", str(len(refdic)), "reference sequences" | |
| 27 | 34 |
| 28 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, | 35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, |
| 29 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", | 36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", |
| 30 # r"(IGKV[0-3]D?-[0-9]{1,2})", | 37 # r"(IGKV[0-3]D?-[0-9]{1,2})", |
| 31 # r"(IGLV[0-9]-[0-9]{1,2})", | 38 # r"(IGLV[0-9]-[0-9]{1,2})", |
| 35 | 42 |
| 36 #vPattern = re.compile(r"|".join(vPattern)) | 43 #vPattern = re.compile(r"|".join(vPattern)) |
| 37 vPattern = re.compile("|".join(vPattern)) | 44 vPattern = re.compile("|".join(vPattern)) |
| 38 | 45 |
| 39 def filterGene(s, pattern): | 46 def filterGene(s, pattern): |
| 40 s1 = s[s.find(" ") + 1:] | |
| 41 return s1[:s1.find(" ")] | |
| 42 """ | |
| 43 if type(s) is not str: | 47 if type(s) is not str: |
| 44 return None | 48 return None |
| 45 res = pattern.search(s) | 49 res = pattern.search(s) |
| 46 if res: | 50 if res: |
| 47 return res.group(0) | 51 return res.group(0) |
| 48 return None | 52 return None |
| 49 """ | 53 |
| 50 | 54 |
| 51 | 55 |
| 52 currentSeq = "" | 56 currentSeq = "" |
| 53 currentId = "" | 57 currentId = "" |
| 54 first=True | 58 first=True |
