Mercurial > repos > davidvanzessen > shm_csr
annotate mutation_column_checker.py @ 86:be36df6dd589 draft
"planemo upload commit 78ace939ed7437b8b360588032449a99aad949eb"
author | rhpvorderman |
---|---|
date | Wed, 27 Oct 2021 10:02:33 +0000 |
parents | 729738462297 |
children |
rev | line source |
---|---|
83
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
1 import re |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
2 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
3 mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
4 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
5 with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle: |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
6 first = True |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
7 fr3_index = -1 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
8 for i, line in enumerate(file_handle): |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
9 line_split = line.split("\t") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
10 if first: |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
11 fr3_index = line_split.index("FR3-IMGT") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
12 first = False |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
13 continue |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
14 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
15 if len(line_split) < fr3_index: |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
16 continue |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
17 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
18 fr3_data = line_split[fr3_index] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
19 if len(fr3_data) > 5: |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
20 try: |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
21 test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
22 except: |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
23 print((line_split[1])) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
24 print(("Something went wrong at line {line} with:".format(line=line_split[0]))) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
25 #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)]) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
26 if i % 100000 == 0: |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
27 print(i) |