Mercurial > repos > davidvanzessen > shm_csr
diff mutation_column_checker.py @ 83:729738462297 draft
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
author | rhpvorderman |
---|---|
date | Wed, 15 Sep 2021 12:24:06 +0000 |
parents | b6f9a640e098 |
children |
line wrap: on
line diff
--- a/mutation_column_checker.py Thu Feb 25 10:32:32 2021 +0000 +++ b/mutation_column_checker.py Wed Sep 15 12:24:06 2021 +0000 @@ -1,27 +1,27 @@ -import re - -mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") - -with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle: - first = True - fr3_index = -1 - for i, line in enumerate(file_handle): - line_split = line.split("\t") - if first: - fr3_index = line_split.index("FR3-IMGT") - first = False - continue - - if len(line_split) < fr3_index: - continue - - fr3_data = line_split[fr3_index] - if len(fr3_data) > 5: - try: - test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x] - except: - print(line_split[1]) - print("Something went wrong at line {line} with:".format(line=line_split[0])) - #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)]) - if i % 100000 == 0: - print(i) +import re + +mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") + +with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle: + first = True + fr3_index = -1 + for i, line in enumerate(file_handle): + line_split = line.split("\t") + if first: + fr3_index = line_split.index("FR3-IMGT") + first = False + continue + + if len(line_split) < fr3_index: + continue + + fr3_data = line_split[fr3_index] + if len(fr3_data) > 5: + try: + test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x] + except: + print((line_split[1])) + print(("Something went wrong at line {line} with:".format(line=line_split[0]))) + #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)]) + if i % 100000 == 0: + print(i)