Mercurial > repos > davidvanzessen > shm_csr
comparison shm_csr.py @ 96:385dea3c6cb5 draft
planemo upload commit 423a48569c69301fdbf893ac3a649128404dfff5
author | rhpvorderman |
---|---|
date | Fri, 05 Jan 2024 08:53:22 +0000 |
parents | 6809c63d9161 |
children |
comparison
equal
deleted
inserted
replaced
95:d63eff357515 | 96:385dea3c6cb5 |
---|---|
1 import argparse | 1 import argparse |
2 import logging | 2 import logging |
3 import sys | 3 import sys |
4 import os | 4 import os |
5 import traceback | |
5 import typing | 6 import typing |
6 from typing import Optional | 7 from typing import Optional |
7 | 8 |
8 from collections import defaultdict | 9 from collections import defaultdict |
9 | 10 |
10 REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2") | 11 REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2", "None") |
11 | 12 |
13 | |
14 def int_or_zero(value: typing.Any): | |
15 try: | |
16 return int(value) | |
17 except ValueError: | |
18 return 0 | |
12 | 19 |
13 class Mutation(typing.NamedTuple): | 20 class Mutation(typing.NamedTuple): |
14 """Represent a mutation type as a tuple""" | 21 """Represent a mutation type as a tuple""" |
15 frm: str # 'from' is a reserved python keyword. | 22 frm: str # 'from' is a reserved python keyword. |
16 where: int | 23 where: int |
175 mutationdic[ID + "_FR3"] = [Mutation.from_string(x) for x in linesplt[fr3Index].split("|") if x] | 182 mutationdic[ID + "_FR3"] = [Mutation.from_string(x) for x in linesplt[fr3Index].split("|") if x] |
176 | 183 |
177 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 184 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
178 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 185 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
179 | 186 |
180 fr1Length = int(linesplt[fr1LengthIndex]) | 187 fr1Length = int_or_zero(linesplt[fr1LengthIndex]) |
181 fr2Length = int(linesplt[fr2LengthIndex]) | 188 fr2Length = int_or_zero(linesplt[fr2LengthIndex]) |
182 fr3Length = int(linesplt[fr3LengthIndex]) | 189 fr3Length = int_or_zero(linesplt[fr3LengthIndex]) |
183 cdr1Length = int(linesplt[cdr1LengthIndex]) | 190 cdr1Length = int_or_zero(linesplt[cdr1LengthIndex]) |
184 cdr2Length = int(linesplt[cdr2LengthIndex]) | 191 cdr2Length = int_or_zero(linesplt[cdr2LengthIndex]) |
185 LengthDic[ID] = (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length) | 192 LengthDic[ID] = (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length) |
186 | 193 |
187 cdr1AALengthDic[ID] = int(linesplt[cdr1AALengthIndex]) | 194 cdr1AALengthDic[ID] = int_or_zero(linesplt[cdr1AALengthIndex]) |
188 cdr2AALengthDic[ID] = int(linesplt[cdr2AALengthIndex]) | 195 cdr2AALengthDic[ID] = int_or_zero(linesplt[cdr2AALengthIndex]) |
189 | 196 |
190 IDlist += [ID] | 197 IDlist += [ID] |
191 print("len(mutationdic) =", len(mutationdic)) | 198 print("len(mutationdic) =", len(mutationdic)) |
192 | 199 |
193 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle: | 200 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle: |
220 # sum(region_lengths[0:]) (Equivalent to everything) | 227 # sum(region_lengths[0:]) (Equivalent to everything) |
221 # sum(region_lengths[1:]) Gets everything except FR1 etc. | 228 # sum(region_lengths[1:]) Gets everything except FR1 etc. |
222 # We determine the position to start summing below. | 229 # We determine the position to start summing below. |
223 # This returns 0 for leader, 1 for FR1 etc. | 230 # This returns 0 for leader, 1 for FR1 etc. |
224 length_start_pos = REGION_FILTERS.index(empty_region_filter) | 231 length_start_pos = REGION_FILTERS.index(empty_region_filter) |
232 if empty_region_filter == "None": | |
233 length_start_pos = 0 | |
225 | 234 |
226 o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n") | 235 o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n") |
227 for ID in IDlist: | 236 for ID in IDlist: |
228 mutations = mutationListByID[ID] | 237 mutations = mutationListByID[ID] |
229 region_length = sum(LengthDic[ID][length_start_pos:]) | 238 region_length = sum(LengthDic[ID][length_start_pos:]) |