annotate cravat_convert/vcf_converter.py @ 14:45b91fdd18ce draft

Uploaded
author in_silico
date Mon, 30 Jul 2018 13:31:57 -0400
parents 18982667bd10
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
18982667bd10 Uploaded
in_silico
parents:
diff changeset
1 """
18982667bd10 Uploaded
in_silico
parents:
diff changeset
2 A module originally obtained from the cravat package. Modified to use in the vcf
18982667bd10 Uploaded
in_silico
parents:
diff changeset
3 converter galaxy tool.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
4
18982667bd10 Uploaded
in_silico
parents:
diff changeset
5
18982667bd10 Uploaded
in_silico
parents:
diff changeset
6 Register of changes made (Chris Jacoby):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
7 1) Changed imports as galaxy tool won't have access to complete cravat python package
18982667bd10 Uploaded
in_silico
parents:
diff changeset
8 2) Defined BadFormatError in BaseConverted file, as I didn't have the BadFormatError module
18982667bd10 Uploaded
in_silico
parents:
diff changeset
9 """
18982667bd10 Uploaded
in_silico
parents:
diff changeset
10
18982667bd10 Uploaded
in_silico
parents:
diff changeset
11 from base_converter import BaseConverter, BadFormatError
18982667bd10 Uploaded
in_silico
parents:
diff changeset
12 import re
18982667bd10 Uploaded
in_silico
parents:
diff changeset
13
18982667bd10 Uploaded
in_silico
parents:
diff changeset
14 class CravatConverter(BaseConverter):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
15
18982667bd10 Uploaded
in_silico
parents:
diff changeset
16 def __init__(self):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
17 self.format_name = 'vcf'
18982667bd10 Uploaded
in_silico
parents:
diff changeset
18 self.samples = []
18982667bd10 Uploaded
in_silico
parents:
diff changeset
19 self.var_counter = 0
18982667bd10 Uploaded
in_silico
parents:
diff changeset
20 self.addl_cols = [{'name':'phred',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
21 'title':'Phred',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
22 'type':'string'},
18982667bd10 Uploaded
in_silico
parents:
diff changeset
23 {'name':'filter',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
24 'title':'VCF filter',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
25 'type':'string'},
18982667bd10 Uploaded
in_silico
parents:
diff changeset
26 {'name':'zygosity',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
27 'title':'Zygosity',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
28 'type':'string'},
18982667bd10 Uploaded
in_silico
parents:
diff changeset
29 {'name':'alt_reads',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
30 'title':'Alternate reads',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
31 'type':'int'},
18982667bd10 Uploaded
in_silico
parents:
diff changeset
32 {'name':'tot_reads',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
33 'title':'Total reads',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
34 'type':'int'},
18982667bd10 Uploaded
in_silico
parents:
diff changeset
35 {'name':'af',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
36 'title':'Variant allele frequency',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
37 'type':'float'}]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
38
18982667bd10 Uploaded
in_silico
parents:
diff changeset
39 def check_format(self, f):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
40 return f.readline().startswith('##fileformat=VCF')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
41
18982667bd10 Uploaded
in_silico
parents:
diff changeset
42 def setup(self, f):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
43
18982667bd10 Uploaded
in_silico
parents:
diff changeset
44 vcf_line_no = 0
18982667bd10 Uploaded
in_silico
parents:
diff changeset
45 for line in f:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
46 vcf_line_no += 1
18982667bd10 Uploaded
in_silico
parents:
diff changeset
47 if len(line) < 6:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
48 continue
18982667bd10 Uploaded
in_silico
parents:
diff changeset
49 if line[:6] == '#CHROM':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
50 toks = re.split('\s+', line.rstrip())
18982667bd10 Uploaded
in_silico
parents:
diff changeset
51 if len(toks) > 8:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
52 self.samples = toks[9:]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
53 break
18982667bd10 Uploaded
in_silico
parents:
diff changeset
54
18982667bd10 Uploaded
in_silico
parents:
diff changeset
55 def convert_line(self, l):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
56 if l.startswith('#'): return None
18982667bd10 Uploaded
in_silico
parents:
diff changeset
57 self.var_counter += 1
18982667bd10 Uploaded
in_silico
parents:
diff changeset
58 toks = l.strip('\r\n').split('\t')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
59 all_wdicts = []
18982667bd10 Uploaded
in_silico
parents:
diff changeset
60 if len(toks) < 8:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
61 raise BadFormatError('Wrong VCF format')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
62 [chrom, pos, tag, ref, alts, qual, filter, info] = toks[:8]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
63 if tag == '':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
64 raise BadFormatError('ID column is blank')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
65 elif tag == '.':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
66 tag = 'VAR' + str(self.var_counter)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
67 if chrom[:3] != 'chr':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
68 chrom = 'chr' + chrom
18982667bd10 Uploaded
in_silico
parents:
diff changeset
69 alts = alts.split(',')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
70 len_alts = len(alts)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
71 if len(toks) == 8:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
72 for altno in range(len_alts):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
73 wdict = None
18982667bd10 Uploaded
in_silico
parents:
diff changeset
74 alt = alts[altno]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
75 newpos, newref, newalt = self.extract_vcf_variant('+', pos, ref, alt)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
76 wdict = {'tags':tag,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
77 'chrom':chrom,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
78 'pos':newpos,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
79 'ref_base':newref,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
80 'alt_base':newalt,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
81 'sample_id':'no_sample',
18982667bd10 Uploaded
in_silico
parents:
diff changeset
82 'phred': qual,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
83 'filter': filter}
18982667bd10 Uploaded
in_silico
parents:
diff changeset
84 all_wdicts.append(wdict)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
85 elif len(toks) > 8:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
86 sample_datas = toks[9:]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
87 genotype_fields = {}
18982667bd10 Uploaded
in_silico
parents:
diff changeset
88 genotype_field_no = 0
18982667bd10 Uploaded
in_silico
parents:
diff changeset
89 for genotype_field in toks[8].split(':'):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
90 genotype_fields[genotype_field] = genotype_field_no
18982667bd10 Uploaded
in_silico
parents:
diff changeset
91 genotype_field_no += 1
18982667bd10 Uploaded
in_silico
parents:
diff changeset
92 if not ('GT' in genotype_fields):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
93 raise BadFormatError('No GT Field')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
94 gt_field_no = genotype_fields['GT']
18982667bd10 Uploaded
in_silico
parents:
diff changeset
95 for sample_no in range(len(sample_datas)):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
96 sample = self.samples[sample_no]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
97 sample_data = sample_datas[sample_no].split(':')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
98 gts = {}
18982667bd10 Uploaded
in_silico
parents:
diff changeset
99 for gt in sample_data[gt_field_no].replace('/', '|').split('|'):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
100 if gt == '.':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
101 continue
18982667bd10 Uploaded
in_silico
parents:
diff changeset
102 else:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
103 gts[int(gt)] = True
18982667bd10 Uploaded
in_silico
parents:
diff changeset
104 for gt in sorted(gts.keys()):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
105 wdict = None
18982667bd10 Uploaded
in_silico
parents:
diff changeset
106 if gt == 0:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
107 continue
18982667bd10 Uploaded
in_silico
parents:
diff changeset
108 else:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
109 alt = alts[gt - 1]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
110 newpos, newref, newalt = self.extract_vcf_variant('+', pos, ref, alt)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
111 zyg = self.homo_hetro(sample_data[gt_field_no])
18982667bd10 Uploaded
in_silico
parents:
diff changeset
112 depth, alt_reads, af = self.extract_read_info(sample_data, gt, gts, genotype_fields)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
113
18982667bd10 Uploaded
in_silico
parents:
diff changeset
114 wdict = {'tags':tag,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
115 'chrom':chrom,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
116 'pos':newpos,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
117 'ref_base':newref,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
118 'alt_base':newalt,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
119 'sample_id':sample,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
120 'phred': qual,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
121 'filter': filter,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
122 'zygosity': zyg,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
123 'tot_reads': depth,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
124 'alt_reads': alt_reads,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
125 'af': af,
18982667bd10 Uploaded
in_silico
parents:
diff changeset
126 }
18982667bd10 Uploaded
in_silico
parents:
diff changeset
127 all_wdicts.append(wdict)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
128 return all_wdicts
18982667bd10 Uploaded
in_silico
parents:
diff changeset
129
18982667bd10 Uploaded
in_silico
parents:
diff changeset
130 #The vcf genotype string has a call for each allele separated by '\' or '/'
18982667bd10 Uploaded
in_silico
parents:
diff changeset
131 #If the call is the same for all allels, return 'hom' otherwise 'het'
18982667bd10 Uploaded
in_silico
parents:
diff changeset
132 def homo_hetro(self, gt_str):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
133 if '.' in gt_str:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
134 return '';
18982667bd10 Uploaded
in_silico
parents:
diff changeset
135
18982667bd10 Uploaded
in_silico
parents:
diff changeset
136 gts = gt_str.strip().replace('/', '|').split('|')
18982667bd10 Uploaded
in_silico
parents:
diff changeset
137 for gt in gts:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
138 if gt != gts[0]:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
139 return 'het'
18982667bd10 Uploaded
in_silico
parents:
diff changeset
140 return 'hom'
18982667bd10 Uploaded
in_silico
parents:
diff changeset
141
18982667bd10 Uploaded
in_silico
parents:
diff changeset
142 #Extract read depth, allele count, and allele frequency from optional VCR information
18982667bd10 Uploaded
in_silico
parents:
diff changeset
143 def extract_read_info (self, sample_data, gt, gts, genotype_fields):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
144 depth = ''
18982667bd10 Uploaded
in_silico
parents:
diff changeset
145 alt_reads = ''
18982667bd10 Uploaded
in_silico
parents:
diff changeset
146 ref_reads = ''
18982667bd10 Uploaded
in_silico
parents:
diff changeset
147 af = ''
18982667bd10 Uploaded
in_silico
parents:
diff changeset
148
18982667bd10 Uploaded
in_silico
parents:
diff changeset
149 #AD contains 2 values usually ref count and alt count unless there are
18982667bd10 Uploaded
in_silico
parents:
diff changeset
150 #multiple alts then it will have alt 1 then alt 2.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
151 if 'AD' in genotype_fields and genotype_fields['AD'] <= len(sample_data):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
152 if 0 in gts.keys():
18982667bd10 Uploaded
in_silico
parents:
diff changeset
153 #if part of the genotype is reference, then AD will have #ref reads, #alt reads
18982667bd10 Uploaded
in_silico
parents:
diff changeset
154 ref_reads = sample_data[genotype_fields['AD']].split(',')[0]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
155 alt_reads = sample_data[genotype_fields['AD']].split(',')[1]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
156 elif gt == max(gts.keys()):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
157 #if geontype has multiple alt bases, then AD will have #alt1 reads, #alt2 reads
18982667bd10 Uploaded
in_silico
parents:
diff changeset
158 alt_reads = sample_data[genotype_fields['AD']].split(',')[1]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
159 else:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
160 alt_reads = sample_data[genotype_fields['AD']].split(',')[0]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
161
18982667bd10 Uploaded
in_silico
parents:
diff changeset
162 if 'DP' in genotype_fields and genotype_fields['DP'] <= len(sample_data):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
163 depth = sample_data[genotype_fields['DP']]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
164 elif alt_reads != '' and ref_reads != '':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
165 #if DP is not present but we have alt and ref reads count, dp = ref+alt
18982667bd10 Uploaded
in_silico
parents:
diff changeset
166 depth = int(alt_reads) + int(ref_reads)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
167
18982667bd10 Uploaded
in_silico
parents:
diff changeset
168 if 'AF' in genotype_fields and genotype_fields['AF'] <= len(sample_data):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
169 af = float(sample_data[genotype_fields['AF']] )
18982667bd10 Uploaded
in_silico
parents:
diff changeset
170 elif depth != '' and alt_reads != '':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
171 #if AF not specified, calc it from alt and ref reads
18982667bd10 Uploaded
in_silico
parents:
diff changeset
172 af = float(alt_reads) / float(depth)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
173
18982667bd10 Uploaded
in_silico
parents:
diff changeset
174 return depth, alt_reads, af
18982667bd10 Uploaded
in_silico
parents:
diff changeset
175
18982667bd10 Uploaded
in_silico
parents:
diff changeset
176 def extract_vcf_variant (self, strand, pos, ref, alt):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
177
18982667bd10 Uploaded
in_silico
parents:
diff changeset
178 reflen = len(ref)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
179 altlen = len(alt)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
180
18982667bd10 Uploaded
in_silico
parents:
diff changeset
181 # Returns without change if same single nucleotide for ref and alt.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
182 if reflen == 1 and altlen == 1 and ref == alt:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
183 return pos, ref, alt
18982667bd10 Uploaded
in_silico
parents:
diff changeset
184
18982667bd10 Uploaded
in_silico
parents:
diff changeset
185 # Trimming from the start and then the end of the sequence
18982667bd10 Uploaded
in_silico
parents:
diff changeset
186 # where the sequences overlap with the same nucleotides
18982667bd10 Uploaded
in_silico
parents:
diff changeset
187 new_ref2, new_alt2, new_pos = \
18982667bd10 Uploaded
in_silico
parents:
diff changeset
188 self.trimming_vcf_input(ref, alt, pos, strand)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
189
18982667bd10 Uploaded
in_silico
parents:
diff changeset
190 if new_ref2 == '':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
191 new_ref2 = '-'
18982667bd10 Uploaded
in_silico
parents:
diff changeset
192 if new_alt2 == '':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
193 new_alt2 = '-'
18982667bd10 Uploaded
in_silico
parents:
diff changeset
194
18982667bd10 Uploaded
in_silico
parents:
diff changeset
195 return new_pos, new_ref2, new_alt2
18982667bd10 Uploaded
in_silico
parents:
diff changeset
196
18982667bd10 Uploaded
in_silico
parents:
diff changeset
197 # This function looks at the ref and alt sequences and removes
18982667bd10 Uploaded
in_silico
parents:
diff changeset
198 # where the overlapping sequences contain the same nucleotide.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
199 # This trims from the end first but does not remove the first nucleotide
18982667bd10 Uploaded
in_silico
parents:
diff changeset
200 # because based on the format of VCF input the
18982667bd10 Uploaded
in_silico
parents:
diff changeset
201 # first nucleotide of the ref and alt sequence occur
18982667bd10 Uploaded
in_silico
parents:
diff changeset
202 # at the position specified.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
203 # End removed first, not the first nucleotide
18982667bd10 Uploaded
in_silico
parents:
diff changeset
204 # Front removed and position changed
18982667bd10 Uploaded
in_silico
parents:
diff changeset
205 def trimming_vcf_input(self, ref, alt, pos, strand):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
206 pos = int(pos)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
207 reflen = len(ref)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
208 altlen = len(alt)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
209 minlen = min(reflen, altlen)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
210 new_ref = ref
18982667bd10 Uploaded
in_silico
parents:
diff changeset
211 new_alt = alt
18982667bd10 Uploaded
in_silico
parents:
diff changeset
212 new_pos = pos
18982667bd10 Uploaded
in_silico
parents:
diff changeset
213 # Trims from the end. Except don't remove the first nucleotide.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
214 # 1:6530968 CTCA -> GTCTCA becomes C -> GTC.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
215 for nt_pos in range(0, minlen - 1):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
216 if ref[reflen - nt_pos - 1] == alt[altlen - nt_pos - 1]:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
217 new_ref = ref[:reflen - nt_pos - 1]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
218 new_alt = alt[:altlen - nt_pos - 1]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
219 else:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
220 break
18982667bd10 Uploaded
in_silico
parents:
diff changeset
221 new_ref_len = len(new_ref)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
222 new_alt_len = len(new_alt)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
223 minlen = min(new_ref_len, new_alt_len)
18982667bd10 Uploaded
in_silico
parents:
diff changeset
224 new_ref2 = new_ref
18982667bd10 Uploaded
in_silico
parents:
diff changeset
225 new_alt2 = new_alt
18982667bd10 Uploaded
in_silico
parents:
diff changeset
226 # Trims from the start. 1:6530968 G -> GT becomes 1:6530969 - -> T.
18982667bd10 Uploaded
in_silico
parents:
diff changeset
227 for nt_pos in range(0, minlen):
18982667bd10 Uploaded
in_silico
parents:
diff changeset
228 if new_ref[nt_pos] == new_alt[nt_pos]:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
229 if strand == '+':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
230 new_pos += 1
18982667bd10 Uploaded
in_silico
parents:
diff changeset
231 elif strand == '-':
18982667bd10 Uploaded
in_silico
parents:
diff changeset
232 new_pos -= 1
18982667bd10 Uploaded
in_silico
parents:
diff changeset
233 new_ref2 = new_ref[nt_pos + 1:]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
234 new_alt2 = new_alt[nt_pos + 1:]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
235 else:
18982667bd10 Uploaded
in_silico
parents:
diff changeset
236 new_ref2 = new_ref[nt_pos:]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
237 new_alt2 = new_alt[nt_pos:]
18982667bd10 Uploaded
in_silico
parents:
diff changeset
238 break
18982667bd10 Uploaded
in_silico
parents:
diff changeset
239 return new_ref2, new_alt2, new_pos
18982667bd10 Uploaded
in_silico
parents:
diff changeset
240
18982667bd10 Uploaded
in_silico
parents:
diff changeset
241
18982667bd10 Uploaded
in_silico
parents:
diff changeset
242 if __name__ == "__main__":
18982667bd10 Uploaded
in_silico
parents:
diff changeset
243 c = CravatConverter()