Mercurial > repos > john-mccallum > pcr_markers
comparison find_CAPS.py @ 0:21053f7f9ed1 draft
First upload of PCR Marker tools
author | john-mccallum |
---|---|
date | Thu, 14 Jun 2012 19:29:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21053f7f9ed1 |
---|---|
1 #!/usr/bin/python2.6 | |
2 ##find snps that condition CAPS | |
3 ##usage find_CAPS.py <reference file> <gff file> | |
4 | |
5 | |
6 #Copyright 2012 John McCallum & Leshi Chen | |
7 #New Zealand Institute for Plant and Food Research | |
8 #This program is free software: you can redistribute it and/or modify | |
9 # it under the terms of the GNU General Public License as published by | |
10 # the Free Software Foundation, either version 3 of the License, or | |
11 # (at your option) any later version. | |
12 # | |
13 # This program is distributed in the hope that it will be useful, | |
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 # GNU General Public License for more details. | |
17 # | |
18 # You should have received a copy of the GNU General Public License | |
19 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | |
21 | |
22 | |
23 import sys | |
24 | |
25 from Bio import SeqIO | |
26 from BCBio import GFF | |
27 from Bio.Restriction import * | |
28 | |
29 ###This list is limited to economical enzymes performing well in PCR buffer | |
30 rest_batch = RestrictionBatch( | |
31 [AluI, ApaI, BamHI, BbrPI, BfrI, ClaI, DdeI, DpnII, DraI, EcoRI, | |
32 HaeIII, HindII, HinfI, HpaI, PvuII, RsaI, SacI, Sau3AI, SmaI, TaqI]) | |
33 | |
34 in_file=sys.argv[1] | |
35 gff_file=sys.argv[2] | |
36 | |
37 in_seq_handle = open(in_file) | |
38 in_gff_handle=open(gff_file) | |
39 | |
40 ##use iterator | |
41 for myrec in SeqIO.parse(in_seq_handle, "fasta"): | |
42 ##create single-entry dictionary to accept gff annotations from parser | |
43 seq_dict = {myrec.id:myrec} | |
44 | |
45 ##note that this filters out only SNP features | |
46 limit_info = dict(gff_id = [myrec.id] ,gff_type = ['SNP']) | |
47 in_gff_handle.seek(0) | |
48 | |
49 ##parse annotations into | |
50 annotations = [r for r | |
51 in GFF.parse(in_gff_handle, | |
52 base_dict=seq_dict, | |
53 limit_info=limit_info)] | |
54 | |
55 ##if there are any for this sequence, proceed | |
56 if annotations: | |
57 rec=annotations[0] | |
58 for feat in rec.features: | |
59 fstart=feat.location.start.position | |
60 fend=feat.location.end.position | |
61 | |
62 if 20 < fstart < len(rec) - 20: | |
63 #just work with +/- 20 bp, ignoring SNPS within this | |
64 #distance from ends | |
65 fseq=rec.seq[fstart-20:fstart+20] | |
66 ref_seq = rec.seq[fstart-20:fstart+20] | |
67 variant_seq = ref_seq.tomutable() | |
68 | |
69 #mutate the variant | |
70 variant_seq[20]= feat.qualifiers['Variant_seq'][0] | |
71 variant_seq = variant_seq.toseq() | |
72 | |
73 #digest the sequences | |
74 ref_cuts = rest_batch.search(ref_seq) | |
75 var_cuts = rest_batch.search(variant_seq) | |
76 | |
77 #print | |
78 for enz in ref_cuts: | |
79 kr = set(ref_cuts[enz]) | |
80 km = set(var_cuts[enz]) | |
81 outputstr=[rec.id, fstart +1,fend+1,feat.id,enz] | |
82 if len(kr) > len(km): | |
83 outputstr.append("reference") | |
84 print('\t'.join(map(str,outputstr))) | |
85 elif len(kr) < len(km): | |
86 outputstr.append("variant") | |
87 print('\t'.join(map(str,outputstr))) | |
88 | |
89 in_gff_handle.close() | |
90 in_seq_handle.close() | |
91 | |
92 | |
93 |