Mercurial > repos > peterjc > seq_primer_clip
comparison tools/seq_primer_clip/seq_primer_clip.py @ 6:b9dc7c967ee6 draft default tip
v0.0.16 Python 3 compatible print function
author | peterjc |
---|---|
date | Tue, 16 May 2017 09:36:50 -0400 |
parents | 530c8d6fedd8 |
children |
comparison
equal
deleted
inserted
replaced
5:530c8d6fedd8 | 6:b9dc7c967ee6 |
---|---|
31 """ | 31 """ |
32 | 32 |
33 import re | 33 import re |
34 import sys | 34 import sys |
35 | 35 |
36 if "-v" in sys.argv or "--version" in sys.argv: | |
37 print("v0.0.16") | |
38 sys.exit(0) | |
39 | |
36 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter | 40 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter |
37 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter | 41 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter |
38 | |
39 if "-v" in sys.argv or "--version" in sys.argv: | |
40 print "v0.0.12" | |
41 sys.exit(0) | |
42 | 42 |
43 try: | 43 try: |
44 from Bio.Seq import reverse_complement | 44 from Bio.Seq import reverse_complement |
45 from Bio.SeqIO.SffIO import SffIterator, SffWriter | 45 from Bio.SeqIO.SffIO import SffIterator, SffWriter |
46 except ImportError: | 46 except ImportError: |
128 else: | 128 else: |
129 ambiguous_dna_re[letter] = "[%s]" % values | 129 ambiguous_dna_re[letter] = "[%s]" % values |
130 | 130 |
131 | 131 |
132 def make_reg_ex(seq): | 132 def make_reg_ex(seq): |
133 """Make regular expression for ambiguous DNA.""" | |
133 return "".join(ambiguous_dna_re[letter] for letter in seq) | 134 return "".join(ambiguous_dna_re[letter] for letter in seq) |
134 | 135 |
135 | 136 |
136 def make_reg_ex_mm(seq, mm): | 137 def make_reg_ex_mm(seq, mm): |
138 """Make regular expression for mis-matches.""" | |
137 if mm > 2: | 139 if mm > 2: |
138 raise NotImplementedError("At most 2 mismatches allowed!") | 140 raise NotImplementedError("At most 2 mismatches allowed!") |
139 seq = seq.upper() | 141 seq = seq.upper() |
140 yield make_reg_ex(seq) | 142 yield make_reg_ex(seq) |
141 for i in range(1, mm + 1): | 143 for i in range(1, mm + 1): |
164 % (pattern, len(pattern), seq, len(seq))) | 166 % (pattern, len(pattern), seq, len(seq))) |
165 yield make_reg_ex(pattern) | 167 yield make_reg_ex(pattern) |
166 | 168 |
167 | 169 |
168 def load_primers_as_re(primer_fasta, mm, rc=False): | 170 def load_primers_as_re(primer_fasta, mm, rc=False): |
169 # Read primer file and record all specified sequences | 171 """Load primers as regular expressions. |
172 | |
173 Read primer file and record all specified sequences. | |
174 """ | |
170 primers = set() | 175 primers = set() |
171 in_handle = open(primer_fasta, "rU") | 176 in_handle = open(primer_fasta, "rU") |
172 reader = fastaReader(in_handle) | 177 reader = fastaReader(in_handle) |
173 count = 0 | 178 count = 0 |
174 for record in reader: | 179 for record in reader: |
187 return count, re.compile("|".join(primers)) # make one monster re! | 192 return count, re.compile("|".join(primers)) # make one monster re! |
188 | 193 |
189 | 194 |
190 # Read primer file and record all specified sequences | 195 # Read primer file and record all specified sequences |
191 count, primer = load_primers_as_re(primer_fasta, mm, rc) | 196 count, primer = load_primers_as_re(primer_fasta, mm, rc) |
192 print "%i primer sequences" % count | 197 print("%i primer sequences" % count) |
193 | 198 |
194 short_neg = 0 | 199 short_neg = 0 |
195 short_clipped = 0 | 200 short_clipped = 0 |
196 clipped = 0 | 201 clipped = 0 |
197 negs = 0 | 202 negs = 0 |
348 else: | 353 else: |
349 sys.exit("Unsupported file type %r" % seq_format) | 354 sys.exit("Unsupported file type %r" % seq_format) |
350 in_handle.close() | 355 in_handle.close() |
351 out_handle.close() | 356 out_handle.close() |
352 | 357 |
353 print "Kept %i clipped reads," % clipped | 358 print("Kept %i clipped reads," % clipped) |
354 print "discarded %i short." % short_clipped | 359 print("discarded %i short." % short_clipped) |
355 if keep_negatives: | 360 if keep_negatives: |
356 print "Kept %i non-matching reads," % negs | 361 print("Kept %i non-matching reads," % negs) |
357 print "discarded %i short." % short_neg | 362 print("discarded %i short." % short_neg) |