comparison tools/seq_primer_clip/seq_primer_clip.py @ 6:b9dc7c967ee6 draft default tip

v0.0.16 Python 3 compatible print function
author peterjc
date Tue, 16 May 2017 09:36:50 -0400
parents 530c8d6fedd8
children
comparison
equal deleted inserted replaced
5:530c8d6fedd8 6:b9dc7c967ee6
31 """ 31 """
32 32
33 import re 33 import re
34 import sys 34 import sys
35 35
36 if "-v" in sys.argv or "--version" in sys.argv:
37 print("v0.0.16")
38 sys.exit(0)
39
36 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter 40 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter
37 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter 41 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
38
39 if "-v" in sys.argv or "--version" in sys.argv:
40 print "v0.0.12"
41 sys.exit(0)
42 42
43 try: 43 try:
44 from Bio.Seq import reverse_complement 44 from Bio.Seq import reverse_complement
45 from Bio.SeqIO.SffIO import SffIterator, SffWriter 45 from Bio.SeqIO.SffIO import SffIterator, SffWriter
46 except ImportError: 46 except ImportError:
128 else: 128 else:
129 ambiguous_dna_re[letter] = "[%s]" % values 129 ambiguous_dna_re[letter] = "[%s]" % values
130 130
131 131
132 def make_reg_ex(seq): 132 def make_reg_ex(seq):
133 """Make regular expression for ambiguous DNA."""
133 return "".join(ambiguous_dna_re[letter] for letter in seq) 134 return "".join(ambiguous_dna_re[letter] for letter in seq)
134 135
135 136
136 def make_reg_ex_mm(seq, mm): 137 def make_reg_ex_mm(seq, mm):
138 """Make regular expression for mis-matches."""
137 if mm > 2: 139 if mm > 2:
138 raise NotImplementedError("At most 2 mismatches allowed!") 140 raise NotImplementedError("At most 2 mismatches allowed!")
139 seq = seq.upper() 141 seq = seq.upper()
140 yield make_reg_ex(seq) 142 yield make_reg_ex(seq)
141 for i in range(1, mm + 1): 143 for i in range(1, mm + 1):
164 % (pattern, len(pattern), seq, len(seq))) 166 % (pattern, len(pattern), seq, len(seq)))
165 yield make_reg_ex(pattern) 167 yield make_reg_ex(pattern)
166 168
167 169
168 def load_primers_as_re(primer_fasta, mm, rc=False): 170 def load_primers_as_re(primer_fasta, mm, rc=False):
169 # Read primer file and record all specified sequences 171 """Load primers as regular expressions.
172
173 Read primer file and record all specified sequences.
174 """
170 primers = set() 175 primers = set()
171 in_handle = open(primer_fasta, "rU") 176 in_handle = open(primer_fasta, "rU")
172 reader = fastaReader(in_handle) 177 reader = fastaReader(in_handle)
173 count = 0 178 count = 0
174 for record in reader: 179 for record in reader:
187 return count, re.compile("|".join(primers)) # make one monster re! 192 return count, re.compile("|".join(primers)) # make one monster re!
188 193
189 194
190 # Read primer file and record all specified sequences 195 # Read primer file and record all specified sequences
191 count, primer = load_primers_as_re(primer_fasta, mm, rc) 196 count, primer = load_primers_as_re(primer_fasta, mm, rc)
192 print "%i primer sequences" % count 197 print("%i primer sequences" % count)
193 198
194 short_neg = 0 199 short_neg = 0
195 short_clipped = 0 200 short_clipped = 0
196 clipped = 0 201 clipped = 0
197 negs = 0 202 negs = 0
348 else: 353 else:
349 sys.exit("Unsupported file type %r" % seq_format) 354 sys.exit("Unsupported file type %r" % seq_format)
350 in_handle.close() 355 in_handle.close()
351 out_handle.close() 356 out_handle.close()
352 357
353 print "Kept %i clipped reads," % clipped 358 print("Kept %i clipped reads," % clipped)
354 print "discarded %i short." % short_clipped 359 print("discarded %i short." % short_clipped)
355 if keep_negatives: 360 if keep_negatives:
356 print "Kept %i non-matching reads," % negs 361 print("Kept %i non-matching reads," % negs)
357 print "discarded %i short." % short_neg 362 print("discarded %i short." % short_neg)