comparison tools/seq_primer_clip/seq_primer_clip.py @ 5:530c8d6fedd8 draft

v0.0.15 - internal changes
author peterjc
date Wed, 10 May 2017 13:09:52 -0400
parents 9b074c1db68e
children b9dc7c967ee6
comparison
equal deleted inserted replaced
4:9b074c1db68e 5:530c8d6fedd8
27 See accompanying text file for licence details (MIT/BSD style). 27 See accompanying text file for licence details (MIT/BSD style).
28 28
29 NOTE: Currently it uses Python's regular expression engine for finding the 29 NOTE: Currently it uses Python's regular expression engine for finding the
30 primers, which for my needs is fast enough. 30 primers, which for my needs is fast enough.
31 """ 31 """
32
33 import re
32 import sys 34 import sys
33 import re 35
34 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter 36 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter
35 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter 37 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
36 38
37 if "-v" in sys.argv or "--version" in sys.argv: 39 if "-v" in sys.argv or "--version" in sys.argv:
38 print "v0.0.12" 40 print "v0.0.12"
145 if mm >= 1: 147 if mm >= 1:
146 for i, letter in enumerate(seq): 148 for i, letter in enumerate(seq):
147 # We'll use a set to remove any duplicate patterns 149 # We'll use a set to remove any duplicate patterns
148 # if letter not in "NX": 150 # if letter not in "NX":
149 pattern = seq[:i] + "N" + seq[i + 1:] 151 pattern = seq[:i] + "N" + seq[i + 1:]
150 assert len(pattern) == len(seq), "Len %s is %i, len %s is %i" \ 152 assert len(pattern) == len(seq), ("Len %s is %i, len %s is %i"
151 % (pattern, len(pattern), seq, len(seq)) 153 % (pattern, len(pattern), seq, len(seq)))
152 yield make_reg_ex(pattern) 154 yield make_reg_ex(pattern)
153 if mm >= 2: 155 if mm >= 2:
154 for i, letter in enumerate(seq): 156 for i, letter in enumerate(seq):
155 # We'll use a set to remove any duplicate patterns 157 # We'll use a set to remove any duplicate patterns
156 # if letter not in "NX": 158 # if letter not in "NX":
157 for k, letter in enumerate(seq[i + 1:]): 159 for k, letter in enumerate(seq[i + 1:]):
158 # We'll use a set to remove any duplicate patterns 160 # We'll use a set to remove any duplicate patterns
159 # if letter not in "NX": 161 # if letter not in "NX":
160 pattern = seq[:i] + "N" + seq[i + 1:i + 1 + k] + "N" + seq[i + k + 2:] 162 pattern = seq[:i] + "N" + seq[i + 1:i + 1 + k] + "N" + seq[i + k + 2:]
161 assert len(pattern) == len(seq), "Len %s is %i, len %s is %i" \ 163 assert len(pattern) == len(seq), ("Len %s is %i, len %s is %i"
162 % (pattern, len(pattern), seq, len(seq)) 164 % (pattern, len(pattern), seq, len(seq)))
163 yield make_reg_ex(pattern) 165 yield make_reg_ex(pattern)
164 166
165 167
166 def load_primers_as_re(primer_fasta, mm, rc=False): 168 def load_primers_as_re(primer_fasta, mm, rc=False):
167 # Read primer file and record all specified sequences 169 # Read primer file and record all specified sequences