Mercurial > repos > peterjc > seq_primer_clip
comparison tools/seq_primer_clip/seq_primer_clip.py @ 5:530c8d6fedd8 draft
v0.0.15 - internal changes
author | peterjc |
---|---|
date | Wed, 10 May 2017 13:09:52 -0400 |
parents | 9b074c1db68e |
children | b9dc7c967ee6 |
comparison
equal
deleted
inserted
replaced
4:9b074c1db68e | 5:530c8d6fedd8 |
---|---|
27 See accompanying text file for licence details (MIT/BSD style). | 27 See accompanying text file for licence details (MIT/BSD style). |
28 | 28 |
29 NOTE: Currently it uses Python's regular expression engine for finding the | 29 NOTE: Currently it uses Python's regular expression engine for finding the |
30 primers, which for my needs is fast enough. | 30 primers, which for my needs is fast enough. |
31 """ | 31 """ |
32 | |
33 import re | |
32 import sys | 34 import sys |
33 import re | 35 |
34 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter | 36 from galaxy_utils.sequence.fasta import fastaReader, fastaWriter |
35 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter | 37 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter |
36 | 38 |
37 if "-v" in sys.argv or "--version" in sys.argv: | 39 if "-v" in sys.argv or "--version" in sys.argv: |
38 print "v0.0.12" | 40 print "v0.0.12" |
145 if mm >= 1: | 147 if mm >= 1: |
146 for i, letter in enumerate(seq): | 148 for i, letter in enumerate(seq): |
147 # We'll use a set to remove any duplicate patterns | 149 # We'll use a set to remove any duplicate patterns |
148 # if letter not in "NX": | 150 # if letter not in "NX": |
149 pattern = seq[:i] + "N" + seq[i + 1:] | 151 pattern = seq[:i] + "N" + seq[i + 1:] |
150 assert len(pattern) == len(seq), "Len %s is %i, len %s is %i" \ | 152 assert len(pattern) == len(seq), ("Len %s is %i, len %s is %i" |
151 % (pattern, len(pattern), seq, len(seq)) | 153 % (pattern, len(pattern), seq, len(seq))) |
152 yield make_reg_ex(pattern) | 154 yield make_reg_ex(pattern) |
153 if mm >= 2: | 155 if mm >= 2: |
154 for i, letter in enumerate(seq): | 156 for i, letter in enumerate(seq): |
155 # We'll use a set to remove any duplicate patterns | 157 # We'll use a set to remove any duplicate patterns |
156 # if letter not in "NX": | 158 # if letter not in "NX": |
157 for k, letter in enumerate(seq[i + 1:]): | 159 for k, letter in enumerate(seq[i + 1:]): |
158 # We'll use a set to remove any duplicate patterns | 160 # We'll use a set to remove any duplicate patterns |
159 # if letter not in "NX": | 161 # if letter not in "NX": |
160 pattern = seq[:i] + "N" + seq[i + 1:i + 1 + k] + "N" + seq[i + k + 2:] | 162 pattern = seq[:i] + "N" + seq[i + 1:i + 1 + k] + "N" + seq[i + k + 2:] |
161 assert len(pattern) == len(seq), "Len %s is %i, len %s is %i" \ | 163 assert len(pattern) == len(seq), ("Len %s is %i, len %s is %i" |
162 % (pattern, len(pattern), seq, len(seq)) | 164 % (pattern, len(pattern), seq, len(seq))) |
163 yield make_reg_ex(pattern) | 165 yield make_reg_ex(pattern) |
164 | 166 |
165 | 167 |
166 def load_primers_as_re(primer_fasta, mm, rc=False): | 168 def load_primers_as_re(primer_fasta, mm, rc=False): |
167 # Read primer file and record all specified sequences | 169 # Read primer file and record all specified sequences |