comparison yac.py @ 0:ad6b978daa2e draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
author artbio
date Wed, 26 Jul 2017 13:35:39 -0400
parents
children 7c913274e22a
comparison
equal deleted inserted replaced
-1:000000000000 0:ad6b978daa2e
1 #!/usr/bin/python
2 # yac = yet another clipper
3 # v 1.2.1 - 23-08-2014 - Support FastQ output
4 # v 1.1.0 - 23-08-2014 - argparse implementation
5 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode
6 # Christophe Antoniewski <drosofff@gmail.com>
7
8 import sys
9 import string
10 import argparse
11 from itertools import islice
12
13
14 def Parser():
15 the_parser = argparse.ArgumentParser()
16 the_parser.add_argument(
17 '--input', action="store", nargs='+', help="input fastq files")
18 the_parser.add_argument(
19 '--output', action="store", type=str, help="output, clipped fasta file")
20 the_parser.add_argument(
21 '--output_format', action="store", type=str, help="output format, fasta or fastq")
22 the_parser.add_argument(
23 '--adapter_to_clip', action="store", type=str, help="adapter sequence to clip")
24 the_parser.add_argument(
25 '--min', action="store", type=int, help="minimal size of clipped sequence to keep")
26 the_parser.add_argument(
27 '--max', action="store", type=int, help="maximal size of clipped sequence to keep")
28 the_parser.add_argument('--Nmode', action="store", type=str, choices=[
29 "accept", "reject"], help="accept or reject sequences with N for clipping")
30 args = the_parser.parse_args()
31 args.adapter_to_clip = args.adapter_to_clip.upper()
32 return args
33
34
35 class Clip:
36
37 def __init__(self, inputfile, outputfile, output_format, adapter, minsize, maxsize, Nmode):
38 self.inputfile = inputfile
39 self.outputfile = outputfile
40 self.output_format = output_format
41 self.adapter = adapter
42 self.minsize = int(minsize)
43 self.maxsize = int(maxsize)
44 self.Nmode = Nmode
45
46 def motives(sequence):
47 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module'''
48 sequencevariants = [
49 sequence[0:6]] # initializes the list with the 6mer perfect match
50 dicsubst = {"A": "TGCN", "T": "AGCN", "G": "TACN", "C": "GATN"}
51 for pos in enumerate(sequence[:6]):
52 for subst in dicsubst[pos[1]]:
53 sequencevariants.append(
54 sequence[:pos[0]] + subst + sequence[pos[0] + 1:7])
55 return sequencevariants
56 self.adaptmotifs = motives(self.adapter)
57
58 def scanadapt(self, adaptmotives=[], sequence="", qscore=""):
59 '''scans sequence for adapter motives'''
60 match_position = sequence.rfind(adaptmotives[0])
61 if match_position != -1:
62 return sequence[:match_position], qscore[:match_position]
63 for motif in adaptmotives[1:]:
64 match_position = sequence.rfind(motif)
65 if match_position != -1:
66 return sequence[:match_position], qscore[:match_position]
67 return sequence, qscore
68
69 def write_output(self, id, read, qscore, output):
70 if self.output_format == "fasta":
71 block = ">{0}\n{1}\n".format(id, read)
72 else:
73 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore)
74 output.write(block)
75
76 def handle_io(self):
77 '''Open input file, pass read sequence and read qscore to clipping function.
78 Pass clipped read and qscore to output function.'''
79 id = 0
80 output = open(self.outputfile, "a")
81 with open(self.inputfile, "r") as input:
82 block_gen = islice(input, 1, None, 2)
83 for i, line in enumerate(block_gen):
84 if i % 2:
85 qscore = line.rstrip()
86 else:
87 read = line.rstrip()
88 continue
89 trimmed_read, trimmed_qscore = self.scanadapt(
90 self.adaptmotifs, read, qscore)
91 if self.minsize <= len(trimmed_read) <= self.maxsize:
92 if (self.Nmode == "reject") and ("N" in trimmed_read):
93 continue
94 id += 1
95 self.write_output(id, trimmed_read, trimmed_qscore, output)
96 output.close()
97
98
99 def main(*argv):
100 instanceClip = Clip(*argv)
101 instanceClip.handle_io()
102
103 if __name__ == "__main__":
104 args = Parser()
105 id = 0
106 for inputfile in args.input:
107 main(inputfile, args.output, args.output_format,
108 args.adapter_to_clip, args.min, args.max, args.Nmode)