Mercurial > repos > artbio > yac_clipper
comparison yac.py @ 0:ad6b978daa2e draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/yac_clipper commit e9cf2978954c546bb90eb11931f9cfd6562156f3
author | artbio |
---|---|
date | Wed, 26 Jul 2017 13:35:39 -0400 |
parents | |
children | 7c913274e22a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ad6b978daa2e |
---|---|
1 #!/usr/bin/python | |
2 # yac = yet another clipper | |
3 # v 1.2.1 - 23-08-2014 - Support FastQ output | |
4 # v 1.1.0 - 23-08-2014 - argparse implementation | |
5 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode | |
6 # Christophe Antoniewski <drosofff@gmail.com> | |
7 | |
8 import sys | |
9 import string | |
10 import argparse | |
11 from itertools import islice | |
12 | |
13 | |
14 def Parser(): | |
15 the_parser = argparse.ArgumentParser() | |
16 the_parser.add_argument( | |
17 '--input', action="store", nargs='+', help="input fastq files") | |
18 the_parser.add_argument( | |
19 '--output', action="store", type=str, help="output, clipped fasta file") | |
20 the_parser.add_argument( | |
21 '--output_format', action="store", type=str, help="output format, fasta or fastq") | |
22 the_parser.add_argument( | |
23 '--adapter_to_clip', action="store", type=str, help="adapter sequence to clip") | |
24 the_parser.add_argument( | |
25 '--min', action="store", type=int, help="minimal size of clipped sequence to keep") | |
26 the_parser.add_argument( | |
27 '--max', action="store", type=int, help="maximal size of clipped sequence to keep") | |
28 the_parser.add_argument('--Nmode', action="store", type=str, choices=[ | |
29 "accept", "reject"], help="accept or reject sequences with N for clipping") | |
30 args = the_parser.parse_args() | |
31 args.adapter_to_clip = args.adapter_to_clip.upper() | |
32 return args | |
33 | |
34 | |
35 class Clip: | |
36 | |
37 def __init__(self, inputfile, outputfile, output_format, adapter, minsize, maxsize, Nmode): | |
38 self.inputfile = inputfile | |
39 self.outputfile = outputfile | |
40 self.output_format = output_format | |
41 self.adapter = adapter | |
42 self.minsize = int(minsize) | |
43 self.maxsize = int(maxsize) | |
44 self.Nmode = Nmode | |
45 | |
46 def motives(sequence): | |
47 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module''' | |
48 sequencevariants = [ | |
49 sequence[0:6]] # initializes the list with the 6mer perfect match | |
50 dicsubst = {"A": "TGCN", "T": "AGCN", "G": "TACN", "C": "GATN"} | |
51 for pos in enumerate(sequence[:6]): | |
52 for subst in dicsubst[pos[1]]: | |
53 sequencevariants.append( | |
54 sequence[:pos[0]] + subst + sequence[pos[0] + 1:7]) | |
55 return sequencevariants | |
56 self.adaptmotifs = motives(self.adapter) | |
57 | |
58 def scanadapt(self, adaptmotives=[], sequence="", qscore=""): | |
59 '''scans sequence for adapter motives''' | |
60 match_position = sequence.rfind(adaptmotives[0]) | |
61 if match_position != -1: | |
62 return sequence[:match_position], qscore[:match_position] | |
63 for motif in adaptmotives[1:]: | |
64 match_position = sequence.rfind(motif) | |
65 if match_position != -1: | |
66 return sequence[:match_position], qscore[:match_position] | |
67 return sequence, qscore | |
68 | |
69 def write_output(self, id, read, qscore, output): | |
70 if self.output_format == "fasta": | |
71 block = ">{0}\n{1}\n".format(id, read) | |
72 else: | |
73 block = "@HWI-{0}\n{1}\n+\n{2}\n".format(id, read, qscore) | |
74 output.write(block) | |
75 | |
76 def handle_io(self): | |
77 '''Open input file, pass read sequence and read qscore to clipping function. | |
78 Pass clipped read and qscore to output function.''' | |
79 id = 0 | |
80 output = open(self.outputfile, "a") | |
81 with open(self.inputfile, "r") as input: | |
82 block_gen = islice(input, 1, None, 2) | |
83 for i, line in enumerate(block_gen): | |
84 if i % 2: | |
85 qscore = line.rstrip() | |
86 else: | |
87 read = line.rstrip() | |
88 continue | |
89 trimmed_read, trimmed_qscore = self.scanadapt( | |
90 self.adaptmotifs, read, qscore) | |
91 if self.minsize <= len(trimmed_read) <= self.maxsize: | |
92 if (self.Nmode == "reject") and ("N" in trimmed_read): | |
93 continue | |
94 id += 1 | |
95 self.write_output(id, trimmed_read, trimmed_qscore, output) | |
96 output.close() | |
97 | |
98 | |
99 def main(*argv): | |
100 instanceClip = Clip(*argv) | |
101 instanceClip.handle_io() | |
102 | |
103 if __name__ == "__main__": | |
104 args = Parser() | |
105 id = 0 | |
106 for inputfile in args.input: | |
107 main(inputfile, args.output, args.output_format, | |
108 args.adapter_to_clip, args.min, args.max, args.Nmode) |