Mercurial > repos > iuc > rcorrector
comparison FilterUncorrectabledPEfastq.py @ 1:6703b98884a2 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rcorrector commit 65ada0f9589f3ffebad1db6636ccb50d58082606"
author | iuc |
---|---|
date | Thu, 26 Dec 2019 05:21:50 -0500 |
parents | 9a0b65ad3c84 |
children |
comparison
equal
deleted
inserted
replaced
0:9a0b65ad3c84 | 1:6703b98884a2 |
---|---|
14 read pairs containing at least one corrected read. | 14 read pairs containing at least one corrected read. |
15 Currently, this script only handles paired-end data, and handle either unzipped | 15 Currently, this script only handles paired-end data, and handle either unzipped |
16 or gzipped files on the fly, so long as the gzipped files end with 'gz'. | 16 or gzipped files on the fly, so long as the gzipped files end with 'gz'. |
17 """ | 17 """ |
18 | 18 |
19 # import sys | |
20 import argparse | 19 import argparse |
21 import gzip | 20 import gzip |
22 from itertools import izip_longest | 21 from itertools import zip_longest |
23 # izip | |
24 from os.path import basename | 22 from os.path import basename |
25 | 23 |
26 | 24 |
27 def get_input_streams(r1file, r2file): | 25 def get_input_streams(r1file, r2file): |
28 if r1file[-2:] == 'gz': | 26 if r1file[-2:] == 'gz': |
36 | 34 |
37 def grouper(iterable, n, fillvalue=None): | 35 def grouper(iterable, n, fillvalue=None): |
38 "Collect data into fixed-length chunks or blocks" | 36 "Collect data into fixed-length chunks or blocks" |
39 # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx | 37 # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx |
40 args = [iter(iterable)] * n | 38 args = [iter(iterable)] * n |
41 return izip_longest(fillvalue=fillvalue, * args) | 39 return zip_longest(fillvalue=fillvalue, * args) |
42 | 40 |
43 | 41 |
44 if __name__ == "__main__": | 42 if __name__ == "__main__": |
45 parser = argparse.ArgumentParser(description="options for filtering and logging rCorrector fastq outputs") | 43 parser = argparse.ArgumentParser(description="options for filtering and logging rCorrector fastq outputs") |
46 parser.add_argument('-1', '--left_reads', dest='leftreads', type=str, help='R1 fastq file') | 44 parser.add_argument('-1', '--left_reads', dest='leftreads', type=str, help='R1 fastq file') |
59 R2 = grouper(f2, 4) | 57 R2 = grouper(f2, 4) |
60 counter = 0 | 58 counter = 0 |
61 for entry in R1: | 59 for entry in R1: |
62 counter += 1 | 60 counter += 1 |
63 if counter % 100000 == 0: | 61 if counter % 100000 == 0: |
64 print "%s reads processed" % counter | 62 print("%s reads processed" % counter) |
65 head1, seq1, placeholder1, qual1 = [i.strip() for i in entry] | 63 head1, seq1, placeholder1, qual1 = [i.strip() for i in entry] |
66 head2, seq2, placeholder2, qual2 = [j.strip() for j in R2.next()] | 64 head2, seq2, placeholder2, qual2 = [j.strip() for j in next(R2)] |
67 if 'unfixable' in head1 or 'unfixable' in head2: | 65 if 'unfixable' in head1 or 'unfixable' in head2: |
68 unfix_count += 1 | 66 unfix_count += 1 |
69 else: | 67 else: |
70 if 'cor' in head1: | 68 if 'cor' in head1: |
71 r1_cor_count += 1 | 69 r1_cor_count += 1 |