comparison FilterUncorrectabledPEfastq.py @ 1:6703b98884a2 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rcorrector commit 65ada0f9589f3ffebad1db6636ccb50d58082606"
author iuc
date Thu, 26 Dec 2019 05:21:50 -0500
parents 9a0b65ad3c84
children
comparison
equal deleted inserted replaced
0:9a0b65ad3c84 1:6703b98884a2
14 read pairs containing at least one corrected read. 14 read pairs containing at least one corrected read.
15 Currently, this script only handles paired-end data, and handle either unzipped 15 Currently, this script only handles paired-end data, and handle either unzipped
16 or gzipped files on the fly, so long as the gzipped files end with 'gz'. 16 or gzipped files on the fly, so long as the gzipped files end with 'gz'.
17 """ 17 """
18 18
19 # import sys
20 import argparse 19 import argparse
21 import gzip 20 import gzip
22 from itertools import izip_longest 21 from itertools import zip_longest
23 # izip
24 from os.path import basename 22 from os.path import basename
25 23
26 24
27 def get_input_streams(r1file, r2file): 25 def get_input_streams(r1file, r2file):
28 if r1file[-2:] == 'gz': 26 if r1file[-2:] == 'gz':
36 34
37 def grouper(iterable, n, fillvalue=None): 35 def grouper(iterable, n, fillvalue=None):
38 "Collect data into fixed-length chunks or blocks" 36 "Collect data into fixed-length chunks or blocks"
39 # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx 37 # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
40 args = [iter(iterable)] * n 38 args = [iter(iterable)] * n
41 return izip_longest(fillvalue=fillvalue, * args) 39 return zip_longest(fillvalue=fillvalue, * args)
42 40
43 41
44 if __name__ == "__main__": 42 if __name__ == "__main__":
45 parser = argparse.ArgumentParser(description="options for filtering and logging rCorrector fastq outputs") 43 parser = argparse.ArgumentParser(description="options for filtering and logging rCorrector fastq outputs")
46 parser.add_argument('-1', '--left_reads', dest='leftreads', type=str, help='R1 fastq file') 44 parser.add_argument('-1', '--left_reads', dest='leftreads', type=str, help='R1 fastq file')
59 R2 = grouper(f2, 4) 57 R2 = grouper(f2, 4)
60 counter = 0 58 counter = 0
61 for entry in R1: 59 for entry in R1:
62 counter += 1 60 counter += 1
63 if counter % 100000 == 0: 61 if counter % 100000 == 0:
64 print "%s reads processed" % counter 62 print("%s reads processed" % counter)
65 head1, seq1, placeholder1, qual1 = [i.strip() for i in entry] 63 head1, seq1, placeholder1, qual1 = [i.strip() for i in entry]
66 head2, seq2, placeholder2, qual2 = [j.strip() for j in R2.next()] 64 head2, seq2, placeholder2, qual2 = [j.strip() for j in next(R2)]
67 if 'unfixable' in head1 or 'unfixable' in head2: 65 if 'unfixable' in head1 or 'unfixable' in head2:
68 unfix_count += 1 66 unfix_count += 1
69 else: 67 else:
70 if 'cor' in head1: 68 if 'cor' in head1:
71 r1_cor_count += 1 69 r1_cor_count += 1