annotate deinterlacer.py @ 7:89c5ba120b21 draft

Uploaded
author petr-novak
date Mon, 02 Dec 2019 08:41:43 -0500
parents e320ef2d105a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python3
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
2 '''very simple deinterlacer - fasta and fastq'''
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
3 import sys
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
4 import itertools
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
5
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
6
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
7 def is_header(line, counter, fasta):
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
8 ''' return True is line is header '''
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
9 if fasta:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
10 if line[0] == ">":
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
11 return True
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
12 else:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
13 if counter == 4 and line[0] == "@":
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
14 return True
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
15 return False
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
16
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
17
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
18 def main():
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
19 '''deinterlace fasta or fastq format'''
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
20 infile = sys.argv[1]
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
21 file_a = sys.argv[2]
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
22 file_b = sys.argv[3]
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
23 with open(infile) as f, open(file_a, 'w') as A, open(file_b, 'w') as B:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
24 ABiter = itertools.cycle([A, B])
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
25 counter = 3 # four lines per record in fastq
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
26 pos = f.tell()
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
27 is_fasta = f.readline()[0] == ">"
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
28 f.seek(pos)
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
29 for line in f:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
30 counter += 1
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
31 if is_header(line, counter, is_fasta):
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
32 fout = next(ABiter)
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
33 counter = 0
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
34 fout.write(line)
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
35
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
36
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
37 if __name__ == "__main__":
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
38 main()