comparison UMI_riboseq_processing/UMI.py @ 0:ef98c6fad2a2 draft

Uploaded
author triasteran
date Sun, 19 Jun 2022 11:29:41 +0000
parents
children 5d0d5933d370
comparison
equal deleted inserted replaced
-1:000000000000 0:ef98c6fad2a2
1 import itertools
2 from sys import argv, exit
3 from itertools import zip_longest
4
5 def grouper(iterable, n, fillvalue=None):
6 args = [iter(iterable)] * n
7 return zip_longest(*args, fillvalue=fillvalue)
8
9
10 chunk_size=4
11
12
13 def trimandpaste(pathToFastaFile, output):
14 #filename = pathToFastaFile.split('/')[-1]
15 output = open(output,"w")
16 with open(pathToFastaFile) as f:
17 for lines in grouper(f, chunk_size, ""): #for every chunk_sized chunk
18 header = lines[0]
19 seq = lines[1]
20 sep = lines[2]
21 qual = lines[3]
22 trimmed_seq = seq[2:-11]+seq[-6:-1]+"\n" # fooprint + barcode
23 UMI = seq[0:2]+seq[-11:-6] #7nt in total
24 split_header = header.split(" ")
25 new_header = split_header[0]+"_"+UMI+" "+split_header[1]
26 if qual[-1:] == "\n":
27 new_qual = qual[2:-11]+qual[-6:-1]+"\n"
28 else:
29 new_qual = qual[2:-10]+qual[-6:-1]
30 output.write(new_header)
31 output.write(trimmed_seq)
32 output.write(sep)
33 output.write(new_qual)
34
35 output.close()
36
37 def main():
38 if len(argv) != 3:
39 exit("Usage: 2 arguments required\n1: Path to fasta file \n2: name of output file")
40
41 # Get paths
42 pathToFastaFile = argv[1]
43 output = argv[2]
44
45 trimandpaste(pathToFastaFile, output)
46
47 if __name__ == "__main__":
48 main()