0
|
1 import itertools
|
|
2 from sys import argv, exit
|
|
3 from itertools import zip_longest
|
|
4
|
|
5 def grouper(iterable, n, fillvalue=None):
|
|
6 args = [iter(iterable)] * n
|
|
7 return zip_longest(*args, fillvalue=fillvalue)
|
|
8
|
|
9
|
|
10 chunk_size=4
|
|
11
|
|
12
|
|
13 def trimandpaste(pathToFastaFile, output):
|
|
14 #filename = pathToFastaFile.split('/')[-1]
|
|
15 output = open(output,"w")
|
|
16 with open(pathToFastaFile) as f:
|
|
17 for lines in grouper(f, chunk_size, ""): #for every chunk_sized chunk
|
|
18 header = lines[0]
|
|
19 seq = lines[1]
|
|
20 sep = lines[2]
|
|
21 qual = lines[3]
|
1
|
22 trimmed_seq = seq[2:-5]+"\n" # fooprint + barcode
|
2
|
23 UMI = seq[0:2]+seq[-5:].strip('\n') #7nt in total; 5'NN and last 3'NNNNN
|
0
|
24 split_header = header.split(" ")
|
|
25 new_header = split_header[0]+"_"+UMI+" "+split_header[1]
|
|
26 if qual[-1:] == "\n":
|
1
|
27 new_qual = qual[2:-5]+"\n"
|
0
|
28 else:
|
1
|
29 new_qual = qual[2:-5]
|
0
|
30 output.write(new_header)
|
|
31 output.write(trimmed_seq)
|
|
32 output.write(sep)
|
|
33 output.write(new_qual)
|
|
34
|
|
35 output.close()
|
|
36
|
|
37 def main():
|
|
38 if len(argv) != 3:
|
|
39 exit("Usage: 2 arguments required\n1: Path to fasta file \n2: name of output file")
|
|
40
|
|
41 # Get paths
|
|
42 pathToFastaFile = argv[1]
|
|
43 output = argv[2]
|
|
44
|
|
45 trimandpaste(pathToFastaFile, output)
|
|
46
|
|
47 if __name__ == "__main__":
|
|
48 main()
|