annotate fasta2fastq.py @ 0:8ab09593f2eb draft default tip

planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Wed, 07 Oct 2020 18:48:40 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
1 #!/usr/bin/env python
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
2
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
3 """fasta2fastq.py
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
4
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
5 Author -- Monika Heinzl
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
6 Contact -- monika.heinzl@edumail.at
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
7
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
8 Takes a fasta file and converts it to fastq file with a fake quality score.
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
9
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
10 ======= ========== ================= ================================
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
11 Version Date Author Description
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
12 1.0.0 2020-10-07 Monika Heinzl
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
13 ======= ========== ================= ================================
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
14
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
15 USAGE: python fasta2fastq.py -i in.fasta -o out.fastq -s 40
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
16 """
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
17
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
18 import argparse
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
19 import sys, os
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
20 from Bio import SeqIO
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
21
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
22
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
23 def make_argparser():
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
24 parser = argparse.ArgumentParser(description='Takes a fasta file and converts it to fastq file with a fake quality score.')
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
25 parser.add_argument('-i', '--infile', required = True,
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
26 help='Input FASTA file.')
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
27 parser.add_argument('-o', '--outfile', required = True,
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
28 help='Output FASTQ file.')
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
29 parser.add_argument('-s', '--score', type=int, default=40,
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
30 help='Quality score added to each base in the read. Default 40.')
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
31 return parser
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
32
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
33 def fasta2fastq(argv):
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
34 parser = make_argparser()
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
35 args = parser.parse_args(argv[1:])
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
36
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
37 infasta = args.infile
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
38 outfastq = args.outfile
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
39 score = args.score
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
40
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
41 if os.path.isfile(infasta) is False:
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
42 sys.exit("Error: Could not find '{}'".format(infasta))
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
43 if os.path.isfile(outfastq) is False:
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
44 sys.exit("Error: Could not find '{}'".format(outfastq))
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
45 if score < 0:
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
46 sys.exit("Error: score is '{}', but only non-negative integers allowed".format(score))
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
47
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
48 # make fastq
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
49 with open(infasta, "r") as fasta, open(outfastq, "w") as fastq:
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
50 for record in SeqIO.parse(fasta, "fasta"):
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
51 record.letter_annotations["phred_quality"] = [score] * len(record)
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
52 SeqIO.write(record, fastq, "fastq")
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
53
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
54
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
55 if __name__ == '__main__':
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
56 sys.exit(fasta2fastq(sys.argv))
8ab09593f2eb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff changeset
57