Mercurial > repos > mheinzl > fasta2fastq
annotate fasta2fastq.py @ 0:8ab09593f2eb draft default tip
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Wed, 07 Oct 2020 18:48:40 +0000 |
parents | |
children |
rev | line source |
---|---|
0
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
2 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
3 """fasta2fastq.py |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
4 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
5 Author -- Monika Heinzl |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
6 Contact -- monika.heinzl@edumail.at |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
7 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
8 Takes a fasta file and converts it to fastq file with a fake quality score. |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
9 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
10 ======= ========== ================= ================================ |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
11 Version Date Author Description |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
12 1.0.0 2020-10-07 Monika Heinzl |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
13 ======= ========== ================= ================================ |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
14 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
15 USAGE: python fasta2fastq.py -i in.fasta -o out.fastq -s 40 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
16 """ |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
17 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
18 import argparse |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
19 import sys, os |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
20 from Bio import SeqIO |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
21 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
22 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
23 def make_argparser(): |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
24 parser = argparse.ArgumentParser(description='Takes a fasta file and converts it to fastq file with a fake quality score.') |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
25 parser.add_argument('-i', '--infile', required = True, |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
26 help='Input FASTA file.') |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
27 parser.add_argument('-o', '--outfile', required = True, |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
28 help='Output FASTQ file.') |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
29 parser.add_argument('-s', '--score', type=int, default=40, |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
30 help='Quality score added to each base in the read. Default 40.') |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
31 return parser |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
32 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
33 def fasta2fastq(argv): |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
34 parser = make_argparser() |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
35 args = parser.parse_args(argv[1:]) |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
36 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
37 infasta = args.infile |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
38 outfastq = args.outfile |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
39 score = args.score |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
40 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
41 if os.path.isfile(infasta) is False: |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
42 sys.exit("Error: Could not find '{}'".format(infasta)) |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
43 if os.path.isfile(outfastq) is False: |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
44 sys.exit("Error: Could not find '{}'".format(outfastq)) |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
45 if score < 0: |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
46 sys.exit("Error: score is '{}', but only non-negative integers allowed".format(score)) |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
47 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
48 # make fastq |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
49 with open(infasta, "r") as fasta, open(outfastq, "w") as fastq: |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
50 for record in SeqIO.parse(fasta, "fasta"): |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
51 record.letter_annotations["phred_quality"] = [score] * len(record) |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
52 SeqIO.write(record, fastq, "fastq") |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
53 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
54 |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
55 if __name__ == '__main__': |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
56 sys.exit(fasta2fastq(sys.argv)) |
8ab09593f2eb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
mheinzl
parents:
diff
changeset
|
57 |