annotate fasta_to_fastq.py @ 6:0649236c92ee draft default tip

Uploaded
author p.lucas
date Wed, 24 Jul 2024 13:57:37 +0000
parents 2f317786c4e7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
1 #!/usr/bin/python
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
2 #-*- coding: utf-8 -*-
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
3
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
4 """
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
5
3
2f317786c4e7 Uploaded
p.lucas
parents: 1
diff changeset
6 Convert fasta fle to fastq file with given score
2f317786c4e7 Uploaded
p.lucas
parents: 1
diff changeset
7 Written by Pierrick Lucas.
6
0649236c92ee Uploaded
p.lucas
parents: 3
diff changeset
8 Usage : python fasta_to_fastq.py -i sequences.fasta -s 40 -o output_file.fastq
1
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
9
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
10 """
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
11
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
12 # Import
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
13 import argparse
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
14 import sys
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
15 import os
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
16 from Bio import SeqIO
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
17
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
18 ##### MAIN
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
19 def __main__():
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
20 # Options :
6
0649236c92ee Uploaded
p.lucas
parents: 3
diff changeset
21 parser = argparse.ArgumentParser(description="""Take a fasta file and converts it to fastq file with a given quality score.""",
1
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
22 epilog="""This script need few options, use -h to see it.""")
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
23 parser.add_argument("-i", "--infile", dest="infile", help="Input fasta file.")
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
24 parser.add_argument("-s", "--score", type=int, default=40, dest="score", help="Quality score you wanted for each base in all reads. (default: 40)")
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
25 parser.add_argument("-o", "--outfile", dest="outfile", help="Output file in fastq format.")
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
26
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
27 if len(sys.argv) == 1 or len(sys.argv) < 5 or len(sys.argv) > 7:
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
28 parser.print_help()
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
29 sys.exit(1)
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
30
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
31 # Get options :
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
32 options = parser.parse_args()
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
33 infile = options.infile
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
34 score = options.score
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
35 outfile = options.outfile
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
36
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
37 outputfile = open(outfile,"w")
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
38
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
39 # Check score
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
40 if score < 0:
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
41 sys.exit("Error: only positive integers for the score".format(score))
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
42
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
43 # Create fastq output file
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
44 with open(infile, "r") as inf:
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
45 for record in SeqIO.parse(inf, "fasta"):
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
46 record.letter_annotations["phred_quality"] = [score] * len(record)
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
47 SeqIO.write(record, outputfile, "fastq")
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
48
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
49 # Close output file
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
50 outputfile.close()
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
51
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
52 #### MAIN END
60e768cf5101 Uploaded
p.lucas
parents:
diff changeset
53 if __name__ == "__main__": __main__()