view fasta_to_fastq.py @ 1:60e768cf5101 draft

Uploaded
author p.lucas
date Tue, 23 Jul 2024 14:28:53 +0000
parents
children 2f317786c4e7
line wrap: on
line source

#!/usr/bin/python
#-*- coding: utf-8 -*-

"""

Scripts ayant pour rôle l'extraction de tous les noms de segment contenus dans un fichier genbank.
Réalisé par Pierrick Lucas.
Usage : python fastq_to_fastq.py -i sequences.fasta -s 40 -o output_file.fastq

"""

# Import
import argparse
import sys
import os
from Bio import SeqIO

##### MAIN
def __main__():
  # Options :
  parser = argparse.ArgumentParser(description="""Takes a fasta file and converts it to fastq file with a fake quality score.""",
                                   epilog="""This script need few options, use -h to see it.""")
  parser.add_argument("-i", "--infile", dest="infile", help="Input fasta file.")
  parser.add_argument("-s", "--score", type=int, default=40, dest="score", help="Quality score you wanted for each base in all reads. (default: 40)")
  parser.add_argument("-o", "--outfile", dest="outfile", help="Output file in fastq format.")

  if len(sys.argv) == 1 or len(sys.argv) < 5 or len(sys.argv) > 7:
    parser.print_help()
    sys.exit(1)

  # Get options :
  options = parser.parse_args()
  infile = options.infile
  score = options.score
  outfile = options.outfile

  outputfile = open(outfile,"w")

  # Check score
  if score < 0:
    sys.exit("Error: only positive integers for the score".format(score))

  # Create fastq output file
  with open(infile, "r") as inf:
    for record in SeqIO.parse(inf, "fasta"):
      record.letter_annotations["phred_quality"] = [score] * len(record)
      SeqIO.write(record, outputfile, "fastq")

  # Close output file
  outputfile.close()

#### MAIN END
if __name__ == "__main__": __main__()