# HG changeset patch # User mheinzl # Date 1602096520 0 # Node ID 8ab09593f2ebb67e75482c62f0b13a9886d61dcb planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8 diff -r 000000000000 -r 8ab09593f2eb fasta2fastq.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta2fastq.py Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +"""fasta2fastq.py + +Author -- Monika Heinzl +Contact -- monika.heinzl@edumail.at + +Takes a fasta file and converts it to fastq file with a fake quality score. + +======= ========== ================= ================================ +Version Date Author Description +1.0.0 2020-10-07 Monika Heinzl +======= ========== ================= ================================ + +USAGE: python fasta2fastq.py -i in.fasta -o out.fastq -s 40 +""" + +import argparse +import sys, os +from Bio import SeqIO + + +def make_argparser(): + parser = argparse.ArgumentParser(description='Takes a fasta file and converts it to fastq file with a fake quality score.') + parser.add_argument('-i', '--infile', required = True, + help='Input FASTA file.') + parser.add_argument('-o', '--outfile', required = True, + help='Output FASTQ file.') + parser.add_argument('-s', '--score', type=int, default=40, + help='Quality score added to each base in the read. Default 40.') + return parser + +def fasta2fastq(argv): + parser = make_argparser() + args = parser.parse_args(argv[1:]) + + infasta = args.infile + outfastq = args.outfile + score = args.score + + if os.path.isfile(infasta) is False: + sys.exit("Error: Could not find '{}'".format(infasta)) + if os.path.isfile(outfastq) is False: + sys.exit("Error: Could not find '{}'".format(outfastq)) + if score < 0: + sys.exit("Error: score is '{}', but only non-negative integers allowed".format(score)) + + # make fastq + with open(infasta, "r") as fasta, open(outfastq, "w") as fastq: + for record in SeqIO.parse(fasta, "fasta"): + record.letter_annotations["phred_quality"] = [score] * len(record) + SeqIO.write(record, fastq, "fastq") + + +if __name__ == '__main__': + sys.exit(fasta2fastq(sys.argv)) + diff -r 000000000000 -r 8ab09593f2eb fasta2fastq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta2fastq.xml Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,36 @@ + + + Convert a FASTA file to a FASTQ file + + macros.xml + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 8ab09593f2eb macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,12 @@ + + + + +@misc{duplex, + author = {Heinzl Monika, monika.heinzl@edumail.at}, + year = {2020}, + } + + + + diff -r 000000000000 -r 8ab09593f2eb test-data/Reads_in.fasta Binary file test-data/Reads_in.fasta has changed diff -r 000000000000 -r 8ab09593f2eb test-data/Reads_out.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Reads_out.fastq Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,40 @@ +@AATTTTTTAAAAGTGTGTAGACTC 3-1 +TGGAGTGAGTTTGGATGGGGTGGCCAGGTCTGAGAAGGTCCCCCGCCAGTGTCCTCTGACCCATCTGCTCTCTCCTGCCAGTGTGCACCGGCACAGACATGAAGCTGCGGCTCCCTGCCAGAGTCTACACACA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@ACAGATTGTTCCGCAAATACAATT 2-6 +CTGGTAGATGAGCTGCGGTGCCTGTGGTGGACCATGCCCCCAGCGCCCGGGGCAGGGTCTGGACAGAAGAAGCCCTGCTGGGGTACCAGATACTCCTCAGCATCCACCAGGTCCCCCATGTCATCGTCCTCCAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@ACCTAAGCTCACGAACCGATTGAA 6-3 +CACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGAGATGCAGTAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@ACTTCTGGTAATTAATAGGATATA 1-3 +GCTGTTTGTGCCTCTCTCTGTTACTAACCCGTCCTCTCGCTGTTAGACATCTCTCTCACTGCCTGTCTCTGGTTCTGTCCTCAGGCCACCCCTGTTAGTCATATATCCTATTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@AGTGCTTATTCATAAGTAACCTTA 4-8 +TAGACTCATGTAGATTGGCTGGCGGGAGCGGAAGTGATTCAGAGCGCCCCCAGAGCAGTTCTGCTCTTCGCACTGCAGTACGCAGTCGCGGTACACCGGCTCACGGTCGCCCTGGGAGCCGCTCGCCAGCGCCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@CGCAGAATAATCGATTGCTCAAAC 6-3 +AGAAACCACAACAAGAGAGGAAACAGAAGGGCAGGGCACCTTCTTCTGCCACCCACCTGTAAACAGAGGGCTCAGCCCAGCTGGAGGCAGGGCCTGGCTGGGTTGCCCACGGGCCTCACCTGCAGGAAGGACAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GAAAAAATGACATTGAACGATTTT 17-2 +GTCTTTCCCTAATCCTGGGAAGTGCACAGACCCTGCAAGGTGGGGCACAGGCAGCCCCTTCCCTCCCTTCACATGCTGAGGTGGCCCCGTAATTCTCCCCATCCCAGCTCTCATCCNCCCTCCNGCCNNNGCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GAAAATTCAATTGTGGTGATTAGT 7-4 +CCCTGACCTGCTGGAAAAGGGGGAGCGGCTGCCCCAGCCCCCCATCTGCACCATTGATGTCTACATGATCATGGTCAAATGTGCGTGGCTGAGCTGTGCTGGCTGCCTGGAGGAGGGTGGGAGGTCCTGGGTGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GTACAGGGATAATTAACGCATGAT 1-12 +AAGGTTTCAATGACGGTGAAGGCCACCTGTGAGGCTTCGAAGCTGCAGCTCCCGCAGGCCTCCTGGGGAGGCCCCTGTGACAGNGGTGGTANTGTTCAGCGGGNCNCCANNGTCTAGCACGGCCAGGGCANAGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GTAGAACTACACGTATTTGTCGAA 9-7 +TTTGCCGTGCCACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII