Mercurial > repos > mheinzl > fasta2fastq
changeset 0:8ab09593f2eb draft default tip
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Wed, 07 Oct 2020 18:48:40 +0000 |
parents | |
children | |
files | fasta2fastq.py fasta2fastq.xml macros.xml test-data/Reads_in.fasta test-data/Reads_out.fastq |
diffstat | 5 files changed, 145 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta2fastq.py Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +"""fasta2fastq.py + +Author -- Monika Heinzl +Contact -- monika.heinzl@edumail.at + +Takes a fasta file and converts it to fastq file with a fake quality score. + +======= ========== ================= ================================ +Version Date Author Description +1.0.0 2020-10-07 Monika Heinzl +======= ========== ================= ================================ + +USAGE: python fasta2fastq.py -i in.fasta -o out.fastq -s 40 +""" + +import argparse +import sys, os +from Bio import SeqIO + + +def make_argparser(): + parser = argparse.ArgumentParser(description='Takes a fasta file and converts it to fastq file with a fake quality score.') + parser.add_argument('-i', '--infile', required = True, + help='Input FASTA file.') + parser.add_argument('-o', '--outfile', required = True, + help='Output FASTQ file.') + parser.add_argument('-s', '--score', type=int, default=40, + help='Quality score added to each base in the read. Default 40.') + return parser + +def fasta2fastq(argv): + parser = make_argparser() + args = parser.parse_args(argv[1:]) + + infasta = args.infile + outfastq = args.outfile + score = args.score + + if os.path.isfile(infasta) is False: + sys.exit("Error: Could not find '{}'".format(infasta)) + if os.path.isfile(outfastq) is False: + sys.exit("Error: Could not find '{}'".format(outfastq)) + if score < 0: + sys.exit("Error: score is '{}', but only non-negative integers allowed".format(score)) + + # make fastq + with open(infasta, "r") as fasta, open(outfastq, "w") as fastq: + for record in SeqIO.parse(fasta, "fasta"): + record.letter_annotations["phred_quality"] = [score] * len(record) + SeqIO.write(record, fastq, "fastq") + + +if __name__ == '__main__': + sys.exit(fasta2fastq(sys.argv)) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta2fastq.xml Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,36 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="fasta2fastq" name="FASTA-to-FASTQ" version="1.0.0" profile="19.01"> + <description>Convert a FASTA file to a FASTQ file</description> + <macros> + <import>macros.xml</import> + </macros> + <command><![CDATA[ + python '$__tool_directory__/fasta2fastq.py' + -i '$file1' + -o '$file2' + -s '$score' + ]]> + </command> + <inputs> + <param name="file1" type="data" format="fasta" label="FASTA file" optional="false"/> + <param name="score" type="integer" label="Quality score" value="40" help="Quality score for each base in all reads. Default = 40."/> + </inputs> + <outputs> + <data name="file2" format="fastq" label="${tool.name} on ${on_string}: FASTQ"/> + </outputs> + <tests> + <test> + <param name="file1" value="Reads_in.fasta"/> + <output name="file2" file="Reads_out.fastq"/> + <param name="score" value="40"/> + </test> + </tests> + <help> <![CDATA[ +**What it does** + +Takes a FASTA file and converts it to a FASTQ file by adding a static quality score. The default quality score for each base in all reads is 40. + + ]]> + </help> + <expand macro="citation" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,12 @@ +<macros> + <xml name="citation"> + <citations> + <citation type="bibtex"> +@misc{duplex, + author = {Heinzl Monika, monika.heinzl@edumail.at}, + year = {2020}, + } + </citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Reads_out.fastq Wed Oct 07 18:48:40 2020 +0000 @@ -0,0 +1,40 @@ +@AATTTTTTAAAAGTGTGTAGACTC 3-1 +TGGAGTGAGTTTGGATGGGGTGGCCAGGTCTGAGAAGGTCCCCCGCCAGTGTCCTCTGACCCATCTGCTCTCTCCTGCCAGTGTGCACCGGCACAGACATGAAGCTGCGGCTCCCTGCCAGAGTCTACACACA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@ACAGATTGTTCCGCAAATACAATT 2-6 +CTGGTAGATGAGCTGCGGTGCCTGTGGTGGACCATGCCCCCAGCGCCCGGGGCAGGGTCTGGACAGAAGAAGCCCTGCTGGGGTACCAGATACTCCTCAGCATCCACCAGGTCCCCCATGTCATCGTCCTCCAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@ACCTAAGCTCACGAACCGATTGAA 6-3 +CACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGAGATGCAGTAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@ACTTCTGGTAATTAATAGGATATA 1-3 +GCTGTTTGTGCCTCTCTCTGTTACTAACCCGTCCTCTCGCTGTTAGACATCTCTCTCACTGCCTGTCTCTGGTTCTGTCCTCAGGCCACCCCTGTTAGTCATATATCCTATTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@AGTGCTTATTCATAAGTAACCTTA 4-8 +TAGACTCATGTAGATTGGCTGGCGGGAGCGGAAGTGATTCAGAGCGCCCCCAGAGCAGTTCTGCTCTTCGCACTGCAGTACGCAGTCGCGGTACACCGGCTCACGGTCGCCCTGGGAGCCGCTCGCCAGCGCCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@CGCAGAATAATCGATTGCTCAAAC 6-3 +AGAAACCACAACAAGAGAGGAAACAGAAGGGCAGGGCACCTTCTTCTGCCACCCACCTGTAAACAGAGGGCTCAGCCCAGCTGGAGGCAGGGCCTGGCTGGGTTGCCCACGGGCCTCACCTGCAGGAAGGACAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GAAAAAATGACATTGAACGATTTT 17-2 +GTCTTTCCCTAATCCTGGGAAGTGCACAGACCCTGCAAGGTGGGGCACAGGCAGCCCCTTCCCTCCCTTCACATGCTGAGGTGGCCCCGTAATTCTCCCCATCCCAGCTCTCATCCNCCCTCCNGCCNNNGCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GAAAATTCAATTGTGGTGATTAGT 7-4 +CCCTGACCTGCTGGAAAAGGGGGAGCGGCTGCCCCAGCCCCCCATCTGCACCATTGATGTCTACATGATCATGGTCAAATGTGCGTGGCTGAGCTGTGCTGGCTGCCTGGAGGAGGGTGGGAGGTCCTGGGTGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GTACAGGGATAATTAACGCATGAT 1-12 +AAGGTTTCAATGACGGTGAAGGCCACCTGTGAGGCTTCGAAGCTGCAGCTCCCGCAGGCCTCCTGGGGAGGCCCCTGTGACAGNGGTGGTANTGTTCAGCGGGNCNCCANNGTCTAGCACGGCCAGGGCANAGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@GTAGAACTACACGTATTTGTCGAA 9-7 +TTTGCCGTGCCACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII