# HG changeset patch
# User mheinzl
# Date 1602096520 0
# Node ID 8ab09593f2ebb67e75482c62f0b13a9886d61dcb
planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
diff -r 000000000000 -r 8ab09593f2eb fasta2fastq.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta2fastq.py	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+"""fasta2fastq.py
+
+Author -- Monika Heinzl
+Contact -- monika.heinzl@edumail.at
+
+Takes a fasta file and converts it to fastq file with a fake quality score.
+
+=======  ==========  =================  ================================
+Version  Date        Author             Description
+1.0.0    2020-10-07  Monika Heinzl    
+=======  ==========  =================  ================================
+
+USAGE: python fasta2fastq.py -i in.fasta -o out.fastq -s 40
+"""
+
+import argparse
+import sys, os
+from Bio import SeqIO
+
+
+def make_argparser():
+    parser = argparse.ArgumentParser(description='Takes a fasta file and converts it to fastq file with a fake quality score.')
+    parser.add_argument('-i', '--infile', required = True,
+                        help='Input FASTA file.')
+    parser.add_argument('-o', '--outfile', required = True,
+                        help='Output FASTQ file.')
+    parser.add_argument('-s', '--score', type=int, default=40,
+                        help='Quality score added to each base in the read. Default 40.')
+    return parser
+
+def fasta2fastq(argv):
+    parser = make_argparser()
+    args = parser.parse_args(argv[1:])
+
+    infasta = args.infile
+    outfastq = args.outfile
+    score = args.score
+
+    if os.path.isfile(infasta) is False:
+        sys.exit("Error: Could not find '{}'".format(infasta))
+    if os.path.isfile(outfastq) is False:
+        sys.exit("Error: Could not find '{}'".format(outfastq))
+    if score < 0:
+        sys.exit("Error: score is '{}', but only non-negative integers allowed".format(score))
+
+	# make fastq
+    with open(infasta, "r") as fasta, open(outfastq, "w") as fastq:
+        for record in SeqIO.parse(fasta, "fasta"):
+            record.letter_annotations["phred_quality"] = [score] * len(record)
+            SeqIO.write(record, fastq, "fastq")
+
+
+if __name__ == '__main__':
+    sys.exit(fasta2fastq(sys.argv))
+
diff -r 000000000000 -r 8ab09593f2eb fasta2fastq.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta2fastq.xml	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,36 @@
+
+
+    Convert a FASTA file to a FASTQ file
+    
+        macros.xml
+    
+    
+    
+    
+        
+        
+    
+    
+        
+    
+    
+        
+            
+            
+            
+        
+    
+      
+    
+    
+
diff -r 000000000000 -r 8ab09593f2eb macros.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,12 @@
+
+    
+        
+            
+@misc{duplex,
+    author = {Heinzl Monika, monika.heinzl@edumail.at},
+    year = {2020},
+ }
+           
+        
+    
+
diff -r 000000000000 -r 8ab09593f2eb test-data/Reads_in.fasta
Binary file test-data/Reads_in.fasta has changed
diff -r 000000000000 -r 8ab09593f2eb test-data/Reads_out.fastq
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Reads_out.fastq	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,40 @@
+@AATTTTTTAAAAGTGTGTAGACTC 3-1
+TGGAGTGAGTTTGGATGGGGTGGCCAGGTCTGAGAAGGTCCCCCGCCAGTGTCCTCTGACCCATCTGCTCTCTCCTGCCAGTGTGCACCGGCACAGACATGAAGCTGCGGCTCCCTGCCAGAGTCTACACACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ACAGATTGTTCCGCAAATACAATT 2-6
+CTGGTAGATGAGCTGCGGTGCCTGTGGTGGACCATGCCCCCAGCGCCCGGGGCAGGGTCTGGACAGAAGAAGCCCTGCTGGGGTACCAGATACTCCTCAGCATCCACCAGGTCCCCCATGTCATCGTCCTCCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ACCTAAGCTCACGAACCGATTGAA 6-3
+CACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGAGATGCAGTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ACTTCTGGTAATTAATAGGATATA 1-3
+GCTGTTTGTGCCTCTCTCTGTTACTAACCCGTCCTCTCGCTGTTAGACATCTCTCTCACTGCCTGTCTCTGGTTCTGTCCTCAGGCCACCCCTGTTAGTCATATATCCTATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@AGTGCTTATTCATAAGTAACCTTA 4-8
+TAGACTCATGTAGATTGGCTGGCGGGAGCGGAAGTGATTCAGAGCGCCCCCAGAGCAGTTCTGCTCTTCGCACTGCAGTACGCAGTCGCGGTACACCGGCTCACGGTCGCCCTGGGAGCCGCTCGCCAGCGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@CGCAGAATAATCGATTGCTCAAAC 6-3
+AGAAACCACAACAAGAGAGGAAACAGAAGGGCAGGGCACCTTCTTCTGCCACCCACCTGTAAACAGAGGGCTCAGCCCAGCTGGAGGCAGGGCCTGGCTGGGTTGCCCACGGGCCTCACCTGCAGGAAGGACAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GAAAAAATGACATTGAACGATTTT 17-2
+GTCTTTCCCTAATCCTGGGAAGTGCACAGACCCTGCAAGGTGGGGCACAGGCAGCCCCTTCCCTCCCTTCACATGCTGAGGTGGCCCCGTAATTCTCCCCATCCCAGCTCTCATCCNCCCTCCNGCCNNNGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GAAAATTCAATTGTGGTGATTAGT 7-4
+CCCTGACCTGCTGGAAAAGGGGGAGCGGCTGCCCCAGCCCCCCATCTGCACCATTGATGTCTACATGATCATGGTCAAATGTGCGTGGCTGAGCTGTGCTGGCTGCCTGGAGGAGGGTGGGAGGTCCTGGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GTACAGGGATAATTAACGCATGAT 1-12
+AAGGTTTCAATGACGGTGAAGGCCACCTGTGAGGCTTCGAAGCTGCAGCTCCCGCAGGCCTCCTGGGGAGGCCCCTGTGACAGNGGTGGTANTGTTCAGCGGGNCNCCANNGTCTAGCACGGCCAGGGCANAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GTAGAACTACACGTATTTGTCGAA 9-7
+TTTGCCGTGCCACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII