changeset 0:8ab09593f2eb draft default tip

planemo upload commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Wed, 07 Oct 2020 18:48:40 +0000
parents
children
files fasta2fastq.py fasta2fastq.xml macros.xml test-data/Reads_in.fasta test-data/Reads_out.fastq
diffstat 5 files changed, 145 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta2fastq.py	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+"""fasta2fastq.py
+
+Author -- Monika Heinzl
+Contact -- monika.heinzl@edumail.at
+
+Takes a fasta file and converts it to fastq file with a fake quality score.
+
+=======  ==========  =================  ================================
+Version  Date        Author             Description
+1.0.0    2020-10-07  Monika Heinzl    
+=======  ==========  =================  ================================
+
+USAGE: python fasta2fastq.py -i in.fasta -o out.fastq -s 40
+"""
+
+import argparse
+import sys, os
+from Bio import SeqIO
+
+
+def make_argparser():
+    parser = argparse.ArgumentParser(description='Takes a fasta file and converts it to fastq file with a fake quality score.')
+    parser.add_argument('-i', '--infile', required = True,
+                        help='Input FASTA file.')
+    parser.add_argument('-o', '--outfile', required = True,
+                        help='Output FASTQ file.')
+    parser.add_argument('-s', '--score', type=int, default=40,
+                        help='Quality score added to each base in the read. Default 40.')
+    return parser
+
+def fasta2fastq(argv):
+    parser = make_argparser()
+    args = parser.parse_args(argv[1:])
+
+    infasta = args.infile
+    outfastq = args.outfile
+    score = args.score
+
+    if os.path.isfile(infasta) is False:
+        sys.exit("Error: Could not find '{}'".format(infasta))
+    if os.path.isfile(outfastq) is False:
+        sys.exit("Error: Could not find '{}'".format(outfastq))
+    if score < 0:
+        sys.exit("Error: score is '{}', but only non-negative integers allowed".format(score))
+
+	# make fastq
+    with open(infasta, "r") as fasta, open(outfastq, "w") as fastq:
+        for record in SeqIO.parse(fasta, "fasta"):
+            record.letter_annotations["phred_quality"] = [score] * len(record)
+            SeqIO.write(record, fastq, "fastq")
+
+
+if __name__ == '__main__':
+    sys.exit(fasta2fastq(sys.argv))
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta2fastq.xml	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="fasta2fastq" name="FASTA-to-FASTQ" version="1.0.0" profile="19.01">
+    <description>Convert a FASTA file to a FASTQ file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <command><![CDATA[
+        python '$__tool_directory__/fasta2fastq.py' 
+        -i '$file1'
+        -o '$file2'
+        -s '$score'
+    ]]>
+    </command>
+    <inputs>
+        <param name="file1" type="data" format="fasta" label="FASTA file" optional="false"/>
+        <param name="score" type="integer" label="Quality score" value="40" help="Quality score for each base in all reads. Default = 40."/>
+    </inputs>
+    <outputs>
+        <data name="file2" format="fastq" label="${tool.name} on ${on_string}: FASTQ"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="file1" value="Reads_in.fasta"/>
+            <output name="file2" file="Reads_out.fastq"/>
+            <param name="score" value="40"/>
+        </test>
+    </tests>
+    <help> <![CDATA[
+**What it does**
+
+Takes a FASTA file and converts it to a FASTQ file by adding a static quality score. The default quality score for each base in all reads is 40.
+
+    ]]> 
+    </help>
+    <expand macro="citation" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,12 @@
+<macros>
+    <xml name="citation">
+        <citations>
+            <citation type="bibtex">
+@misc{duplex,
+    author = {Heinzl Monika, monika.heinzl@edumail.at},
+    year = {2020},
+ }
+           </citation>
+        </citations>
+    </xml>
+</macros>
Binary file test-data/Reads_in.fasta has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Reads_out.fastq	Wed Oct 07 18:48:40 2020 +0000
@@ -0,0 +1,40 @@
+@AATTTTTTAAAAGTGTGTAGACTC 3-1
+TGGAGTGAGTTTGGATGGGGTGGCCAGGTCTGAGAAGGTCCCCCGCCAGTGTCCTCTGACCCATCTGCTCTCTCCTGCCAGTGTGCACCGGCACAGACATGAAGCTGCGGCTCCCTGCCAGAGTCTACACACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ACAGATTGTTCCGCAAATACAATT 2-6
+CTGGTAGATGAGCTGCGGTGCCTGTGGTGGACCATGCCCCCAGCGCCCGGGGCAGGGTCTGGACAGAAGAAGCCCTGCTGGGGTACCAGATACTCCTCAGCATCCACCAGGTCCCCCATGTCATCGTCCTCCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ACCTAAGCTCACGAACCGATTGAA 6-3
+CACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGAGATGCAGTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@ACTTCTGGTAATTAATAGGATATA 1-3
+GCTGTTTGTGCCTCTCTCTGTTACTAACCCGTCCTCTCGCTGTTAGACATCTCTCTCACTGCCTGTCTCTGGTTCTGTCCTCAGGCCACCCCTGTTAGTCATATATCCTATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@AGTGCTTATTCATAAGTAACCTTA 4-8
+TAGACTCATGTAGATTGGCTGGCGGGAGCGGAAGTGATTCAGAGCGCCCCCAGAGCAGTTCTGCTCTTCGCACTGCAGTACGCAGTCGCGGTACACCGGCTCACGGTCGCCCTGGGAGCCGCTCGCCAGCGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@CGCAGAATAATCGATTGCTCAAAC 6-3
+AGAAACCACAACAAGAGAGGAAACAGAAGGGCAGGGCACCTTCTTCTGCCACCCACCTGTAAACAGAGGGCTCAGCCCAGCTGGAGGCAGGGCCTGGCTGGGTTGCCCACGGGCCTCACCTGCAGGAAGGACAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GAAAAAATGACATTGAACGATTTT 17-2
+GTCTTTCCCTAATCCTGGGAAGTGCACAGACCCTGCAAGGTGGGGCACAGGCAGCCCCTTCCCTCCCTTCACATGCTGAGGTGGCCCCGTAATTCTCCCCATCCCAGCTCTCATCCNCCCTCCNGCCNNNGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GAAAATTCAATTGTGGTGATTAGT 7-4
+CCCTGACCTGCTGGAAAAGGGGGAGCGGCTGCCCCAGCCCCCCATCTGCACCATTGATGTCTACATGATCATGGTCAAATGTGCGTGGCTGAGCTGTGCTGGCTGCCTGGAGGAGGGTGGGAGGTCCTGGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GTACAGGGATAATTAACGCATGAT 1-12
+AAGGTTTCAATGACGGTGAAGGCCACCTGTGAGGCTTCGAAGCTGCAGCTCCCGCAGGCCTCCTGGGGAGGCCCCTGTGACAGNGGTGGTANTGTTCAGCGGGNCNCCANNGTCTAGCACGGCCAGGGCANAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@GTAGAACTACACGTATTTGTCGAA 9-7
+TTTGCCGTGCCACCCTGAGTGTCAGCCCCAGAATGGCTCAGTGACCTGTTTTGGACCGGTGAGCTGCTGGCGGGCTCAGAGCTGGGTGGAGGGGGGCAGCGAGGGGGATTGCCAGGGACTTGGCAGGATGGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII