Repository 'get_hrun'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/get_hrun

Changeset 0:84f70ce0b830 (2021-03-02)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
added:
get_hrun.py
get_hrun.xml
macros.xml
test-data/in.vcf
test-data/out.vcf
test-data/reference.fasta
b
diff -r 000000000000 -r 84f70ce0b830 get_hrun.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_hrun.py Tue Mar 02 21:35:40 2021 +0000
[
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+import argparse
+
+import vcf
+from pyfaidx import Fasta
+from vcf.parser import _Info as VcfInfo
+
+
+parser = argparse.ArgumentParser(description='Generate report tables')
+parser.add_argument("--reference",
+                    required=True,
+                    help="Filepath to reference FASTA file")
+parser.add_argument("--in-vcf",
+                    required=True,
+                    help="Filepath to vcf file to be analyzed")
+parser.add_argument("--out-vcf",
+                    required=True,
+                    help="Filepath to vcf file to be output")
+
+args = parser.parse_args()
+ref_path = args.reference
+reference = Fasta(ref_path, sequence_always_upper=True, read_ahead=1000)
+in_vcf_path = args.in_vcf
+in_vcf_handle = open(in_vcf_path)
+in_vcf = vcf.Reader(in_vcf_handle)
+in_vcf.infos['HRUN'] = VcfInfo(
+    'HRUN', 1, 'Integer',
+    'Homopolymer length to the right of report indel position',
+    "get_hrun", "1.0")
+out_vcf_path = args.out_vcf
+out_vcf_handle = open(out_vcf_path, 'w')
+out_vcf = vcf.Writer(out_vcf_handle, in_vcf)
+for record in in_vcf:
+    chrom = record.CHROM
+    pos = record.POS - 1
+    ref = record.REF
+    calc_hrun = False
+    for alt in record.ALT:
+        if len(ref) != len(alt):
+            calc_hrun = True
+    if calc_hrun:
+        window = 50
+        hrun = 1
+        start = pos + 2
+        end = start + window
+        base = reference[chrom][pos + 1]
+        seq_len = len(reference[chrom])
+        for i in range(start, len(reference)):
+            base2 = reference[chrom][i]
+            if base == base2:
+                hrun += 1
+            else:
+                break
+        # Extend to left in case not left aligned
+        for i in range(pos, -1, -1):
+            if reference[chrom][i] == base:
+                hrun += 1
+            else:
+                break
+        record.add_info('HRUN', [hrun])
+    out_vcf.write_record(record)
+in_vcf_handle.close()
+out_vcf.close()
+out_vcf_handle.close()
b
diff -r 000000000000 -r 84f70ce0b830 get_hrun.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_hrun.xml Tue Mar 02 21:35:40 2021 +0000
[
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<tool id="get_hrun" name="Get homopolymer run length" version="@VERSION@+galaxy0" profile="@PROFILE@" license="MIT">
+    <description>Annotate indel variants with homopolymer context</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        ln -s '${reference}' ref.fa &&
+        samtools faidx ref.fa &&
+        python '${__tool_directory__}/get_hrun.py' --reference ref.fa --in-vcf '${in_vcf}' --out-vcf '${out_vcf}'
+]]></command>
+    <inputs>
+        <param argument="--in-vcf" type="data" format="vcf" label="Input VCF"/>
+        <param argument="--reference" type="data" format="fasta" label="Input Reference FASTA"/>
+    </inputs>
+    <outputs>
+        <data name="out_vcf" format="vcf" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="in_vcf" value="in.vcf"/>
+            <param name="reference" value="reference.fasta"/>
+            <output name="out_vcf" value="out.vcf"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+**Inputs**
+- VCF file
+- Reference file for that VCF
+**Outputs**
+- VCF with annotated homopolymers
+]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 84f70ce0b830 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Mar 02 21:35:40 2021 +0000
[
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@VERSION@">1.0</token>
+    <token name="@PROFILE@">20.01</token>
+    <token name="@TOOL_VERSION@">0.5.9.2</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">pyfaidx</requirement>
+            <requirement type="package" version="0.6.8">pyvcf</requirement>
+            <requirement type="package" version="1.11">samtools</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.7287/peerj.preprints.970v1</citation>
+            <citation type="doi">10.1093/bioinformatics/btp352</citation>
+        </citations>
+    </xml>
+    <!--
+        Help
+    -->
+    <token name="@WID@"><![CDATA[
+This tools determines the homopolymer context of indels and returns them.
+]]></token>
+</macros>
b
diff -r 000000000000 -r 84f70ce0b830 test-data/in.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.vcf Tue Mar 02 21:35:40 2021 +0000
b
@@ -0,0 +1,10 @@
+##fileformat=VCFv4.1
+##medaka_version=1.0.3
+##contig=<ID=MN908947.3>
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Medaka genotype.">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Medaka genotype quality score">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
+MN908947.3 29748 . TAAAAAAAAAAA TA 243.965 PASS GT:GQ 1:244
+MN908947.3 29749 . A ATTT 243.965 PASS GT:GQ 1:244
+MN908947.3 29765 . TGGGGAACTCG A 243.965 PASS GT:GQ 1:244
+MN908947.3 29789 . TATATGGAAGA A 243.965 PASS GT:GQ 1:244
\ No newline at end of file
b
diff -r 000000000000 -r 84f70ce0b830 test-data/out.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out.vcf Tue Mar 02 21:35:40 2021 +0000
b
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##medaka_version=1.0.3
+##INFO=<ID=HRUN,Number=1,Type=Integer,Description="Homopolymer length to the right of report indel position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Medaka genotype.">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Medaka genotype quality score">
+##contig=<ID=MN908947.3>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
+MN908947.3 29748 . TAAAAAAAAAAA TA 243.965 PASS HRUN=11; GT:GQ 1:244
+MN908947.3 29749 . A ATTT 243.965 PASS HRUN=11; GT:GQ 1:244
+MN908947.3 29765 . TGGGGAACTCG A 243.965 PASS HRUN=4; GT:GQ 1:244
+MN908947.3 29789 . TATATGGAAGA A 243.965 PASS HRUN=1; GT:GQ 1:244
b
diff -r 000000000000 -r 84f70ce0b830 test-data/reference.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fasta Tue Mar 02 21:35:40 2021 +0000
b
b'@@ -0,0 +1,500 @@\n+>MN908947.3\n+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCT\n+GTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACT\n+CACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATC\n+TTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT\n+CGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC\n+ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG\n+AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG\n+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA\n+ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACT\n+CGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG\n+CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGG\n+TGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGA\n+TCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGA\n+ACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG\n+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC\n+ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCG\n+TGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCA\n+GACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAA\n+TTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAA\n+GCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG\n+CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA\n+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGA\n+AGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGC\n+ATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGG\n+CTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC\n+TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGG\n+TTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGA\n+AATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA\n+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAA\n+AGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC\n+AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCC\n+TCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCT\n+TGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGG\n+AATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTAC\n+TAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG\n+GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGA\n+AGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTAT\n+CTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAA\n+GGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTC\n+TATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA\n+CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCC\n+TCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT\n+AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGA\n+AGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGA\n+AATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC\n+CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGA\n+AGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGT\n+ACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGC\n+CTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC\n+ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG\n+TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGA\n+AGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGA\n+AGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGA\n+AGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGA\n+CGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT\n+AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT\n+AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGT\n+AAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGC\n+AGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGC\n+TACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA\n+ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAA\n+GAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGG\n+TATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA\n+TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGA\n'..b'GGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATT\n+GTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTT\n+AATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAA\n+GCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACGTTA\n+ATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCC\n+ATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTA\n+AAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT\n+CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAG\n+CCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAAT\n+GGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATG\n+CCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAG\n+TAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAA\n+TTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTT\n+TCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC\n+TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAA\n+TCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTG\n+ACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACA\n+AATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACA\n+GGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGC\n+TTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAG\n+ATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA\n+AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGAT\n+GAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTG\n+ATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTA\n+CTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTA\n+GCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGAC\n+GGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGA\n+CAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT\n+ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACT\n+TCTATTTGTGCTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTT\n+GGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAAT\n+TTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTAC\n+AGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATT\n+CTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGG\n+ATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT\n+GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTT\n+CGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAA\n+CGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTAC\n+GTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGAGAACGCAGTGGGGCGCG\n+ATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCT\n+CACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACAC\n+CAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG\n+TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGG\n+GCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGA\n+GGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGC\n+AATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAG\n+CAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAA\n+TTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGA\n+TGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG\n+TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAA\n+GAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAG\n+ACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAAC\n+TGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGG\n+AATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC\n+CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCA\n+TATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC\n+TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCC\n+TGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTC\n+AACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGC\n+TTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGC\n+ACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTA\n+GGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAAAAAAAAAAAT\n+ACAGTGGGGAACTCGAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT\n+TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAA\n+AAAAAAAAAAAAAAAAAAAAAAA\n\\ No newline at end of file\n'