Repository 'small_rna_signatures'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/small_rna_signatures

Changeset 1:6f1378738798 (2017-08-29)
Previous changeset 0:a35e6f9c1d34 (2017-08-28) Next changeset 2:320e06bf99b9 (2017-08-30)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 6133bb114c76a795fa12a4a11edb1a8b80fd104d
added:
overlapping_reads.py
overlapping_reads.xml
test-data/paired.fa
b
diff -r a35e6f9c1d34 -r 6f1378738798 overlapping_reads.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/overlapping_reads.py Tue Aug 29 20:02:15 2017 -0400
[
@@ -0,0 +1,140 @@
+import argparse
+from collections import defaultdict
+
+import pysam
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument(
+        '--input', action="store", type=str, help="bam alignment file")
+    the_parser.add_argument(
+        '--minquery', type=int,
+        help="Minimum readsize of query reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--maxquery', type=int,
+        help="Maximum readsize of query reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--mintarget', type=int,
+        help="Minimum readsize of target reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--maxtarget', type=int,
+        help="Maximum readsize of target reads (nt) - must be an integer")
+    the_parser.add_argument(
+        '--overlap', type=int,
+        help="Overlap analyzed (nt) - must be an integer")
+    the_parser.add_argument(
+        '--output', action="store", type=str,
+        help="Pairable sequences")
+    args = the_parser.parse_args()
+    return args
+
+
+class Map:
+
+    def __init__(self, bam_file):
+        self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
+        self.chromosomes = dict(zip(self.bam_object.references,
+                                self.bam_object.lengths))
+        self.map_dict = self.create_map(self.bam_object)
+
+    def create_map(self, bam_object):
+        '''
+        Returns a map_dictionary {(chromosome,read_position,polarity):
+                                                    [read_length, ...]}
+        '''
+        map_dictionary = defaultdict(list)
+        # get empty value for start and end of each chromosome
+        for chrom in self.chromosomes:
+            map_dictionary[(chrom, 1, 'F')] = []
+            map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
+        for chrom in self.chromosomes:
+            for read in bam_object.fetch(chrom):
+                positions = read.positions  # a list of covered positions
+                if read.is_reverse:
+                    map_dictionary[(chrom, positions[-1]+1,
+                                    'R')].append(read.query_alignment_length)
+                else:
+                    map_dictionary[(chrom, positions[0]+1,
+                                    'F')].append(read.query_alignment_length)
+        return map_dictionary
+
+    def signature_tables(self, minquery, maxquery, mintarget, maxtarget):
+        query_range = range(minquery, maxquery + 1)
+        target_range = range(mintarget, maxtarget + 1)
+        Query_table = defaultdict(dict)
+        Target_table = defaultdict(dict)
+        for key in self.map_dict:
+            for size in self.map_dict[key]:
+                if size in query_range or size in target_range:
+                    if key[2] == 'F':
+                        coordinate = key[1]
+                    else:
+                        coordinate = -key[1]
+                if size in query_range:
+                    Query_table[key[0]][coordinate] = Query_table[key[0]].get(
+                        coordinate, 0) + 1
+                if size in target_range:
+                    Target_table[key[0]][coordinate] = \
+                        Target_table[key[0]].get(coordinate, 0) + 1
+        return Query_table, Target_table
+
+    def search_overlaps(self, minquery, maxquery, mintarget, maxtarget,
+                        overlap=10):
+        Query_table, Target_table = self.signature_tables(minquery, maxquery,
+                                                          mintarget, maxtarget)
+        overlap_groups = defaultdict(list)
+        for chrom in Query_table:
+            for coord in Query_table[chrom]:
+                if Target_table[chrom].get(-coord - overlap + 1, 0):
+                    overlap_groups[chrom].append(coord)
+        return overlap_groups
+
+    def feed_overlaps(self, overlap_groups, minquery, output, overlap=10):
+        F = open(output, 'w')
+        for chrom in sorted(overlap_groups):
+            for pos in sorted(overlap_groups[chrom]):
+                if pos > 0:  # read are forward
+                    reads = self.bam_object.fetch(chrom, start=pos-1,
+                                                  end=pos-1+overlap-1)
+                    for read in reads:
+                        positions = read.positions
+                        if pos-1 == positions[0] and \
+                                read.query_alignment_length >= minquery:
+                            F.write('>%s|%s|%s|%s\n%s\n' % (
+                                chrom, pos, 'F',
+                                read.query_alignment_length,
+                                read.query_sequence))
+                else:  # reads are reverse
+                    reads = self.bam_object.fetch(chrom,
+                                                  start=-pos-1-overlap+1,
+                                                  end=-pos-1)
+                    for read in reads:
+                        positions = read.positions
+                        if -pos-1 == positions[-1] and \
+                                read.query_alignment_length >= minquery:
+                            readseq = self.revcomp(read.query_sequence)
+                            readsize = read.query_alignment_length
+                            F.write('>%s|%s|%s|%s\n%s\n' % (chrom,
+                                                       positions[0] + 1,
+                                                       'R', readsize, readseq))
+        F.close()
+        return
+
+    def revcomp(self, sequence):
+        antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
+        revseq = sequence[::-1]
+        return "".join([antidict[i] for i in revseq])
+
+
+def main(input, minquery, maxquery, mintarget, maxtarget, output, overlap=10):
+    mapobj = Map(input)
+    mapobj.feed_overlaps(mapobj.search_overlaps(minquery, maxquery,
+                                                mintarget, maxtarget,
+                                                overlap), minquery, output)
+
+
+if __name__ == "__main__":
+    args = Parser()
+    main(args.input, args.minquery, args.maxquery, args.mintarget,
+         args.maxtarget, args.output)
b
diff -r a35e6f9c1d34 -r 6f1378738798 overlapping_reads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/overlapping_reads.xml Tue Aug 29 20:02:15 2017 -0400
[
@@ -0,0 +1,78 @@
+<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.0">
+    <description />
+    <requirements>
+        <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Tool exception" />
+    </stdio>
+      <command detect_errors="exit_code"><![CDATA[
+        samtools index '$input' &&
+        python '$__tool_directory__'/overlapping_reads.py
+           --input '$input'
+           --minquery '$minquery'
+           --maxquery '$maxquery'
+           --mintarget '$mintarget'
+           --maxtarget '$maxtarget'
+           --overlap '$overlap'
+           --output '$output'
+    ]]></command>
+    <inputs>
+        <param format="bam" label="Compute signature from this bowtie standard output" name="input" type="data" />
+        <param help="'23' = 23 nucleotides" label="Min size of query small RNAs" name="minquery" size="3" type="integer" value="23" />
+        <param help="'29' = 29 nucleotides" label="Max size of query small RNAs" name="maxquery" size="3" type="integer" value="29" />
+        <param help="'23' = 23 nucleotides" label="Min size of target small RNAs" name="mintarget" size="3" type="integer" value="23" />
+        <param help="'29' = 29 nucleotides" label="Max size of target small RNAs" name="maxtarget" size="3" type="integer" value="29" />
+        <param help="'10' = 10 nucleotides overlap" label="Overlap (in nt)" name="overlap" size="3" type="integer" value="10" />
+    </inputs>
+    <outputs>
+        <data format="fasta" label="pairable reads" name="output" />
+    </outputs>
+    <tests>
+        <test>
+            <param ftype="bam" name="input" value="sr_bowtie.bam" />
+            <param name="minquery" value="23" />
+            <param name="maxquery" value="29" />
+            <param name="mintarget" value="23" />
+            <param name="maxtarget" value="29" />
+            <param name="overlap" value="10" />
+            <output file="paired.fa" ftype="fasta" name="output" />
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+Extract reads with overlap signatures of the specified overlap (in nt) and 
+return a fasta file of these "pairable" reads.
+
+See `Antoniewski (2014)`_ for background and details
+
+.. _Antoniewski (2014): https://link.springer.com/protocol/10.1007%2F978-1-4939-0931-5_12
+
+**Input**
+
+A **sorted** BAM alignment file.
+
+**Outputs**
+
+a fasta file of pairable reads such as :
+
+>FBgn0000004_17.6|5839|R|26
+
+TTTTCGTCAATTGTGCCAAATAGGTA
+
+>FBgn0000004_17.6|5855|F|23
+
+TTGACGAAAATGATCGAGTGGAT
+
+where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, 
+R stand for reverse strand (F forward strand) and 26 stands for the size of the read.
+
+the second sequence in this example is a read that overlap by 10 nt with the first read.
+
+        </help>
+    <citations>
+            <citation type="doi">10.1007/978-1-4939-0931-5_12</citation>
+    </citations>
+</tool>
b
diff -r a35e6f9c1d34 -r 6f1378738798 test-data/paired.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/paired.fa Tue Aug 29 20:02:15 2017 -0400
b
b'@@ -0,0 +1,2182 @@\n+>FBgn0000004_17.6|5839|R|26\n+TTTTCGTCAATTGTGCCAAATAGGTA\n+>FBgn0000004_17.6|5855|F|23\n+TTGACGAAAATGATCGAGTGGAT\n+>FBgn0000006_412|744|R|26\n+TTTCGACCAAACCGGTGGCGTTTGCT\n+>FBgn0000006_412|744|R|26\n+TTTCGACCAAACCGGTGGCGTTTGCT\n+>FBgn0000006_412|745|R|25\n+TTTCGACCAAACCGGTGGCGTTTGC\n+>FBgn0000006_412|742|R|27\n+TTCGACCAAACCGGTGGCGTTTGCTGA\n+>FBgn0000006_412|744|R|25\n+TTCGACCAAACCGGTGGCGTTTGCT\n+>FBgn0000006_412|744|R|25\n+TTCGACCAAACCGGTGGCGTTTGCT\n+>FBgn0000006_412|745|R|24\n+TTCGACCAAACCGGTGGCGTTTGC\n+>FBgn0000006_412|745|R|24\n+TTCGACCAAACCGGTGGCGTTTGC\n+>FBgn0000006_412|759|F|24\n+TTTGGTCGAAAGCTCTAAAGCTAC\n+>FBgn0000006_412|759|F|24\n+TTTGGTCGAAAGCTCTAAAGCTAC\n+>FBgn0000006_412|759|F|24\n+TTTGGTCGAAAGCTCTAAAGCTAC\n+>FBgn0000006_412|759|F|25\n+TTTGGTCGAAAGCTCTAAAGCTACA\n+>FBgn0000006_412|759|F|27\n+TTTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|759|F|27\n+TTTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|759|F|27\n+TTTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|760|F|26\n+TTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|760|F|26\n+TTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|760|F|26\n+TTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|760|F|26\n+TTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|760|F|26\n+TTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000006_412|760|F|26\n+TTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000007_1731|4539|R|26\n+TTGAGAGCAAAGGCCGAATGAGTAAA\n+>FBgn0000007_1731|4555|F|24\n+TTGCTCTCAATGCGCTGAGTTTGG\n+>FBgn0000155_roo|8639|R|26\n+TTCTGCCAAAGGGCCAGCAAAGCTGA\n+>FBgn0000155_roo|8639|R|26\n+TTCTGCCAAAGTGCCAGCAAAGCTGA\n+>FBgn0000155_roo|3225|R|26\n+TTTTGCCCAAGGAGACCGTCTATTTT\n+>FBgn0000155_roo|3226|R|25\n+TTTTGCCCAAGGAGACCGTCTATTT\n+>FBgn0000155_roo|3227|R|24\n+TTTTGCCCAAGGAGACCGTCTATT\n+>FBgn0000155_roo|3227|R|24\n+TTTTGCCCAAGGAGACCGTCTATT\n+>FBgn0000155_roo|3228|R|23\n+TTTTGCCCAAGGAGACCGTCTAT\n+>FBgn0000155_roo|3132|R|27\n+TTTATTAAAATCGGGGTCGGCTAATTT\n+>FBgn0000155_roo|2675|R|27\n+TTTTTACCAAACGGATGCCTCAGACAT\n+>FBgn0000155_roo|2676|R|26\n+TTTTTACCAAACGGATGCCTCAGACA\n+>FBgn0000155_roo|2676|R|26\n+TTTTTACCAAACGGATGCCTCAGACA\n+>FBgn0000155_roo|2676|R|26\n+TTTTTACCAAACGGATGCCTCAGACA\n+>FBgn0000155_roo|2676|R|26\n+TTTTTACCAAACGGATGCCTCAGACA\n+>FBgn0000155_roo|2676|R|26\n+TTTTTACCAAACGGATGCCTCAGACA\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2677|R|25\n+TTTTTACCAAACGGATGCCTCAGAC\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2678|R|24\n+TTTTTACCAAACGGATGCCTCAGA\n+>FBgn0000155_roo|2679|R|23\n+TTTTTACCAAACGGATGCCTCAG\n+>FBgn0000155_roo|2679|R|23\n+TTTTTACCAAACGGATGCCTCAG\n+>FBgn0000155_roo|2159|R|27\n+TTGGTCAAAAACTCCCAAGTGGCTTCA\n+>FBgn0000155_roo|2161|R|25\n+TTGGTCAAAAACTCCCAAGTGGCTT\n+>FBgn0000155_roo|2176|F|24\n+TTTTGACCAAGCGGTATGAGAATA\n+>FBgn0000155_roo|2692|F|24\n+TTGGTAA'..b'08|R|25\n+TTCCGTGGGATCGCCTGAAGCCAAA\n+>FBgn0063507_G2|1923|F|29\n+TTCCACGGAACACCCGAGCAATGCCACCG\n+>FBgn0063534_Doc2-element|3605|R|28\n+TTTTTGCGAAAGCCAAACTGATGCGATG\n+>FBgn0063534_Doc2-element|3606|R|27\n+TTTTTGCGAAAGCCAAACTGATGCGAT\n+>FBgn0063534_Doc2-element|3606|R|27\n+TTTTTGCGAAAGCCAAACTGATGCGAT\n+>FBgn0063534_Doc2-element|3607|R|26\n+TTTTTGCGAAAGCCAAACTGATGCGA\n+>FBgn0063534_Doc2-element|3607|R|26\n+TTTTTGCGAAAGCCAAACTGATGCGA\n+>FBgn0063534_Doc2-element|3607|R|26\n+TTTTTGCGAAAGCCAAACTGATGCGA\n+>FBgn0063534_Doc2-element|3607|R|26\n+TTTTTGCGAAAGCCAAACTGATGCGA\n+>FBgn0063534_Doc2-element|3608|R|25\n+TTTTTGCGAAAGCCAAACTGATGCG\n+>FBgn0063534_Doc2-element|3608|R|25\n+TTTTTGCGAAAGCCAAACTGATGCG\n+>FBgn0063534_Doc2-element|3608|R|25\n+TTTTTGCGAAAGCCAAACTGATGCG\n+>FBgn0063534_Doc2-element|3608|R|25\n+TTTTTGCGAAAGCCAAACTGATGCG\n+>FBgn0063534_Doc2-element|3608|R|25\n+TTTTTGCGAAAGCCAAACTGATGCG\n+>FBgn0063534_Doc2-element|3608|R|25\n+TTTTTGCGAAAGCCAAACTGATGCG\n+>FBgn0063534_Doc2-element|3608|R|25\n+TTTTTGCGAAAGCCAAACTGATGCG\n+>FBgn0063534_Doc2-element|3609|R|24\n+TTTTTGCGAAAGCCAAACTGATGC\n+>FBgn0063534_Doc2-element|3610|R|23\n+TTTTTGCGAAAGCCAAACTGATG\n+>FBgn0063534_Doc2-element|3610|R|23\n+TTTTTGCGAAAGCCAAACTGATG\n+>FBgn0063534_Doc2-element|3610|R|23\n+TTTTTGCGAAAGCCAAACTGATG\n+>FBgn0063534_Doc2-element|327|R|25\n+TTCGTTGCAATGAGAGCCGGCGATC\n+>FBgn0063534_Doc2-element|342|F|25\n+TTGCAACGAAACAACGCGTACTTCT\n+>FBgn0063534_Doc2-element|3623|F|23\n+TTCGCAAAAATCACGGAACGATC\n+>FBgn0063534_Doc2-element|3623|F|26\n+TTCGCAAAAATCACGGAACGATCGAA\n+>FBgn0063594_Cr1a|2037|R|27\n+TTGTTACAAGACATAGATCCAACAGTC\n+>FBgn0063594_Cr1a|2039|R|25\n+TTGTTACAAAACATAGATCCAACAG\n+>FBgn0063594_Cr1a|2054|F|24\n+TTTGTAACAAGTCCTGAAAGTGTG\n+>FBgn0063594_Cr1a|2054|F|24\n+TTTGTAACAAGTCCTGAAAGTGTG\n+>FBgn0063919_Max-element|3879|R|29\n+TTGCTGAGAAGCGTGTTGAGCGAATCAGG\n+>FBgn0063919_Max-element|3880|R|28\n+TTGCTGAGAAGCGTGTCGAGCGAATCAG\n+>FBgn0063919_Max-element|3880|R|28\n+TTGCTGAGAAGCGTGTTGAGCGAATCAG\n+>FBgn0063919_Max-element|3882|R|26\n+TTGCTGAGAAGCGTGTCGAGCGAATC\n+>FBgn0063919_Max-element|3883|R|25\n+TTGCTGAGAAGCGTGTCGAGCGAAT\n+>FBgn0063919_Max-element|3884|R|24\n+TTGCTGAGAAGCGTGTTGAGCGAA\n+>FBgn0063919_Max-element|3898|F|23\n+TTCTCAGCAAGTTCTGGGAGGTG\n+>FBgn0063919_Max-element|3898|F|24\n+TTCTCAGCAAGTTCTGGGAGGTGG\n+>FBgn0063919_Max-element|3898|F|24\n+TTCTCAGCAAGTTCTGGGAGGTGG\n+>FBgn0063919_Max-element|3898|F|25\n+TTCTCAGCAAGTTCTGGGAGGTGGA\n+>FBgn0063919_Max-element|3898|F|24\n+TTCTCAGCAAGTTCTGGGAGGTGT\n+>FBgn0067385_invader6|3007|R|27\n+TTCTAGTCAAAGTCGAAGGACTGCATA\n+>FBgn0067385_invader6|3007|R|27\n+TTCTAGTCAAAGTCGAAGGACTGCATA\n+>FBgn0067385_invader6|3007|R|27\n+TTCTAGTCAAAGTCGAAGGACTGCATA\n+>FBgn0067385_invader6|3007|R|27\n+TTCTAGTCAAAGTCGAAGGACTGCATA\n+>FBgn0067385_invader6|3008|R|26\n+TTCTAGTCAAAGTCGAAGGACTGCAT\n+>FBgn0067385_invader6|3008|R|26\n+TTCTAGTCAAAGTCGAAGGACTGCAT\n+>FBgn0067385_invader6|3008|R|26\n+TTCTAGTCAAAGTCGAAGGACTGCAT\n+>FBgn0067385_invader6|3009|R|25\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|3009|R|25\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|3009|R|25\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|3009|R|25\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|3009|R|25\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|3010|R|24\n+TTCTAGTCAAAGTCGAAGGACTGC\n+>FBgn0067385_invader6|3010|R|24\n+TTCTAGTCAAAGTCGAAGGACTGC\n+>FBgn0067385_invader6|3010|R|24\n+TTCTAGTCAAAGTCGAAGGACTGC\n+>FBgn0067385_invader6|3010|R|24\n+TTCTAGTCAAAGTCGAAGGACTGC\n+>FBgn0067385_invader6|3011|R|23\n+TTCTAGTCAAAGTCGAAGGACTG\n+>FBgn0067385_invader6|3024|F|26\n+TTGACTAGAATGACTTAGACTTAGAA\n+>FBgn0067624_BS3|1011|R|25\n+TTGATGCCAATGTTCCAGCGTTTTG\n+>FBgn0067624_BS3|1011|R|25\n+TTGATGCCAATGTTCCAGCGTTTTG\n+>FBgn0067624_BS3|1011|R|25\n+TTGATGCCAATGTTCCAGCGTTTTG\n+>FBgn0067624_BS3|1013|R|23\n+TTGATGCCAATGTTCCAACGTCT\n+>FBgn0067624_BS3|1013|R|23\n+TTGATGCCAATGTTCCAACGTCT\n+>FBgn0067624_BS3|1013|R|23\n+TTGATGCCAATGTTCCAACGTCT\n+>FBgn0067624_BS3|1026|F|26\n+TTGGCATCAATGGTGACAAATCAGCG\n+>FBgn0067624_BS3|1026|F|24\n+TTGGCATCAATGGTGACAAATCTG\n+>FBgn0067624_BS3|1026|F|25\n+TTGGCATCAATGGTGACAAATCTGC\n'