Repository 'small_rna_signatures'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/small_rna_signatures

Changeset 5:a7fd04208764 (2017-09-09)
Previous changeset 4:20d28cfdeefe (2017-09-08) Next changeset 6:4da23f009c9e (2017-09-10)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 24d44a9b7ec9db4dce3d839b597eea2b1be34adb
modified:
overlapping_reads.py
overlapping_reads.xml
test-data/paired.fa
test-data/paired_2.fa
added:
test-data/paired_3.fa
test-data/paired_4.fa
b
diff -r 20d28cfdeefe -r a7fd04208764 overlapping_reads.py
--- a/overlapping_reads.py Fri Sep 08 04:44:22 2017 -0400
+++ b/overlapping_reads.py Sat Sep 09 11:57:39 2017 -0400
[
b"@@ -32,12 +32,20 @@\n \n class Map:\n \n-    def __init__(self, bam_file):\n+    def __init__(self, bam_file, output, minquery=23, maxquery=29,\n+                 mintarget=23, maxtarget=29, overlap=10):\n         self.bam_object = pysam.AlignmentFile(bam_file, 'rb')\n+        self.output = output\n+        self.query_range = range(minquery, maxquery + 1)\n+        self.target_range = range(mintarget, maxtarget + 1)\n+        self.overlap = overlap\n         self.chromosomes = dict(zip(self.bam_object.references,\n                                 self.bam_object.lengths))\n-        self.all_query_positions = self.query_positions(self.bam_object)\n+        self.alignement_dic = self.index_alignments(self.bam_object)\n+        self.all_query_positions = self.query_positions(self.bam_object,\n+                                                        overlap=self.overlap)\n         self.readdic = self.make_readdic(self.bam_object)\n+        self.pairing()\n \n     def make_readdic(self, bam_object):\n         readdic = defaultdict(int)\n@@ -45,90 +53,67 @@\n             readdic[read.query_sequence] += 1\n         return readdic\n \n-    def query_positions(self, bam_object):\n-        all_query_positions = defaultdict(list)\n+    def index_alignments(self, bam_object):\n+        '''\n+        dic[(chrom, pos, polarity)]: [readseq1, readseq2, ...]\n+        the list value is further converted in set\n+        '''\n+        dic = defaultdict(list)\n         for chrom in self.chromosomes:\n             for read in bam_object.fetch(chrom):\n-                if not read.is_reverse:\n-                    all_query_positions[chrom].append(\n-                        read.reference_start)\n+                if read.is_reverse:\n+                    coord = read.reference_end-1\n+                    pol = 'R'\n                 else:\n-                    all_query_positions[chrom].append(\n-                        read.reference_end)\n+                    coord = read.reference_start\n+                    pol = 'F'\n+                dic[(chrom, coord, pol)].append(read.query_sequence)\n+        for key in dic:\n+            dic[key] = set(dic[key])\n+        return dic\n+\n+    def query_positions(self, bam_object, overlap):\n+        all_query_positions = defaultdict(list)\n+        for genomicKey in self.alignement_dic.keys():\n+            chrom, coord, pol = genomicKey\n+            if pol == 'F' and len(self.alignement_dic[(chrom,\n+                                                      coord+overlap-1,\n+                                                      'R')]) > 0:\n+                all_query_positions[chrom].append(coord)\n+        for chrom in all_query_positions:\n             all_query_positions[chrom] = sorted(\n                 list(set(all_query_positions[chrom])))\n         return all_query_positions\n \n-    def direct_pairing(self, minquery, maxquery, mintarget, maxtarget,\n-                       file, overlap=10):\n-        F = open(file, 'w')\n-        query_range = range(minquery, maxquery + 1)\n-        target_range = range(mintarget, maxtarget + 1)\n+    def pairing(self):\n+        F = open(self.output, 'w')\n+        query_range = self.query_range\n+        target_range = self.target_range\n+        overlap = self.overlap\n         stringresult = []\n+        header_template = '>%s|coord=%s|strand %s|size=%s|nreads=%s\\n%s\\n'\n         for chrom in sorted(self.chromosomes):\n-            for pos in (self.all_query_positions[chrom]):\n-                iterreads_1 = self.bam_object.fetch(chrom,\n-                                                    start=pos, end=pos+overlap-1)\n-                iterreads_2 = self.bam_object.fetch(chrom,\n-                                                    start=pos, end=pos+overlap-1)\n-                iterreads_3 = self.bam_object.fetch(chrom,\n-                                                    start=pos, end=pos+overlap-1)\n-                iterreads_4 = self.bam_object.fetch(chrom,\n-                                                    start=pos, end=pos+overlap-1)"..b'                       (chrom, targetread.reference_start+1,\n-                                     \'R\', targetread.query_alignment_length,\n-                                     self.readdic[targetread.query_sequence],\n-                                     targetreadseq))\n-                #  2\n-                for queryread in iterreads_3:\n-                    if queryread.reference_end-1 == pos+overlap-1 and \\\n-                        queryread.query_alignment_length in query_range \\\n-                            and queryread.is_reverse:\n-                        for targetread in iterreads_4:\n-                            if (targetread.\n-                                reference_start\n-                                == pos and targetread.query_alignment_length\n-                                    in target_range and not\n-                                    targetread.is_reverse):\n-                                queryreadseq = self.revcomp(\n-                                    queryread.query_sequence)\n-                                targetreadseq = targetread.query_sequence\n-                                stringresult.append(\n-                                    \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n-                                    (chrom, queryread.reference_start+1, \'R\',\n-                                     queryread.query_alignment_length,\n-                                     self.readdic[queryread.query_sequence],\n-                                     queryreadseq))\n-                                stringresult.append(\n-                                    \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n-                                    (chrom, targetread.reference_start+1,\n-                                     \'F\', targetread.query_alignment_length,\n-                                     self.readdic[targetread.query_sequence],\n-                                     targetreadseq))\n-        stringresult = sorted(set(stringresult),\n-                              key=lambda x: stringresult.index(x))\n+            for pos in self.all_query_positions[chrom]:\n+                stringbuffer = []\n+                uppers = self.alignement_dic[chrom, pos, \'F\']\n+                lowers = self.alignement_dic[chrom, pos+overlap-1, \'R\']\n+                if uppers and lowers:\n+                    for upread in uppers:\n+                        for downread in lowers:\n+                            if (len(upread) in query_range and len(downread) in\n+                                target_range) or (len(upread) in target_range\n+                                                  and len(downread) in\n+                                                  query_range):\n+                                stringbuffer.append(\n+                                    header_template %\n+                                    (chrom, pos+1, \'+\', len(upread),\n+                                     self.readdic[upread], upread))\n+                                stringbuffer.append(\n+                                    header_template %\n+                                    (chrom, pos+overlap-len(downread)+1, \'-\',\n+                                     len(downread), self.readdic[downread],\n+                                     self.revcomp(downread)))\n+                stringresult.extend(sorted(set(stringbuffer)))\n         F.write(\'\'.join(stringresult))\n \n     def revcomp(self, sequence):\n@@ -137,13 +122,7 @@\n         return "".join([antidict[i] for i in revseq])\n \n \n-def main(input, minquery, maxquery, mintarget, maxtarget, output, overlap=10):\n-    mapobj = Map(input)\n-    mapobj.direct_pairing(minquery, maxquery, mintarget, maxtarget,\n-                          output, overlap)\n-\n-\n if __name__ == "__main__":\n     args = Parser()\n-    main(args.input, args.minquery, args.maxquery, args.mintarget,\n-         args.maxtarget, args.output, args.overlap)\n+    mapobj = Map(args.input, args.output, args.minquery, args.maxquery,\n+                 args.mintarget, args.maxtarget, args.overlap)\n'
b
diff -r 20d28cfdeefe -r a7fd04208764 overlapping_reads.xml
--- a/overlapping_reads.xml Fri Sep 08 04:44:22 2017 -0400
+++ b/overlapping_reads.xml Sat Sep 09 11:57:39 2017 -0400
b
@@ -1,4 +1,4 @@
-<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.3">
+<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.4">
     <description />
     <requirements>
         <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
@@ -47,6 +47,24 @@
             <param name="overlap" value="10" />
             <output file="paired_2.fa" ftype="fasta" name="output" />
         </test>
+        <test>
+            <param ftype="bam" name="input" value="sr_bowtie.bam" />
+            <param name="minquery" value="23" />
+            <param name="maxquery" value="29" />
+            <param name="mintarget" value="20" />
+            <param name="maxtarget" value="22" />
+            <param name="overlap" value="10" />
+            <output file="paired_3.fa" ftype="fasta" name="output" />
+        </test>
+        <test>
+            <param ftype="bam" name="input" value="sr_bowtie.bam" />
+            <param name="minquery" value="20" />
+            <param name="maxquery" value="22" />
+            <param name="mintarget" value="20" />
+            <param name="maxtarget" value="22" />
+            <param name="overlap" value="10" />
+            <output file="paired_4.fa" ftype="fasta" name="output" />
+        </test>
     </tests>
     <help>
 
@@ -70,10 +88,12 @@
 overlap.
 
 Searching query reads of 20-22 nt that overlap by 10 nt with target
-reads of 23-29 nt is different from searching query reads of 23-29 nt that overlap by 10 nt
-with target reads of 20-22 nt. i.e, searching for siRNAs that pair with piRNAs is distinct
-from searching for siRNAs that pairs with piRNAs, although of course the number of possibly
-formed piRNA/siRNA pairs is the same as the number of possibly formed siRNA/piRNA pairs.
+reads of 23-29 nt is equivalent to searching query reads of 23-29 nt that overlap by 10 nt
+with target reads of 20-22 nt. i.e, searching for siRNAs that pair with piRNAs is equivalent
+to searching for siRNAs that pairs with piRNAs. In contrast, searching query reads of 20-22 nt
+that overlap by 10 nt with target reads of 23-29 nt is different from searching query reads of
+23-29 nt that overlap by 10 nt with target reads of 23-29 nt, since the number of "heterotypic"
+pairs of reads is likely to be different from the number of "homotypic" pairs of reads.
 
 *Overlap*
 The number of nucleotides by which the pairs of sequences will overlap
@@ -84,17 +104,18 @@
 
 a fasta file of pairable reads such as :
 
->FBgn0000004_17.6|5855|F|23|n=1
+>FBgn0000004_17.6|coord=5839|strand -|size=26|nreads=1
+
+TTTTCGTCAATTGTGCCAAATAGGTA
+
+>FBgn0000004_17.6|coord=5855|strand +|size=23|nreads=1
 
 TTGACGAAAATGATCGAGTGGAT
 
->FBgn0000004_17.6|5839|R|26|n=1
-
-TTTTCGTCAATTGTGCCAAATAGGTA
 
 where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, 
-R stand for reverse strand (F forward strand), 26 stands for the size of the sequence and
-n=1 stands for the number of reads of the sequence in the dataset.
+'strand -' stands for lower strand of chromosome, 26 stands for the size of the sequence and
+nreads=1 stands for the number of reads of the sequence in the dataset.
 
 the second sequence in this example corresponds to 1 read that overlap by 10 nt with
 1 read of the first sequence.
b
diff -r 20d28cfdeefe -r a7fd04208764 test-data/paired.fa
--- a/test-data/paired.fa Fri Sep 08 04:44:22 2017 -0400
+++ b/test-data/paired.fa Sat Sep 09 11:57:39 2017 -0400
b
b'@@ -1,668 +1,668 @@\n->FBgn0000004_17.6|5855|F|23|n=1\n-TTGACGAAAATGATCGAGTGGAT\n->FBgn0000004_17.6|5839|R|26|n=1\n+>FBgn0000004_17.6|coord=5839|strand -|size=26|nreads=1\n TTTTCGTCAATTGTGCCAAATAGGTA\n->FBgn0000006_412|759|F|24|n=3\n-TTTGGTCGAAAGCTCTAAAGCTAC\n->FBgn0000006_412|742|R|27|n=1\n+>FBgn0000004_17.6|coord=5855|strand +|size=23|nreads=1\n+TTGACGAAAATGATCGAGTGGAT\n+>FBgn0000006_412|coord=742|strand -|size=27|nreads=1\n TTCGACCAAACCGGTGGCGTTTGCTGA\n->FBgn0000006_412|744|R|25|n=2\n+>FBgn0000006_412|coord=744|strand -|size=25|nreads=2\n TTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|745|R|24|n=2\n+>FBgn0000006_412|coord=745|strand -|size=24|nreads=2\n TTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000006_412|759|F|25|n=1\n+>FBgn0000006_412|coord=759|strand +|size=24|nreads=3\n+TTTGGTCGAAAGCTCTAAAGCTAC\n+>FBgn0000006_412|coord=759|strand +|size=25|nreads=1\n TTTGGTCGAAAGCTCTAAAGCTACA\n->FBgn0000006_412|759|F|27|n=3\n+>FBgn0000006_412|coord=759|strand +|size=27|nreads=3\n TTTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26|n=6\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|744|R|26|n=2\n+>FBgn0000006_412|coord=744|strand -|size=26|nreads=2\n TTTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|745|R|25|n=1\n+>FBgn0000006_412|coord=745|strand -|size=25|nreads=1\n TTTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000007_1731|4555|F|24|n=1\n+>FBgn0000006_412|coord=760|strand +|size=26|nreads=6\n+TTGGTCGAAAGCTCTAAAGCTACATG\n+>FBgn0000007_1731|coord=4539|strand -|size=26|nreads=1\n+TTGAGAGCAAAGGCCGAATGAGTAAA\n+>FBgn0000007_1731|coord=4555|strand +|size=24|nreads=1\n TTGCTCTCAATGCGCTGAGTTTGG\n->FBgn0000007_1731|4539|R|26|n=1\n-TTGAGAGCAAAGGCCGAATGAGTAAA\n->FBgn0000155_roo|2176|F|24|n=1\n-TTTTGACCAAGCGGTATGAGAATA\n->FBgn0000155_roo|2159|R|27|n=1\n+>FBgn0000155_roo|coord=2159|strand -|size=27|nreads=1\n TTGGTCAAAAACTCCCAAGTGGCTTCA\n->FBgn0000155_roo|2161|R|25|n=1\n+>FBgn0000155_roo|coord=2161|strand -|size=25|nreads=1\n TTGGTCAAAAACTCCCAAGTGGCTT\n->FBgn0000155_roo|2692|F|24|n=1\n-TTGGTAAAAAATGTATAAGTGAGC\n->FBgn0000155_roo|2675|R|27|n=1\n+>FBgn0000155_roo|coord=2176|strand +|size=24|nreads=1\n+TTTTGACCAAGCGGTATGAGAATA\n+>FBgn0000155_roo|coord=2675|strand -|size=27|nreads=1\n TTTTTACCAAACGGATGCCTCAGACAT\n->FBgn0000155_roo|2676|R|26|n=5\n+>FBgn0000155_roo|coord=2676|strand -|size=26|nreads=5\n TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2677|R|25|n=14\n+>FBgn0000155_roo|coord=2677|strand -|size=25|nreads=14\n TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2678|R|24|n=14\n+>FBgn0000155_roo|coord=2678|strand -|size=24|nreads=14\n TTTTTACCAAACGGATGCCTCAGA\n->FBgn0000155_roo|2679|R|23|n=2\n+>FBgn0000155_roo|coord=2679|strand -|size=23|nreads=2\n TTTTTACCAAACGGATGCCTCAG\n->FBgn0000155_roo|2692|F|27|n=1\n+>FBgn0000155_roo|coord=2692|strand +|size=24|nreads=1\n+TTGGTAAAAAATGTATAAGTGAGC\n+>FBgn0000155_roo|coord=2692|strand +|size=27|nreads=1\n TTGGTAAAAAATGTATAAGTGAGCAGC\n->FBgn0000155_roo|3149|F|26|n=1\n-TTTTAATAAACCGGGTCGCATTGATT\n->FBgn0000155_roo|3132|R|27|n=1\n+>FBgn0000155_roo|coord=3132|strand -|size=27|nreads=1\n TTTATTAAAATCGGGGTCGGCTAATTT\n->FBgn0000155_roo|3241|F|24|n=1\n-TTGGGCAAAAAACTGATTTCGGGT\n->FBgn0000155_roo|3225|R|26|n=1\n+>FBgn0000155_roo|coord=3149|strand +|size=26|nreads=1\n+TTTTAATAAACCGGGTCGCATTGATT\n+>FBgn0000155_roo|coord=3225|strand -|size=26|nreads=1\n TTTTGCCCAAGGAGACCGTCTATTTT\n->FBgn0000155_roo|3226|R|25|n=1\n+>FBgn0000155_roo|coord=3226|strand -|size=25|nreads=1\n TTTTGCCCAAGGAGACCGTCTATTT\n->FBgn0000155_roo|3227|R|24|n=2\n+>FBgn0000155_roo|coord=3227|strand -|size=24|nreads=2\n TTTTGCCCAAGGAGACCGTCTATT\n->FBgn0000155_roo|3228|R|23|n=1\n+>FBgn0000155_roo|coord=3228|strand -|size=23|nreads=1\n TTTTGCCCAAGGAGACCGTCTAT\n->FBgn0000155_roo|3241|F|27|n=1\n+>FBgn0000155_roo|coord=3241|strand +|size=24|nreads=1\n+TTGGGCAAAAAACTGATTTCGGGT\n+>FBgn0000155_roo|coord=3241|strand +|size=27|nreads=1\n TTGGGCAAAAAACTGATTTCGGGTGGA\n->FBgn0000155_roo|3241|F|28|n=1\n+>FBgn0000155_roo|coord=3241|strand +|size=28|nreads=1\n TTGGGCAAAAAACTGATTTCGGGTGGAT\n->FBgn0000155_roo|8655|F|25|n=1\n-TTTGGCAGAATGTTCACACATGAAA\n->FBgn0000155_roo|8639|R|26|n=1\n+>FBgn0000155_roo|coord'..b'34_Doc2-element|3606|R|27|n=2\n+>FBgn0063534_Doc2-element|coord=3606|strand -|size=27|nreads=2\n TTTTTGCGAAAGCCAAACTGATGCGAT\n->FBgn0063534_Doc2-element|3607|R|26|n=4\n+>FBgn0063534_Doc2-element|coord=3607|strand -|size=26|nreads=4\n TTTTTGCGAAAGCCAAACTGATGCGA\n->FBgn0063534_Doc2-element|3608|R|25|n=7\n+>FBgn0063534_Doc2-element|coord=3608|strand -|size=25|nreads=7\n TTTTTGCGAAAGCCAAACTGATGCG\n->FBgn0063534_Doc2-element|3609|R|24|n=1\n+>FBgn0063534_Doc2-element|coord=3609|strand -|size=24|nreads=1\n TTTTTGCGAAAGCCAAACTGATGC\n->FBgn0063534_Doc2-element|3610|R|23|n=3\n+>FBgn0063534_Doc2-element|coord=3610|strand -|size=23|nreads=3\n TTTTTGCGAAAGCCAAACTGATG\n->FBgn0063534_Doc2-element|3623|F|26|n=1\n+>FBgn0063534_Doc2-element|coord=3623|strand +|size=23|nreads=1\n+TTCGCAAAAATCACGGAACGATC\n+>FBgn0063534_Doc2-element|coord=3623|strand +|size=26|nreads=1\n TTCGCAAAAATCACGGAACGATCGAA\n->FBgn0063594_Cr1a|2054|F|24|n=2\n-TTTGTAACAAGTCCTGAAAGTGTG\n->FBgn0063594_Cr1a|2037|R|27|n=1\n+>FBgn0063594_Cr1a|coord=2037|strand -|size=27|nreads=1\n TTGTTACAAGACATAGATCCAACAGTC\n->FBgn0063594_Cr1a|2039|R|25|n=1\n+>FBgn0063594_Cr1a|coord=2039|strand -|size=25|nreads=1\n TTGTTACAAAACATAGATCCAACAG\n->FBgn0063919_Max-element|3898|F|23|n=1\n-TTCTCAGCAAGTTCTGGGAGGTG\n->FBgn0063919_Max-element|3879|R|29|n=1\n+>FBgn0063594_Cr1a|coord=2054|strand +|size=24|nreads=2\n+TTTGTAACAAGTCCTGAAAGTGTG\n+>FBgn0063919_Max-element|coord=3879|strand -|size=29|nreads=1\n TTGCTGAGAAGCGTGTTGAGCGAATCAGG\n->FBgn0063919_Max-element|3880|R|28|n=1\n+>FBgn0063919_Max-element|coord=3880|strand -|size=28|nreads=1\n TTGCTGAGAAGCGTGTCGAGCGAATCAG\n->FBgn0063919_Max-element|3880|R|28|n=1\n+>FBgn0063919_Max-element|coord=3880|strand -|size=28|nreads=1\n TTGCTGAGAAGCGTGTTGAGCGAATCAG\n->FBgn0063919_Max-element|3882|R|26|n=1\n+>FBgn0063919_Max-element|coord=3882|strand -|size=26|nreads=1\n TTGCTGAGAAGCGTGTCGAGCGAATC\n->FBgn0063919_Max-element|3883|R|25|n=1\n+>FBgn0063919_Max-element|coord=3883|strand -|size=25|nreads=1\n TTGCTGAGAAGCGTGTCGAGCGAAT\n->FBgn0063919_Max-element|3884|R|24|n=1\n+>FBgn0063919_Max-element|coord=3884|strand -|size=24|nreads=1\n TTGCTGAGAAGCGTGTTGAGCGAA\n->FBgn0063919_Max-element|3898|F|24|n=2\n-TTCTCAGCAAGTTCTGGGAGGTGG\n->FBgn0063919_Max-element|3898|F|25|n=1\n-TTCTCAGCAAGTTCTGGGAGGTGGA\n->FBgn0063919_Max-element|3898|F|24|n=1\n+>FBgn0063919_Max-element|coord=3898|strand +|size=23|nreads=1\n+TTCTCAGCAAGTTCTGGGAGGTG\n+>FBgn0063919_Max-element|coord=3898|strand +|size=24|nreads=1\n TTCTCAGCAAGTTCTGGGAGGTGT\n->FBgn0067385_invader6|3024|F|26|n=1\n-TTGACTAGAATGACTTAGACTTAGAA\n->FBgn0067385_invader6|3007|R|27|n=4\n+>FBgn0063919_Max-element|coord=3898|strand +|size=24|nreads=2\n+TTCTCAGCAAGTTCTGGGAGGTGG\n+>FBgn0063919_Max-element|coord=3898|strand +|size=25|nreads=1\n+TTCTCAGCAAGTTCTGGGAGGTGGA\n+>FBgn0067385_invader6|coord=3007|strand -|size=27|nreads=4\n TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3008|R|26|n=3\n+>FBgn0067385_invader6|coord=3008|strand -|size=26|nreads=3\n TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3009|R|25|n=5\n+>FBgn0067385_invader6|coord=3009|strand -|size=25|nreads=5\n TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3010|R|24|n=4\n+>FBgn0067385_invader6|coord=3010|strand -|size=24|nreads=4\n TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3011|R|23|n=1\n+>FBgn0067385_invader6|coord=3011|strand -|size=23|nreads=1\n TTCTAGTCAAAGTCGAAGGACTG\n->FBgn0067624_BS3|1026|F|26|n=1\n-TTGGCATCAATGGTGACAAATCAGCG\n->FBgn0067624_BS3|1011|R|25|n=3\n+>FBgn0067385_invader6|coord=3024|strand +|size=26|nreads=1\n+TTGACTAGAATGACTTAGACTTAGAA\n+>FBgn0067624_BS3|coord=1011|strand -|size=25|nreads=3\n TTGATGCCAATGTTCCAGCGTTTTG\n->FBgn0067624_BS3|1013|R|23|n=3\n+>FBgn0067624_BS3|coord=1013|strand -|size=23|nreads=3\n TTGATGCCAATGTTCCAACGTCT\n->FBgn0067624_BS3|1026|F|24|n=1\n+>FBgn0067624_BS3|coord=1026|strand +|size=24|nreads=1\n TTGGCATCAATGGTGACAAATCTG\n->FBgn0067624_BS3|1026|F|25|n=1\n+>FBgn0067624_BS3|coord=1026|strand +|size=25|nreads=1\n TTGGCATCAATGGTGACAAATCTGC\n+>FBgn0067624_BS3|coord=1026|strand +|size=26|nreads=1\n+TTGGCATCAATGGTGACAAATCAGCG\n'
b
diff -r 20d28cfdeefe -r a7fd04208764 test-data/paired_2.fa
--- a/test-data/paired_2.fa Fri Sep 08 04:44:22 2017 -0400
+++ b/test-data/paired_2.fa Sat Sep 09 11:57:39 2017 -0400
b
b'@@ -1,370 +1,416 @@\n->FBgn0000004_17.6|5844|R|21|n=1\n+>FBgn0000004_17.6|coord=5844|strand -|size=21|nreads=1\n TTTTCGTCAAGTGTGCTAAAT\n->FBgn0000004_17.6|5855|F|23|n=1\n+>FBgn0000004_17.6|coord=5855|strand +|size=23|nreads=1\n TTGACGAAAATGATCGAGTGGAT\n->FBgn0000005_297|1347|F|21|n=1\n+>FBgn0000005_297|coord=1334|strand -|size=23|nreads=1\n+TTTTGCGCAATGGTAATTAAGGA\n+>FBgn0000005_297|coord=1347|strand +|size=21|nreads=1\n TTGCACAAAATGAGGGAATTT\n->FBgn0000005_297|1334|R|23|n=1\n-TTTTGCGCAATGGTAATTAAGGA\n->FBgn0000007_1731|4545|R|20|n=1\n+>FBgn0000007_1731|coord=4545|strand -|size=20|nreads=1\n TTGAGAGCAAAGGCCGAATG\n->FBgn0000007_1731|4555|F|24|n=1\n+>FBgn0000007_1731|coord=4555|strand +|size=24|nreads=1\n TTGCTCTCAATGCGCTGAGTTTGG\n->FBgn0000155_roo|2682|R|20|n=2\n+>FBgn0000155_roo|coord=2682|strand -|size=20|nreads=2\n TTTTTACCAAACGGATGCCT\n->FBgn0000155_roo|2692|F|24|n=1\n+>FBgn0000155_roo|coord=2692|strand +|size=24|nreads=1\n TTGGTAAAAAATGTATAAGTGAGC\n->FBgn0000155_roo|2692|F|27|n=1\n+>FBgn0000155_roo|coord=2692|strand +|size=27|nreads=1\n TTGGTAAAAAATGTATAAGTGAGCAGC\n->FBgn0000155_roo|3241|F|20|n=1\n-TTGGGCAAAAAACTGATTTC\n->FBgn0000155_roo|3225|R|26|n=1\n+>FBgn0000155_roo|coord=3225|strand -|size=26|nreads=1\n TTTTGCCCAAGGAGACCGTCTATTTT\n->FBgn0000155_roo|3226|R|25|n=1\n+>FBgn0000155_roo|coord=3226|strand -|size=25|nreads=1\n TTTTGCCCAAGGAGACCGTCTATTT\n->FBgn0000155_roo|3227|R|24|n=2\n+>FBgn0000155_roo|coord=3227|strand -|size=24|nreads=2\n TTTTGCCCAAGGAGACCGTCTATT\n->FBgn0000155_roo|3228|R|23|n=1\n+>FBgn0000155_roo|coord=3228|strand -|size=23|nreads=1\n TTTTGCCCAAGGAGACCGTCTAT\n->FBgn0000155_roo|3229|R|22|n=2\n+>FBgn0000155_roo|coord=3229|strand -|size=22|nreads=2\n TTTTGCCCAAGGAGACCGTCTA\n->FBgn0000155_roo|3241|F|24|n=1\n+>FBgn0000155_roo|coord=3230|strand -|size=21|nreads=1\n+TTTTGCCCAAGGAGACCGTCT\n+>FBgn0000155_roo|coord=3231|strand -|size=20|nreads=2\n+TTTTGCCCAAGGAGACCGTC\n+>FBgn0000155_roo|coord=3241|strand +|size=20|nreads=1\n+TTGGGCAAAAAACTGATTTC\n+>FBgn0000155_roo|coord=3241|strand +|size=24|nreads=1\n TTGGGCAAAAAACTGATTTCGGGT\n->FBgn0000155_roo|3241|F|27|n=1\n+>FBgn0000155_roo|coord=3241|strand +|size=27|nreads=1\n TTGGGCAAAAAACTGATTTCGGGTGGA\n->FBgn0000155_roo|3241|F|28|n=1\n+>FBgn0000155_roo|coord=3241|strand +|size=28|nreads=1\n TTGGGCAAAAAACTGATTTCGGGTGGAT\n->FBgn0000155_roo|8644|R|21|n=1\n+>FBgn0000155_roo|coord=8644|strand -|size=21|nreads=1\n TTCTGCCAAAGGGCCAGCAAG\n->FBgn0000155_roo|8655|F|25|n=1\n+>FBgn0000155_roo|coord=8645|strand -|size=20|nreads=1\n+TTCTGCCAAAGGGCCAGCAA\n+>FBgn0000155_roo|coord=8655|strand +|size=25|nreads=1\n TTTGGCAGAATGTTCACACATGAAA\n->FBgn0000349_copia|658|R|22|n=1\n+>FBgn0000349_copia|coord=658|strand -|size=22|nreads=1\n TTCTCAAGAATCTGACGCGCCG\n->FBgn0000349_copia|670|F|25|n=1\n+>FBgn0000349_copia|coord=670|strand +|size=25|nreads=1\n TTCTTGAGAATTTGGACGCCGTTTA\n->FBgn0000349_copia|4628|F|20|n=1\n+>FBgn0000349_copia|coord=4614|strand -|size=24|nreads=1\n+TTGCAGCAAACCCAATTTGTCTCG\n+>FBgn0000349_copia|coord=4628|strand +|size=20|nreads=1\n TTTGCTGCAAGACGACCAAT\n->FBgn0000349_copia|4614|R|24|n=1\n-TTGCAGCAAACCCAATTTGTCTCG\n->FBgn0000652_F-element|1564|F|20|n=2\n-TTTTCTCGAAAGCAGCAAGT\n->FBgn0000652_F-element|1546|R|28|n=1\n+>FBgn0000652_F-element|coord=1546|strand -|size=28|nreads=1\n TTCGAGAAAATTACTTCAGGATTTGTCT\n->FBgn0000652_F-element|1546|R|28|n=1\n+>FBgn0000652_F-element|coord=1546|strand -|size=28|nreads=1\n TTCGGGAAAATTACTTCAGGATTTGTCT\n->FBgn0000652_F-element|1547|R|27|n=74\n+>FBgn0000652_F-element|coord=1547|strand -|size=27|nreads=1\n+TTCGGGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|coord=1547|strand -|size=27|nreads=1\n+TTTGAGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|coord=1547|strand -|size=27|nreads=74\n TTCGAGAAAATTACTTCAGGATTTGTC\n->FBgn0000652_F-element|1547|R|27|n=1\n-TTCGGGAAAATTACTTCAGGATTTGTC\n->FBgn0000652_F-element|1547|R|27|n=1\n-TTTGAGAAAATTACTTCAGGATTTGTC\n->FBgn0000652_F-element|1548|R|26|n=3\n+>FBgn0000652_F-element|coord=1548|strand -|size=26|nreads=3\n TTCGAGAAAATTACTTCAGGATTTGT\n->FBgn0000652_F-element|1549|R|25|n=22\n+>FBgn0000652_F-element|coord=1'..b'4221|strand +|size=24|nreads=1\n TTCTAACCAAGAATTTGAATAGAT\n->FBgn0063427_invader4|707|F|22|n=2\n+>FBgn0063427_invader4|coord=691|strand -|size=26|nreads=15\n+TTCCTACGAATCGCTGTATGAACAGT\n+>FBgn0063427_invader4|coord=707|strand +|size=22|nreads=2\n TTCGTAGGAATGGAGACGTCGG\n->FBgn0063427_invader4|691|R|26|n=15\n-TTCCTACGAATCGCTGTATGAACAGT\n->FBgn0063428_invader3|440|F|22|n=2\n+>FBgn0063428_invader3|coord=425|strand -|size=25|nreads=1\n+TTCTGGGGAAATGGCCTGCAGACGC\n+>FBgn0063428_invader3|coord=427|strand -|size=23|nreads=1\n+TTCTGGGGAAATGGCCTGCAGAC\n+>FBgn0063428_invader3|coord=440|strand +|size=22|nreads=2\n TTCCCCAGAAACGCGTGGCGAT\n->FBgn0063428_invader3|425|R|25|n=1\n-TTCTGGGGAAATGGCCTGCAGACGC\n->FBgn0063428_invader3|427|R|23|n=1\n-TTCTGGGGAAATGGCCTGCAGAC\n->FBgn0063430_invader1|2082|R|20|n=1\n+>FBgn0063430_invader1|coord=2082|strand -|size=20|nreads=1\n TTTTATCGAACCAATAGAAC\n->FBgn0063430_invader1|2092|F|24|n=1\n+>FBgn0063430_invader1|coord=2092|strand +|size=24|nreads=1\n TTCGATAAAATGTCTAAGTATGTT\n->FBgn0063430_invader1|2092|F|26|n=1\n+>FBgn0063430_invader1|coord=2092|strand +|size=26|nreads=1\n TTCGATAAAATGTCTAAGTATGTTCG\n->FBgn0063433_gypsy4|3058|F|22|n=1\n+>FBgn0063433_gypsy4|coord=3040|strand -|size=28|nreads=1\n+TTTTGAAGAACTTGGATTTTTCTTGAGA\n+>FBgn0063433_gypsy4|coord=3044|strand -|size=24|nreads=1\n+TTTTGAAGAACTTGGATTTTTCTT\n+>FBgn0063433_gypsy4|coord=3058|strand +|size=22|nreads=1\n TTCTTCAAAAAGAGCGTGGAAT\n->FBgn0063433_gypsy4|3040|R|28|n=1\n-TTTTGAAGAACTTGGATTTTTCTTGAGA\n->FBgn0063433_gypsy4|3044|R|24|n=1\n-TTTTGAAGAACTTGGATTTTTCTT\n->FBgn0063440_baggins|5170|F|21|n=1\n+>FBgn0063440_baggins|coord=5154|strand -|size=26|nreads=1\n+TTTTGTGCAAATGGCTGTGAAGTCGG\n+>FBgn0063440_baggins|coord=5170|strand +|size=21|nreads=1\n TTGCACAAAATTGGCATTGCA\n->FBgn0063440_baggins|5154|R|26|n=1\n-TTTTGTGCAAATGGCTGTGAAGTCGG\n->FBgn0063919_Max-element|3887|R|21|n=2\n+>FBgn0063919_Max-element|coord=3887|strand -|size=21|nreads=2\n TTGCTGAGAAGCGTGTTGAGC\n->FBgn0063919_Max-element|3898|F|23|n=1\n+>FBgn0063919_Max-element|coord=3898|strand +|size=23|nreads=1\n TTCTCAGCAAGTTCTGGGAGGTG\n->FBgn0063919_Max-element|3898|F|24|n=2\n-TTCTCAGCAAGTTCTGGGAGGTGG\n->FBgn0063919_Max-element|3898|F|25|n=1\n-TTCTCAGCAAGTTCTGGGAGGTGGA\n->FBgn0063919_Max-element|3898|F|24|n=1\n+>FBgn0063919_Max-element|coord=3898|strand +|size=24|nreads=1\n TTCTCAGCAAGTTCTGGGAGGTGT\n->FBgn0067385_invader6|3024|F|21|n=1\n-TTGACTAGAATGACTTAGACT\n->FBgn0067385_invader6|3007|R|27|n=4\n+>FBgn0063919_Max-element|coord=3898|strand +|size=24|nreads=2\n+TTCTCAGCAAGTTCTGGGAGGTGG\n+>FBgn0063919_Max-element|coord=3898|strand +|size=25|nreads=1\n+TTCTCAGCAAGTTCTGGGAGGTGGA\n+>FBgn0067385_invader6|coord=3007|strand -|size=27|nreads=4\n TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3008|R|26|n=3\n+>FBgn0067385_invader6|coord=3008|strand -|size=26|nreads=3\n TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3009|R|25|n=5\n+>FBgn0067385_invader6|coord=3009|strand -|size=25|nreads=5\n TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3010|R|24|n=4\n+>FBgn0067385_invader6|coord=3010|strand -|size=24|nreads=4\n TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3011|R|23|n=1\n+>FBgn0067385_invader6|coord=3011|strand -|size=23|nreads=1\n TTCTAGTCAAAGTCGAAGGACTG\n->FBgn0067385_invader6|3014|R|20|n=1\n+>FBgn0067385_invader6|coord=3014|strand -|size=20|nreads=1\n TTCTAGTCAAAGTCGAAGGA\n->FBgn0067385_invader6|3024|F|26|n=1\n+>FBgn0067385_invader6|coord=3024|strand +|size=21|nreads=1\n+TTGACTAGAATGACTTAGACT\n+>FBgn0067385_invader6|coord=3024|strand +|size=26|nreads=1\n TTGACTAGAATGACTTAGACTTAGAA\n->FBgn0067624_BS3|1016|R|20|n=1\n+>FBgn0067624_BS3|coord=1016|strand -|size=20|nreads=1\n TTGATGCCAATGTTCCAACG\n->FBgn0067624_BS3|1026|F|26|n=1\n+>FBgn0067624_BS3|coord=1026|strand +|size=24|nreads=1\n+TTGGCATCAATGGTGACAAATCTG\n+>FBgn0067624_BS3|coord=1026|strand +|size=25|nreads=1\n+TTGGCATCAATGGTGACAAATCTGC\n+>FBgn0067624_BS3|coord=1026|strand +|size=26|nreads=1\n TTGGCATCAATGGTGACAAATCAGCG\n->FBgn0067624_BS3|1026|F|24|n=1\n-TTGGCATCAATGGTGACAAATCTG\n->FBgn0067624_BS3|1026|F|25|n=1\n-TTGGCATCAATGGTGACAAATCTGC\n'
b
diff -r 20d28cfdeefe -r a7fd04208764 test-data/paired_3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/paired_3.fa Sat Sep 09 11:57:39 2017 -0400
b
b'@@ -0,0 +1,416 @@\n+>FBgn0000004_17.6|coord=5844|strand -|size=21|nreads=1\n+TTTTCGTCAAGTGTGCTAAAT\n+>FBgn0000004_17.6|coord=5855|strand +|size=23|nreads=1\n+TTGACGAAAATGATCGAGTGGAT\n+>FBgn0000005_297|coord=1334|strand -|size=23|nreads=1\n+TTTTGCGCAATGGTAATTAAGGA\n+>FBgn0000005_297|coord=1347|strand +|size=21|nreads=1\n+TTGCACAAAATGAGGGAATTT\n+>FBgn0000007_1731|coord=4545|strand -|size=20|nreads=1\n+TTGAGAGCAAAGGCCGAATG\n+>FBgn0000007_1731|coord=4555|strand +|size=24|nreads=1\n+TTGCTCTCAATGCGCTGAGTTTGG\n+>FBgn0000155_roo|coord=2682|strand -|size=20|nreads=2\n+TTTTTACCAAACGGATGCCT\n+>FBgn0000155_roo|coord=2692|strand +|size=24|nreads=1\n+TTGGTAAAAAATGTATAAGTGAGC\n+>FBgn0000155_roo|coord=2692|strand +|size=27|nreads=1\n+TTGGTAAAAAATGTATAAGTGAGCAGC\n+>FBgn0000155_roo|coord=3225|strand -|size=26|nreads=1\n+TTTTGCCCAAGGAGACCGTCTATTTT\n+>FBgn0000155_roo|coord=3226|strand -|size=25|nreads=1\n+TTTTGCCCAAGGAGACCGTCTATTT\n+>FBgn0000155_roo|coord=3227|strand -|size=24|nreads=2\n+TTTTGCCCAAGGAGACCGTCTATT\n+>FBgn0000155_roo|coord=3228|strand -|size=23|nreads=1\n+TTTTGCCCAAGGAGACCGTCTAT\n+>FBgn0000155_roo|coord=3229|strand -|size=22|nreads=2\n+TTTTGCCCAAGGAGACCGTCTA\n+>FBgn0000155_roo|coord=3230|strand -|size=21|nreads=1\n+TTTTGCCCAAGGAGACCGTCT\n+>FBgn0000155_roo|coord=3231|strand -|size=20|nreads=2\n+TTTTGCCCAAGGAGACCGTC\n+>FBgn0000155_roo|coord=3241|strand +|size=20|nreads=1\n+TTGGGCAAAAAACTGATTTC\n+>FBgn0000155_roo|coord=3241|strand +|size=24|nreads=1\n+TTGGGCAAAAAACTGATTTCGGGT\n+>FBgn0000155_roo|coord=3241|strand +|size=27|nreads=1\n+TTGGGCAAAAAACTGATTTCGGGTGGA\n+>FBgn0000155_roo|coord=3241|strand +|size=28|nreads=1\n+TTGGGCAAAAAACTGATTTCGGGTGGAT\n+>FBgn0000155_roo|coord=8644|strand -|size=21|nreads=1\n+TTCTGCCAAAGGGCCAGCAAG\n+>FBgn0000155_roo|coord=8645|strand -|size=20|nreads=1\n+TTCTGCCAAAGGGCCAGCAA\n+>FBgn0000155_roo|coord=8655|strand +|size=25|nreads=1\n+TTTGGCAGAATGTTCACACATGAAA\n+>FBgn0000349_copia|coord=658|strand -|size=22|nreads=1\n+TTCTCAAGAATCTGACGCGCCG\n+>FBgn0000349_copia|coord=670|strand +|size=25|nreads=1\n+TTCTTGAGAATTTGGACGCCGTTTA\n+>FBgn0000349_copia|coord=4614|strand -|size=24|nreads=1\n+TTGCAGCAAACCCAATTTGTCTCG\n+>FBgn0000349_copia|coord=4628|strand +|size=20|nreads=1\n+TTTGCTGCAAGACGACCAAT\n+>FBgn0000652_F-element|coord=1546|strand -|size=28|nreads=1\n+TTCGAGAAAATTACTTCAGGATTTGTCT\n+>FBgn0000652_F-element|coord=1546|strand -|size=28|nreads=1\n+TTCGGGAAAATTACTTCAGGATTTGTCT\n+>FBgn0000652_F-element|coord=1547|strand -|size=27|nreads=1\n+TTCGGGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|coord=1547|strand -|size=27|nreads=1\n+TTTGAGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|coord=1547|strand -|size=27|nreads=74\n+TTCGAGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|coord=1548|strand -|size=26|nreads=3\n+TTCGAGAAAATTACTTCAGGATTTGT\n+>FBgn0000652_F-element|coord=1549|strand -|size=25|nreads=1\n+TTCGGGAAAATTACTTCAGGATTTG\n+>FBgn0000652_F-element|coord=1549|strand -|size=25|nreads=22\n+TTCGAGAAAATTACTTCAGGATTTG\n+>FBgn0000652_F-element|coord=1550|strand -|size=24|nreads=2\n+TTCGAGAAAATTACTTCAGGATTT\n+>FBgn0000652_F-element|coord=1551|strand -|size=23|nreads=7\n+TTCGAGAAAATTACTTCAGGATT\n+>FBgn0000652_F-element|coord=1554|strand -|size=20|nreads=1\n+TTCGAGAAAATTACTTCAGG\n+>FBgn0000652_F-element|coord=1564|strand +|size=20|nreads=1\n+TTTTCTCGAAAGCAGCTAGT\n+>FBgn0000652_F-element|coord=1564|strand +|size=20|nreads=2\n+TTTTCTCGAAAGCAGCAAGT\n+>FBgn0000652_F-element|coord=1564|strand +|size=23|nreads=2\n+TTTTCTCGAAAGCAGCAAGTTTC\n+>FBgn0000652_F-element|coord=1564|strand +|size=23|nreads=2\n+TTTTCTCGAAAGCAGCTAGTTTC\n+>FBgn0000652_F-element|coord=1564|strand +|size=24|nreads=2\n+TTTTCTCGAAAGCAGCAAGTTTCG\n+>FBgn0000652_F-element|coord=1564|strand +|size=24|nreads=5\n+TTTTCTCGAAAGCAGCTAGTTTCG\n+>FBgn0000652_F-element|coord=1564|strand +|size=25|nreads=1\n+TTTTCTCGAAAGCAGCTAGTTTCGC\n+>FBgn0000652_F-element|coord=2231|strand -|size=27|nreads=1\n+TTGGAAGAAATCCAGGAATTGAGCTTC\n+>FBgn0000652_F-element|coord=2233|strand -|size=25|nreads=5\n+TTGGAAGAAATCCAGGAATTGAGCT\n+>FBgn0000652_F-element|coord=2248|strand +|size=20|nreads='..b'coord=2907|strand -|size=23|nreads=1\n+TTGGTAACAAGCTTGGAGGCGAG\n+>FBgn0042682_Rt1b|coord=2907|strand -|size=23|nreads=15\n+TTGGTAACAAGCTTGTAGGCGAG\n+>FBgn0042682_Rt1b|coord=2908|strand -|size=22|nreads=1\n+TTGGTAACAAGCTTGTAGGCGA\n+>FBgn0042682_Rt1b|coord=2909|strand -|size=21|nreads=6\n+TTGGTAACAAGCTTGTAGGCG\n+>FBgn0042682_Rt1b|coord=2920|strand +|size=22|nreads=1\n+TTGTTACCAAAAAGCTAAGGAG\n+>FBgn0042682_Rt1b|coord=2920|strand +|size=23|nreads=1\n+TTGTTACCAAAAAGCTAAGGAGG\n+>FBgn0042682_Rt1b|coord=2920|strand +|size=23|nreads=1\n+TTGTTACCAACAAGCTAAGGAGG\n+>FBgn0042682_Rt1b|coord=2920|strand +|size=24|nreads=2\n+TTGTTACCAACAAGCTAAGGAGGA\n+>FBgn0042682_Rt1b|coord=2920|strand +|size=25|nreads=1\n+TTGTTACCAAAAAGCTAAGGAGGAG\n+>FBgn0042682_Rt1b|coord=2920|strand +|size=26|nreads=1\n+TTGTTACCAAAAAGCTAAGGAGGAGA\n+>FBgn0042682_Rt1b|coord=2920|strand +|size=27|nreads=1\n+TTGTTACCAAAAAGCTAAGGAGGAGAG\n+>FBgn0042682_Rt1b|coord=4364|strand -|size=22|nreads=1\n+TTGCCTGGAAGCGCCACTCCGC\n+>FBgn0042682_Rt1b|coord=4365|strand -|size=21|nreads=1\n+TTGCCTGGAAGCGCCACTCCG\n+>FBgn0042682_Rt1b|coord=4366|strand -|size=20|nreads=4\n+TTGCCTGGAAGCGCCACTCC\n+>FBgn0042682_Rt1b|coord=4376|strand +|size=24|nreads=1\n+TTCCAGGCAAGAGGCACACGAGTG\n+>FBgn0042682_Rt1b|coord=4376|strand +|size=26|nreads=1\n+TTCCAGGCAAGAGGCACACGAGTGGC\n+>FBgn0062343_Dm88|coord=4210|strand -|size=21|nreads=1\n+TTGGTTAGAACATCTGCCATC\n+>FBgn0062343_Dm88|coord=4221|strand +|size=24|nreads=1\n+TTCTAACCAAGAATTTGAATAGAT\n+>FBgn0063427_invader4|coord=691|strand -|size=26|nreads=15\n+TTCCTACGAATCGCTGTATGAACAGT\n+>FBgn0063427_invader4|coord=707|strand +|size=22|nreads=2\n+TTCGTAGGAATGGAGACGTCGG\n+>FBgn0063428_invader3|coord=425|strand -|size=25|nreads=1\n+TTCTGGGGAAATGGCCTGCAGACGC\n+>FBgn0063428_invader3|coord=427|strand -|size=23|nreads=1\n+TTCTGGGGAAATGGCCTGCAGAC\n+>FBgn0063428_invader3|coord=440|strand +|size=22|nreads=2\n+TTCCCCAGAAACGCGTGGCGAT\n+>FBgn0063430_invader1|coord=2082|strand -|size=20|nreads=1\n+TTTTATCGAACCAATAGAAC\n+>FBgn0063430_invader1|coord=2092|strand +|size=24|nreads=1\n+TTCGATAAAATGTCTAAGTATGTT\n+>FBgn0063430_invader1|coord=2092|strand +|size=26|nreads=1\n+TTCGATAAAATGTCTAAGTATGTTCG\n+>FBgn0063433_gypsy4|coord=3040|strand -|size=28|nreads=1\n+TTTTGAAGAACTTGGATTTTTCTTGAGA\n+>FBgn0063433_gypsy4|coord=3044|strand -|size=24|nreads=1\n+TTTTGAAGAACTTGGATTTTTCTT\n+>FBgn0063433_gypsy4|coord=3058|strand +|size=22|nreads=1\n+TTCTTCAAAAAGAGCGTGGAAT\n+>FBgn0063440_baggins|coord=5154|strand -|size=26|nreads=1\n+TTTTGTGCAAATGGCTGTGAAGTCGG\n+>FBgn0063440_baggins|coord=5170|strand +|size=21|nreads=1\n+TTGCACAAAATTGGCATTGCA\n+>FBgn0063919_Max-element|coord=3887|strand -|size=21|nreads=2\n+TTGCTGAGAAGCGTGTTGAGC\n+>FBgn0063919_Max-element|coord=3898|strand +|size=23|nreads=1\n+TTCTCAGCAAGTTCTGGGAGGTG\n+>FBgn0063919_Max-element|coord=3898|strand +|size=24|nreads=1\n+TTCTCAGCAAGTTCTGGGAGGTGT\n+>FBgn0063919_Max-element|coord=3898|strand +|size=24|nreads=2\n+TTCTCAGCAAGTTCTGGGAGGTGG\n+>FBgn0063919_Max-element|coord=3898|strand +|size=25|nreads=1\n+TTCTCAGCAAGTTCTGGGAGGTGGA\n+>FBgn0067385_invader6|coord=3007|strand -|size=27|nreads=4\n+TTCTAGTCAAAGTCGAAGGACTGCATA\n+>FBgn0067385_invader6|coord=3008|strand -|size=26|nreads=3\n+TTCTAGTCAAAGTCGAAGGACTGCAT\n+>FBgn0067385_invader6|coord=3009|strand -|size=25|nreads=5\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|coord=3010|strand -|size=24|nreads=4\n+TTCTAGTCAAAGTCGAAGGACTGC\n+>FBgn0067385_invader6|coord=3011|strand -|size=23|nreads=1\n+TTCTAGTCAAAGTCGAAGGACTG\n+>FBgn0067385_invader6|coord=3014|strand -|size=20|nreads=1\n+TTCTAGTCAAAGTCGAAGGA\n+>FBgn0067385_invader6|coord=3024|strand +|size=21|nreads=1\n+TTGACTAGAATGACTTAGACT\n+>FBgn0067385_invader6|coord=3024|strand +|size=26|nreads=1\n+TTGACTAGAATGACTTAGACTTAGAA\n+>FBgn0067624_BS3|coord=1016|strand -|size=20|nreads=1\n+TTGATGCCAATGTTCCAACG\n+>FBgn0067624_BS3|coord=1026|strand +|size=24|nreads=1\n+TTGGCATCAATGGTGACAAATCTG\n+>FBgn0067624_BS3|coord=1026|strand +|size=25|nreads=1\n+TTGGCATCAATGGTGACAAATCTGC\n+>FBgn0067624_BS3|coord=1026|strand +|size=26|nreads=1\n+TTGGCATCAATGGTGACAAATCAGCG\n'
b
diff -r 20d28cfdeefe -r a7fd04208764 test-data/paired_4.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/paired_4.fa Sat Sep 09 11:57:39 2017 -0400
b
@@ -0,0 +1,70 @@
+>FBgn0000155_roo|coord=3229|strand -|size=22|nreads=2
+TTTTGCCCAAGGAGACCGTCTA
+>FBgn0000155_roo|coord=3230|strand -|size=21|nreads=1
+TTTTGCCCAAGGAGACCGTCT
+>FBgn0000155_roo|coord=3231|strand -|size=20|nreads=2
+TTTTGCCCAAGGAGACCGTC
+>FBgn0000155_roo|coord=3241|strand +|size=20|nreads=1
+TTGGGCAAAAAACTGATTTC
+>FBgn0000652_F-element|coord=1554|strand -|size=20|nreads=1
+TTCGAGAAAATTACTTCAGG
+>FBgn0000652_F-element|coord=1564|strand +|size=20|nreads=1
+TTTTCTCGAAAGCAGCTAGT
+>FBgn0000652_F-element|coord=1564|strand +|size=20|nreads=2
+TTTTCTCGAAAGCAGCAAGT
+>FBgn0000652_F-element|coord=2471|strand -|size=21|nreads=2
+TTTAACCAAACTGCGGGAAAT
+>FBgn0000652_F-element|coord=2472|strand -|size=20|nreads=1
+TTTAACCAAACTGCGGGAAA
+>FBgn0000652_F-element|coord=2482|strand +|size=21|nreads=1
+TTTGGTTAAAGCTGAATGTCT
+>FBgn0000652_F-element|coord=2482|strand +|size=22|nreads=2
+TTTGGTTAAAGCTGAATGTCTG
+>FBgn0000652_F-element|coord=3513|strand -|size=21|nreads=3
+TTTCGCGAAATCCAAATTGGT
+>FBgn0000652_F-element|coord=3524|strand +|size=20|nreads=2
+TTTCGCGAAAGCCATGGAAC
+>FBgn0000652_F-element|coord=3524|strand +|size=21|nreads=2
+TTTCGCGAAAGCCACGGAACC
+>FBgn0000652_F-element|coord=3524|strand +|size=22|nreads=1
+TTTCGCGAAAGCCACGGAACCA
+>FBgn0002697_mdg1|coord=4646|strand -|size=21|nreads=1
+TTTCTCAGAAAAGTTCTTAAT
+>FBgn0002697_mdg1|coord=4657|strand +|size=22|nreads=1
+TTCTGAGAAATCACGCCACTTA
+>FBgn0002697_mdg1|coord=5354|strand -|size=21|nreads=2
+TTTCAACAAACACTTATGCTT
+>FBgn0002697_mdg1|coord=5365|strand +|size=21|nreads=2
+TTTGTTGAAAAGAGGAAAACA
+>FBgn0003519_Stalker|coord=6708|strand -|size=21|nreads=1
+TTTGTCTCAAAGCATTTTTGT
+>FBgn0003519_Stalker|coord=6719|strand +|size=21|nreads=1
+TTGAGACAAAACATTGCTAAT
+>FBgn0003908_R1A1-element|coord=4678|strand -|size=22|nreads=1
+TTTTCGCCAAGGCGCTGCACTT
+>FBgn0003908_R1A1-element|coord=4678|strand -|size=22|nreads=18
+TTTTCGCCAAGGCGCTGCACTC
+>FBgn0003908_R1A1-element|coord=4679|strand -|size=21|nreads=1
+TTTTCGCCAAGGCGCTGCACC
+>FBgn0003908_R1A1-element|coord=4679|strand -|size=21|nreads=6
+TTTTCGCCAAGGCGCTGCACT
+>FBgn0003908_R1A1-element|coord=4680|strand -|size=20|nreads=4
+TTTTCGCCAAGGCGCTGCAC
+>FBgn0003908_R1A1-element|coord=4690|strand +|size=20|nreads=2
+TTGGCGAAAACTGGATCTTC
+>FBgn0003908_R1A1-element|coord=4690|strand +|size=21|nreads=6
+TTGGCGAAAACTGGATCTTCG
+>FBgn0003908_R1A1-element|coord=4690|strand +|size=22|nreads=1
+TTGGCGAAAACTAGATCTTCGA
+>FBgn0003908_R1A1-element|coord=4690|strand +|size=22|nreads=8
+TTGGCGAAAACTGGATCTTCGA
+>FBgn0042682_Rt1b|coord=2908|strand -|size=22|nreads=1
+TTGGTAACAAGCTTGTAGGCGA
+>FBgn0042682_Rt1b|coord=2909|strand -|size=21|nreads=6
+TTGGTAACAAGCTTGTAGGCG
+>FBgn0042682_Rt1b|coord=2920|strand +|size=22|nreads=1
+TTGTTACCAAAAAGCTAAGGAG
+>FBgn0067385_invader6|coord=3014|strand -|size=20|nreads=1
+TTCTAGTCAAAGTCGAAGGA
+>FBgn0067385_invader6|coord=3024|strand +|size=21|nreads=1
+TTGACTAGAATGACTTAGACT