Repository 'small_rna_signatures'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/small_rna_signatures

Changeset 3:4d9682bd3a6b (2017-09-02)
Previous changeset 2:320e06bf99b9 (2017-08-30) Next changeset 4:20d28cfdeefe (2017-09-08)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
modified:
overlapping_reads.py
overlapping_reads.xml
test-data/paired.fa
added:
test-data/paired_2.fa
b
diff -r 320e06bf99b9 -r 4d9682bd3a6b overlapping_reads.py
--- a/overlapping_reads.py Wed Aug 30 05:40:18 2017 -0400
+++ b/overlapping_reads.py Sat Sep 02 06:35:15 2017 -0400
[
b"@@ -36,90 +36,106 @@\n         self.bam_object = pysam.AlignmentFile(bam_file, 'rb')\n         self.chromosomes = dict(zip(self.bam_object.references,\n                                 self.bam_object.lengths))\n-        self.map_dict = self.create_map(self.bam_object)\n+        self.all_query_positions = self.query_positions(self.bam_object)\n+        self.readdic = self.make_readdic(self.bam_object)\n \n-    def create_map(self, bam_object):\n-        '''\n-        Returns a map_dictionary {(chromosome,read_position,polarity):\n-                                                    [read_length, ...]}\n-        '''\n-        map_dictionary = defaultdict(list)\n-        # get empty value for start and end of each chromosome\n-        for chrom in self.chromosomes:\n-            map_dictionary[(chrom, 1, 'F')] = []\n-            map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []\n+    def make_readdic(self, bam_object):\n+        readdic = defaultdict(int)\n+        for read in bam_object.fetch():\n+            readdic[read.query_sequence] += 1\n+        return readdic\n+\n+    def query_positions(self, bam_object):\n+        all_query_positions = defaultdict(list)\n         for chrom in self.chromosomes:\n             for read in bam_object.fetch(chrom):\n-                positions = read.positions  # a list of covered positions\n-                if read.is_reverse:\n-                    map_dictionary[(chrom, positions[-1]+1,\n-                                    'R')].append(read.query_alignment_length)\n+                if not read.is_reverse:\n+                    all_query_positions[chrom].append(\n+                        read.get_reference_positions(full_length=True)[0])\n                 else:\n-                    map_dictionary[(chrom, positions[0]+1,\n-                                    'F')].append(read.query_alignment_length)\n-        return map_dictionary\n+                    all_query_positions[chrom].append(\n+                        read.get_reference_positions(full_length=True)[-1])\n+            all_query_positions[chrom] = sorted(\n+                list(set(all_query_positions[chrom])))\n+        return all_query_positions\n \n-    def signature_tables(self, minquery, maxquery, mintarget, maxtarget):\n+    def direct_pairing(self, minquery, maxquery, mintarget, maxtarget,\n+                       file, overlap=10):\n+        F = open(file, 'w')\n         query_range = range(minquery, maxquery + 1)\n         target_range = range(mintarget, maxtarget + 1)\n-        Query_table = defaultdict(dict)\n-        Target_table = defaultdict(dict)\n-        for key in self.map_dict:\n-            for size in self.map_dict[key]:\n-                if size in query_range or size in target_range:\n-                    if key[2] == 'F':\n-                        coordinate = key[1]\n-                    else:\n-                        coordinate = -key[1]\n-                if size in query_range:\n-                    Query_table[key[0]][coordinate] = Query_table[key[0]].get(\n-                        coordinate, 0) + 1\n-                if size in target_range:\n-                    Target_table[key[0]][coordinate] = \\\n-                        Target_table[key[0]].get(coordinate, 0) + 1\n-        return Query_table, Target_table\n-\n-    def search_overlaps(self, minquery, maxquery, mintarget, maxtarget,\n-                        overlap=10):\n-        Query_table, Target_table = self.signature_tables(minquery, maxquery,\n-                                                          mintarget, maxtarget)\n-        overlap_groups = defaultdict(list)\n-        for chrom in Query_table:\n-            for coord in Query_table[chrom]:\n-                if Target_table[chrom].get(-coord - overlap + 1, 0):\n-                    overlap_groups[chrom].append(coord)\n-        return overlap_groups\n-\n-    def feed_overlaps(self, overlap_groups, minquery, output, overlap=10):\n-        F = open(output, 'w')\n-        for chrom in sorted(overlap_groups):\n-            for pos in sorted(overlap_groups[chr"..b'                 targetread.query_alignment_length in\n+                                    target_range and targetread.is_reverse):\n+                                targetreadseq = self.revcomp(\n+                                    targetread.query_sequence)\n+                                stringresult.append(\n+                                    \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+                                    (chrom, queryread.get_reference_positions(\n+                                     full_length=True)[0]+1,\n+                                     \'F\', queryread.query_alignment_length,\n+                                     self.readdic[queryread.query_sequence],\n+                                     queryread.query_sequence))\n+                                stringresult.append(\n+                                    \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+                                    (chrom, targetread.get_reference_positions(\n+                                     full_length=True)[0]+1,\n+                                     \'R\', targetread.query_alignment_length,\n+                                     self.readdic[targetread.query_sequence],\n+                                     targetreadseq))\n+                #  2\n+                for queryread in iterreads_3:\n+                    if queryread.get_reference_positions(\n+                        full_length=True)[-1] == pos+overlap-1 and \\\n+                        queryread.query_alignment_length in query_range \\\n+                            and queryread.is_reverse:\n+                        for targetread in iterreads_4:\n+                            if (targetread.\n+                                get_reference_positions(full_length=True)[0]\n+                                == pos and targetread.query_alignment_length\n+                                    in target_range and not\n+                                    targetread.is_reverse):\n+                                queryreadseq = self.revcomp(\n+                                    queryread.query_sequence)\n+                                targetreadseq = targetread.query_sequence\n+                                stringresult.append(\n+                                    \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+                                    (chrom, queryread.get_reference_positions(\n+                                     full_length=True)[0]+1, \'R\',\n+                                     queryread.query_alignment_length,\n+                                     self.readdic[queryread.query_sequence],\n+                                     queryreadseq))\n+                                stringresult.append(\n+                                    \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+                                    (chrom, targetread.get_reference_positions(\n+                                     full_length=True)[0]+1,\n+                                     \'F\', targetread.query_alignment_length,\n+                                     self.readdic[targetread.query_sequence],\n+                                     targetreadseq))\n+        stringresult = sorted(set(stringresult),\n+                              key=lambda x: stringresult.index(x))\n+        F.write(\'\'.join(stringresult))\n \n     def revcomp(self, sequence):\n         antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}\n@@ -129,12 +145,11 @@\n \n def main(input, minquery, maxquery, mintarget, maxtarget, output, overlap=10):\n     mapobj = Map(input)\n-    mapobj.feed_overlaps(mapobj.search_overlaps(minquery, maxquery,\n-                                                mintarget, maxtarget,\n-                                                overlap), minquery, output)\n+    mapobj.direct_pairing(minquery, maxquery, mintarget, maxtarget,\n+                          output, overlap)\n \n \n if __name__ == "__main__":\n     args = Parser()\n     main(args.input, args.minquery, args.maxquery, args.mintarget,\n-         args.maxtarget, args.output)\n+         args.maxtarget, args.output, args.overlap)\n'
b
diff -r 320e06bf99b9 -r 4d9682bd3a6b overlapping_reads.xml
--- a/overlapping_reads.xml Wed Aug 30 05:40:18 2017 -0400
+++ b/overlapping_reads.xml Sat Sep 02 06:35:15 2017 -0400
b
@@ -1,4 +1,4 @@
-<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.1">
+<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.2">
     <description />
     <requirements>
         <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
@@ -38,6 +38,15 @@
             <param name="overlap" value="10" />
             <output file="paired.fa" ftype="fasta" name="output" />
         </test>
+        <test>
+            <param ftype="bam" name="input" value="sr_bowtie.bam" />
+            <param name="minquery" value="20" />
+            <param name="maxquery" value="22" />
+            <param name="mintarget" value="23" />
+            <param name="maxtarget" value="29" />
+            <param name="overlap" value="10" />
+            <output file="paired_2.fa" ftype="fasta" name="output" />
+        </test>
     </tests>
     <help>
 
@@ -52,24 +61,43 @@
 
 **Input**
 
-A **sorted** BAM alignment file.
+*A **sorted** BAM alignment file.*
+
+*Query and target sizes:*
+
+The algorithm search for each *query* reads (of specified size) in the bam alignment if
+there are *target* reads (of specified size) that align on the opposite strand with a 10 nt
+overlap.
+
+Searching query reads of 20-22 nt that overlap by 10 nt with target
+reads of 23-29 nt is different from searching query reads of 23-29 nt that overlap by 10 nt
+with target reads of 20-22 nt. i.e, searching for siRNAs that pair with piRNAs is distinct
+from searching for siRNAs that pairs with piRNAs, although of course the number of possibly
+formed piRNA/siRNA pairs is the same as the number of possibly formed siRNA/piRNA pairs.
+
+*Overlap*
+The number of nucleotides by which the pairs of sequences will overlap
+
+
 
 **Outputs**
 
 a fasta file of pairable reads such as :
 
->FBgn0000004_17.6|5839|R|26
+>FBgn0000004_17.6|5855|F|23|n=1
+
+TTGACGAAAATGATCGAGTGGAT
+
+>FBgn0000004_17.6|5839|R|26|n=1
 
 TTTTCGTCAATTGTGCCAAATAGGTA
 
->FBgn0000004_17.6|5855|F|23
-
-TTGACGAAAATGATCGAGTGGAT
+where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, 
+R stand for reverse strand (F forward strand), 26 stands for the size of the sequence and
+n=1 stands for the number of reads of the sequence in the dataset.
 
-where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, 
-R stand for reverse strand (F forward strand) and 26 stands for the size of the read.
-
-the second sequence in this example is a read that overlap by 10 nt with the first read.
+the second sequence in this example corresponds to 1 read that overlap by 10 nt with
+1 read of the first sequence.
 
         </help>
     <citations>
b
diff -r 320e06bf99b9 -r 4d9682bd3a6b test-data/paired.fa
--- a/test-data/paired.fa Wed Aug 30 05:40:18 2017 -0400
+++ b/test-data/paired.fa Sat Sep 02 06:35:15 2017 -0400
b
b'@@ -1,2182 +1,668 @@\n->FBgn0000004_17.6|5839|R|26\n-TTTTCGTCAATTGTGCCAAATAGGTA\n->FBgn0000004_17.6|5855|F|23\n+>FBgn0000004_17.6|5855|F|23|n=1\n TTGACGAAAATGATCGAGTGGAT\n->FBgn0000006_412|744|R|26\n-TTTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|744|R|26\n-TTTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|745|R|25\n-TTTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000006_412|742|R|27\n+>FBgn0000004_17.6|5839|R|26|n=1\n+TTTTCGTCAATTGTGCCAAATAGGTA\n+>FBgn0000006_412|759|F|24|n=3\n+TTTGGTCGAAAGCTCTAAAGCTAC\n+>FBgn0000006_412|742|R|27|n=1\n TTCGACCAAACCGGTGGCGTTTGCTGA\n->FBgn0000006_412|744|R|25\n-TTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|744|R|25\n+>FBgn0000006_412|744|R|25|n=2\n TTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|745|R|24\n-TTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000006_412|745|R|24\n+>FBgn0000006_412|745|R|24|n=2\n TTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000006_412|759|F|24\n-TTTGGTCGAAAGCTCTAAAGCTAC\n->FBgn0000006_412|759|F|24\n-TTTGGTCGAAAGCTCTAAAGCTAC\n->FBgn0000006_412|759|F|24\n-TTTGGTCGAAAGCTCTAAAGCTAC\n->FBgn0000006_412|759|F|25\n+>FBgn0000006_412|759|F|25|n=1\n TTTGGTCGAAAGCTCTAAAGCTACA\n->FBgn0000006_412|759|F|27\n-TTTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|759|F|27\n-TTTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|759|F|27\n+>FBgn0000006_412|759|F|27|n=3\n TTTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n+>FBgn0000006_412|760|F|26|n=6\n TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000007_1731|4539|R|26\n-TTGAGAGCAAAGGCCGAATGAGTAAA\n->FBgn0000007_1731|4555|F|24\n+>FBgn0000006_412|744|R|26|n=2\n+TTTCGACCAAACCGGTGGCGTTTGCT\n+>FBgn0000006_412|745|R|25|n=1\n+TTTCGACCAAACCGGTGGCGTTTGC\n+>FBgn0000007_1731|4555|F|24|n=1\n TTGCTCTCAATGCGCTGAGTTTGG\n->FBgn0000155_roo|8639|R|26\n-TTCTGCCAAAGGGCCAGCAAAGCTGA\n->FBgn0000155_roo|8639|R|26\n-TTCTGCCAAAGTGCCAGCAAAGCTGA\n->FBgn0000155_roo|3225|R|26\n-TTTTGCCCAAGGAGACCGTCTATTTT\n->FBgn0000155_roo|3226|R|25\n-TTTTGCCCAAGGAGACCGTCTATTT\n->FBgn0000155_roo|3227|R|24\n-TTTTGCCCAAGGAGACCGTCTATT\n->FBgn0000155_roo|3227|R|24\n-TTTTGCCCAAGGAGACCGTCTATT\n->FBgn0000155_roo|3228|R|23\n-TTTTGCCCAAGGAGACCGTCTAT\n->FBgn0000155_roo|3132|R|27\n-TTTATTAAAATCGGGGTCGGCTAATTT\n->FBgn0000155_roo|2675|R|27\n+>FBgn0000007_1731|4539|R|26|n=1\n+TTGAGAGCAAAGGCCGAATGAGTAAA\n+>FBgn0000155_roo|2176|F|24|n=1\n+TTTTGACCAAGCGGTATGAGAATA\n+>FBgn0000155_roo|2159|R|27|n=1\n+TTGGTCAAAAACTCCCAAGTGGCTTCA\n+>FBgn0000155_roo|2161|R|25|n=1\n+TTGGTCAAAAACTCCCAAGTGGCTT\n+>FBgn0000155_roo|2692|F|24|n=1\n+TTGGTAAAAAATGTATAAGTGAGC\n+>FBgn0000155_roo|2675|R|27|n=1\n TTTTTACCAAACGGATGCCTCAGACAT\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n+>FBgn0000155_roo|2676|R|26|n=5\n TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n+>FBgn0000155_roo|2677|R|25|n=14\n TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2678|R|24\n-TTTTTACCAAACGGATGCCTCAGA\n->FBgn0000155_roo|2678|R|24\n-TTTTTACCAAACGGATGCCTCAGA\n->FBgn0000155_roo|2678|R|24\n-TTTTTACCAAACGGATGCCTCAGA\n->FBgn0000155_roo|2'..b'063534_Doc2-element|3610|R|23\n-TTTTTGCGAAAGCCAAACTGATG\n->FBgn0063534_Doc2-element|3610|R|23\n-TTTTTGCGAAAGCCAAACTGATG\n->FBgn0063534_Doc2-element|3610|R|23\n+>FBgn0063534_Doc2-element|3610|R|23|n=3\n TTTTTGCGAAAGCCAAACTGATG\n->FBgn0063534_Doc2-element|327|R|25\n-TTCGTTGCAATGAGAGCCGGCGATC\n->FBgn0063534_Doc2-element|342|F|25\n-TTGCAACGAAACAACGCGTACTTCT\n->FBgn0063534_Doc2-element|3623|F|23\n-TTCGCAAAAATCACGGAACGATC\n->FBgn0063534_Doc2-element|3623|F|26\n+>FBgn0063534_Doc2-element|3623|F|26|n=1\n TTCGCAAAAATCACGGAACGATCGAA\n->FBgn0063594_Cr1a|2037|R|27\n+>FBgn0063594_Cr1a|2054|F|24|n=2\n+TTTGTAACAAGTCCTGAAAGTGTG\n+>FBgn0063594_Cr1a|2037|R|27|n=1\n TTGTTACAAGACATAGATCCAACAGTC\n->FBgn0063594_Cr1a|2039|R|25\n+>FBgn0063594_Cr1a|2039|R|25|n=1\n TTGTTACAAAACATAGATCCAACAG\n->FBgn0063594_Cr1a|2054|F|24\n-TTTGTAACAAGTCCTGAAAGTGTG\n->FBgn0063594_Cr1a|2054|F|24\n-TTTGTAACAAGTCCTGAAAGTGTG\n->FBgn0063919_Max-element|3879|R|29\n+>FBgn0063919_Max-element|3898|F|23|n=1\n+TTCTCAGCAAGTTCTGGGAGGTG\n+>FBgn0063919_Max-element|3879|R|29|n=1\n TTGCTGAGAAGCGTGTTGAGCGAATCAGG\n->FBgn0063919_Max-element|3880|R|28\n+>FBgn0063919_Max-element|3880|R|28|n=1\n TTGCTGAGAAGCGTGTCGAGCGAATCAG\n->FBgn0063919_Max-element|3880|R|28\n+>FBgn0063919_Max-element|3880|R|28|n=1\n TTGCTGAGAAGCGTGTTGAGCGAATCAG\n->FBgn0063919_Max-element|3882|R|26\n+>FBgn0063919_Max-element|3882|R|26|n=1\n TTGCTGAGAAGCGTGTCGAGCGAATC\n->FBgn0063919_Max-element|3883|R|25\n+>FBgn0063919_Max-element|3883|R|25|n=1\n TTGCTGAGAAGCGTGTCGAGCGAAT\n->FBgn0063919_Max-element|3884|R|24\n+>FBgn0063919_Max-element|3884|R|24|n=1\n TTGCTGAGAAGCGTGTTGAGCGAA\n->FBgn0063919_Max-element|3898|F|23\n-TTCTCAGCAAGTTCTGGGAGGTG\n->FBgn0063919_Max-element|3898|F|24\n+>FBgn0063919_Max-element|3898|F|24|n=2\n TTCTCAGCAAGTTCTGGGAGGTGG\n->FBgn0063919_Max-element|3898|F|24\n-TTCTCAGCAAGTTCTGGGAGGTGG\n->FBgn0063919_Max-element|3898|F|25\n+>FBgn0063919_Max-element|3898|F|25|n=1\n TTCTCAGCAAGTTCTGGGAGGTGGA\n->FBgn0063919_Max-element|3898|F|24\n+>FBgn0063919_Max-element|3898|F|24|n=1\n TTCTCAGCAAGTTCTGGGAGGTGT\n->FBgn0067385_invader6|3007|R|27\n-TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3007|R|27\n-TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3007|R|27\n-TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3007|R|27\n+>FBgn0067385_invader6|3024|F|26|n=1\n+TTGACTAGAATGACTTAGACTTAGAA\n+>FBgn0067385_invader6|3007|R|27|n=4\n TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3008|R|26\n-TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3008|R|26\n+>FBgn0067385_invader6|3008|R|26|n=3\n TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3008|R|26\n-TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n+>FBgn0067385_invader6|3009|R|25|n=5\n TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3010|R|24\n+>FBgn0067385_invader6|3010|R|24|n=4\n TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3010|R|24\n-TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3010|R|24\n-TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3010|R|24\n-TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3011|R|23\n+>FBgn0067385_invader6|3011|R|23|n=1\n TTCTAGTCAAAGTCGAAGGACTG\n->FBgn0067385_invader6|3024|F|26\n-TTGACTAGAATGACTTAGACTTAGAA\n->FBgn0067624_BS3|1011|R|25\n+>FBgn0067624_BS3|1026|F|26|n=1\n+TTGGCATCAATGGTGACAAATCAGCG\n+>FBgn0067624_BS3|1011|R|25|n=3\n TTGATGCCAATGTTCCAGCGTTTTG\n->FBgn0067624_BS3|1011|R|25\n-TTGATGCCAATGTTCCAGCGTTTTG\n->FBgn0067624_BS3|1011|R|25\n-TTGATGCCAATGTTCCAGCGTTTTG\n->FBgn0067624_BS3|1013|R|23\n+>FBgn0067624_BS3|1013|R|23|n=3\n TTGATGCCAATGTTCCAACGTCT\n->FBgn0067624_BS3|1013|R|23\n-TTGATGCCAATGTTCCAACGTCT\n->FBgn0067624_BS3|1013|R|23\n-TTGATGCCAATGTTCCAACGTCT\n->FBgn0067624_BS3|1026|F|26\n-TTGGCATCAATGGTGACAAATCAGCG\n->FBgn0067624_BS3|1026|F|24\n+>FBgn0067624_BS3|1026|F|24|n=1\n TTGGCATCAATGGTGACAAATCTG\n->FBgn0067624_BS3|1026|F|25\n+>FBgn0067624_BS3|1026|F|25|n=1\n TTGGCATCAATGGTGACAAATCTGC\n'
b
diff -r 320e06bf99b9 -r 4d9682bd3a6b test-data/paired_2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/paired_2.fa Sat Sep 02 06:35:15 2017 -0400
b
b'@@ -0,0 +1,370 @@\n+>FBgn0000004_17.6|5844|R|21|n=1\n+TTTTCGTCAAGTGTGCTAAAT\n+>FBgn0000004_17.6|5855|F|23|n=1\n+TTGACGAAAATGATCGAGTGGAT\n+>FBgn0000005_297|1347|F|21|n=1\n+TTGCACAAAATGAGGGAATTT\n+>FBgn0000005_297|1334|R|23|n=1\n+TTTTGCGCAATGGTAATTAAGGA\n+>FBgn0000007_1731|4545|R|20|n=1\n+TTGAGAGCAAAGGCCGAATG\n+>FBgn0000007_1731|4555|F|24|n=1\n+TTGCTCTCAATGCGCTGAGTTTGG\n+>FBgn0000155_roo|2682|R|20|n=2\n+TTTTTACCAAACGGATGCCT\n+>FBgn0000155_roo|2692|F|24|n=1\n+TTGGTAAAAAATGTATAAGTGAGC\n+>FBgn0000155_roo|2692|F|27|n=1\n+TTGGTAAAAAATGTATAAGTGAGCAGC\n+>FBgn0000155_roo|3241|F|20|n=1\n+TTGGGCAAAAAACTGATTTC\n+>FBgn0000155_roo|3225|R|26|n=1\n+TTTTGCCCAAGGAGACCGTCTATTTT\n+>FBgn0000155_roo|3226|R|25|n=1\n+TTTTGCCCAAGGAGACCGTCTATTT\n+>FBgn0000155_roo|3227|R|24|n=2\n+TTTTGCCCAAGGAGACCGTCTATT\n+>FBgn0000155_roo|3228|R|23|n=1\n+TTTTGCCCAAGGAGACCGTCTAT\n+>FBgn0000155_roo|3229|R|22|n=2\n+TTTTGCCCAAGGAGACCGTCTA\n+>FBgn0000155_roo|3241|F|24|n=1\n+TTGGGCAAAAAACTGATTTCGGGT\n+>FBgn0000155_roo|3241|F|27|n=1\n+TTGGGCAAAAAACTGATTTCGGGTGGA\n+>FBgn0000155_roo|3241|F|28|n=1\n+TTGGGCAAAAAACTGATTTCGGGTGGAT\n+>FBgn0000155_roo|8644|R|21|n=1\n+TTCTGCCAAAGGGCCAGCAAG\n+>FBgn0000155_roo|8655|F|25|n=1\n+TTTGGCAGAATGTTCACACATGAAA\n+>FBgn0000349_copia|658|R|22|n=1\n+TTCTCAAGAATCTGACGCGCCG\n+>FBgn0000349_copia|670|F|25|n=1\n+TTCTTGAGAATTTGGACGCCGTTTA\n+>FBgn0000349_copia|4628|F|20|n=1\n+TTTGCTGCAAGACGACCAAT\n+>FBgn0000349_copia|4614|R|24|n=1\n+TTGCAGCAAACCCAATTTGTCTCG\n+>FBgn0000652_F-element|1564|F|20|n=2\n+TTTTCTCGAAAGCAGCAAGT\n+>FBgn0000652_F-element|1546|R|28|n=1\n+TTCGAGAAAATTACTTCAGGATTTGTCT\n+>FBgn0000652_F-element|1546|R|28|n=1\n+TTCGGGAAAATTACTTCAGGATTTGTCT\n+>FBgn0000652_F-element|1547|R|27|n=74\n+TTCGAGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|1547|R|27|n=1\n+TTCGGGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|1547|R|27|n=1\n+TTTGAGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|1548|R|26|n=3\n+TTCGAGAAAATTACTTCAGGATTTGT\n+>FBgn0000652_F-element|1549|R|25|n=22\n+TTCGAGAAAATTACTTCAGGATTTG\n+>FBgn0000652_F-element|1549|R|25|n=1\n+TTCGGGAAAATTACTTCAGGATTTG\n+>FBgn0000652_F-element|1550|R|24|n=2\n+TTCGAGAAAATTACTTCAGGATTT\n+>FBgn0000652_F-element|1551|R|23|n=7\n+TTCGAGAAAATTACTTCAGGATT\n+>FBgn0000652_F-element|1554|R|20|n=1\n+TTCGAGAAAATTACTTCAGG\n+>FBgn0000652_F-element|1564|F|23|n=2\n+TTTTCTCGAAAGCAGCAAGTTTC\n+>FBgn0000652_F-element|1564|F|24|n=2\n+TTTTCTCGAAAGCAGCAAGTTTCG\n+>FBgn0000652_F-element|1564|F|23|n=2\n+TTTTCTCGAAAGCAGCTAGTTTC\n+>FBgn0000652_F-element|1564|F|24|n=5\n+TTTTCTCGAAAGCAGCTAGTTTCG\n+>FBgn0000652_F-element|1564|F|25|n=1\n+TTTTCTCGAAAGCAGCTAGTTTCGC\n+>FBgn0000652_F-element|2248|F|20|n=1\n+TTTCTTCCAAGCACTAGGGC\n+>FBgn0000652_F-element|2231|R|27|n=1\n+TTGGAAGAAATCCAGGAATTGAGCTTC\n+>FBgn0000652_F-element|2233|R|25|n=5\n+TTGGAAGAAATCCAGGAATTGAGCT\n+>FBgn0000652_F-element|2471|R|21|n=2\n+TTTAACCAAACTGCGGGAAAT\n+>FBgn0000652_F-element|2482|F|23|n=2\n+TTTGGTTAAAGCTGAATGTCTGC\n+>FBgn0000652_F-element|2482|F|24|n=2\n+TTTGGTTAAAGCTGAATGTCTGCC\n+>FBgn0000652_F-element|2482|F|26|n=3\n+TTTGGTTAAAGCTGAATGTCTGCCGG\n+>FBgn0000652_F-element|2482|F|27|n=1\n+TTTGGTTAAAGCTGAATGTCTGCCGGA\n+>FBgn0000652_F-element|3524|F|21|n=2\n+TTTCGCGAAAGCCACGGAACC\n+>FBgn0000652_F-element|3507|R|27|n=1\n+TTTCGCGAAATCCAAATTGGTGGGCTG\n+>FBgn0000652_F-element|3509|R|25|n=6\n+TTTCGCGAAATCCAAATTGGTGGGC\n+>FBgn0000652_F-element|3510|R|24|n=1\n+TTTCGCGAAATCCAAATTGGTGGG\n+>FBgn0000652_F-element|3513|R|21|n=3\n+TTTCGCGAAATCCAAATTGGT\n+>FBgn0000652_F-element|3524|F|23|n=1\n+TTTCGCGAAAGCCACGGAACCAT\n+>FBgn0000652_F-element|3524|F|27|n=1\n+TTTCGCGAAAGCCACGGAACCATTGAA\n+>FBgn0000652_F-element|3524|F|24|n=1\n+TTTCGCGAAAGCCATGGAACCATT\n+>FBgn0000652_F-element|3524|F|26|n=1\n+TTTCGCGAAAGCCATGGAACCATTGA\n+>FBgn0002697_mdg1|4301|R|22|n=1\n+TTCTTTGGAAAGAATTTGGGGC\n+>FBgn0002697_mdg1|4313|F|25|n=1\n+TTCCAAAGAATGATGACCCTTGCAT\n+>FBgn0003007_opus|5558|F|21|n=1\n+TTCCTAGAAATTTATCGTTGC\n+>FBgn0003007_opus|5540|R|28|n=1\n+TTTCTAGGAACGTAGAATGGAATCTCTC\n+>FBgn0003007_opus|5540|R|28|n=1\n+TTTCTAGGAACGTAGAGTGGAATCTCTC\n+>FBgn0003007_opus|5542|R|26|n=1\n+TTTCTAGGAACGTAGAATGGAATCTC\n+>FBgn0003007_opus|5542|R|26|n=2\n+TTTCTAGGAA'..b'flea|2765|F|25|n=1\n+TTGGTCTAAAAATAAAATGGAAGAA\n+>FBgn0014947_flea|2765|F|26|n=1\n+TTGGTCTAAAAATAAAATGGAAGAAG\n+>FBgn0014947_flea|2765|F|28|n=1\n+TTGGTCTAAAAATAAAATGGAAGAAGTG\n+>FBgn0015945_GATE|3158|F|20|n=3\n+TTCGTTCCAAATGAGCAAGC\n+>FBgn0015945_GATE|3140|R|28|n=9\n+TTGGAACGAAATTGGCCTGATTAGCGGA\n+>FBgn0015945_GATE|3141|R|27|n=4\n+TTGGAACGAAATTGGCCTGATTAGCGG\n+>FBgn0015945_GATE|3142|R|26|n=17\n+TTGGAACGAAATTGGCCTGATTAGCG\n+>FBgn0015945_GATE|3143|R|25|n=86\n+TTGGAACGAAATTGGCCTGATTAGC\n+>FBgn0015945_GATE|3144|R|24|n=17\n+TTGGAACGAAATTGGCCTGATTAG\n+>FBgn0015945_GATE|6234|R|22|n=1\n+TTGAAGGAAATCGCGGGAAAGC\n+>FBgn0015945_GATE|6246|F|25|n=1\n+TTTCCTTCAAGCCGTAAAAGAGTCG\n+>FBgn0015945_GATE|6246|F|26|n=1\n+TTTCCTTCAAGCCGTAAAAGAGTCGG\n+>FBgn0042682_Rt1b|1075|R|21|n=1\n+TTCTTGGCGACAGATGCGTAG\n+>FBgn0042682_Rt1b|1086|F|23|n=1\n+TTGCCAAGAATGCTAGCACGGGT\n+>FBgn0042682_Rt1b|2920|F|22|n=1\n+TTGTTACCAAAAAGCTAAGGAG\n+>FBgn0042682_Rt1b|2902|R|28|n=1\n+TTGGTAACAAGCTTGTAGGCGAGGCCCC\n+>FBgn0042682_Rt1b|2903|R|27|n=1\n+TTGGTAACAAGCTTGTAGGCGAGGCCC\n+>FBgn0042682_Rt1b|2904|R|26|n=2\n+TTGGTAACAAGCTTGTAGGCGAGGCC\n+>FBgn0042682_Rt1b|2904|R|26|n=1\n+TTGGTAACAAGCTTGTAGGCGAGTCC\n+>FBgn0042682_Rt1b|2905|R|25|n=2\n+TTGGTAACAAGCTTGTAGGCGAGGC\n+>FBgn0042682_Rt1b|2905|R|25|n=1\n+TTGGTAACAAGCTTGTAGGCGAGGT\n+>FBgn0042682_Rt1b|2906|R|24|n=25\n+TTGGTAACAAGCTTGTAGGCGAGG\n+>FBgn0042682_Rt1b|2907|R|23|n=1\n+TTGGTAACAAGCTTGGAGGCGAG\n+>FBgn0042682_Rt1b|2907|R|23|n=15\n+TTGGTAACAAGCTTGTAGGCGAG\n+>FBgn0042682_Rt1b|2908|R|22|n=1\n+TTGGTAACAAGCTTGTAGGCGA\n+>FBgn0042682_Rt1b|2920|F|23|n=1\n+TTGTTACCAAAAAGCTAAGGAGG\n+>FBgn0042682_Rt1b|2920|F|25|n=1\n+TTGTTACCAAAAAGCTAAGGAGGAG\n+>FBgn0042682_Rt1b|2920|F|26|n=1\n+TTGTTACCAAAAAGCTAAGGAGGAGA\n+>FBgn0042682_Rt1b|2920|F|27|n=1\n+TTGTTACCAAAAAGCTAAGGAGGAGAG\n+>FBgn0042682_Rt1b|2920|F|23|n=1\n+TTGTTACCAACAAGCTAAGGAGG\n+>FBgn0042682_Rt1b|2920|F|24|n=2\n+TTGTTACCAACAAGCTAAGGAGGA\n+>FBgn0042682_Rt1b|4364|R|22|n=1\n+TTGCCTGGAAGCGCCACTCCGC\n+>FBgn0042682_Rt1b|4376|F|24|n=1\n+TTCCAGGCAAGAGGCACACGAGTG\n+>FBgn0042682_Rt1b|4376|F|26|n=1\n+TTCCAGGCAAGAGGCACACGAGTGGC\n+>FBgn0062343_Dm88|4210|R|21|n=1\n+TTGGTTAGAACATCTGCCATC\n+>FBgn0062343_Dm88|4221|F|24|n=1\n+TTCTAACCAAGAATTTGAATAGAT\n+>FBgn0063427_invader4|707|F|22|n=2\n+TTCGTAGGAATGGAGACGTCGG\n+>FBgn0063427_invader4|691|R|26|n=15\n+TTCCTACGAATCGCTGTATGAACAGT\n+>FBgn0063428_invader3|440|F|22|n=2\n+TTCCCCAGAAACGCGTGGCGAT\n+>FBgn0063428_invader3|425|R|25|n=1\n+TTCTGGGGAAATGGCCTGCAGACGC\n+>FBgn0063428_invader3|427|R|23|n=1\n+TTCTGGGGAAATGGCCTGCAGAC\n+>FBgn0063430_invader1|2082|R|20|n=1\n+TTTTATCGAACCAATAGAAC\n+>FBgn0063430_invader1|2092|F|24|n=1\n+TTCGATAAAATGTCTAAGTATGTT\n+>FBgn0063430_invader1|2092|F|26|n=1\n+TTCGATAAAATGTCTAAGTATGTTCG\n+>FBgn0063433_gypsy4|3058|F|22|n=1\n+TTCTTCAAAAAGAGCGTGGAAT\n+>FBgn0063433_gypsy4|3040|R|28|n=1\n+TTTTGAAGAACTTGGATTTTTCTTGAGA\n+>FBgn0063433_gypsy4|3044|R|24|n=1\n+TTTTGAAGAACTTGGATTTTTCTT\n+>FBgn0063440_baggins|5170|F|21|n=1\n+TTGCACAAAATTGGCATTGCA\n+>FBgn0063440_baggins|5154|R|26|n=1\n+TTTTGTGCAAATGGCTGTGAAGTCGG\n+>FBgn0063919_Max-element|3887|R|21|n=2\n+TTGCTGAGAAGCGTGTTGAGC\n+>FBgn0063919_Max-element|3898|F|23|n=1\n+TTCTCAGCAAGTTCTGGGAGGTG\n+>FBgn0063919_Max-element|3898|F|24|n=2\n+TTCTCAGCAAGTTCTGGGAGGTGG\n+>FBgn0063919_Max-element|3898|F|25|n=1\n+TTCTCAGCAAGTTCTGGGAGGTGGA\n+>FBgn0063919_Max-element|3898|F|24|n=1\n+TTCTCAGCAAGTTCTGGGAGGTGT\n+>FBgn0067385_invader6|3024|F|21|n=1\n+TTGACTAGAATGACTTAGACT\n+>FBgn0067385_invader6|3007|R|27|n=4\n+TTCTAGTCAAAGTCGAAGGACTGCATA\n+>FBgn0067385_invader6|3008|R|26|n=3\n+TTCTAGTCAAAGTCGAAGGACTGCAT\n+>FBgn0067385_invader6|3009|R|25|n=5\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|3010|R|24|n=4\n+TTCTAGTCAAAGTCGAAGGACTGC\n+>FBgn0067385_invader6|3011|R|23|n=1\n+TTCTAGTCAAAGTCGAAGGACTG\n+>FBgn0067385_invader6|3014|R|20|n=1\n+TTCTAGTCAAAGTCGAAGGA\n+>FBgn0067385_invader6|3024|F|26|n=1\n+TTGACTAGAATGACTTAGACTTAGAA\n+>FBgn0067624_BS3|1016|R|20|n=1\n+TTGATGCCAATGTTCCAACG\n+>FBgn0067624_BS3|1026|F|26|n=1\n+TTGGCATCAATGGTGACAAATCAGCG\n+>FBgn0067624_BS3|1026|F|24|n=1\n+TTGGCATCAATGGTGACAAATCTG\n+>FBgn0067624_BS3|1026|F|25|n=1\n+TTGGCATCAATGGTGACAAATCTGC\n'