Previous changeset 2:320e06bf99b9 (2017-08-30) Next changeset 4:20d28cfdeefe (2017-09-08) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3 |
modified:
overlapping_reads.py overlapping_reads.xml test-data/paired.fa |
added:
test-data/paired_2.fa |
b |
diff -r 320e06bf99b9 -r 4d9682bd3a6b overlapping_reads.py --- a/overlapping_reads.py Wed Aug 30 05:40:18 2017 -0400 +++ b/overlapping_reads.py Sat Sep 02 06:35:15 2017 -0400 |
[ |
b"@@ -36,90 +36,106 @@\n self.bam_object = pysam.AlignmentFile(bam_file, 'rb')\n self.chromosomes = dict(zip(self.bam_object.references,\n self.bam_object.lengths))\n- self.map_dict = self.create_map(self.bam_object)\n+ self.all_query_positions = self.query_positions(self.bam_object)\n+ self.readdic = self.make_readdic(self.bam_object)\n \n- def create_map(self, bam_object):\n- '''\n- Returns a map_dictionary {(chromosome,read_position,polarity):\n- [read_length, ...]}\n- '''\n- map_dictionary = defaultdict(list)\n- # get empty value for start and end of each chromosome\n- for chrom in self.chromosomes:\n- map_dictionary[(chrom, 1, 'F')] = []\n- map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []\n+ def make_readdic(self, bam_object):\n+ readdic = defaultdict(int)\n+ for read in bam_object.fetch():\n+ readdic[read.query_sequence] += 1\n+ return readdic\n+\n+ def query_positions(self, bam_object):\n+ all_query_positions = defaultdict(list)\n for chrom in self.chromosomes:\n for read in bam_object.fetch(chrom):\n- positions = read.positions # a list of covered positions\n- if read.is_reverse:\n- map_dictionary[(chrom, positions[-1]+1,\n- 'R')].append(read.query_alignment_length)\n+ if not read.is_reverse:\n+ all_query_positions[chrom].append(\n+ read.get_reference_positions(full_length=True)[0])\n else:\n- map_dictionary[(chrom, positions[0]+1,\n- 'F')].append(read.query_alignment_length)\n- return map_dictionary\n+ all_query_positions[chrom].append(\n+ read.get_reference_positions(full_length=True)[-1])\n+ all_query_positions[chrom] = sorted(\n+ list(set(all_query_positions[chrom])))\n+ return all_query_positions\n \n- def signature_tables(self, minquery, maxquery, mintarget, maxtarget):\n+ def direct_pairing(self, minquery, maxquery, mintarget, maxtarget,\n+ file, overlap=10):\n+ F = open(file, 'w')\n query_range = range(minquery, maxquery + 1)\n target_range = range(mintarget, maxtarget + 1)\n- Query_table = defaultdict(dict)\n- Target_table = defaultdict(dict)\n- for key in self.map_dict:\n- for size in self.map_dict[key]:\n- if size in query_range or size in target_range:\n- if key[2] == 'F':\n- coordinate = key[1]\n- else:\n- coordinate = -key[1]\n- if size in query_range:\n- Query_table[key[0]][coordinate] = Query_table[key[0]].get(\n- coordinate, 0) + 1\n- if size in target_range:\n- Target_table[key[0]][coordinate] = \\\n- Target_table[key[0]].get(coordinate, 0) + 1\n- return Query_table, Target_table\n-\n- def search_overlaps(self, minquery, maxquery, mintarget, maxtarget,\n- overlap=10):\n- Query_table, Target_table = self.signature_tables(minquery, maxquery,\n- mintarget, maxtarget)\n- overlap_groups = defaultdict(list)\n- for chrom in Query_table:\n- for coord in Query_table[chrom]:\n- if Target_table[chrom].get(-coord - overlap + 1, 0):\n- overlap_groups[chrom].append(coord)\n- return overlap_groups\n-\n- def feed_overlaps(self, overlap_groups, minquery, output, overlap=10):\n- F = open(output, 'w')\n- for chrom in sorted(overlap_groups):\n- for pos in sorted(overlap_groups[chr"..b' targetread.query_alignment_length in\n+ target_range and targetread.is_reverse):\n+ targetreadseq = self.revcomp(\n+ targetread.query_sequence)\n+ stringresult.append(\n+ \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+ (chrom, queryread.get_reference_positions(\n+ full_length=True)[0]+1,\n+ \'F\', queryread.query_alignment_length,\n+ self.readdic[queryread.query_sequence],\n+ queryread.query_sequence))\n+ stringresult.append(\n+ \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+ (chrom, targetread.get_reference_positions(\n+ full_length=True)[0]+1,\n+ \'R\', targetread.query_alignment_length,\n+ self.readdic[targetread.query_sequence],\n+ targetreadseq))\n+ # 2\n+ for queryread in iterreads_3:\n+ if queryread.get_reference_positions(\n+ full_length=True)[-1] == pos+overlap-1 and \\\n+ queryread.query_alignment_length in query_range \\\n+ and queryread.is_reverse:\n+ for targetread in iterreads_4:\n+ if (targetread.\n+ get_reference_positions(full_length=True)[0]\n+ == pos and targetread.query_alignment_length\n+ in target_range and not\n+ targetread.is_reverse):\n+ queryreadseq = self.revcomp(\n+ queryread.query_sequence)\n+ targetreadseq = targetread.query_sequence\n+ stringresult.append(\n+ \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+ (chrom, queryread.get_reference_positions(\n+ full_length=True)[0]+1, \'R\',\n+ queryread.query_alignment_length,\n+ self.readdic[queryread.query_sequence],\n+ queryreadseq))\n+ stringresult.append(\n+ \'>%s|%s|%s|%s|n=%s\\n%s\\n\' %\n+ (chrom, targetread.get_reference_positions(\n+ full_length=True)[0]+1,\n+ \'F\', targetread.query_alignment_length,\n+ self.readdic[targetread.query_sequence],\n+ targetreadseq))\n+ stringresult = sorted(set(stringresult),\n+ key=lambda x: stringresult.index(x))\n+ F.write(\'\'.join(stringresult))\n \n def revcomp(self, sequence):\n antidict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}\n@@ -129,12 +145,11 @@\n \n def main(input, minquery, maxquery, mintarget, maxtarget, output, overlap=10):\n mapobj = Map(input)\n- mapobj.feed_overlaps(mapobj.search_overlaps(minquery, maxquery,\n- mintarget, maxtarget,\n- overlap), minquery, output)\n+ mapobj.direct_pairing(minquery, maxquery, mintarget, maxtarget,\n+ output, overlap)\n \n \n if __name__ == "__main__":\n args = Parser()\n main(args.input, args.minquery, args.maxquery, args.mintarget,\n- args.maxtarget, args.output)\n+ args.maxtarget, args.output, args.overlap)\n' |
b |
diff -r 320e06bf99b9 -r 4d9682bd3a6b overlapping_reads.xml --- a/overlapping_reads.xml Wed Aug 30 05:40:18 2017 -0400 +++ b/overlapping_reads.xml Sat Sep 02 06:35:15 2017 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.1"> +<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.2"> <description /> <requirements> <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement> @@ -38,6 +38,15 @@ <param name="overlap" value="10" /> <output file="paired.fa" ftype="fasta" name="output" /> </test> + <test> + <param ftype="bam" name="input" value="sr_bowtie.bam" /> + <param name="minquery" value="20" /> + <param name="maxquery" value="22" /> + <param name="mintarget" value="23" /> + <param name="maxtarget" value="29" /> + <param name="overlap" value="10" /> + <output file="paired_2.fa" ftype="fasta" name="output" /> + </test> </tests> <help> @@ -52,24 +61,43 @@ **Input** -A **sorted** BAM alignment file. +*A **sorted** BAM alignment file.* + +*Query and target sizes:* + +The algorithm search for each *query* reads (of specified size) in the bam alignment if +there are *target* reads (of specified size) that align on the opposite strand with a 10 nt +overlap. + +Searching query reads of 20-22 nt that overlap by 10 nt with target +reads of 23-29 nt is different from searching query reads of 23-29 nt that overlap by 10 nt +with target reads of 20-22 nt. i.e, searching for siRNAs that pair with piRNAs is distinct +from searching for siRNAs that pairs with piRNAs, although of course the number of possibly +formed piRNA/siRNA pairs is the same as the number of possibly formed siRNA/piRNA pairs. + +*Overlap* +The number of nucleotides by which the pairs of sequences will overlap + + **Outputs** a fasta file of pairable reads such as : ->FBgn0000004_17.6|5839|R|26 +>FBgn0000004_17.6|5855|F|23|n=1 + +TTGACGAAAATGATCGAGTGGAT + +>FBgn0000004_17.6|5839|R|26|n=1 TTTTCGTCAATTGTGCCAAATAGGTA ->FBgn0000004_17.6|5855|F|23 - -TTGACGAAAATGATCGAGTGGAT +where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, +R stand for reverse strand (F forward strand), 26 stands for the size of the sequence and +n=1 stands for the number of reads of the sequence in the dataset. -where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, -R stand for reverse strand (F forward strand) and 26 stands for the size of the read. - -the second sequence in this example is a read that overlap by 10 nt with the first read. +the second sequence in this example corresponds to 1 read that overlap by 10 nt with +1 read of the first sequence. </help> <citations> |
b |
diff -r 320e06bf99b9 -r 4d9682bd3a6b test-data/paired.fa --- a/test-data/paired.fa Wed Aug 30 05:40:18 2017 -0400 +++ b/test-data/paired.fa Sat Sep 02 06:35:15 2017 -0400 |
b |
b'@@ -1,2182 +1,668 @@\n->FBgn0000004_17.6|5839|R|26\n-TTTTCGTCAATTGTGCCAAATAGGTA\n->FBgn0000004_17.6|5855|F|23\n+>FBgn0000004_17.6|5855|F|23|n=1\n TTGACGAAAATGATCGAGTGGAT\n->FBgn0000006_412|744|R|26\n-TTTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|744|R|26\n-TTTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|745|R|25\n-TTTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000006_412|742|R|27\n+>FBgn0000004_17.6|5839|R|26|n=1\n+TTTTCGTCAATTGTGCCAAATAGGTA\n+>FBgn0000006_412|759|F|24|n=3\n+TTTGGTCGAAAGCTCTAAAGCTAC\n+>FBgn0000006_412|742|R|27|n=1\n TTCGACCAAACCGGTGGCGTTTGCTGA\n->FBgn0000006_412|744|R|25\n-TTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|744|R|25\n+>FBgn0000006_412|744|R|25|n=2\n TTCGACCAAACCGGTGGCGTTTGCT\n->FBgn0000006_412|745|R|24\n-TTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000006_412|745|R|24\n+>FBgn0000006_412|745|R|24|n=2\n TTCGACCAAACCGGTGGCGTTTGC\n->FBgn0000006_412|759|F|24\n-TTTGGTCGAAAGCTCTAAAGCTAC\n->FBgn0000006_412|759|F|24\n-TTTGGTCGAAAGCTCTAAAGCTAC\n->FBgn0000006_412|759|F|24\n-TTTGGTCGAAAGCTCTAAAGCTAC\n->FBgn0000006_412|759|F|25\n+>FBgn0000006_412|759|F|25|n=1\n TTTGGTCGAAAGCTCTAAAGCTACA\n->FBgn0000006_412|759|F|27\n-TTTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|759|F|27\n-TTTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|759|F|27\n+>FBgn0000006_412|759|F|27|n=3\n TTTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n+>FBgn0000006_412|760|F|26|n=6\n TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000006_412|760|F|26\n-TTGGTCGAAAGCTCTAAAGCTACATG\n->FBgn0000007_1731|4539|R|26\n-TTGAGAGCAAAGGCCGAATGAGTAAA\n->FBgn0000007_1731|4555|F|24\n+>FBgn0000006_412|744|R|26|n=2\n+TTTCGACCAAACCGGTGGCGTTTGCT\n+>FBgn0000006_412|745|R|25|n=1\n+TTTCGACCAAACCGGTGGCGTTTGC\n+>FBgn0000007_1731|4555|F|24|n=1\n TTGCTCTCAATGCGCTGAGTTTGG\n->FBgn0000155_roo|8639|R|26\n-TTCTGCCAAAGGGCCAGCAAAGCTGA\n->FBgn0000155_roo|8639|R|26\n-TTCTGCCAAAGTGCCAGCAAAGCTGA\n->FBgn0000155_roo|3225|R|26\n-TTTTGCCCAAGGAGACCGTCTATTTT\n->FBgn0000155_roo|3226|R|25\n-TTTTGCCCAAGGAGACCGTCTATTT\n->FBgn0000155_roo|3227|R|24\n-TTTTGCCCAAGGAGACCGTCTATT\n->FBgn0000155_roo|3227|R|24\n-TTTTGCCCAAGGAGACCGTCTATT\n->FBgn0000155_roo|3228|R|23\n-TTTTGCCCAAGGAGACCGTCTAT\n->FBgn0000155_roo|3132|R|27\n-TTTATTAAAATCGGGGTCGGCTAATTT\n->FBgn0000155_roo|2675|R|27\n+>FBgn0000007_1731|4539|R|26|n=1\n+TTGAGAGCAAAGGCCGAATGAGTAAA\n+>FBgn0000155_roo|2176|F|24|n=1\n+TTTTGACCAAGCGGTATGAGAATA\n+>FBgn0000155_roo|2159|R|27|n=1\n+TTGGTCAAAAACTCCCAAGTGGCTTCA\n+>FBgn0000155_roo|2161|R|25|n=1\n+TTGGTCAAAAACTCCCAAGTGGCTT\n+>FBgn0000155_roo|2692|F|24|n=1\n+TTGGTAAAAAATGTATAAGTGAGC\n+>FBgn0000155_roo|2675|R|27|n=1\n TTTTTACCAAACGGATGCCTCAGACAT\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n-TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2676|R|26\n+>FBgn0000155_roo|2676|R|26|n=5\n TTTTTACCAAACGGATGCCTCAGACA\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n+>FBgn0000155_roo|2677|R|25|n=14\n TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2677|R|25\n-TTTTTACCAAACGGATGCCTCAGAC\n->FBgn0000155_roo|2678|R|24\n-TTTTTACCAAACGGATGCCTCAGA\n->FBgn0000155_roo|2678|R|24\n-TTTTTACCAAACGGATGCCTCAGA\n->FBgn0000155_roo|2678|R|24\n-TTTTTACCAAACGGATGCCTCAGA\n->FBgn0000155_roo|2'..b'063534_Doc2-element|3610|R|23\n-TTTTTGCGAAAGCCAAACTGATG\n->FBgn0063534_Doc2-element|3610|R|23\n-TTTTTGCGAAAGCCAAACTGATG\n->FBgn0063534_Doc2-element|3610|R|23\n+>FBgn0063534_Doc2-element|3610|R|23|n=3\n TTTTTGCGAAAGCCAAACTGATG\n->FBgn0063534_Doc2-element|327|R|25\n-TTCGTTGCAATGAGAGCCGGCGATC\n->FBgn0063534_Doc2-element|342|F|25\n-TTGCAACGAAACAACGCGTACTTCT\n->FBgn0063534_Doc2-element|3623|F|23\n-TTCGCAAAAATCACGGAACGATC\n->FBgn0063534_Doc2-element|3623|F|26\n+>FBgn0063534_Doc2-element|3623|F|26|n=1\n TTCGCAAAAATCACGGAACGATCGAA\n->FBgn0063594_Cr1a|2037|R|27\n+>FBgn0063594_Cr1a|2054|F|24|n=2\n+TTTGTAACAAGTCCTGAAAGTGTG\n+>FBgn0063594_Cr1a|2037|R|27|n=1\n TTGTTACAAGACATAGATCCAACAGTC\n->FBgn0063594_Cr1a|2039|R|25\n+>FBgn0063594_Cr1a|2039|R|25|n=1\n TTGTTACAAAACATAGATCCAACAG\n->FBgn0063594_Cr1a|2054|F|24\n-TTTGTAACAAGTCCTGAAAGTGTG\n->FBgn0063594_Cr1a|2054|F|24\n-TTTGTAACAAGTCCTGAAAGTGTG\n->FBgn0063919_Max-element|3879|R|29\n+>FBgn0063919_Max-element|3898|F|23|n=1\n+TTCTCAGCAAGTTCTGGGAGGTG\n+>FBgn0063919_Max-element|3879|R|29|n=1\n TTGCTGAGAAGCGTGTTGAGCGAATCAGG\n->FBgn0063919_Max-element|3880|R|28\n+>FBgn0063919_Max-element|3880|R|28|n=1\n TTGCTGAGAAGCGTGTCGAGCGAATCAG\n->FBgn0063919_Max-element|3880|R|28\n+>FBgn0063919_Max-element|3880|R|28|n=1\n TTGCTGAGAAGCGTGTTGAGCGAATCAG\n->FBgn0063919_Max-element|3882|R|26\n+>FBgn0063919_Max-element|3882|R|26|n=1\n TTGCTGAGAAGCGTGTCGAGCGAATC\n->FBgn0063919_Max-element|3883|R|25\n+>FBgn0063919_Max-element|3883|R|25|n=1\n TTGCTGAGAAGCGTGTCGAGCGAAT\n->FBgn0063919_Max-element|3884|R|24\n+>FBgn0063919_Max-element|3884|R|24|n=1\n TTGCTGAGAAGCGTGTTGAGCGAA\n->FBgn0063919_Max-element|3898|F|23\n-TTCTCAGCAAGTTCTGGGAGGTG\n->FBgn0063919_Max-element|3898|F|24\n+>FBgn0063919_Max-element|3898|F|24|n=2\n TTCTCAGCAAGTTCTGGGAGGTGG\n->FBgn0063919_Max-element|3898|F|24\n-TTCTCAGCAAGTTCTGGGAGGTGG\n->FBgn0063919_Max-element|3898|F|25\n+>FBgn0063919_Max-element|3898|F|25|n=1\n TTCTCAGCAAGTTCTGGGAGGTGGA\n->FBgn0063919_Max-element|3898|F|24\n+>FBgn0063919_Max-element|3898|F|24|n=1\n TTCTCAGCAAGTTCTGGGAGGTGT\n->FBgn0067385_invader6|3007|R|27\n-TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3007|R|27\n-TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3007|R|27\n-TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3007|R|27\n+>FBgn0067385_invader6|3024|F|26|n=1\n+TTGACTAGAATGACTTAGACTTAGAA\n+>FBgn0067385_invader6|3007|R|27|n=4\n TTCTAGTCAAAGTCGAAGGACTGCATA\n->FBgn0067385_invader6|3008|R|26\n-TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3008|R|26\n+>FBgn0067385_invader6|3008|R|26|n=3\n TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3008|R|26\n-TTCTAGTCAAAGTCGAAGGACTGCAT\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n+>FBgn0067385_invader6|3009|R|25|n=5\n TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3009|R|25\n-TTCTAGTCAAAGTCGAAGGACTGCA\n->FBgn0067385_invader6|3010|R|24\n+>FBgn0067385_invader6|3010|R|24|n=4\n TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3010|R|24\n-TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3010|R|24\n-TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3010|R|24\n-TTCTAGTCAAAGTCGAAGGACTGC\n->FBgn0067385_invader6|3011|R|23\n+>FBgn0067385_invader6|3011|R|23|n=1\n TTCTAGTCAAAGTCGAAGGACTG\n->FBgn0067385_invader6|3024|F|26\n-TTGACTAGAATGACTTAGACTTAGAA\n->FBgn0067624_BS3|1011|R|25\n+>FBgn0067624_BS3|1026|F|26|n=1\n+TTGGCATCAATGGTGACAAATCAGCG\n+>FBgn0067624_BS3|1011|R|25|n=3\n TTGATGCCAATGTTCCAGCGTTTTG\n->FBgn0067624_BS3|1011|R|25\n-TTGATGCCAATGTTCCAGCGTTTTG\n->FBgn0067624_BS3|1011|R|25\n-TTGATGCCAATGTTCCAGCGTTTTG\n->FBgn0067624_BS3|1013|R|23\n+>FBgn0067624_BS3|1013|R|23|n=3\n TTGATGCCAATGTTCCAACGTCT\n->FBgn0067624_BS3|1013|R|23\n-TTGATGCCAATGTTCCAACGTCT\n->FBgn0067624_BS3|1013|R|23\n-TTGATGCCAATGTTCCAACGTCT\n->FBgn0067624_BS3|1026|F|26\n-TTGGCATCAATGGTGACAAATCAGCG\n->FBgn0067624_BS3|1026|F|24\n+>FBgn0067624_BS3|1026|F|24|n=1\n TTGGCATCAATGGTGACAAATCTG\n->FBgn0067624_BS3|1026|F|25\n+>FBgn0067624_BS3|1026|F|25|n=1\n TTGGCATCAATGGTGACAAATCTGC\n' |
b |
diff -r 320e06bf99b9 -r 4d9682bd3a6b test-data/paired_2.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/paired_2.fa Sat Sep 02 06:35:15 2017 -0400 |
b |
b'@@ -0,0 +1,370 @@\n+>FBgn0000004_17.6|5844|R|21|n=1\n+TTTTCGTCAAGTGTGCTAAAT\n+>FBgn0000004_17.6|5855|F|23|n=1\n+TTGACGAAAATGATCGAGTGGAT\n+>FBgn0000005_297|1347|F|21|n=1\n+TTGCACAAAATGAGGGAATTT\n+>FBgn0000005_297|1334|R|23|n=1\n+TTTTGCGCAATGGTAATTAAGGA\n+>FBgn0000007_1731|4545|R|20|n=1\n+TTGAGAGCAAAGGCCGAATG\n+>FBgn0000007_1731|4555|F|24|n=1\n+TTGCTCTCAATGCGCTGAGTTTGG\n+>FBgn0000155_roo|2682|R|20|n=2\n+TTTTTACCAAACGGATGCCT\n+>FBgn0000155_roo|2692|F|24|n=1\n+TTGGTAAAAAATGTATAAGTGAGC\n+>FBgn0000155_roo|2692|F|27|n=1\n+TTGGTAAAAAATGTATAAGTGAGCAGC\n+>FBgn0000155_roo|3241|F|20|n=1\n+TTGGGCAAAAAACTGATTTC\n+>FBgn0000155_roo|3225|R|26|n=1\n+TTTTGCCCAAGGAGACCGTCTATTTT\n+>FBgn0000155_roo|3226|R|25|n=1\n+TTTTGCCCAAGGAGACCGTCTATTT\n+>FBgn0000155_roo|3227|R|24|n=2\n+TTTTGCCCAAGGAGACCGTCTATT\n+>FBgn0000155_roo|3228|R|23|n=1\n+TTTTGCCCAAGGAGACCGTCTAT\n+>FBgn0000155_roo|3229|R|22|n=2\n+TTTTGCCCAAGGAGACCGTCTA\n+>FBgn0000155_roo|3241|F|24|n=1\n+TTGGGCAAAAAACTGATTTCGGGT\n+>FBgn0000155_roo|3241|F|27|n=1\n+TTGGGCAAAAAACTGATTTCGGGTGGA\n+>FBgn0000155_roo|3241|F|28|n=1\n+TTGGGCAAAAAACTGATTTCGGGTGGAT\n+>FBgn0000155_roo|8644|R|21|n=1\n+TTCTGCCAAAGGGCCAGCAAG\n+>FBgn0000155_roo|8655|F|25|n=1\n+TTTGGCAGAATGTTCACACATGAAA\n+>FBgn0000349_copia|658|R|22|n=1\n+TTCTCAAGAATCTGACGCGCCG\n+>FBgn0000349_copia|670|F|25|n=1\n+TTCTTGAGAATTTGGACGCCGTTTA\n+>FBgn0000349_copia|4628|F|20|n=1\n+TTTGCTGCAAGACGACCAAT\n+>FBgn0000349_copia|4614|R|24|n=1\n+TTGCAGCAAACCCAATTTGTCTCG\n+>FBgn0000652_F-element|1564|F|20|n=2\n+TTTTCTCGAAAGCAGCAAGT\n+>FBgn0000652_F-element|1546|R|28|n=1\n+TTCGAGAAAATTACTTCAGGATTTGTCT\n+>FBgn0000652_F-element|1546|R|28|n=1\n+TTCGGGAAAATTACTTCAGGATTTGTCT\n+>FBgn0000652_F-element|1547|R|27|n=74\n+TTCGAGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|1547|R|27|n=1\n+TTCGGGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|1547|R|27|n=1\n+TTTGAGAAAATTACTTCAGGATTTGTC\n+>FBgn0000652_F-element|1548|R|26|n=3\n+TTCGAGAAAATTACTTCAGGATTTGT\n+>FBgn0000652_F-element|1549|R|25|n=22\n+TTCGAGAAAATTACTTCAGGATTTG\n+>FBgn0000652_F-element|1549|R|25|n=1\n+TTCGGGAAAATTACTTCAGGATTTG\n+>FBgn0000652_F-element|1550|R|24|n=2\n+TTCGAGAAAATTACTTCAGGATTT\n+>FBgn0000652_F-element|1551|R|23|n=7\n+TTCGAGAAAATTACTTCAGGATT\n+>FBgn0000652_F-element|1554|R|20|n=1\n+TTCGAGAAAATTACTTCAGG\n+>FBgn0000652_F-element|1564|F|23|n=2\n+TTTTCTCGAAAGCAGCAAGTTTC\n+>FBgn0000652_F-element|1564|F|24|n=2\n+TTTTCTCGAAAGCAGCAAGTTTCG\n+>FBgn0000652_F-element|1564|F|23|n=2\n+TTTTCTCGAAAGCAGCTAGTTTC\n+>FBgn0000652_F-element|1564|F|24|n=5\n+TTTTCTCGAAAGCAGCTAGTTTCG\n+>FBgn0000652_F-element|1564|F|25|n=1\n+TTTTCTCGAAAGCAGCTAGTTTCGC\n+>FBgn0000652_F-element|2248|F|20|n=1\n+TTTCTTCCAAGCACTAGGGC\n+>FBgn0000652_F-element|2231|R|27|n=1\n+TTGGAAGAAATCCAGGAATTGAGCTTC\n+>FBgn0000652_F-element|2233|R|25|n=5\n+TTGGAAGAAATCCAGGAATTGAGCT\n+>FBgn0000652_F-element|2471|R|21|n=2\n+TTTAACCAAACTGCGGGAAAT\n+>FBgn0000652_F-element|2482|F|23|n=2\n+TTTGGTTAAAGCTGAATGTCTGC\n+>FBgn0000652_F-element|2482|F|24|n=2\n+TTTGGTTAAAGCTGAATGTCTGCC\n+>FBgn0000652_F-element|2482|F|26|n=3\n+TTTGGTTAAAGCTGAATGTCTGCCGG\n+>FBgn0000652_F-element|2482|F|27|n=1\n+TTTGGTTAAAGCTGAATGTCTGCCGGA\n+>FBgn0000652_F-element|3524|F|21|n=2\n+TTTCGCGAAAGCCACGGAACC\n+>FBgn0000652_F-element|3507|R|27|n=1\n+TTTCGCGAAATCCAAATTGGTGGGCTG\n+>FBgn0000652_F-element|3509|R|25|n=6\n+TTTCGCGAAATCCAAATTGGTGGGC\n+>FBgn0000652_F-element|3510|R|24|n=1\n+TTTCGCGAAATCCAAATTGGTGGG\n+>FBgn0000652_F-element|3513|R|21|n=3\n+TTTCGCGAAATCCAAATTGGT\n+>FBgn0000652_F-element|3524|F|23|n=1\n+TTTCGCGAAAGCCACGGAACCAT\n+>FBgn0000652_F-element|3524|F|27|n=1\n+TTTCGCGAAAGCCACGGAACCATTGAA\n+>FBgn0000652_F-element|3524|F|24|n=1\n+TTTCGCGAAAGCCATGGAACCATT\n+>FBgn0000652_F-element|3524|F|26|n=1\n+TTTCGCGAAAGCCATGGAACCATTGA\n+>FBgn0002697_mdg1|4301|R|22|n=1\n+TTCTTTGGAAAGAATTTGGGGC\n+>FBgn0002697_mdg1|4313|F|25|n=1\n+TTCCAAAGAATGATGACCCTTGCAT\n+>FBgn0003007_opus|5558|F|21|n=1\n+TTCCTAGAAATTTATCGTTGC\n+>FBgn0003007_opus|5540|R|28|n=1\n+TTTCTAGGAACGTAGAATGGAATCTCTC\n+>FBgn0003007_opus|5540|R|28|n=1\n+TTTCTAGGAACGTAGAGTGGAATCTCTC\n+>FBgn0003007_opus|5542|R|26|n=1\n+TTTCTAGGAACGTAGAATGGAATCTC\n+>FBgn0003007_opus|5542|R|26|n=2\n+TTTCTAGGAA'..b'flea|2765|F|25|n=1\n+TTGGTCTAAAAATAAAATGGAAGAA\n+>FBgn0014947_flea|2765|F|26|n=1\n+TTGGTCTAAAAATAAAATGGAAGAAG\n+>FBgn0014947_flea|2765|F|28|n=1\n+TTGGTCTAAAAATAAAATGGAAGAAGTG\n+>FBgn0015945_GATE|3158|F|20|n=3\n+TTCGTTCCAAATGAGCAAGC\n+>FBgn0015945_GATE|3140|R|28|n=9\n+TTGGAACGAAATTGGCCTGATTAGCGGA\n+>FBgn0015945_GATE|3141|R|27|n=4\n+TTGGAACGAAATTGGCCTGATTAGCGG\n+>FBgn0015945_GATE|3142|R|26|n=17\n+TTGGAACGAAATTGGCCTGATTAGCG\n+>FBgn0015945_GATE|3143|R|25|n=86\n+TTGGAACGAAATTGGCCTGATTAGC\n+>FBgn0015945_GATE|3144|R|24|n=17\n+TTGGAACGAAATTGGCCTGATTAG\n+>FBgn0015945_GATE|6234|R|22|n=1\n+TTGAAGGAAATCGCGGGAAAGC\n+>FBgn0015945_GATE|6246|F|25|n=1\n+TTTCCTTCAAGCCGTAAAAGAGTCG\n+>FBgn0015945_GATE|6246|F|26|n=1\n+TTTCCTTCAAGCCGTAAAAGAGTCGG\n+>FBgn0042682_Rt1b|1075|R|21|n=1\n+TTCTTGGCGACAGATGCGTAG\n+>FBgn0042682_Rt1b|1086|F|23|n=1\n+TTGCCAAGAATGCTAGCACGGGT\n+>FBgn0042682_Rt1b|2920|F|22|n=1\n+TTGTTACCAAAAAGCTAAGGAG\n+>FBgn0042682_Rt1b|2902|R|28|n=1\n+TTGGTAACAAGCTTGTAGGCGAGGCCCC\n+>FBgn0042682_Rt1b|2903|R|27|n=1\n+TTGGTAACAAGCTTGTAGGCGAGGCCC\n+>FBgn0042682_Rt1b|2904|R|26|n=2\n+TTGGTAACAAGCTTGTAGGCGAGGCC\n+>FBgn0042682_Rt1b|2904|R|26|n=1\n+TTGGTAACAAGCTTGTAGGCGAGTCC\n+>FBgn0042682_Rt1b|2905|R|25|n=2\n+TTGGTAACAAGCTTGTAGGCGAGGC\n+>FBgn0042682_Rt1b|2905|R|25|n=1\n+TTGGTAACAAGCTTGTAGGCGAGGT\n+>FBgn0042682_Rt1b|2906|R|24|n=25\n+TTGGTAACAAGCTTGTAGGCGAGG\n+>FBgn0042682_Rt1b|2907|R|23|n=1\n+TTGGTAACAAGCTTGGAGGCGAG\n+>FBgn0042682_Rt1b|2907|R|23|n=15\n+TTGGTAACAAGCTTGTAGGCGAG\n+>FBgn0042682_Rt1b|2908|R|22|n=1\n+TTGGTAACAAGCTTGTAGGCGA\n+>FBgn0042682_Rt1b|2920|F|23|n=1\n+TTGTTACCAAAAAGCTAAGGAGG\n+>FBgn0042682_Rt1b|2920|F|25|n=1\n+TTGTTACCAAAAAGCTAAGGAGGAG\n+>FBgn0042682_Rt1b|2920|F|26|n=1\n+TTGTTACCAAAAAGCTAAGGAGGAGA\n+>FBgn0042682_Rt1b|2920|F|27|n=1\n+TTGTTACCAAAAAGCTAAGGAGGAGAG\n+>FBgn0042682_Rt1b|2920|F|23|n=1\n+TTGTTACCAACAAGCTAAGGAGG\n+>FBgn0042682_Rt1b|2920|F|24|n=2\n+TTGTTACCAACAAGCTAAGGAGGA\n+>FBgn0042682_Rt1b|4364|R|22|n=1\n+TTGCCTGGAAGCGCCACTCCGC\n+>FBgn0042682_Rt1b|4376|F|24|n=1\n+TTCCAGGCAAGAGGCACACGAGTG\n+>FBgn0042682_Rt1b|4376|F|26|n=1\n+TTCCAGGCAAGAGGCACACGAGTGGC\n+>FBgn0062343_Dm88|4210|R|21|n=1\n+TTGGTTAGAACATCTGCCATC\n+>FBgn0062343_Dm88|4221|F|24|n=1\n+TTCTAACCAAGAATTTGAATAGAT\n+>FBgn0063427_invader4|707|F|22|n=2\n+TTCGTAGGAATGGAGACGTCGG\n+>FBgn0063427_invader4|691|R|26|n=15\n+TTCCTACGAATCGCTGTATGAACAGT\n+>FBgn0063428_invader3|440|F|22|n=2\n+TTCCCCAGAAACGCGTGGCGAT\n+>FBgn0063428_invader3|425|R|25|n=1\n+TTCTGGGGAAATGGCCTGCAGACGC\n+>FBgn0063428_invader3|427|R|23|n=1\n+TTCTGGGGAAATGGCCTGCAGAC\n+>FBgn0063430_invader1|2082|R|20|n=1\n+TTTTATCGAACCAATAGAAC\n+>FBgn0063430_invader1|2092|F|24|n=1\n+TTCGATAAAATGTCTAAGTATGTT\n+>FBgn0063430_invader1|2092|F|26|n=1\n+TTCGATAAAATGTCTAAGTATGTTCG\n+>FBgn0063433_gypsy4|3058|F|22|n=1\n+TTCTTCAAAAAGAGCGTGGAAT\n+>FBgn0063433_gypsy4|3040|R|28|n=1\n+TTTTGAAGAACTTGGATTTTTCTTGAGA\n+>FBgn0063433_gypsy4|3044|R|24|n=1\n+TTTTGAAGAACTTGGATTTTTCTT\n+>FBgn0063440_baggins|5170|F|21|n=1\n+TTGCACAAAATTGGCATTGCA\n+>FBgn0063440_baggins|5154|R|26|n=1\n+TTTTGTGCAAATGGCTGTGAAGTCGG\n+>FBgn0063919_Max-element|3887|R|21|n=2\n+TTGCTGAGAAGCGTGTTGAGC\n+>FBgn0063919_Max-element|3898|F|23|n=1\n+TTCTCAGCAAGTTCTGGGAGGTG\n+>FBgn0063919_Max-element|3898|F|24|n=2\n+TTCTCAGCAAGTTCTGGGAGGTGG\n+>FBgn0063919_Max-element|3898|F|25|n=1\n+TTCTCAGCAAGTTCTGGGAGGTGGA\n+>FBgn0063919_Max-element|3898|F|24|n=1\n+TTCTCAGCAAGTTCTGGGAGGTGT\n+>FBgn0067385_invader6|3024|F|21|n=1\n+TTGACTAGAATGACTTAGACT\n+>FBgn0067385_invader6|3007|R|27|n=4\n+TTCTAGTCAAAGTCGAAGGACTGCATA\n+>FBgn0067385_invader6|3008|R|26|n=3\n+TTCTAGTCAAAGTCGAAGGACTGCAT\n+>FBgn0067385_invader6|3009|R|25|n=5\n+TTCTAGTCAAAGTCGAAGGACTGCA\n+>FBgn0067385_invader6|3010|R|24|n=4\n+TTCTAGTCAAAGTCGAAGGACTGC\n+>FBgn0067385_invader6|3011|R|23|n=1\n+TTCTAGTCAAAGTCGAAGGACTG\n+>FBgn0067385_invader6|3014|R|20|n=1\n+TTCTAGTCAAAGTCGAAGGA\n+>FBgn0067385_invader6|3024|F|26|n=1\n+TTGACTAGAATGACTTAGACTTAGAA\n+>FBgn0067624_BS3|1016|R|20|n=1\n+TTGATGCCAATGTTCCAACG\n+>FBgn0067624_BS3|1026|F|26|n=1\n+TTGGCATCAATGGTGACAAATCAGCG\n+>FBgn0067624_BS3|1026|F|24|n=1\n+TTGGCATCAATGGTGACAAATCTG\n+>FBgn0067624_BS3|1026|F|25|n=1\n+TTGGCATCAATGGTGACAAATCTGC\n' |