# HG changeset patch
# User brenninc
# Date 1462785301 14400
# Node ID 79682a423af732b87d2bef89bbfa994793b1a8f8
Uploaded
diff -r 000000000000 -r 79682a423af7 syncpairs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/syncpairs.py Mon May 09 05:15:01 2016 -0400
@@ -0,0 +1,111 @@
+"""
+Source: https://raw.githubusercontent.com/mmendez12/sync_paired_end_reads/master/sync_paired_end_reads/syncpairs.py
+"""
+__author__ = 'mickael'
+__author__ = 'mickael'
+
+from Bio import SeqIO
+from itertools import izip
+import argparse
+
+
+def adjust_name(reads1, reads2):
+ for r1, r2 in izip(reads1, reads2):
+ r2.name = r1.description
+ r2.description = r1.description
+ r2.id = r1.description
+ yield r2
+
+
+def remove_space_from_sequence_header(read):
+ """ replaces spaces in a read's name by three underscores.
+ Args:
+ read: A SeqRecord object (see Biopython)
+
+ >>> from Bio.Seq import Seq
+ >>> from Bio.SeqRecord import SeqRecord
+ >>> from Bio.Alphabet import SingleLetterAlphabet
+
+ >>> read = SeqRecord(Seq("AAAAA",SingleLetterAlphabet),\
+ id="read A", name="read A", description="read A")
+
+ >>> print remove_space_from_sequence_header(read).name
+ read___A
+ """
+
+ read.description = read.description.replace(' ', '___')
+ read.name = read.description
+ read.id = read.description
+ return read
+
+
+def next_matching_read(reads1, reads2):
+ """ return next read2 that matches read2
+ Args:
+ reads1: A generator that contains a SeqRecord (see Biopython)
+
+ reads2: A generator that contains a SeqRecord (see Biopython)
+
+
+ >>> from Bio.Seq import Seq
+ >>> from Bio.SeqRecord import SeqRecord
+ >>> from Bio.Alphabet import SingleLetterAlphabet
+
+ >>> reads1 = []
+ >>> reads2 = []
+
+ >>> reads1.append(SeqRecord(Seq("AAAAA",SingleLetterAlphabet),\
+ id="read A", name="read A", description="read A"))
+ >>> reads2.append(SeqRecord(Seq("TTTTT",SingleLetterAlphabet),\
+ id="read A", name="read A", description="read A"))
+
+ >>> reads1.append(SeqRecord(Seq("AAAAA",SingleLetterAlphabet),\
+ id="read B", name="read B", description="read B"))
+
+ >>> reads1.append(SeqRecord(Seq("AAAAA",SingleLetterAlphabet),\
+ id="read C", name="read C", description="read C"))
+ >>> reads2.append(SeqRecord(Seq("TTTTT",SingleLetterAlphabet),\
+ id="read C", name="read C", description="read C"))
+
+ >>> match = [read2 for read2 in next_matching_read(reads1, reads2)]
+ >>> print match[0].name
+ read A
+ >>> print match[1].name
+ read C
+ """
+
+ for read1 in reads1:
+ for read2 in reads2:
+ if read1.name == read2.name:
+ yield read2
+ break
+
+
+def main():
+
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("reads1",help='modified reads')
+ parser.add_argument("reads2", help='reads to adjust')
+
+ parser.add_argument('reads1_output', help='output folder and filename. Note that the folder should already exist')
+ parser.add_argument('reads2_output', help='output folder and filename. Note that the folder should already exist')
+
+ args = parser.parse_args()
+
+ #we'll need to go through the reads1 multiple time and it can be a large file
+ #so it's better to use inline func that return a generator
+ _reads1 = lambda: (rec for rec in SeqIO.parse(args.reads1, 'fastq'))
+ _reads2 = (rec for rec in SeqIO.parse(args.reads2, 'fastq'))
+
+ matching_reads2 = (read2 for read2 in next_matching_read(_reads1(), _reads2))
+ synced_reads2_names = (read2 for read2 in adjust_name(_reads1(), matching_reads2))
+
+ final_reads1 = (remove_space_from_sequence_header(r1) for r1 in _reads1())
+ final_reads2 = (remove_space_from_sequence_header(r2) for r2 in synced_reads2_names)
+
+ SeqIO.write(final_reads1, args.reads1_output, "fastq")
+ SeqIO.write(final_reads2, args.reads2_output, "fastq")
+
+if __name__ == '__main__':
+ main()
diff -r 000000000000 -r 79682a423af7 syncpairs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/syncpairs.xml Mon May 09 05:15:01 2016 -0400
@@ -0,0 +1,65 @@
+
+ Runes sncy pairs on two fastq files.
+
+ biopython
+
+
+
+
+
+
+
+ syncpairs.py $reads1 $reads2 $reads1_output $reads2_output
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @misc{
+ sync_paired_end_reads,
+ author = {Mickaƫl Mendez},
+ title = {sync_paired_end_reads on Github},
+ publisher = {GitHub},
+ journal = {GitHub repository},
+ url = {https://github.com/mmendez12/sync_paired_end_reads},
+ commit = {afd9bf092ed55a65f90f28f40d6230c5fe849223}
+ }
+
+
+
diff -r 000000000000 -r 79682a423af7 test-data/read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read1.fq Mon May 09 05:15:01 2016 -0400
@@ -0,0 +1,328 @@
+@NB500968:8:H5CFGAFXX:1:11101:15939:1071 1:N:0:TCCTGAGC+TACTCCTT;FP:23586;RQ:34.63
+GGCACGTCAGATGAGTATAAGAGACAGCAAATAGGTATAGGGGGCACGTCAGATGAGTAA
++
+EAEEEEEEA///A/EAEEE/EAEEEEEEEEEEAEEE//E/AA/E6AE/EE/
+
+
+
+
+