Repository 'umicount'
hg clone https://toolshed.g2.bx.psu.edu/repos/brenninc/umicount

Changeset 0:d1d0ee366702 (2016-05-11)
Commit message:
Uploaded first version
added:
bed12.py
cage_scan_clustering.py
dedup_barcode_fingerprint.py
dedup_fingerprint.py
test-data/cagescan_fragments.bed
test-data/paired.bed
umicount.xml
umicount_license
b
diff -r 000000000000 -r d1d0ee366702 bed12.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bed12.py Wed May 11 04:53:30 2016 -0400
[
@@ -0,0 +1,170 @@
+"""
+.. module:: bed12
+   :platform: Unix
+   :synopsis: Defines a set a generic function to parse and process bed12 files.
+
+.. moduleauthor:: Mickael Mendez <mendez.mickael@gmail.com>
+
+.. source: https://github.com/mmendez12/umicount
+
+"""
+
+__author__ = 'mickael'
+
+
+import operator
+import itertools
+
+def get_chrom(read):
+    """Get chromosome from a bed12 line.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        The chromosome name
+
+    >>> read = ['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_chrom(read)
+    'chrX'
+    """
+    return read[0]
+
+
+def get_start(read):
+    """Get start position from a bed12 line.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        An integer representing the start position of the read.
+
+    >>> read = ['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_start(read)
+    100
+    """
+    return int(read[1])
+
+
+def get_end(read):
+    """Get end position from a bed12 line.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        An integer representing the end position of the read.
+
+    >>> read = ['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_end(read)
+    200
+    """
+    return int(read[2])
+
+
+def get_strand(read):
+    """Get strand from a bed12 line.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        A single char representing the strand of a read
+
+    >>> read = ['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_strand(read)
+    '+'
+    """
+    return read[5]
+
+def get_tss(read):
+    """Get Transcription Start Site (TSS) from a bed12 line.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        The start position as an integer if the read is on the plus strand.
+        The end position as an integer if the read is on the minus strand.
+
+    >>> read = ['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_tss(read)
+    100
+    >>> read = ['chrX', '100', '200', 'toto', '12', '-', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_tss(read)
+    200
+    """
+    strand = get_strand(read)
+
+    if strand == '+':
+        return get_start(read)
+    else:
+        return get_end(read)
+
+
+def blocks_to_absolute_start_end(read):
+    """Calculate the absolute start and end of the blocks from a bed12 line.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        A list of tuple where each tuple contains the absolute start and end coordinates of a block.
+
+    >>> read = ['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> blocks_to_absolute_start_end(read)
+    [(100, 121), (175, 200)]
+    """
+    read_start = get_start(read)
+
+    block_starts = [read_start + int(start) for start in read[11].split(',') if start]
+    block_sizes = [int(size) for size in read[10].split(',') if size]
+
+    block_starts_sizes = zip(block_starts, block_sizes)
+
+    return [(bstart, bstart + bsize) for bstart, bsize in block_starts_sizes]
+
+
+def merge_overlapping_blocks(reads):
+    """Merge blocks if they overlap.
+
+    Args:
+        reads: A list of read in the BED12 format.
+
+    Returns:
+        Two lists where the first list contains the blocks sizes and the second the blocks starts.
+        Values in the lists are integer.
+
+    >>> reads = []
+    >>> reads.append(['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '2', '20,25', '0,75'])
+    >>> reads.append(['chrX', '100', '200', 'toto', '12', '+', '100', '110', '255,0,0', '3', '10,10,25', '0,15,75'])
+    >>> merge_overlapping_blocks(reads)
+    ([25, 25], [0, 75])
+    """
+
+    blocks_list = [blocks_to_absolute_start_end(read) for read in reads]
+
+    #flatten
+    blocks = itertools.chain.from_iterable(blocks_list)
+
+    final_blocks = []
+
+    blocks = sorted(blocks, key = operator.itemgetter(0, 1))
+    known_block_start, known_block_end = blocks[0]
+
+    for block_start, block_end in blocks[1:]:
+        if block_start <= known_block_end:
+            known_block_end = max(block_end, known_block_end)
+        else:
+            final_blocks.append((known_block_start, known_block_end))
+            known_block_start, known_block_end = (block_start, block_end)
+
+    final_blocks.append((known_block_start, known_block_end))
+
+    absolute_block_start = final_blocks[0][0]
+
+    bsizes = [end - start for start, end in final_blocks]
+    bstarts = [start - absolute_block_start for start, end in final_blocks]
+
+    return bsizes, bstarts
b
diff -r 000000000000 -r d1d0ee366702 cage_scan_clustering.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cage_scan_clustering.py Wed May 11 04:53:30 2016 -0400
[
@@ -0,0 +1,115 @@
+"""
+.. module:: cage_scan_clustering
+   :platform: Unix
+   :synopsis: cluster cage scan reads based on arbitrary distance. It assumes that the reads are sorted by chrom, TSS and strand.
+   :version: 1.0
+
+.. moduleauthor:: Mickael Mendez <mendez.mickael@gmail.com>
+
+.. source: https://github.com/mmendez12/umicount
+"""
+
+import csv
+import argparse
+
+
+import bed12
+
+#TODO: rewrite tests
+def print_read_to_bed12(reads):
+    """ Merge the reads by blocks and print a single read in the BED12 format on stdout.
+    It assumes that the reads are on the same TSS and contains
+    fingerprint information in the read's name.
+
+    Args:
+        reads: A list of reads
+
+    """
+    block_sizes, block_starts = bed12.merge_overlapping_blocks(reads)
+        
+    #bed12
+    first_read = sorted(reads, key=bed12.get_start)[0]
+    chrom = bed12.get_chrom(first_read)
+    start = bed12.get_start(first_read)
+    end = start + block_starts[-1] + block_sizes[-1]
+
+    score = len(reads)
+    
+    strand = bed12.get_strand(first_read)
+    
+    if strand == '+':
+        thick_start = start
+        thick_end = start + block_sizes[0]
+    else:
+        thick_start = end - block_sizes[-1]
+        thick_end = end
+        
+    color = "255,0,0"
+    block_count = len(block_sizes)
+    block_sizes = ','.join(map(str, block_sizes))
+    block_starts = ','.join(map(str, block_starts))
+
+    name = map(str, [chrom, start, end, strand])
+    name = ":".join(name)
+    
+    output = [chrom, start, end, name, score, strand, thick_start, thick_end,
+              color, block_count, block_sizes, block_starts]
+    
+    output_str = map(str, output)
+    print '\t'.join(output_str)
+
+
+def overlapping_reads(reads, distance):
+    """returns all the overlapping reads within a given distance"""
+
+    reads_list = []
+    cur_tss = 0
+    cur_chrom = ''
+
+    for read in reads:
+
+        if not cur_tss:
+            cur_tss = bed12.get_tss(read)
+            reads_list.append(read)
+            cur_chrom = bed12.get_chrom(read)
+            continue
+
+
+        tss = bed12.get_tss(read)
+        chrom = bed12.get_chrom(read)
+
+        #if not overlap
+        if (tss - cur_tss > distance) or (chrom != cur_chrom):
+            yield reads_list
+            reads_list = [read]
+            cur_tss = tss
+            cur_chrom = chrom
+        else:
+            reads_list.append(read)
+            cur_tss = tss
+
+    yield reads_list
+
+
+def main():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('bed_file', help='input file')
+    parser.add_argument('-t', '--tag_distance', default=20, type=int, help='cluster all the cage tags at distance d')
+
+    args = parser.parse_args()
+
+    with open(args.bed_file) as bedfile:
+
+        reader = csv.reader(bedfile, delimiter='\t')
+        reads = (line for line in reader)
+
+        #for each reads on the same tss
+        for read_list in overlapping_reads(reads, args.tag_distance):
+            print_read_to_bed12(read_list)
+
+
+if __name__ == '__main__':
+    main()
+
+#TODO: combine this script with fingerprint.py
b
diff -r 000000000000 -r d1d0ee366702 dedup_barcode_fingerprint.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dedup_barcode_fingerprint.py Wed May 11 04:53:30 2016 -0400
[
@@ -0,0 +1,185 @@
+"""
+.. module:: fingerprint
+   :platform: Unix
+   :synopsis: Use UMI to count transcripts
+   :version: 1.0
+
+.. moduleauthor:: Mickael Mendez <mendez.mickael@gmail.com>
+
+.. source: https://github.com/mmendez12/umicount
+
+"""
+
+import csv
+import itertools
+import subprocess
+import argparse
+import tempfile
+import os
+import shutil
+from collections import defaultdict
+
+import bed12
+
+
+def get_fingerprint(read):
+    """Get fingerprint id from the read's name. It assumes that the read's name
+    contains the following pattern *FP:XXX;* where *XXX* is the fingerprint id.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        A string containing the fingerprint id
+
+    >>> read = ['chrX', '100', '200', 'BC:ATGC;FP:0012', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_fingerprint(read)
+    '0012'
+    """
+    return read[3].split('FP:')[1].split(';')[0]
+
+
+def get_barcode(read):
+    """Get barcode from the read's name. It assumes that the read's name
+    contains the following pattern *BC:XXX;* where *XXX* is the actual barcode.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        A string containing the barcode
+
+    >>> read = ['chrX', '100', '200', 'BC:ATGC;FP:0012', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_barcode(read)
+    'ATGC'
+    """
+    return read[3].split('BC:')[1].split(';')[0]
+
+
+def print_read_to_bed12(key, reads):
+    """ Merge the reads by blocks and print a single read in the BED12 format on stdout.
+    It assumes that the reads are on the same TSS and contains
+    barcode and fingerprint information in the read's name.
+
+    Args:
+        key: A tuple that contain the chromosome, barcode and fingerprint information.
+
+        reads: A list of reads (in a list) from the same TSS, that have similar barcode and fingerprint.
+
+    >>> reads = []
+    >>> reads.append(['chrX', '100', '200', 'BC:AAA;FP:0012', '12', '+', '100', '110', '255,0,0', '2', '20,25', '0,75'])
+    >>> reads.append(['chrX', '100', '300', 'BC:AAA;FP:0012', '12', '+', '100', '110', '255,0,0', '3', '20,25', '0,175'])
+    >>> print_read_to_bed12(('chrX', 'AAA', '0012'), reads) #doctest: +NORMALIZE_WHITESPACE
+    chrX    100 300 BC:AAA;FP:0012 2 + 100 120 255,0,0 3 20,25,25 0,75,175
+    """
+    block_sizes, block_starts = bed12.merge_overlapping_blocks(reads)
+        
+    #bed12
+    first_read = sorted(reads, key = bed12.get_start)[0]
+    chrom, barcode, fingerprint = key
+    start = bed12.get_start(first_read)
+    end = start + block_starts[-1] + block_sizes[-1]
+    name = "BC:{0};FP:{1}".format(barcode, fingerprint)
+    score = len(reads)
+    
+    strand = bed12.get_strand(first_read)
+    
+    if strand == '+':
+        thick_start = start
+        thick_end = start + block_sizes[0]
+    else:
+        thick_start = end - block_sizes[-1]
+        thick_end = end
+        
+    color = "255,0,0"
+    block_count = len(block_sizes)
+    block_sizes = ','.join(map(str, block_sizes))
+    block_starts = ','.join(map(str, block_starts))
+    
+    output = [chrom, start, end, name, score, strand, thick_start, thick_end,
+              color, block_count, block_sizes, block_starts]
+    
+    output_str = map(str, output)
+    print '\t'.join(output_str)
+
+
+def main():
+
+    #PARSER TODO: move this code somewhere else
+    parser = argparse.ArgumentParser()
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("-d", "--directory", help="absolute path of the folder containing the bed files")
+    group.add_argument("-f", "--file", help="a bed file")
+    parser.add_argument("-o", help='name of the output file. Only works if the script is called with the -f option, \
+                                    ignored otherwise.')
+
+    args = parser.parse_args()
+
+    if args.directory:
+        path, folder, files = os.walk(args.directory).next()
+    elif args.file:
+        path = ''
+        files = [args.file]
+    #ENDPARSER
+
+    #create a temporary directory
+    tmp_dir = tempfile.mkdtemp()
+
+    plus_strand_tmp_file = open(os.path.join(tmp_dir, '+'), 'w')
+    minus_strand_tmp_file = open(os.path.join(tmp_dir, '-'), 'w')
+    plus_and_minus_sorted_path = os.path.join(tmp_dir, '+-s')
+
+    #creates two temporary bed files containing either reads on the plus or minus strand
+    for bed_file in files:
+
+        with open(os.path.join(path, bed_file)) as bed_file:
+            reader = csv.reader(bed_file, delimiter='\t')
+
+            for read in reader:
+                strand = bed12.get_strand(read)
+                if strand == '+':
+                    plus_strand_tmp_file.write('\t'.join(read) + '\n')
+                elif strand == '-':
+                    minus_strand_tmp_file.write('\t'.join(read) + '\n')
+
+
+    #close the files
+    plus_strand_tmp_file.close()
+    minus_strand_tmp_file.close()
+
+    #call unix sort on the file containing reads on the plus strand by tss
+    with open(os.path.join(tmp_dir, '+sorted'), "w") as outfile:
+        subprocess.call(["sort", '-k2,2n', os.path.join(tmp_dir, '+')], stdout=outfile)
+
+    #call unix sort on the file containing reads on the minus strand by tss
+    with open(os.path.join(tmp_dir, '-sorted'), "w") as outfile:
+        subprocess.call(["sort", '-k3,3n', os.path.join(tmp_dir, '-')], stdout=outfile)
+
+    #concatenate the files sorted by tss
+    with open(plus_and_minus_sorted_path, "w") as outfile:
+        subprocess.call(['cat', os.path.join(tmp_dir, '+sorted'), os.path.join(tmp_dir, '-sorted')], stdout=outfile)
+
+    with open(plus_and_minus_sorted_path) as bedfile:
+        reader = csv.reader(bedfile, delimiter='\t')
+        reads = (line for line in reader)
+
+        #for each reads on the same tss
+        for tss, reads in itertools.groupby(reads, bed12.get_tss):
+            d = defaultdict(list)
+
+            #group the reads by chr, barcode and fingerprint
+            for read in reads:
+                key = (bed12.get_chrom(read), get_barcode(read), get_fingerprint(read))
+                d[key].append(read)
+
+            #merge and print the reads that have similar tss, barcode and fingerprint
+            for key, reads in d.iteritems():
+                print_read_to_bed12(key, reads)
+
+    shutil.rmtree(tmp_dir)
+
+
+if __name__ == '__main__':
+    main()
+
+#TODO: combine this script with dedup_fingerprint
b
diff -r 000000000000 -r d1d0ee366702 dedup_fingerprint.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dedup_fingerprint.py Wed May 11 04:53:30 2016 -0400
[
@@ -0,0 +1,167 @@
+"""
+.. module:: dedup_fingerprint
+   :platform: Unix
+   :synopsis: Use UMI to count transcripts, assumes there is no barcode
+
+.. moduleauthor:: Mickael Mendez <mendez.mickael@gmail.com>
+
+.. source: https://github.com/mmendez12/umicount
+
+"""
+
+import csv
+import itertools
+import subprocess
+import argparse
+import tempfile
+import os
+import shutil
+from collections import defaultdict
+
+import bed12
+
+
+def get_fingerprint(read):
+    """Get fingerprint id from the read's name. It assumes that the read's name
+    contains the following pattern *FP:XXX;* where *XXX* is the fingerprint id.
+
+    Args:
+        read: A list of twelve elements where each element refers to a field in the BED format.
+
+    Returns:
+        A string containing the fingerprint id
+
+    >>> read = ['chrX', '100', '200', 'FP:0012', '12', '+', '100', '110', '255,0,0', '2', '21,25', '0,75']
+    >>> get_fingerprint(read)
+    '0012'
+    """
+    return read[3].split('FP:')[1].split(';')[0]
+
+
+def print_read_to_bed12(key, reads):
+    """ Merge the reads by blocks and print a single read in the BED12 format on stdout.
+    It assumes that the reads are on the same TSS and contains
+    fingerprint information in the read's name.
+
+    Args:
+        key: A tuple that contain the chromosome, barcode and fingerprint information.
+
+        reads: A list of reads (in a list) from the same TSS, that have similar barcode and fingerprint.
+
+    >>> reads = []
+    >>> reads.append(['chrX', '100', '200', 'FP:0012', '12', '+', '100', '110', '255,0,0', '2', '20,25', '0,75'])
+    >>> reads.append(['chrX', '100', '300', 'FP:0012', '12', '+', '100', '110', '255,0,0', '3', '20,25', '0,175'])
+    >>> print_read_to_bed12(('chrX', '0012'), reads) #doctest: +NORMALIZE_WHITESPACE
+    chrX    100 300 FP:0012 2 + 100 120 255,0,0 3 20,25,25 0,75,175
+    """
+    block_sizes, block_starts = bed12.merge_overlapping_blocks(reads)
+        
+    #bed12
+    first_read = sorted(reads, key = bed12.get_start)[0]
+    chrom, fingerprint = key
+    start = bed12.get_start(first_read)
+    end = start + block_starts[-1] + block_sizes[-1]
+    name = "FP:{0}".format(fingerprint)
+    score = len(reads)
+    
+    strand = bed12.get_strand(first_read)
+    
+    if strand == '+':
+        thick_start = start
+        thick_end = start + block_sizes[0]
+    else:
+        thick_start = end - block_sizes[-1]
+        thick_end = end
+        
+    color = "255,0,0"
+    block_count = len(block_sizes)
+    block_sizes = ','.join(map(str, block_sizes))
+    block_starts = ','.join(map(str, block_starts))
+    
+    output = [chrom, start, end, name, score, strand, thick_start, thick_end,
+              color, block_count, block_sizes, block_starts]
+    
+    output_str = map(str, output)
+    print '\t'.join(output_str)
+
+
+def main():
+
+    #PARSER TODO: move this code somewhere else
+    parser = argparse.ArgumentParser()
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("-d", "--directory", help="absolute path of the folder containing the bed files")
+    group.add_argument("-f", "--file", help="a bed file")
+    parser.add_argument("-o", help='name of the output file. Only works if the script is called with the -f option, \
+                                    ignored otherwise.')
+
+    args = parser.parse_args()
+
+    if args.directory:
+        path, folder, files = os.walk(args.directory).next()
+    elif args.file:
+        path = ''
+        files = [args.file]
+    #ENDPARSER
+
+    #create a temporary directory
+    tmp_dir = tempfile.mkdtemp()
+
+    plus_strand_tmp_file = open(os.path.join(tmp_dir, '+'), 'w')
+    minus_strand_tmp_file = open(os.path.join(tmp_dir, '-'), 'w')
+    plus_and_minus_sorted_path = os.path.join(tmp_dir, '+-s')
+
+    #creates two temporary bed files containing either reads on the plus or minus strand
+    for bed_file in files:
+
+        with open(os.path.join(path, bed_file)) as bed_file:
+            reader = csv.reader(bed_file, delimiter='\t')
+
+            for read in reader:
+                strand = bed12.get_strand(read)
+                if strand == '+':
+                    plus_strand_tmp_file.write('\t'.join(read) + '\n')
+                elif strand == '-':
+                    minus_strand_tmp_file.write('\t'.join(read) + '\n')
+
+
+    #close the files
+    plus_strand_tmp_file.close()
+    minus_strand_tmp_file.close()
+
+    #call unix sort on the file containing reads on the plus strand by tss
+    with open(os.path.join(tmp_dir, '+sorted'), "w") as outfile:
+        subprocess.call(["sort", '-k1,1', '-k2,2n', os.path.join(tmp_dir, '+')], stdout=outfile)
+
+    #call unix sort on the file containing reads on the minus strand by tss
+    with open(os.path.join(tmp_dir, '-sorted'), "w") as outfile:
+        subprocess.call(["sort", '-k1,1', '-k3,3n', os.path.join(tmp_dir, '-')], stdout=outfile)
+
+    #concatenate the files sorted by tss
+    with open(plus_and_minus_sorted_path, "w") as outfile:
+        subprocess.call(['cat', os.path.join(tmp_dir, '+sorted'), os.path.join(tmp_dir, '-sorted')], stdout=outfile)
+
+    with open(plus_and_minus_sorted_path) as bedfile:
+        reader = csv.reader(bedfile, delimiter='\t')
+        reads = (line for line in reader)
+
+        #for each reads on the same tss
+        for tss, same_tss_reads in itertools.groupby(reads, bed12.get_tss):
+            d = defaultdict(list)
+
+            #group the reads by chr and fingerprint
+            for read in same_tss_reads:
+                key = (bed12.get_chrom(read), get_fingerprint(read))
+                d[key].append(read)
+
+            #merge and print the reads that have same tss, and fingerprint
+            for key, same_fingerprint_reads in d.iteritems():
+                print_read_to_bed12(key, same_fingerprint_reads)
+
+    shutil.rmtree(tmp_dir)
+
+
+if __name__ == '__main__':
+    main()
+
+#TODO: combine this with dedup_barcode_fingerprint
b
diff -r 000000000000 -r d1d0ee366702 test-data/cagescan_fragments.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cagescan_fragments.bed Wed May 11 04:53:30 2016 -0400
b
b'@@ -0,0 +1,11907 @@\n+chr1\t6492854\t6492977\tFP:28446\t1\t+\t6492854\t6492977\t255,0,0\t1\t123\t0\n+chr1\t6492854\t6492980\tFP:27362\t1\t+\t6492854\t6492980\t255,0,0\t1\t126\t0\n+chr1\t7171356\t7171473\tFP:17539\t1\t+\t7171356\t7171473\t255,0,0\t1\t117\t0\n+chr1\t24615921\t24616108\tFP:57353\t1\t+\t24615921\t24615981\t255,0,0\t2\t60,75\t0,112\n+chr1\t24616040\t24616138\tFP:20952\t1\t+\t24616040\t24616138\t255,0,0\t1\t98\t0\n+chr1\t24616040\t24616141\tFP:43597\t1\t+\t24616040\t24616141\t255,0,0\t1\t101\t0\n+chr1\t24616040\t24616148\tFP:47949\t1\t+\t24616040\t24616148\t255,0,0\t1\t108\t0\n+chr1\t33988808\t33988918\tFP:41394\t1\t+\t33988808\t33988918\t255,0,0\t1\t110\t0\n+chr1\t34474242\t34474441\tFP:22200\t1\t+\t34474242\t34474302\t255,0,0\t2\t60,74\t0,125\n+chr1\t37369355\t37369483\tFP:19766\t1\t+\t37369355\t37369414\t255,0,0\t2\t59,39\t0,89\n+chr1\t40540343\t40540496\tFP:26976\t1\t+\t40540343\t40540403\t255,0,0\t2\t60,75\t0,78\n+chr1\t40540343\t40540486\tFP:22268\t1\t+\t40540343\t40540403\t255,0,0\t2\t60,75\t0,68\n+chr1\t40540343\t40540486\tFP:58700\t1\t+\t40540343\t40540403\t255,0,0\t2\t60,75\t0,68\n+chr1\t40540343\t40540499\tFP:17167\t1\t+\t40540343\t40540403\t255,0,0\t2\t60,74\t0,82\n+chr1\t40540343\t40540454\tFP:28154\t1\t+\t40540343\t40540454\t255,0,0\t1\t111\t0\n+chr1\t42868037\t42868123\tFP:12597\t1\t+\t42868037\t42868123\t255,0,0\t1\t86\t0\n+chr1\t46050418\t46050503\tFP:25257\t1\t+\t46050418\t46050503\t255,0,0\t1\t85\t0\n+chr1\t54331143\t54331244\tFP:43586\t1\t+\t54331143\t54331244\t255,0,0\t1\t101\t0\n+chr1\t54331143\t54331297\tFP:29880\t1\t+\t54331143\t54331203\t255,0,0\t2\t60,75\t0,79\n+chr1\t54331143\t54331317\tFP:33187\t1\t+\t54331143\t54331202\t255,0,0\t2\t59,75\t0,99\n+chr1\t56855791\t56855890\tFP:24297\t1\t+\t56855791\t56855890\t255,0,0\t1\t99\t0\n+chr1\t58802800\t58802903\tFP:42410\t1\t+\t58802800\t58802903\t255,0,0\t1\t103\t0\n+chr1\t58802800\t58802944\tFP:35382\t1\t+\t58802800\t58802860\t255,0,0\t2\t60,75\t0,69\n+chr1\t58802800\t58802946\tFP:36806\t1\t+\t58802800\t58802860\t255,0,0\t2\t60,75\t0,71\n+chr1\t59484287\t59484527\tFP:53855\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,75\t0,165\n+chr1\t59484287\t59484533\tFP:7250\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,76\t0,170\n+chr1\t59484287\t59484480\tFP:65210\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,74\t0,119\n+chr1\t59484287\t59484413\tFP:8815\t1\t+\t59484287\t59484413\t255,0,0\t1\t126\t0\n+chr1\t59484287\t59484413\tFP:62121\t1\t+\t59484287\t59484413\t255,0,0\t1\t126\t0\n+chr1\t59484287\t59484528\tFP:60974\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,64\t0,177\n+chr1\t59484287\t59484430\tFP:1342\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,75\t0,68\n+chr1\t59484287\t59484414\tFP:48330\t1\t+\t59484287\t59484414\t255,0,0\t1\t127\t0\n+chr1\t59484287\t59484488\tFP:49636\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,75\t0,126\n+chr1\t59484287\t59484389\tFP:10423\t1\t+\t59484287\t59484389\t255,0,0\t1\t102\t0\n+chr1\t59484287\t59484442\tFP:40414\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,74\t0,81\n+chr1\t59484287\t59484521\tFP:57950\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,75\t0,159\n+chr1\t59484287\t59484473\tFP:49775\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,75\t0,111\n+chr1\t59484287\t59484414\tFP:11904\t1\t+\t59484287\t59484414\t255,0,0\t1\t127\t0\n+chr1\t59484287\t59484409\tFP:48132\t1\t+\t59484287\t59484409\t255,0,0\t1\t122\t0\n+chr1\t59484287\t59484525\tFP:53656\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,74\t0,164\n+chr1\t59484287\t59484528\tFP:59813\t1\t+\t59484287\t59484347\t255,0,0\t2\t60,69\t0,172\n+chr1\t59855465\t59855546\tFP:42735\t1\t+\t59855465\t59855546\t255,0,0\t1\t81\t0\n+chr1\t59855465\t59855601\tFP:30092\t1\t+\t59855465\t59855522\t255,0,0\t2\t57,75\t0,61\n+chr1\t59855465\t59855587\tFP:60999\t1\t+\t59855465\t59855522\t255,0,0\t2\t57,62\t0,60\n+chr1\t59855465\t59855573\tFP:41616\t1\t+\t59855465\t59855573\t255,0,0\t1\t108\t0\n+chr1\t59855465\t59855596\tFP:38055\t1\t+\t59855465\t59855596\t255,0,0\t1\t131\t0\n+chr1\t59855465\t59855542\tFP:59386\t1\t+\t59855465\t59855542\t255,0,0\t1\t77\t0\n+chr1\t59855465\t59855579\tFP:23719\t1\t+\t59855465\t59855579\t255,0,0\t1\t114\t0\n+chr1\t59855465\t59855579\tFP:41451\t1\t+\t59855465\t59855579\t255,0,0\t1\t114\t0\n+chr1\t62746688\t62746781\tFP:59273\t1\t+\t62746688\t62746781\t255,0,0\t1\t93\t0\n+chr1\t62787911\t62787997\tFP:25040\t1\t+\t62787911\t62787997\t255,0,0\t1\t86\t0\n+chr1\t62787911\t62788056\tFP:48531\t1\t+\t62787911\t62787971\t255,0,0\t2\t60,75\t0,70\n+chr1\t62787911\t62788042\tFP:44445\t1\t+\t62787911\t62788042\t255,0,0\t1\t131\t0\n+chr1\t62787911\t62788103\tFP:53439\t1\t+\t62787911\t62787971\t255,0,0\t2\t60,74\t0,118\n+chr1\t62787911\t62787997\tFP'..b'76272\t53776390\tFP:58091\t1\t-\t53776272\t53776390\t255,0,0\t1\t118\t0\n+chrX\t53776262\t53776390\tFP:1066\t1\t-\t53776262\t53776390\t255,0,0\t1\t128\t0\n+chrX\t53776221\t53776390\tFP:17159\t1\t-\t53776330\t53776390\t255,0,0\t2\t59,60\t0,109\n+chrX\t57545977\t57546068\tFP:35219\t1\t-\t57545977\t57546068\t255,0,0\t1\t91\t0\n+chrX\t59566535\t59566629\tFP:27169\t1\t-\t59566535\t59566629\t255,0,0\t1\t94\t0\n+chrX\t60953569\t60953669\tFP:53322\t1\t-\t60953569\t60953669\t255,0,0\t1\t100\t0\n+chrX\t73738007\t73738151\tFP:22555\t1\t-\t73738091\t73738151\t255,0,0\t2\t75,60\t0,84\n+chrX\t80089231\t80089361\tFP:49862\t1\t-\t80089231\t80089361\t255,0,0\t1\t130\t0\n+chrX\t98846880\t98846964\tFP:4040\t1\t-\t98846880\t98846964\t255,0,0\t1\t84\t0\n+chrX\t98846880\t98846964\tFP:26311\t1\t-\t98846880\t98846964\t255,0,0\t1\t84\t0\n+chrX\t98846845\t98846964\tFP:40087\t1\t-\t98846845\t98846964\t255,0,0\t1\t119\t0\n+chrX\t98846880\t98846964\tFP:59707\t1\t-\t98846880\t98846964\t255,0,0\t1\t84\t0\n+chrX\t102188065\t102188148\tFP:16390\t1\t-\t102188065\t102188148\t255,0,0\t1\t83\t0\n+chrX\t105875470\t105875601\tFP:15995\t1\t-\t105875470\t105875601\t255,0,0\t1\t131\t0\n+chrX\t105875418\t105875601\tFP:49671\t1\t-\t105875543\t105875601\t255,0,0\t2\t74,58\t0,125\n+chrX\t105875433\t105875601\tFP:16772\t1\t-\t105875543\t105875601\t255,0,0\t2\t75,58\t0,110\n+chrX\t105875500\t105875601\tFP:22856\t1\t-\t105875500\t105875601\t255,0,0\t1\t101\t0\n+chrX\t105875470\t105875601\tFP:39200\t1\t-\t105875470\t105875601\t255,0,0\t1\t131\t0\n+chrX\t105875401\t105875601\tFP:29544\t1\t-\t105875543\t105875601\t255,0,0\t2\t75,58\t0,142\n+chrX\t105875494\t105875601\tFP:34312\t1\t-\t105875494\t105875601\t255,0,0\t1\t107\t0\n+chrX\t140475174\t140475296\tFP:16538\t1\t-\t140475236\t140475296\t255,0,0\t2\t45,60\t0,62\n+chrX\t140475174\t140475300\tFP:35747\t1\t-\t140475240\t140475300\t255,0,0\t2\t52,60\t0,66\n+chrX\t157556895\t157557007\tFP:4282\t1\t-\t157556949\t157557007\t255,0,0\t2\t50,58\t0,54\n+chrX\t157566818\t157566901\tFP:47443\t1\t-\t157566818\t157566901\t255,0,0\t1\t83\t0\n+chrX\t157566794\t157566901\tFP:14936\t1\t-\t157566794\t157566901\t255,0,0\t1\t107\t0\n+chrX\t157566768\t157566901\tFP:39336\t1\t-\t157566768\t157566901\t255,0,0\t1\t133\t0\n+chrX\t157566780\t157566901\tFP:53688\t1\t-\t157566780\t157566901\t255,0,0\t1\t121\t0\n+chrX\t157566693\t157566901\tFP:37796\t1\t-\t157566841\t157566901\t255,0,0\t2\t62,60\t0,148\n+chrX\t157566693\t157566901\tFP:42762\t1\t-\t157566841\t157566901\t255,0,0\t2\t68,60\t0,148\n+chrX\t157566809\t157566901\tFP:39594\t1\t-\t157566809\t157566901\t255,0,0\t1\t92\t0\n+chrX\t157566799\t157566901\tFP:27879\t1\t-\t157566799\t157566901\t255,0,0\t1\t102\t0\n+chrX\t157566798\t157566901\tFP:5053\t1\t-\t157566798\t157566901\t255,0,0\t1\t103\t0\n+chrX\t157566812\t157566901\tFP:54130\t1\t-\t157566812\t157566901\t255,0,0\t1\t89\t0\n+chrX\t157566779\t157566901\tFP:46277\t1\t-\t157566779\t157566901\t255,0,0\t1\t122\t0\n+chrX\t157566791\t157566901\tFP:46360\t1\t-\t157566791\t157566901\t255,0,0\t1\t110\t0\n+chrX\t157566698\t157566901\tFP:46201\t4\t-\t157566841\t157566901\t255,0,0\t2\t128,60\t0,143\n+chrX\t157566765\t157566901\tFP:5112\t1\t-\t157566841\t157566901\t255,0,0\t2\t75,60\t0,76\n+chrX\t157566714\t157566901\tFP:34275\t1\t-\t157566841\t157566901\t255,0,0\t2\t75,60\t0,127\n+chrX\t157566752\t157566901\tFP:18565\t1\t-\t157566841\t157566901\t255,0,0\t2\t74,60\t0,89\n+chrX\t157566812\t157566901\tFP:9126\t1\t-\t157566812\t157566901\t255,0,0\t1\t89\t0\n+chrX\t157566687\t157566901\tFP:35809\t1\t-\t157566841\t157566901\t255,0,0\t2\t74,60\t0,154\n+chrX\t157566791\t157566901\tFP:42193\t2\t-\t157566791\t157566901\t255,0,0\t1\t110\t0\n+chrX\t157566809\t157566901\tFP:15160\t1\t-\t157566809\t157566901\t255,0,0\t1\t92\t0\n+chrX\t157566688\t157566901\tFP:42608\t2\t-\t157566841\t157566901\t255,0,0\t2\t88,60\t0,153\n+chrX\t157572576\t157572669\tFP:62086\t1\t-\t157572576\t157572669\t255,0,0\t1\t93\t0\n+chrX\t157572545\t157572669\tFP:40370\t1\t-\t157572545\t157572669\t255,0,0\t1\t124\t0\n+chrX\t157572389\t157572669\tFP:61730\t1\t-\t157572611\t157572669\t255,0,0\t2\t68,58\t0,222\n+chrX\t157572450\t157572669\tFP:16902\t1\t-\t157572611\t157572669\t255,0,0\t2\t73,58\t0,161\n+chrX\t157572568\t157572669\tFP:35420\t1\t-\t157572568\t157572669\t255,0,0\t1\t101\t0\n+chrX\t157572509\t157572669\tFP:58009\t1\t-\t157572611\t157572669\t255,0,0\t2\t74,58\t0,102\n+chrX\t157572392\t157572669\tFP:6326\t1\t-\t157572611\t157572669\t255,0,0\t2\t75,58\t0,219\n+chrY\t6800805\t6800889\tFP:48076\t1\t-\t6800805\t6800889\t255,0,0\t1\t84\t0\n+chrY\t58848204\t58848288\tFP:8320\t1\t-\t58848204\t58848288\t255,0,0\t1\t84\t0\n'
b
diff -r 000000000000 -r d1d0ee366702 test-data/paired.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/paired.bed Wed May 11 04:53:30 2016 -0400
b
b'@@ -0,0 +1,12701 @@\n+chr16\t44378881\t44379100\tNB500968:8:H5CFGAFXX:1:11101:2311:17627___1:N:0:TCCTGAGC+TACTCCTT;FP:35105;RQ:34.13;RQ:-1.00\t120\t+\t44378881\t44378941\t255,0,0\t2\t60,74\t0,145\n+chr12\t69159294\t69159383\tNB500968:8:H5CFGAFXX:1:11101:2678:9895___1:N:0:TCCTGAGC+TACTCCTT;FP:19791;RQ:34.15;RQ:-1.00\t0\t+\t69159294\t69159354\t255,0,0\t2\t60,75\t0,14\n+chr11\t16386619\t16386759\tNB500968:8:H5CFGAFXX:1:11101:2891:16900___1:N:0:TCCTGAGC+TACTCCTT;FP:12917;RQ:34.13;RQ:-1.00\t0\t+\t16386619\t16386679\t255,0,0\t2\t60,75\t0,65\n+chr16\t92794124\t92794272\tNB500968:8:H5CFGAFXX:1:11101:3903:9158___1:N:0:TCCTGAGC+TACTCCTT;FP:23729;RQ:34.24;RQ:-1.00\t120\t-\t92794215\t92794272\t255,0,0\t2\t75,57\t0,91\n+chr6\t116640596\t116640786\tNB500968:8:H5CFGAFXX:1:11101:5052:1198___1:N:0:TCCTGAGC+TACTCCTT;FP:38799;RQ:34.15;RQ:-1.00\t120\t-\t116640726\t116640786\t255,0,0\t2\t75,60\t0,130\n+chrM\t1282\t1441\tNB500968:8:H5CFGAFXX:1:11101:5538:1286___1:N:0:TCCTGAGC+TACTCCTT;FP:5079;RQ:34.14;RQ:-1.00\t120\t+\t1282\t1342\t255,0,0\t2\t60,74\t0,85\n+chr15\t89169026\t89169261\tNB500968:8:H5CFGAFXX:1:11101:5814:2162___1:N:0:TCCTGAGC+TACTCCTT;FP:5751;RQ:34.24;RQ:-1.00\t120\t-\t89169203\t89169261\t255,0,0\t2\t75,58\t0,177\n+chr16\t91032029\t91032195\tNB500968:8:H5CFGAFXX:1:11101:6142:6415___1:N:0:TCCTGAGC+TACTCCTT;FP:5138;RQ:34.15;RQ:-1.00\t120\t-\t91032135\t91032195\t255,0,0\t2\t75,60\t0,106\n+chr2\t180449140\t180449276\tNB500968:8:H5CFGAFXX:1:11101:6306:10980___1:N:0:TCCTGAGC+TACTCCTT;FP:39555;RQ:34.24;RQ:-1.00\t120\t-\t180449220\t180449276\t255,0,0\t2\t74,56\t0,80\n+chrM\t12256\t12429\tNB500968:8:H5CFGAFXX:1:11101:6670:2262___1:N:0:TCCTGAGC+TACTCCTT;FP:14539;RQ:3.54;RQ:-1.00\t120\t-\t12369\t12429\t255,0,0\t2\t75,60\t0,113\n+chr12\t33366865\t33366973\tNB500968:8:H5CFGAFXX:1:11101:7863:19997___1:N:0:TCCTGAGC+TACACCTT;FP:11663;RQ:34.13;RQ:-1.00\t120\t+\t33366865\t33366924\t255,0,0\t2\t59,73\t0,35\n+chr7\t145058760\t145058926\tNB500968:8:H5CFGAFXX:1:11101:8024:1581___1:N:0:TCCTGAGC+TACTCCTT;FP:38311;RQ:34.15;RQ:-1.00\t120\t+\t145058760\t145058820\t255,0,0\t2\t60,75\t0,91\n+chr4\t63394118\t63394274\tNB500968:8:H5CFGAFXX:1:11101:8893:9691___1:N:0:TCCTGAGC+TACTCCTT;FP:38507;RQ:34.14;RQ:-1.00\t120\t-\t63394214\t63394274\t255,0,0\t2\t74,60\t0,96\n+chr16\t91032114\t91032195\tNB500968:8:H5CFGAFXX:1:11101:10356:6207___1:N:0:TCCTGAGC+TACTCCTT;FP:50578;RQ:34.15;RQ:-1.00\t120\t-\t91032135\t91032195\t255,0,0\t2\t75,60\t0,21\n+chr1\t189882936\t189883073\tNB500968:8:H5CFGAFXX:1:11101:11245:6846___1:N:0:TCCTGAGC+TACTCCTT;FP:41020;RQ:34.15;RQ:-1.00\t120\t-\t189883013\t189883073\t255,0,0\t2\t74,60\t0,77\n+chr3\t103296014\t103296114\tNB500968:8:H5CFGAFXX:1:11101:12189:1919___1:N:0:TCCTGAGC+TACTCCTT;FP:53204;RQ:34.14;RQ:-1.00\t58\t+\t103296014\t103296039\t255,0,0\t2\t25,75\t0,25\n+chr2\t75693129\t75693258\tNB500968:8:H5CFGAFXX:1:11101:12877:5151___1:N:0:TCCTGAGC+TACTCCTT;FP:44537;RQ:34.24;RQ:-1.00\t120\t-\t75693200\t75693258\t255,0,0\t2\t73,58\t0,71\n+chr2\t113441108\t113441403\tNB500968:8:H5CFGAFXX:1:11101:12949:9025___1:N:0:TCCTGAGC+TACTCCTT;FP:25502;RQ:34.24;RQ:-1.00\t58\t+\t113441108\t113441166\t255,0,0\t2\t58,70\t0,225\n+chr3\t60617370\t60617492\tNB500968:8:H5CFGAFXX:1:11101:13102:13780___1:N:0:TCCTGAGC+TACTCCTT;FP:19768;RQ:34.14;RQ:-1.00\t120\t+\t60617370\t60617430\t255,0,0\t2\t60,75\t0,47\n+chr2\t35413902\t35414074\tNB500968:8:H5CFGAFXX:1:11101:13540:9920___1:N:0:TCCTGAGC+TACTCCTT;FP:46767;RQ:34.13;RQ:-1.00\t120\t-\t35414014\t35414074\t255,0,0\t2\t72,60\t0,112\n+chr1\t191940652\t191940751\tNB500968:8:H5CFGAFXX:1:11101:14161:19523___1:N:0:TCCTGAGC+TACTCCTT;FP:12836;RQ:34.15;RQ:-1.00\t0\t+\t191940652\t191940712\t255,0,0\t2\t60,75\t0,24\n+chr14\t8161158\t8161369\tNB500968:8:H5CFGAFXX:1:11101:14654:3876___1:N:0:TCCTGAGC+TACTCCTT;FP:12198;RQ:34.13;RQ:-1.00\t120\t+\t8161158\t8161217\t255,0,0\t2\t59,75\t0,136\n+chr1\t79663344\t79663574\tNB500968:8:H5CFGAFXX:1:11101:14725:9666___1:N:0:TCCTGAGC+TACTCCTT;FP:61191;RQ:34.15;RQ:-1.00\t120\t-\t79663514\t79663574\t255,0,0\t2\t75,60\t0,170\n+chr18\t82700701\t82700958\tNB500968:8:H5CFGAFXX:1:11101:14731:13936___1:N:0:TCCTGAGC+TACTCCTT;FP:53778;RQ:34.24;RQ:-1.00\t12\t+\t82700701\t82700760\t255,0,0\t2\t59,75\t0,182\n+chr3\t95667830\t95668064\tNB500968:8:H5CFGAFXX:1:11101:14828:10575___1:N:0:TCCTGAGC+TACTCCTT;FP:'..b'1612:13616:5442___1:N:0:TCCTGAGC+TACTCCTT;FP:39686;RQ:34.24;RQ:-1.00\t120\t-\t92762707\t92762766\t255,0,0\t2\t74,59\t0,120\n+chr6\t108308176\t108308266\tNB500968:8:H5CFGAFXX:4:21612:13927:20331___1:N:0:TCCTGAGC+TACTCCTT;FP:26860;RQ:34.15;RQ:-1.00\t120\t+\t108308176\t108308236\t255,0,0\t2\t60,75\t0,15\n+chr15\t98864025\t98864135\tNB500968:8:H5CFGAFXX:4:21612:14367:16404___1:N:0:TCCTGAGC+TACTCCTT;FP:26906;RQ:34.61;RQ:-1.00\t58\t-\t98864094\t98864135\t255,0,0\t2\t29,41\t0,69\n+chr5\t96921109\t96921218\tNB500968:8:H5CFGAFXX:4:21612:14944:4586___1:N:0:TCCTGAGC+TACTCCTT;FP:60577;RQ:34.15;RQ:-1.00\t58\t-\t96921158\t96921218\t255,0,0\t2\t34,60\t0,49\n+chr5\t67755180\t67755278\tNB500968:8:H5CFGAFXX:4:21612:15757:19226___1:N:0:TCCTGAGC+TACTCCTT;FP:12088;RQ:34.15;RQ:-1.00\t58\t-\t67755255\t67755278\t255,0,0\t2\t77,23\t0,75\n+chrM\t718\t812\tNB500968:8:H5CFGAFXX:4:21612:15940:16347___1:N:0:TCCTGAGC+TACTCCTT;FP:42592;RQ:34.15;RQ:-1.00\t120\t+\t718\t778\t255,0,0\t2\t60,75\t0,19\n+chr1\t155921158\t155921241\tNB500968:8:H5CFGAFXX:4:21612:17106:8488___1:N:0:TCCTGAGC+TACTCCTT;FP:23179;RQ:34.24;RQ:-1.00\t120\t-\t155921182\t155921241\t255,0,0\t2\t75,59\t0,24\n+chr17\t83395645\t83395765\tNB500968:8:H5CFGAFXX:4:21612:17516:3183___1:N:0:TCCTGAGC+TACTCCTT;FP:13755;RQ:34.15;RQ:-1.00\t120\t+\t83395645\t83395705\t255,0,0\t2\t60,75\t0,45\n+chrX\t36769063\t36769191\tNB500968:8:H5CFGAFXX:4:21612:17631:17717___1:N:0:TACTGAGC+TACTCCTT;FP:50712;RQ:34.24;RQ:-1.00\t120\t+\t36769063\t36769121\t255,0,0\t2\t58,75\t0,53\n+chr7\t3653336\t3653534\tNB500968:8:H5CFGAFXX:4:21612:18940:19575___1:N:0:TCCTGAGC+TACTCCTT;FP:37131;RQ:34.15;RQ:-1.00\t58\t-\t3653510\t3653534\t255,0,0\t2\t75,24\t0,174\n+chr10\t127551672\t127551748\tNB500968:8:H5CFGAFXX:4:21612:19163:3945___1:N:0:TCCTGAGC+TACTCCTT;FP:42256;RQ:34.14;RQ:-1.00\t120\t-\t127551687\t127551748\t255,0,0\t2\t76,61\t0,15\n+chr7\t29621915\t29622063\tNB500968:8:H5CFGAFXX:4:21612:19524:12034___1:N:0:TCCTGAGC+TACTCCTT;FP:54532;RQ:34.24;RQ:-1.00\t0\t+\t29621915\t29621974\t255,0,0\t2\t59,75\t0,73\n+chr10\t78190083\t78190220\tNB500968:8:H5CFGAFXX:4:21612:21286:14549___1:N:0:TCCTGAGC+TACTCCTT;FP:39926;RQ:34.15;RQ:-1.00\t120\t-\t78190160\t78190220\t255,0,0\t2\t75,60\t0,77\n+chr17\t26595978\t26596070\tNB500968:8:H5CFGAFXX:4:21612:21430:8369___1:N:0:TCCTGAGC+TACTCCTT;FP:64717;RQ:34.13;RQ:-1.00\t120\t+\t26595978\t26596038\t255,0,0\t2\t60,75\t0,17\n+chr12\t111069658\t111069841\tNB500968:8:H5CFGAFXX:4:21612:21827:11124___1:N:0:TCCTGAGC+TACTCCTT;FP:42726;RQ:34.15;RQ:-1.00\t30\t+\t111069658\t111069718\t255,0,0\t2\t60,69\t0,114\n+chr3\t157948843\t157948940\tNB500968:8:H5CFGAFXX:4:21612:22117:16491___1:N:0:TCCTGAGC+TACTCCTT;FP:1309;RQ:34.15;RQ:-1.00\t58\t+\t157948843\t157948903\t255,0,0\t2\t60,37\t0,60\n+chr11\t11690189\t11690360\tNB500968:8:H5CFGAFXX:4:21612:22172:12058___1:N:0:TCCTGAGC+TACTCCTT;FP:25252;RQ:34.15;RQ:-1.00\t120\t+\t11690189\t11690249\t255,0,0\t2\t60,74\t0,97\n+chr15\t6633789\t6633884\tNB500968:8:H5CFGAFXX:4:21612:22282:17810___1:N:0:TCCTGAGC+TACTCCTT;FP:64172;RQ:34.15;RQ:-1.00\t58\t+\t6633789\t6633849\t255,0,0\t2\t60,34\t0,61\n+chr5\t114947726\t114947828\tNB500968:8:H5CFGAFXX:4:21612:23669:16425___1:N:0:TACTGAGC+TACTCCTT;FP:39944;RQ:34.15;RQ:-1.00\t120\t+\t114947726\t114947786\t255,0,0\t2\t60,73\t0,29\n+chr5\t81632864\t81632998\tNB500968:8:H5CFGAFXX:4:21612:24064:10488___1:N:0:TCCTGAGC+TACTCCTT;FP:27442;RQ:34.15;RQ:-1.00\t0\t-\t81632938\t81632998\t255,0,0\t2\t74,60\t0,74\n+chrM\t572\t817\tNB500968:8:H5CFGAFXX:4:21612:24619:2557___1:N:0:TCCTGAGC+TACTCCTT;FP:20376;RQ:34.14;RQ:-1.00\t120\t+\t572\t632\t255,0,0\t2\t60,75\t0,170\n+chr8\t46846122\t46846243\tNB500968:8:H5CFGAFXX:4:21612:24649:5476___1:N:0:TCCTGAGC+TACTCCTT;FP:46267;RQ:34.15;RQ:-1.00\t120\t+\t46846122\t46846182\t255,0,0\t2\t60,73\t0,48\n+chr4\t125120460\t125120670\tNB500968:8:H5CFGAFXX:4:21612:25299:2118___1:N:0:TCCTGAGC+TACTCCTT;FP:25753;RQ:34.14;RQ:-1.00\t120\t-\t125120611\t125120670\t255,0,0\t2\t74,59\t0,151\n+chr18\t36287972\t36288128\tNB500968:8:H5CFGAFXX:4:21612:25380:17392___1:N:0:TCCTGAGC+TACTCCTT;FP:12834;RQ:34.15;RQ:-1.00\t120\t+\t36287972\t36288032\t255,0,0\t2\t60,75\t0,81\n+chr3\t50460559\t50460646\tNB500968:8:H5CFGAFXX:4:21612:26587:6771___1:N:0:TCCTGAGC+TACTCCTT;FP:30019;RQ:34.15;RQ:-1.00\t120\t-\t50460586\t50460646\t255,0,0\t2\t75,60\t0,27\n'
b
diff -r 000000000000 -r d1d0ee366702 umicount.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/umicount.xml Wed May 11 04:53:30 2016 -0400
[
@@ -0,0 +1,46 @@
+<tool id="umicount" name="umicount" version="0.1">
+    <description>Runs umicount</description>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Error" />
+    </stdio>
+    <command interpreter="python">
+        dedup_fingerprint.py -f "${input}" > "${output}"
+    </command>
+    <inputs>
+        <param name="input" format="bed" label="Select Paired BED file" type="data" />
+    </inputs>
+    <outputs>
+        <data format="bed" name="output" label="Unicount output for ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param ftype="bed" name="input" value="paired.bed" />
+            <output file="cagescan_fragments.bed" ftype="bed" name="output" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+Runs umicount (https://raw.githubusercontent.com/mmendez12/umicount)
+
+See http://umicount.readthedocs.org/en/latest/introduction.html for more details.
+]]>
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @misc{
+                umicount_bibtex,
+                author = {Mickaël Mendez},
+                title = {umicount on GitHub},
+                url = {https://github.com/mmendez12/umicount}
+            }
+        </citation>
+        <citation type="bibtex">
+            @misc{
+                umicount_docs,
+                author = {Mickaël Mendez},
+                title = {umicount documentation on readthedocs},
+                url = {http://umicount.readthedocs.org}
+            }
+        </citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r d1d0ee366702 umicount_license
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/umicount_license Wed May 11 04:53:30 2016 -0400
b
@@ -0,0 +1,19 @@
+Taken from: https://github.com/mmendez12/umicount/blob/master/LICENSE
+
+Copyright: 2015   RIKEN Center for Life Science Technologies, Division of Genomics Technologies
+Author: Mickaël Mendez <mendez.mickael@gmail.com>
+License: GNU public license version 2 (GPL v2)
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.