Repository 'gffcompare_to_bed'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/gffcompare_to_bed

Changeset 0:7e572e148175 (2018-01-11)
Next changeset 1:0f62097d7c1a (2019-04-26)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/gffcompare_to_bed commit 321b217382f6be33bd77c7dbb51c8caf5fa50afe
added:
gffcompare_to_bed.py
gffcompare_to_bed.xml
test-data/gffcompare_annotated.gtf
test-data/gffcompare_output.bed
test-data/gffcompare_output_j.bed
test-data/gffcompare_output_jeipux.bed
b
diff -r 000000000000 -r 7e572e148175 gffcompare_to_bed.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gffcompare_to_bed.py Thu Jan 11 11:16:51 2018 -0500
[
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+"""
+#
+#------------------------------------------------------------------------------
+#                         University of Minnesota
+#         Copyright 2017, Regents of the University of Minnesota
+#------------------------------------------------------------------------------
+# Author:
+#
+#  James E Johnson
+#
+#------------------------------------------------------------------------------
+"""
+
+import argparse
+import sys
+
+
+class BedEntry(object):
+    def __init__(self, chrom=None, chromStart=None, chromEnd=None,
+                 name=None, score=None, strand=None,
+                 thickStart=None, thickEnd=None, itemRgb=None,
+                 blockCount=None, blockSizes=None, blockStarts=None):
+        self.chrom = chrom
+        self.chromStart = int(chromStart)
+        self.chromEnd = int(chromEnd)
+        self.name = name
+        self.score = int(score) if score is not None else 0
+        self.strand = '-' if str(strand).startswith('-') else '+'
+        self.thickStart = int(thickStart) if thickStart else self.chromStart
+        self.thickEnd = int(thickEnd) if thickEnd else self.chromEnd
+        self.itemRgb = str(itemRgb) if itemRgb is not None else r'100,100,100'
+        self.blockCount = int(blockCount)
+        if isinstance(blockSizes, str) or isinstance(blockSizes, unicode):
+            self.blockSizes = [int(x) for x in blockSizes.split(',')]
+        elif isinstance(blockSizes, list):
+            self.blockSizes = [int(x) for x in blockSizes]
+        else:
+            self.blockSizes = blockSizes
+        if isinstance(blockStarts, str) or isinstance(blockSizes, unicode):
+            self.blockStarts = [int(x) for x in blockStarts.split(',')]
+        elif isinstance(blockStarts, list):
+            self.blockStarts = [int(x) for x in blockStarts]
+        else:
+            self.blockStarts = blockStarts
+
+    def __str__(self):
+        return '%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s' % (
+            self.chrom, self.chromStart, self.chromEnd,
+            self.name, self.score, self.strand,
+            self.thickStart, self.thickEnd, str(self.itemRgb), self.blockCount,
+            ','.join([str(x) for x in self.blockSizes]),
+            ','.join([str(x) for x in self.blockStarts]))
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Retrieve Ensembl cDNAs and three frame translate')
+    parser.add_argument(
+        'input',
+        help='GFFCompare annotated GTF file,  (-) for stdin')
+    parser.add_argument(
+        'output',
+        help='BED file,  (-) for stdout')
+    parser.add_argument(
+        '-C', '--class_code', action='append', default=[],
+        help='Restrict output to gffcompare class codes')
+    parser.add_argument('-d', '--debug', action='store_true', help='Debug')
+    args = parser.parse_args()
+
+    # print >> sys.stderr, "args: %s" % args
+    input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin
+    output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout
+
+    def write_bed_entry(bed):
+        if bed.blockCount == 0:
+            bed.blockCount = 1
+        output_wtr.write("%s\n" % str(bed))
+
+    class_codes = [c.strip() for codes in args.class_code
+                   for c in codes.split(',')] if args.class_code else None
+    bed = None
+    class_code = None
+    for i, line in enumerate(input_rdr):
+        if line.startswith('#'):
+            continue
+        fields = line.rstrip('\r\n').split('\t')
+        if len(fields) != 9:
+            continue
+        (seqname, source, feature, start, end,
+         score, strand, frame, attributes) = fields
+        attribute = {i[0]: i[1].strip('"') for i in [j.strip().split(' ')
+                     for j in attributes.rstrip(';').split(';')]}
+        if feature == 'transcript':
+            if args.debug:
+                print >> sys.stderr, "%s\t%s"\
+                    % ('\t'.join([seqname, source, feature,
+                                  start, end, score, strand, frame]),
+                        attribute)
+            if bed is not None:
+                write_bed_entry(bed)
+                bed = None
+            class_code = attribute['class_code'].strip('"')\
+                if 'class_code' in attribute else None
+            if class_codes and class_code not in class_codes:
+                continue
+            chromStart = int(start) - 1
+            chromEnd = int(end)
+            cat = '_' + class_code if class_code and class_code != '=' else ''
+            bed = BedEntry(chrom=seqname,
+                           chromStart=chromStart, chromEnd=chromEnd,
+                           name=attribute['transcript_id'] + cat,
+                           strand=strand,
+                           blockCount=0,
+                           blockSizes=[chromEnd - chromStart],
+                           blockStarts=[0])
+        elif feature == 'exon' and bed is not None:
+            chromStart = int(start) - 1
+            chromEnd = int(end)
+            blockSize = chromEnd - chromStart
+            if bed.blockCount == 0:
+                bed.blockSizes = []
+                bed.blockStarts = []
+            bed.blockSizes.append(blockSize)
+            bed.blockStarts.append(chromStart - bed.chromStart)
+            bed.blockCount += 1
+    if bed is not None:
+        write_bed_entry(bed)
+
+
+if __name__ == "__main__":
+    __main__()
b
diff -r 000000000000 -r 7e572e148175 gffcompare_to_bed.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gffcompare_to_bed.xml Thu Jan 11 11:16:51 2018 -0500
[
@@ -0,0 +1,66 @@
+<tool id="gffcompare_to_bed" name="Convert gffCompare annotated GTF to BED" version="0.1.0">
+    <description>for StringTie results</description>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/gffcompare_to_bed.py'  
+        #if $class_codes:
+            --class_code='$class_codes'
+        #end if
+        $input $output
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="gtf" label="GTF annotated by gffCompare"/>
+        <param name="class_codes" type="select" display="checkboxes" multiple="true" optional="true" 
+            label="filter gffCompare class_codes to convert" help="No selection implies no filtering">
+            <option value="=">= : Complete match of intron chain</option>
+            <option value="c">c : Contained</option>
+            <option value="j">j : Potentially novel isoform (fragment): at least one splice junction is shared with a reference transcript</option>
+            <option value="e">e : Single exon transfrag overlapping a reference exon and at least 10 bp of a reference intron, indicating a possible pre-mRNA fragment.</option>
+            <option value="i">i : A transfrag falling entirely within a reference intron</option>
+            <option value="o">o : Generic exonic overlap with a reference transcript</option>
+            <option value="p">p : Possible polymerase run-on fragment (within 2Kbases of a reference transcript)</option>
+            <option value="r">r : Repeat. Currently determined by looking at the soft-masked reference sequence and applied to transcripts where at least 50% of the bases are lower case</option>
+            <option value="u">u : Unknown, intergenic transcript</option>
+            <option value="x">x : Exonic overlap with reference on the opposite strand</option>
+            <option value="s">s : An intron of the transfrag overlaps a reference intron on the opposite strand (likely due to read mapping errors)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="bed" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="gffcompare_annotated.gtf" ftype="gtf"/>
+            <output name="output" file="gffcompare_output.bed"/>
+        </test>
+        <test>
+            <param name="input" value="gffcompare_annotated.gtf" ftype="gtf"/>
+            <param name="class_codes" value="j"/>
+            <output name="output" file="gffcompare_output_j.bed"/>
+        </test>
+        <test>
+            <param name="input" value="gffcompare_annotated.gtf" ftype="gtf"/>
+            <param name="class_codes" value="j,e,i,p,u,x"/>
+            <output name="output" file="gffcompare_output_jeipux.bed"/>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+Convert a GFFCompare annotated GTF file to BED format.
+
+usage: gffcompare_to_bed.py [-h] [-C CLASS_CODE] [-v] [-d] input output
+
+positional arguments:
+  input                 GFFCompare annotated GTF file, (-) for stdin
+  output                BED file, (-) for stdout
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -C CLASS_CODE, --class_code CLASS_CODE
+                        Restrict output to gffcompare class codes
+  -d, --debug           Debug
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/nbt.1621</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 7e572e148175 test-data/gffcompare_annotated.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gffcompare_annotated.gtf Thu Jan 11 11:16:51 2018 -0500
b
b'@@ -0,0 +1,1024 @@\n+1\tStringTie\texon\t43778515\t43778980\t.\t-\t.\ttranscript_id "STRG.1163.3"; gene_id "STRG.1163"; exon_number "2";\n+1\tStringTie\ttranscript\t43776567\t43778980\t.\t-\t.\ttranscript_id "STRG.1163.2"; gene_id "STRG.1163"; gene_name "Uxs1"; xloc "XLOC_000439"; cmp_ref "ENSMUST00000126008"; class_code "i"; tss_id "TSS1162";\n+1\tStringTie\texon\t43776567\t43777660\t.\t-\t.\ttranscript_id "STRG.1163.2"; gene_id "STRG.1163"; exon_number "1";\n+1\tStringTie\texon\t43778515\t43778758\t.\t-\t.\ttranscript_id "STRG.1163.2"; gene_id "STRG.1163"; exon_number "2";\n+1\tStringTie\texon\t43778881\t43778980\t.\t-\t.\ttranscript_id "STRG.1163.2"; gene_id "STRG.1163"; exon_number "3";\n+1\tStringTie\ttranscript\t43776567\t43778980\t.\t-\t.\ttranscript_id "STRG.1163.1"; gene_id "STRG.1163"; gene_name "Uxs1"; xloc "XLOC_000439"; cmp_ref "ENSMUST00000126008"; class_code "i"; tss_id "TSS1162";\n+1\tStringTie\texon\t43776567\t43777721\t.\t-\t.\ttranscript_id "STRG.1163.1"; gene_id "STRG.1163"; exon_number "1";\n+1\tStringTie\texon\t43778454\t43778575\t.\t-\t.\ttranscript_id "STRG.1163.1"; gene_id "STRG.1163"; exon_number "2";\n+1\tStringTie\texon\t43778881\t43778980\t.\t-\t.\ttranscript_id "STRG.1163.1"; gene_id "STRG.1163"; exon_number "3";\n+1\tStringTie\ttranscript\t45331687\t45332998\t.\t+\t.\ttranscript_id "STRG.1209.1"; gene_id "STRG.1209"; gene_name "Col3a1"; xloc "XLOC_000069"; cmp_ref "ENSMUST00000087883"; class_code "c"; tss_id "TSS174";\n+1\tStringTie\texon\t45331687\t45331776\t.\t+\t.\ttranscript_id "STRG.1209.1"; gene_id "STRG.1209"; exon_number "1";\n+1\tStringTie\texon\t45331973\t45332026\t.\t+\t.\ttranscript_id "STRG.1209.1"; gene_id "STRG.1209"; exon_number "2";\n+1\tStringTie\texon\t45332135\t45332242\t.\t+\t.\ttranscript_id "STRG.1209.1"; gene_id "STRG.1209"; exon_number "3";\n+1\tStringTie\texon\t45332965\t45332998\t.\t+\t.\ttranscript_id "STRG.1209.1"; gene_id "STRG.1209"; exon_number "4";\n+1\tStringTie\ttranscript\t6453837\t6460700\t.\t+\t.\ttranscript_id "STRG.88.1"; gene_id "STRG.88"; xloc "XLOC_000004"; class_code "u"; tss_id "TSS16";\n+1\tStringTie\texon\t6453837\t6453886\t.\t+\t.\ttranscript_id "STRG.88.1"; gene_id "STRG.88"; exon_number "1";\n+1\tStringTie\texon\t6460545\t6460700\t.\t+\t.\ttranscript_id "STRG.88.1"; gene_id "STRG.88"; exon_number "2";\n+1\tStringTie\ttranscript\t86703819\t87050095\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; gene_name "Dis3l2"; xloc "XLOC_000165"; ref_gene_id "ENSMUSG00000053333"; cmp_ref "ENSMUST00000168237"; class_code "="; tss_id "TSS424"; p_id "P253";\n+1\tStringTie\texon\t86703819\t86703871\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "1";\n+1\tStringTie\texon\t86744681\t86744828\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "2";\n+1\tStringTie\texon\t86745341\t86745498\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "3";\n+1\tStringTie\texon\t86754227\t86754280\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "4";\n+1\tStringTie\texon\t86760288\t86760389\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "5";\n+1\tStringTie\texon\t86791416\t86791457\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "6";\n+1\tStringTie\texon\t86821222\t86821450\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "7";\n+1\tStringTie\texon\t86854360\t86854460\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "8";\n+1\tStringTie\texon\t86857127\t86857374\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "9";\n+1\tStringTie\texon\t86878403\t86878576\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "10";\n+1\tStringTie\texon\t86930754\t86930833\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "11";\n+1\tStringTie\texon\t86959743\t86959855\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "12";\n+1\tStringTie\texon\t86973400\t86973507\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "13";\n+1\tStringTie\texon\t86990092\t86990325\t.\t+\t.\ttranscript_id "STRG.2299.1"; gene_id "STRG.2299"; exon_number "14";\n+1\tStringTie\texon\t87021038\t87021117\t.\t+\t.\ttranscrip'..b' "STRG.52021"; exon_number "4";\n+12\tStringTie\texon\t69207859\t69207936\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "5";\n+12\tStringTie\texon\t69208021\t69208080\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "6";\n+12\tStringTie\texon\t69208917\t69208971\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "7";\n+12\tStringTie\texon\t69209312\t69209994\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "8";\n+12\tStringTie\texon\t69211373\t69211445\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "9";\n+12\tStringTie\texon\t69212223\t69212271\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "10";\n+12\tStringTie\texon\t69212987\t69213043\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "11";\n+12\tStringTie\texon\t69213829\t69213912\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "12";\n+12\tStringTie\texon\t69215311\t69215385\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "13";\n+12\tStringTie\texon\t69221977\t69222070\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "14";\n+12\tStringTie\texon\t69222399\t69222476\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "15";\n+12\tStringTie\texon\t69223104\t69223179\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "16";\n+12\tStringTie\texon\t69226338\t69226438\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "17";\n+12\tStringTie\texon\t69228097\t69228169\t.\t-\t.\ttranscript_id "STRG.52021.2"; gene_id "STRG.52021"; exon_number "18";\n+12\tStringTie\ttranscript\t69201779\t69228190\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; gene_name "Pole2"; xloc "XLOC_002837"; ref_gene_id "ENSMUSG00000020974"; cmp_ref "ENSMUST00000021359"; class_code "="; tss_id "TSS6896"; p_id "P3723";\n+12\tStringTie\texon\t69201779\t69201882\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "1";\n+12\tStringTie\texon\t69202907\t69202974\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "2";\n+12\tStringTie\texon\t69204091\t69204267\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "3";\n+12\tStringTie\texon\t69206410\t69206518\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "4";\n+12\tStringTie\texon\t69207859\t69207936\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "5";\n+12\tStringTie\texon\t69208021\t69208080\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "6";\n+12\tStringTie\texon\t69208917\t69208971\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "7";\n+12\tStringTie\texon\t69209312\t69209401\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "8";\n+12\tStringTie\texon\t69209822\t69209994\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "9";\n+12\tStringTie\texon\t69211373\t69211445\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "10";\n+12\tStringTie\texon\t69212223\t69212271\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "11";\n+12\tStringTie\texon\t69212987\t69213043\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "12";\n+12\tStringTie\texon\t69213829\t69213912\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "13";\n+12\tStringTie\texon\t69215311\t69215385\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "14";\n+12\tStringTie\texon\t69221977\t69222070\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "15";\n+12\tStringTie\texon\t69222399\t69222476\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "16";\n+12\tStringTie\texon\t69223104\t69223179\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "17";\n+12\tStringTie\texon\t69226338\t69226438\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "18";\n+12\tStringTie\texon\t69228097\t69228190\t.\t-\t.\ttranscript_id "STRG.52021.1"; gene_id "STRG.52021"; exon_number "19";\n'
b
diff -r 000000000000 -r 7e572e148175 test-data/gffcompare_output.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gffcompare_output.bed Thu Jan 11 11:16:51 2018 -0500
b
b'@@ -0,0 +1,124 @@\n+1\t43776566\t43778980\tSTRG.1163.2_i\t0\t-\t43776566\t43778980\t100,100,100\t3\t1094,244,100\t0,1948,2314\n+1\t43776566\t43778980\tSTRG.1163.1_i\t0\t-\t43776566\t43778980\t100,100,100\t3\t1155,122,100\t0,1887,2314\n+1\t45331686\t45332998\tSTRG.1209.1_c\t0\t+\t45331686\t45332998\t100,100,100\t4\t90,54,108,34\t0,286,448,1278\n+1\t6453836\t6460700\tSTRG.88.1_u\t0\t+\t6453836\t6460700\t100,100,100\t2\t50,156\t0,6708\n+1\t86703818\t87050095\tSTRG.2299.1\t0\t+\t86703818\t87050095\t100,100,100\t22\t53,148,158,54,102,42,229,101,248,174,80,113,108,234,80,184,87,148,131,105,102,473\t0,40862,41522,50408,56469,87597,117403,150541,153308,174584,226935,255924,269581,286273,317219,340198,341011,343218,343629,343869,345014,345804\n+1\t86703827\t86882439\tSTRG.2299.5\t0\t+\t86703827\t86882439\t100,100,100\t11\t44,148,158,54,102,42,78,229,101,248,4037\t0,40853,41513,50399,56460,87588,108684,117394,150532,153299,174575\n+1\t86703827\t87049195\tSTRG.2299.3\t0\t+\t86703827\t87049195\t100,100,100\t21\t44,148,158,54,102,78,229,101,248,174,80,113,108,234,80,184,87,148,131,105,363\t0,40853,41513,50399,56460,108684,117394,150532,153299,174575,226926,255915,269572,286264,317210,340189,341002,343209,343620,343860,345005\n+1\t86703842\t87050092\tSTRG.2299.4_j\t0\t+\t86703842\t87050092\t100,100,100\t21\t29,148,158,54,102,78,229,101,248,174,80,113,234,80,184,87,148,131,105,102,470\t0,40838,41498,50384,56445,108669,117379,150517,153284,174560,226911,255900,286249,317195,340174,340987,343194,343605,343845,344990,345780\n+1\t86703842\t87050092\tSTRG.2299.2\t0\t+\t86703842\t87050092\t100,100,100\t21\t29,148,158,54,102,229,101,248,174,80,113,108,234,80,184,87,148,131,105,102,470\t0,40838,41498,50384,56445,117379,150517,153284,174560,226911,255900,269557,286249,317195,340174,340987,343194,343605,343845,344990,345780\n+1\t86754158\t86767638\tSTRG.2299.6_j\t0\t+\t86754158\t86767638\t100,100,100\t3\t122,102,509\t0,6129,12971\n+1\t9941958\t9944118\tSTRG.229.1\t0\t-\t9941958\t9944118\t100,100,100\t4\t197,69,108,99\t0,364,651,2061\n+1\t9942022\t9943223\tSTRG.229.5\t0\t-\t9942022\t9943223\t100,100,100\t3\t133,69,614\t0,300,587\n+1\t9942024\t9942897\tSTRG.229.7\t0\t-\t9942024\t9942897\t100,100,100\t2\t367,288\t0,585\n+1\t9942024\t9944103\tSTRG.229.6_j\t0\t-\t9942024\t9944103\t100,100,100\t3\t131,395,84\t0,298,1995\n+1\t9942024\t9944103\tSTRG.229.4\t0\t-\t9942024\t9944103\t100,100,100\t3\t367,108,84\t0,585,1995\n+1\t9942024\t9944103\tSTRG.229.3_j\t0\t-\t9942024\t9944103\t100,100,100\t4\t131,62,108,84\t0,298,585,1995\n+1\t9942024\t9944117\tSTRG.229.2\t0\t-\t9942024\t9944117\t100,100,100\t4\t125,69,108,98\t0,298,585,1995\n+1\t86908792\t86909683\tSTRG.2297.1_i\t0\t-\t86908792\t86909683\t100,100,100\t2\t319,285\t0,606\n+1\t193145390\t193173087\tSTRG.5202.1_j\t0\t+\t193145390\t193173087\t100,100,100\t8\t42,177,205,129,159,393,119,3806\t0,11825,16222,17211,20375,22048,23126,23891\n+1\t193156668\t193164181\tSTRG.5202.2\t0\t+\t193156668\t193164181\t100,100,100\t3\t724,205,1580\t0,4944,5933\n+1\t193167974\t193173087\tSTRG.5202.3_j\t0\t+\t193167974\t193173087\t100,100,100\t3\t157,119,3806\t0,542,1307\n+1\t193168502\t193169489\tSTRG.5202.4\t0\t+\t193168502\t193169489\t100,100,100\t2\t133,208\t0,779\n+2\t119749138\t119749844\tSTRG.8811.1_j\t0\t+\t119749138\t119749844\t100,100,100\t3\t148,217,172\t0,223,534\n+2\t119972463\t120018974\tSTRG.8821.1_j\t0\t+\t119972463\t120018974\t100,100,100\t19\t17,223,92,63,58,171,138,183,161,190,147,176,92,126,71,122,76,112,53\t0,482,26018,38279,38538,39469,40129,40448,41155,42159,42879,43229,44316,44676,45117,45292,45835,46024,46458\n+2\t120011686\t120013094\tSTRG.8821.3\t0\t+\t120011686\t120013094\t100,100,100\t3\t417,138,183\t0,906,1225\n+2\t120013042\t120015605\tSTRG.8821.2\t0\t+\t120013042\t120015605\t100,100,100\t4\t52,161,190,263\t0,576,1580,2300\n+2\t121008402\t121016904\tSTRG.8871.1\t0\t+\t121008402\t121016904\t100,100,100\t11\t207,60,80,82,97,74,77,278,61,47,506\t0,315,640,1424,3031,3280,3950,4296,5555,7728,7996\n+2\t121008431\t121015120\tSTRG.8871.3\t0\t+\t121008431\t121015120\t100,100,100\t10\t178,60,80,82,97,74,77,278,61,608\t0,286,611,1395,3002,3251,3921,4267,5526,6081\n+2\t121008447\t121016904\tSTRG.8871.2\t0\t+\t121008447\t121016904\t100,100,100\t10\t330,80,82,97,74,77,278,61,47,506\t0,595,1379,2986,3235,3905,4251,5510,7683,7951\n+2\t121008466\t121011756\tSTRG.'..b'100,100,100\t5\t132,48,145,95,14\t0,1856,5575,6895,7227\n+5\t110829079\t110839575\tSTRG.22905.2\t0\t-\t110829079\t110839575\t100,100,100\t5\t132,48,95,97,80\t0,1856,6895,7227,10416\n+5\t110829080\t110834895\tSTRG.22905.4\t0\t-\t110829080\t110834895\t100,100,100\t3\t131,48,241\t0,1855,5574\n+5\t112337391\t112340022\tSTRG.22974.4\t0\t-\t112337391\t112340022\t100,100,100\t5\t350,46,250,99,126\t0,610,926,2327,2505\n+5\t112337391\t112343020\tSTRG.22974.2\t0\t-\t112337391\t112343020\t100,100,100\t7\t350,46,155,99,227,41,146\t0,610,926,2327,2505,3704,5483\n+5\t112337391\t112343040\tSTRG.22974.1\t0\t-\t112337391\t112343040\t100,100,100\t7\t350,72,155,99,227,41,166\t0,584,926,2327,2505,3704,5483\n+5\t112337908\t112339932\tSTRG.22974.5\t0\t-\t112337908\t112339932\t100,100,100\t3\t564,99,36\t0,1810,1988\n+5\t112337971\t112343018\tSTRG.22974.3\t0\t-\t112337971\t112343018\t100,100,100\t6\t76,250,99,227,41,144\t0,346,1747,1925,3124,4903\n+5\t112339718\t112340219\tSTRG.22974.6\t0\t-\t112339718\t112340219\t100,100,100\t2\t99,323\t0,178\n+5\t112419351\t112425650\tSTRG.22982.1_c\t0\t-\t112419351\t112425650\t100,100,100\t3\t2687,103,37\t0,5271,6262\n+5\t112688877\t112765931\tSTRG.22984.3\t0\t-\t112688877\t112765931\t100,100,100\t11\t220,1240,138,45,131,186,138,84,117,114,97\t0,3218,26503,28109,34929,68577,71438,72446,73779,75570,76957\n+5\t112688917\t112871586\tSTRG.22984.1_c\t0\t-\t112688917\t112871586\t100,100,100\t38\t180,1240,138,45,131,186,138,84,117,114,153,105,111,201,124,155,125,109,120,90,144,129,66,110,220,184,160,151,81,193,91,174,145,64,101,143,199,166\t0,3178,26463,28069,34889,68537,71398,72406,73739,75530,86335,93667,96639,101072,102159,106477,109899,113448,114271,120744,122668,123068,128712,139059,141242,142153,145457,148833,150699,151702,154286,157353,169484,176322,176739,177426,179276,182503\n+5\t112772885\t112871586\tSTRG.22984.2\t0\t-\t112772885\t112871586\t100,100,100\t29\t1641,388,105,111,201,124,155,125,109,120,90,144,129,66,110,220,184,160,151,81,193,91,174,145,64,101,143,199,166\t0,2132,9699,12671,17104,18191,22509,25931,29480,30303,36776,38700,39100,44744,55091,57274,58185,61489,64865,66731,67734,70318,73385,85516,92354,92771,93458,95308,98535\n+5\t112871741\t112874253\tSTRG.22984.5_j\t0\t-\t112871741\t112874253\t100,100,100\t3\t755,67,361\t0,1753,2151\n+5\t112875222\t112896379\tSTRG.22984.4_j\t0\t-\t112875222\t112896379\t100,100,100\t3\t104,159,124\t0,2762,21033\n+5\t112879419\t112896379\tSTRG.22984.6_c\t0\t-\t112879419\t112896379\t100,100,100\t2\t159,124\t0,16836\n+5\t112910481\t113015514\tSTRG.22985.1\t0\t-\t112910481\t113015514\t100,100,100\t21\t4593,114,137,163,96,67,101,67,108,95,131,79,100,92,52,62,75,102,74,77,148\t0,5568,8344,9526,13467,14914,18174,19191,27227,31151,34436,36105,43344,44682,46833,51105,56430,58686,63025,75283,104885\n+5\t112912598\t113015514\tSTRG.22985.2\t0\t-\t112912598\t113015514\t100,100,100\t21\t2476,114,137,163,96,67,101,67,108,95,131,79,100,92,52,62,75,141,74,77,148\t0,3451,6227,7409,11350,12797,16057,17074,25110,29034,32319,33988,41227,42565,44716,48988,54313,56569,60908,73166,102768\n+5\t112916109\t112924375\tSTRG.22985.4\t0\t-\t112916109\t112924375\t100,100,100\t4\t54,137,163,427\t0,2716,3898,7839\n+5\t112920061\t112925798\tSTRG.22985.5\t0\t-\t112920061\t112925798\t100,100,100\t3\t109,96,554\t0,3887,5183\n+5\t112960543\t113015503\tSTRG.22985.3\t0\t-\t112960543\t113015503\t100,100,100\t6\t1105,75,102,74,77,137\t0,6368,8624,12963,25221,54823\n+5\t113086322\t113163351\tSTRG.22991.1\t0\t-\t113086322\t113163351\t100,100,100\t8\t4702,83,143,119,175,241,119,294\t0,6092,6779,7643,9267,11275,15136,76735\n+5\t113184127\t113192079\tSTRG.22997.1_u\t0\t-\t113184127\t113192079\t100,100,100\t3\t590,88,1476\t0,4158,6476\n+12\t69229664\t69231901\tSTRG.52020.2_u\t0\t+\t69229664\t69231901\t100,100,100\t2\t62,203\t0,2034\n+12\t69199878\t69228169\tSTRG.52021.2_j\t0\t-\t69199878\t69228169\t100,100,100\t18\t2004,68,177,109,78,60,55,683,73,49,57,84,75,94,78,76,101,73\t0,3028,4212,6531,7980,8142,9038,9433,11494,12344,13108,13950,15432,22098,22520,23225,26459,28218\n+12\t69201778\t69228190\tSTRG.52021.1\t0\t-\t69201778\t69228190\t100,100,100\t19\t104,68,177,109,78,60,55,90,173,73,49,57,84,75,94,78,76,101,94\t0,1128,2312,4631,6080,6242,7138,7533,8043,9594,10444,11208,12050,13532,20198,20620,21325,24559,26318\n'
b
diff -r 000000000000 -r 7e572e148175 test-data/gffcompare_output_j.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gffcompare_output_j.bed Thu Jan 11 11:16:51 2018 -0500
b
@@ -0,0 +1,20 @@
+1 86703842 87050092 STRG.2299.4_j 0 + 86703842 87050092 100,100,100 21 29,148,158,54,102,78,229,101,248,174,80,113,234,80,184,87,148,131,105,102,470 0,40838,41498,50384,56445,108669,117379,150517,153284,174560,226911,255900,286249,317195,340174,340987,343194,343605,343845,344990,345780
+1 86754158 86767638 STRG.2299.6_j 0 + 86754158 86767638 100,100,100 3 122,102,509 0,6129,12971
+1 9942024 9944103 STRG.229.6_j 0 - 9942024 9944103 100,100,100 3 131,395,84 0,298,1995
+1 9942024 9944103 STRG.229.3_j 0 - 9942024 9944103 100,100,100 4 131,62,108,84 0,298,585,1995
+1 193145390 193173087 STRG.5202.1_j 0 + 193145390 193173087 100,100,100 8 42,177,205,129,159,393,119,3806 0,11825,16222,17211,20375,22048,23126,23891
+1 193167974 193173087 STRG.5202.3_j 0 + 193167974 193173087 100,100,100 3 157,119,3806 0,542,1307
+2 119749138 119749844 STRG.8811.1_j 0 + 119749138 119749844 100,100,100 3 148,217,172 0,223,534
+2 119972463 120018974 STRG.8821.1_j 0 + 119972463 120018974 100,100,100 19 17,223,92,63,58,171,138,183,161,190,147,176,92,126,71,122,76,112,53 0,482,26018,38279,38538,39469,40129,40448,41155,42159,42879,43229,44316,44676,45117,45292,45835,46024,46458
+2 121008466 121011756 STRG.8871.5_j 0 + 121008466 121011756 100,100,100 4 311,80,1704,74 0,576,1360,3216
+2 121194837 121221232 STRG.8881.3_j 0 - 121194837 121221232 100,100,100 12 3770,146,200,95,2006,192,431,150,272,153,505,75 0,4138,4769,5465,7705,10082,12861,13517,14243,16432,20829,26320
+2 121216142 121247714 STRG.8881.5_j 0 - 121216142 121247714 100,100,100 7 29,93,58,1063,1297,203,214 0,5015,7562,11502,19430,27751,31358
+3 57736119 57820993 STRG.12098.5_j 0 + 57736119 57820993 100,100,100 9 81,144,126,81,126,88,91,106,487 0,11771,28137,32920,43263,60078,66327,70902,84387
+5 110840253 110874013 STRG.22906.3_j 0 + 110840253 110874013 100,100,100 13 1290,125,148,91,109,54,62,100,87,164,116,86,1987 0,8135,8376,9342,15471,18120,20609,23004,25268,26009,26684,27754,31773
+5 110855689 110868215 STRG.22906.4_j 0 + 110855689 110868215 100,100,100 6 144,54,62,100,87,208 0,2684,5173,7568,9832,12318
+5 111417441 111454864 STRG.22929.1_j 0 + 111417441 111454864 100,100,100 3 320,2274,249 0,2168,37174
+5 110786093 110796066 STRG.22901.3_j 0 - 110786093 110796066 100,100,100 20 134,136,158,119,173,191,144,588,269,87,146,126,90,61,148,89,51,83,59,335 0,206,990,1480,1679,2016,2635,2854,4118,4756,4958,5507,6008,6303,6872,7721,8099,8491,8653,9638
+5 110792544 110796066 STRG.22901.5_j 0 - 110792544 110796066 100,100,100 5 569,89,51,221,335 0,1270,1648,2040,3187
+5 112871741 112874253 STRG.22984.5_j 0 - 112871741 112874253 100,100,100 3 755,67,361 0,1753,2151
+5 112875222 112896379 STRG.22984.4_j 0 - 112875222 112896379 100,100,100 3 104,159,124 0,2762,21033
+12 69199878 69228169 STRG.52021.2_j 0 - 69199878 69228169 100,100,100 18 2004,68,177,109,78,60,55,683,73,49,57,84,75,94,78,76,101,73 0,3028,4212,6531,7980,8142,9038,9433,11494,12344,13108,13950,15432,22098,22520,23225,26459,28218
b
diff -r 000000000000 -r 7e572e148175 test-data/gffcompare_output_jeipux.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gffcompare_output_jeipux.bed Thu Jan 11 11:16:51 2018 -0500
b
@@ -0,0 +1,29 @@
+1 43776566 43778980 STRG.1163.2_i 0 - 43776566 43778980 100,100,100 3 1094,244,100 0,1948,2314
+1 43776566 43778980 STRG.1163.1_i 0 - 43776566 43778980 100,100,100 3 1155,122,100 0,1887,2314
+1 6453836 6460700 STRG.88.1_u 0 + 6453836 6460700 100,100,100 2 50,156 0,6708
+1 86703842 87050092 STRG.2299.4_j 0 + 86703842 87050092 100,100,100 21 29,148,158,54,102,78,229,101,248,174,80,113,234,80,184,87,148,131,105,102,470 0,40838,41498,50384,56445,108669,117379,150517,153284,174560,226911,255900,286249,317195,340174,340987,343194,343605,343845,344990,345780
+1 86754158 86767638 STRG.2299.6_j 0 + 86754158 86767638 100,100,100 3 122,102,509 0,6129,12971
+1 9942024 9944103 STRG.229.6_j 0 - 9942024 9944103 100,100,100 3 131,395,84 0,298,1995
+1 9942024 9944103 STRG.229.3_j 0 - 9942024 9944103 100,100,100 4 131,62,108,84 0,298,585,1995
+1 86908792 86909683 STRG.2297.1_i 0 - 86908792 86909683 100,100,100 2 319,285 0,606
+1 193145390 193173087 STRG.5202.1_j 0 + 193145390 193173087 100,100,100 8 42,177,205,129,159,393,119,3806 0,11825,16222,17211,20375,22048,23126,23891
+1 193167974 193173087 STRG.5202.3_j 0 + 193167974 193173087 100,100,100 3 157,119,3806 0,542,1307
+2 119749138 119749844 STRG.8811.1_j 0 + 119749138 119749844 100,100,100 3 148,217,172 0,223,534
+2 119972463 120018974 STRG.8821.1_j 0 + 119972463 120018974 100,100,100 19 17,223,92,63,58,171,138,183,161,190,147,176,92,126,71,122,76,112,53 0,482,26018,38279,38538,39469,40129,40448,41155,42159,42879,43229,44316,44676,45117,45292,45835,46024,46458
+2 121008466 121011756 STRG.8871.5_j 0 + 121008466 121011756 100,100,100 4 311,80,1704,74 0,576,1360,3216
+2 121194837 121221232 STRG.8881.3_j 0 - 121194837 121221232 100,100,100 12 3770,146,200,95,2006,192,431,150,272,153,505,75 0,4138,4769,5465,7705,10082,12861,13517,14243,16432,20829,26320
+2 121216142 121247714 STRG.8881.5_j 0 - 121216142 121247714 100,100,100 7 29,93,58,1063,1297,203,214 0,5015,7562,11502,19430,27751,31358
+3 57736119 57820993 STRG.12098.5_j 0 + 57736119 57820993 100,100,100 9 81,144,126,81,126,88,91,106,487 0,11771,28137,32920,43263,60078,66327,70902,84387
+5 110840253 110874013 STRG.22906.3_j 0 + 110840253 110874013 100,100,100 13 1290,125,148,91,109,54,62,100,87,164,116,86,1987 0,8135,8376,9342,15471,18120,20609,23004,25268,26009,26684,27754,31773
+5 110855689 110868215 STRG.22906.4_j 0 + 110855689 110868215 100,100,100 6 144,54,62,100,87,208 0,2684,5173,7568,9832,12318
+5 111417441 111454864 STRG.22929.1_j 0 + 111417441 111454864 100,100,100 3 320,2274,249 0,2168,37174
+5 111546620 111566751 STRG.22958.1_u 0 + 111546620 111566751 100,100,100 4 167,77,2000,786 0,7231,15766,19345
+5 111553369 111564411 STRG.22958.2_u 0 + 111553369 111564411 100,100,100 3 559,1223,249 0,9017,10793
+5 111553369 111567041 STRG.22958.3_u 0 + 111553369 111567041 100,100,100 3 559,2000,2005 0,9017,11667
+5 110786093 110796066 STRG.22901.3_j 0 - 110786093 110796066 100,100,100 20 134,136,158,119,173,191,144,588,269,87,146,126,90,61,148,89,51,83,59,335 0,206,990,1480,1679,2016,2635,2854,4118,4756,4958,5507,6008,6303,6872,7721,8099,8491,8653,9638
+5 110792544 110796066 STRG.22901.5_j 0 - 110792544 110796066 100,100,100 5 569,89,51,221,335 0,1270,1648,2040,3187
+5 112871741 112874253 STRG.22984.5_j 0 - 112871741 112874253 100,100,100 3 755,67,361 0,1753,2151
+5 112875222 112896379 STRG.22984.4_j 0 - 112875222 112896379 100,100,100 3 104,159,124 0,2762,21033
+5 113184127 113192079 STRG.22997.1_u 0 - 113184127 113192079 100,100,100 3 590,88,1476 0,4158,6476
+12 69229664 69231901 STRG.52020.2_u 0 + 69229664 69231901 100,100,100 2 62,203 0,2034
+12 69199878 69228169 STRG.52021.2_j 0 - 69199878 69228169 100,100,100 18 2004,68,177,109,78,60,55,683,73,49,57,84,75,94,78,76,101,73 0,3028,4212,6531,7980,8142,9038,9433,11494,12344,13108,13950,15432,22098,22520,23225,26459,28218