Repository 'dr_disco'
hg clone https://toolshed.g2.bx.psu.edu/repos/erasmus-medical-center/dr_disco

Changeset 3:5348cfd3ba5c (2017-08-10)
Previous changeset 2:173ca9768e22 (2017-04-28) Next changeset 4:3de621c44cf9 (2017-09-14)
Commit message:
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/dr-disco commit b6ccc6d8a9a9061207040561b494b7cf2ee3f885
modified:
dr-disco_bam-extract.xml
dr-disco_detect.xml
dr-disco_fix.xml
macros.xml
test-data/detect.txt
test-data/fix_test-01.fixed.bam
added:
dr-disco_classify.xml
dr-disco_integrate.xml
test-data/blacklist-junctions.hg38.txt
test-data/blacklist-regions.hg38.bed
test-data/classify_test_16.in.dbed
test-data/classify_test_16.test-01.out.dbed
test-data/classify_test_16.test-02.out.dbed
test-data/classify_test_16.test-03.out.dbed
test-data/classify_test_16.test-04.out.dbed
test-data/classify_test_16.test-05.out.dbed
test-data/classify_test_16.test-06.out.dbed
test-data/classify_test_16.test-07.out.dbed
test-data/classify_test_16.test-08.out.dbed
test-data/integrate_test_terg_s041.in.dbed
test-data/integrate_test_terg_s041.out.no-gtf.txt
test-data/integrate_test_terg_s041.out.txt
test-data/integrate_tmprss-erg.hg38.gtf
b
diff -r 173ca9768e22 -r 5348cfd3ba5c dr-disco_bam-extract.xml
--- a/dr-disco_bam-extract.xml Fri Apr 28 03:56:08 2017 -0400
+++ b/dr-disco_bam-extract.xml Thu Aug 10 05:38:27 2017 -0400
b
@@ -1,5 +1,6 @@
 <tool id="dr_disco_bam_extract" name="Dr. Disco (bam-extract)" version="@TOOL_VERSION@-g0">
     <description>Extracts reads from two targeted regions</description>
+
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -11,14 +12,16 @@
         dr-disco bam-extract
             '$region1'
             '$region2'
+            '$input_alignment'
             '$output'
-            '$input_alignment'
     ]]></command>
+
     <inputs>
         <param name="input_alignment" type="data" format="bam" label="Discordant alignment file of STAR)" />
         <param name="region1" type="text" value="chr21:39737183-40035618" label="Genomic region 1" />
         <param name="region2" type="text" value="chr21:42834478-42882085" label="Genomic region 2" />
     </inputs>
+
     <outputs>
         <data name="output" format="bam" label="${tool.name} on ${input_alignment.name}" />
     </outputs>
b
diff -r 173ca9768e22 -r 5348cfd3ba5c dr-disco_classify.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dr-disco_classify.xml Thu Aug 10 05:38:27 2017 -0400
[
@@ -0,0 +1,116 @@
+<tool id="dr_disco_classify" name="Dr. Disco (classify)" version="@TOOL_VERSION@-g0">
+    <description>Classifies detected break-points in RNA-seq based on corresponding statistics and blacklists</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    
+    <command detect_errors="exit_code"><![CDATA[
+        dr-disco
+            classify
+            
+                ${only_valid}
+                
+                #if $blacklist_regions:
+                    --blacklist-regions '${blacklist_regions}'
+                #end if
+
+                #if $blacklist_junctions:
+                    --blacklist-junctions '${blacklist_junctions}'
+                #end if
+                
+                '${dr_disco_detect_output}'
+                '${dr_disco_classify_output}'
+    ]]></command>
+
+    <inputs>
+        <param name="dr_disco_detect_output" type="data" format="tabular" label="Output of Dr. Disco Detect" />
+        <param name="only_valid" argument="--only-valid" truevalue="--only-valid" falsevalue="" type="boolean" />
+        
+        <param name="blacklist_regions"   argument="--blacklist-regions"   type="data" format="bed"     optional="True" label="Blacklist Regions"   help="List of regions known to be false positives (https://github.com/yhoogstrate/dr-disco/tree/master/share/)" />
+        <param name="blacklist_junctions" argument="--blacklist-junctions" type="data" format="tabular" optional="True" label="Blacklist Junctions" help="List of junctions know to be false positives (https://github.com/yhoogstrate/dr-disco/tree/master/share/)" />
+    </inputs>
+    
+    <outputs>
+        <data name="dr_disco_classify_output" format="tabular" label="${tool.name} on ${dr_disco_detect_output.name}" />
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" value="" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-01.out.dbed" ftype="tabular" />
+        </test>
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" value="--only-valid" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-02.out.dbed" />
+        </test>
+
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" />
+            <param name="blacklist_regions" value="blacklist-regions.hg38.bed" ftype="bed" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-03.out.dbed" ftype="tabular" />
+        </test>
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" />
+            <param name="blacklist_regions" value="blacklist-regions.hg38.bed" ftype="bed" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-04.out.dbed" />
+        </test>
+
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" />
+            <param name="blacklist_junctions" value="blacklist-junctions.hg38.txt" ftype="tabular" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-05.out.dbed" ftype="tabular" />
+        </test>
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" />
+            <param name="blacklist_junctions" value="blacklist-junctions.hg38.txt" ftype="tabular" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-06.out.dbed" />
+        </test>
+
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" />
+            <param name="blacklist_regions" value="blacklist-regions.hg38.bed" ftype="bed" />
+            <param name="blacklist_junctions" value="blacklist-junctions.hg38.txt" ftype="tabular" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-07.out.dbed" ftype="tabular" />
+        </test>
+        <test>
+            <param name="dr_disco_detect_output" value="classify_test_16.in.dbed" ftype="tabular" />
+            <param name="only_valid" />
+            <param name="blacklist_regions" value="blacklist-regions.hg38.bed" ftype="bed" />
+            <param name="blacklist_junctions" value="blacklist-junctions.hg38.txt" ftype="tabular" />
+
+            <output name="dr_disco_classify_output" file="classify_test_16.test-08.out.dbed" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+
+**What it does**
+
+
+
+**Attributions**
+
+This work is part of Youri Hoogstrate's PhD thesis.
+
+    ]]></help>
+    
+     <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 173ca9768e22 -r 5348cfd3ba5c dr-disco_detect.xml
--- a/dr-disco_detect.xml Fri Apr 28 03:56:08 2017 -0400
+++ b/dr-disco_detect.xml Thu Aug 10 05:38:27 2017 -0400
b
@@ -1,4 +1,4 @@
-<tool id="dr_disco_intronic" name="Dr. Disco (intronic)" version="@TOOL_VERSION@-g0">
+<tool id="dr_disco_detect" name="Dr. Disco (detect)" version="@TOOL_VERSION@-g0">
     <description>Detects break-points in RNA-seq</description>
     <macros>
         <import>macros.xml</import>
@@ -11,8 +11,8 @@
         dr-disco
             detect
                 --min-e-score '${min_e_score}'
-                '${table_fusion_events}'
                 '${star_discordant_alignment_fixed}'
+                '${output_fusion_events}'
     ]]></command>
     <inputs>
         <param name="star_discordant_alignment_fixed" type="data" format="bam" label="Discordant alignment file of STAR (processed with dr-disco fix)" />
@@ -21,7 +21,7 @@
     </inputs>
     
     <outputs>
-        <data name="table_fusion_events" format="tabular" label="${tool.name} on ${$star_discordant_alignment_fixed.name}" />
+        <data name="output_fusion_events" format="tabular" label="${tool.name} on ${star_discordant_alignment_fixed.name}" />
     </outputs>
     
     <tests>
@@ -29,7 +29,7 @@
             <param name="star_discordant_alignment_fixed" value="detect.bam" />
             <param name="min_e_score" value="8" />
 
-            <output name="table_fusion_events" file="detect.txt" />
+            <output name="output_fusion_events" file="detect.txt" />
         </test>
     </tests>
     
b
diff -r 173ca9768e22 -r 5348cfd3ba5c dr-disco_fix.xml
--- a/dr-disco_fix.xml Fri Apr 28 03:56:08 2017 -0400
+++ b/dr-disco_fix.xml Thu Aug 10 05:38:27 2017 -0400
[
@@ -8,11 +8,15 @@
     <expand macro="version_command" />
 
     <command detect_errors="exit_code"><![CDATA[
-        dr-disco fix '${alignment_fixed}' '${star_discordant_alignment}'
+        dr-disco fix
+            '${star_discordant_alignment}'
+            '${alignment_fixed}'
     ]]></command>
+
     <inputs>
         <param name="star_discordant_alignment" type="data" format="bam" label="Discordant alignment file of STAR" />
     </inputs>
+
     <outputs>
         <data name="alignment_fixed" format="bam" label="${tool.name} on ${star_discordant_alignment}" />
     </outputs>
b
diff -r 173ca9768e22 -r 5348cfd3ba5c dr-disco_integrate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dr-disco_integrate.xml Thu Aug 10 05:38:27 2017 -0400
[
@@ -0,0 +1,57 @@
+<tool id="dr_disco_integrate" name="Dr. Disco (intronic)" version="@TOOL_VERSION@-g0">
+    <description>Merges corresponding genomic breaks and exon-to-exon junctions</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    
+    <command detect_errors="exit_code"><![CDATA[
+        dr-disco
+            integrate
+                #if $gtf:
+                    --gtf ${gtf}
+                #end if
+
+                '${dr_disco_classify_output}'
+                '${dr_disco_integrate_output}'
+    ]]></command>
+
+    <inputs>
+        <param name="dr_disco_classify_output" type="data" format="bam" label="Discordant alignment file of STAR (processed with dr-disco fix)" />
+        <param argument="--gtf" type="data" format="gtf" optional="True" label="GTF file (Optional: for predicting frame shifts)" help="This GTF file requires the following attributes annotated: gene_name, transcript_id and transcript_version"/>
+    </inputs>
+    
+    <outputs>
+        <data name="dr_disco_integrate_output" format="tabular" label="${tool.name} on ${dr_disco_classify_output.name}" />
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="dr_disco_classify_output" value="integrate_test_terg_s041.in.dbed" />
+            <param name="gtf" value="integrate_tmprss-erg.hg38.gtf" />
+
+            <output name="dr_disco_integrate_output" file="integrate_test_terg_s041.out.txt" />
+        </test>
+        <test>
+            <param name="dr_disco_classify_output" value="integrate_test_terg_s041.in.dbed" />
+            <param name="gtf" value="integrate_tmprss-erg.hg38.gtf" />
+
+            <output name="dr_disco_integrate_output" file="integrate_test_terg_s041.out.no-gtf.txt" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+
+**What it does**
+
+
+**Attributions**
+
+This work is part of Youri Hoogstrate's PhD thesis.
+
+    ]]></help>
+    
+     <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 173ca9768e22 -r 5348cfd3ba5c macros.xml
--- a/macros.xml Fri Apr 28 03:56:08 2017 -0400
+++ b/macros.xml Thu Aug 10 05:38:27 2017 -0400
b
@@ -1,12 +1,12 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="0.6.0">dr-disco</requirement>
+            <requirement type="package" version="0.9.0">dr-disco</requirement>
             <yield/>
         </requirements>
     </xml>
     
-    <token name="@TOOL_VERSION@">0.6.0</token>
+    <token name="@TOOL_VERSION@">0.9.0</token>
 
     <xml name="version_command">
         <version_command>dr-disco --version | head -n 1</version_command>
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/blacklist-junctions.hg38.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blacklist-junctions.hg38.txt Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,992 @@\n+chr-A\tpos-A\tdirection-A\tchr-B\tpos-B\tdirection-B\tid\tcomment\n+chr10\t10919999\t10920000\t-\tchr10\t11165485\t11165486\t+\t\ts11324,s11386,s13304,s13768,s14437,s14455,s15679,s18020,s18730,s18751,s18758,s5960,s5961,s6408,s7310,s7431,s8612,s8615,s8828,s8971,s9067,s9578\n+chr10\t126970701\t126970702\t+\tchr10\t127127764\t127127765\t-\t\ts4845,s6411,s6729,s6730,s6731,s7247,s10010,s11325,s11326,s11338,s11354,s11355,s11364,s11368,s11370,s11374,s11375,s11377,s11378,s11385,s11391,s11393,s11394,s11402,s11743,s11745,s11750,s11754,s11818,s13297,s13298r1,s13298r2,s13299r1,s13308,s13311,s13358,s13418,s13419,s13420,s13422,s13424,s13623,s13749,s13752,s13754,s13761,s13769,s14432,s14441,s14447,s14455,s14460,s14461,s15679,s15680,s15681,s18017,s18024,s18031,s18037,s18255,s18734,s18754,s20343,s20345,s20401,s20403,s20404,s4841,s4847,s4953,s4955,s4970,s5933,s5934,s5938,s5946,s5948,s5954,s5960,s5961,s6733,s7220,s7239,s7316,s7321,s7431,s8611,s8614,s8615,s8620,s8652,s8660,s8832,s8984,s9067,s9544,s9568,s9578,s9579,s9592,s9596,s9599,s9606,s9754,s9755,s9757,s9758,s9761,s9772,s9844\n+chr10\t126970701\t126970702\t+\tchr10\t127257429\t127257430\t-\t\tprtcl/014,024,032\n+chr10\t17548348\t17548349\t+\tchr12\t85726659\t85726660\t-\t\ts11744,s6729,s6730,s10011,s11324,s11355,s11383,s11386,s13297,s13298r1,s13298r2,s13306,s13428,s13626,s13749,s14460,s15681,s18017,s18020,s18031,s18037,s18151,s18758,s18759,s4970,s5942,s5947,s6727r1,s8652,s8965,s8984,s9004,s9544,s9568,s9585,s9761,s9775\n+chr10\t19790186\t19790187\t-\tchr10\t20001774\t20001775\t+\t\ts6046,s11361,s11378,s11402,s11817,s13312,s13629,s13771,s14441,s18749,s5946,s5954,s5964,s9541,s9755,s9844\n+chr10\t27486892\t27486967\t-\tchr12\t11038658\t11038787\t-\t\ts9759,s13307,s6408,s18022,s9064,s8982,s11377,s18037,s13629,s7428,s13424,s6732,s10014,s11326,s7217,s6729,s5954,s7250,s9757,s11367,s11402,s11375,s9597,s14461,s13428,s9754,s7240,s13421,s5947,s9589,s11355,s13299r2,s11384,s8830,s13299r1,s11750,s11745,s9568\n+chr10\t34516993\t34517160\t-\tchr10\t34814875\t34814896\t+\t\ts13749,s13770,s9757,s8828,s4845,s6411,s6731,s7247,s7248,s10011,s10014,s11364,s11377,s11384,s13296,s13428,s14437,s18020,s18022,s18031,s18037,s5942,s5945,s5948,s6413,s6727r1,s7426,s8652,s8965,s8977,s8984,s9064,s9575,s9585,s9592,s9772\n+chr10\t38021018\t38021427\t-\tchr10\t42628376\t42628399\t-\t\ts11391,s8978,s13312,s11359,s13426,s9582,s11326,s8981,s11760,s5961,s10010,s9757,s8652,s8971,s13307,s11743,s9761,s18017,s11368,s14461,s9575,s11383,s11386,s6046,s6411,s10011,s11324,s11325,s11338,s11354,s11355,s11360,s11364,s11365,s11367,s11375,s11378,s11380,s11384,s11385,s11397,s11402,s11745,s11750,s11751,s11762,s13296,s13297,s13298r1,s13298r2,s13299r1,s13310,s13311,s13422,s13622,s14447,s14455,s14460,s15680,s15681,s18024,s18031,s18037,s18748,s18749,s18759,s4953,s5938,s6727r1,s7314,s7321,s8660,s8977,s8980,s9004,s9541,s9568,s9578,s9596\n+chr10\t38021484\t38021496\t+\tchr10\t42628291\t42628315\t+\t\ts9597,s8830,s11402,s13312,s9758,s11760,s11342,s11380,s13306,s13311,s18024,s18037,s6720,s7318,s8612,s9757,s6418,s20405,s11324\n+chr10\t50433475\t50433476\t+\tchr10\t50590247\t50590248\t-\tSGMS1 large circular (1)\ts6046,s6729,s6730,s7247,s7250,s11338,s11341,s11355,s11393,s11762,s13304,s13308,s13416,s13420,s13633,s13749,s13768,s13769,s13771,s14436,s14439,s18151,s18751,s20400,s4841,s4954,s5948,s6408,s6409,s6413,s7240,s8611,s8614,s8615,s8616,s8622,s8828,s8965,s9064,s9067,s9575,s9584,s9596,s9597,s9606,s9608,s9755,s9757,s9758,s9761\n+chr10\t50460672\t50460673\t+\tchr10\t50590247\t50590248\t-\tSGMS1 large circular (2)\ts6731,s11342,s11383,s13298r1,s13426,s14432,s18730,s5954,s5960,s7431,s9578\n+chr10\t52193235\t52193236\t+\tchr10\t52350007\t52350008\t-\tSGMS1 large circular (3)\tprtcl/043\n+chr10\t60279639\t60279640\t-\tchr10\t60733262\t60733263\t+\t\tprtcl/007,011,015,019,025,026,039,125,147\n+chr10\t75438413\t75438495\t-\tchr10\t76036007\t76036008\t+\t\ts18730,s11750,s13358,s8616,s14441,s18170,s6411,s6729,s6730,s6731,s7247,s7250,s11324,s11326,s11337,s11359,s11364,s11366,s11377,s11386,s11393,s11402,s11750,s11751,s11754,s11764,s13298r1,s13298r2,s13302,s13311,s13358,s13416,s13418,s13426,s13427,s1'..b'chr19\t52384767\t52384868\t-\tprtcl/011,003,024\n+chr2\t73898346\t73898368\t+\tchr16\t2183080\t2183101\t-\t\tprtcl/039,025,001\n+chr2\t73898346\t73898368\t+\tchr21\t29564441\t29564462\t-\t\tprtcl/149,025,001\n+chr2\t74740960\t74740971\t+\tchr17\t38919028\t38919039\t+\t\tprtcl/007,008,039,141,143\n+chr8\t47085260\t47085281\t+\tchr14\t19002353\t19002374\t-\t\tprtcl/125,151,141,143\n+chr8\t47085260\t47085281\t+\tchr14\t19399402\t19399423\t+\t\tprtcl/125,151,141,143\n+chr8\t47085260\t47085281\t+\tchr22\t15724969\t15724990\t-\t\tprtcl/125,151,141,143\n+chr6\t113401866\t113401887\t-\tchr14\t24965330\t24965351\t+\t\tprtcl/005,140,005,007\n+chr3\t131968997\t131969018\t-\tchr3\t131978033\t131978054\t-\t\tprtcl/001,015,032,148\n+chr5\t117925121\t117925133\t-\tchr5\t118265352\t118265364\t+\tLINC02147-LINC02147\n+chr5\t10571014\t10571035\t+\tchr7\t27150037\t27150238\t+\t\tprtcl/013,005,015,032,148\n+chr5\t168883285\t168883339\t-\tchr5\t169193476\t169193525\t+\tSLIT3-SLIT3\n+chr3\t188883471\t188883492\t-\tchr20\t56678190\t56678211\t+\t\tprtcl/033,001,004,032\n+chr6\t38345002\t38345095\t-\tchr6\t38577598\t38577650\t+\tBTBD9-BTBD9\n+chr7\t82159837\t82159858\t-\tchr10\t59143070\t59143271\t-\t\tprtcl/147,005,0148\n+chr5\t55444119\t55444320\t+\tchr12\t49199677\t49199698\t+\t\tprtcl/149,148,005,015,032\n+chr6\t73518630\t73518831\t+\tchr15\t52937807\t52937828\t-\t\tprtcl/025,001,007,015,031\n+chr9\t131414392\t131414433\t+\tchr14\t35038495\t35038516\t+\t\tprtcl/031,032,151,026\n+chr2\t83877611\t83877652\t-\tchr7\t64838341\t64838953\t+\t\tprtcl/031,003,038,013,015\n+chr6\t113861005\t113861026\t+\tchr21\t26235606\t26235627\t+\t\tprtcl/038,013,015\n+chr4\t167000375\t167000466\t-\tchr4\t167233982\t167234150\t+\tSPOCK3-SPOCK3\n+chr3\t62779341\t62779362\t-\tchr12\t62965791\t62965812\t-\t\tprtcl/024,038,013,015\n+chr5\t53886450\t53886716\t-\tchr5\t54113200\t54113350\t+\tARL15-ARL15\n+chr8\t117837125\t117837204\t-\tchr8\t118110082\t118110224\t+\tEXT1-EXT1\n+chr8\t101493484\t101493505\t+\tchr12\t111055638\t111055659\t+\t\tprtcl/003,038,013,015,001\n+chr8\t119862583\t119862984\t+\tchr14\t49586679\t49587080\t-\t\tprtcl/007,015,016,019\n+chrX\t124963817\t124963818\t-\tchrX\t125185642\t125185643\t+\tTENM1 new exon\n+chr7\t111413356\t111413845\t-\tchr12\t107809426\t107809757\t+\tlooks like real fusion\tprtcl/149,022,035\n+chr6\t144200376\t144200397\t-\tchr13\t45337182\t45337383\t+\t\tprtcl/019,035,013,014\n+chr6\t28740259\t32621911\t+\tchr6_GL000255v2_alt\t0\t3881652\t-\tvariation in human population\n+chr6\t28740259\t32621911\t-\tchr6_GL000255v2_alt\t0\t3881652\t+\tvariation in human population\n+chr6\t37819968\t37820019\t-\tchr6\t38061590\t38061750\t+\tZFAND3-ZFAND3\n+chr6\t54775241\t54775262\t+\tchr12\t25205680\t25205781\t+\t\tprtcl/149,013,014\n+chr4\t45724000\t45724021\t+\tchr12\t53040262\t53040463\t-\t\tprtcl/001,035,014\n+chr4\t2031624\t2031645\t+\tchr18\t58336577\t58336618\t+\t\tprtcl/148,014,026,027,031,032\n+chr3\t116086345\t116086559\t-\tchr3\t116444875\t116445010\t+\tLSAMP-LSAMP\n+chr8\t35235475\t35235890\t-\tchr8\t35549289\t35549500\t+\tUNC5D-UNC5D\n+chr5\t116562731\t116562752\t-\tchr5\t150446919\t150446940\t-\t\tprtcl/025,003,006,007\n+chr4\t77421365\t77421386\t+\tchr15\t36992057\t36992078\t+\t\tprtcl/007,015,020,026,027\n+chr3\t61562150\t61562375\t-\tchr3\t61989622\t61989785\t+\tPTPRG-PTPRG\n+chr3\t140175972\t140176076\t-\tchr3\t140403626\t140403804\t+\tCLSTN2-CLSTN2\n+chr8\t59970452\t59970473\t+\tchr19\t56494324\t56494365\t-\t\tprtcl/145,020,001,005,006\n+chr3\t184117340\t184117441\t+\tchr14\t102082992\t102083093\t+\t\tprtcl/125,006,013,038,039\n+chr3\t170524077\t170524098\t+\tchr4\t185504464\t185504485\t+\t\tprtcl/014,005,013,038,039,040\n+chr8\t89757560\t89757581\t-\tchr17\t7715548\t7715568\t+\t\tprtcl/014,013,038,039\n+chr9\t33289240\t33289261\t+\tchr19\t13140383\t13140404\t-\t\tprtcl/035,005,006,013,038,039\n+chrX\t132628100\t132629095\t-\tchrX\t132956805\t132957225\t+\tHS6ST2-HS6ST2\n+chr3\t58115791\t58115792\t-\tchr3\t58116742\t58116743\t-\t\tprtcl/015,005,006,013\n+chrX\t138857528\t138857682\t-\tchrX\t139204056\t139204250\t+\tFGF13-FGF13\n+chrX\t68192918\t68192919\t+\tchrX\t68299096\t68299097\t-\tPHN1 large circular\n+chr3\t114693550\t114693578\t-\tchr3\t114900301\t114900324\t+\tZBTB20-ZBTB20\n+chr3\t132027601\t132027802\t-\tchr10\t11434665\t11434686\t+\t\tprtcl/147,022,038,013\n+chr3\t132390413\t132390532\t-\tchr14\t30617947\t30618278\t+\t\tprtcl/147005038\n+chr3\t132357281\t132357302\t-\tchr16\t81605362\t81605403\t-\t\tprtcl/141,147,038,005\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/blacklist-regions.hg38.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blacklist-regions.hg38.bed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,2090 @@\n+chr\tpos\tdirection\tid\tcomment\n+chrM\t0\t16570\t+\tchrM\n+chrM\t0\t16570\t-\tchrM\n+chr22\t22687199\t22687280\t-\t\ts13304,s8828,s13419,s48452,s8612,s11338,s15680,s4836,s6408,s6411,s9064,s9608,s11341,s5930,s5950,s9592,s9777,s8984,s9584\n+chr16\t10532572\t10532680\t-\t\ts6046,s11756,s11326,s13308,s6046,s11375,s13304,s11385,s7249,s11354,s11354,s11374,s11378,s13302,s13310,s7250,s11380,s11359,s10010,s11355,s11365,s11368,s11369,s13307,s11359,s11326\n+chr4\t81594218\t81594232\t+\t\ts3297,s8979,s6721,s13298r2,s13418,s13761,s7239,s13302,s6408,s11385,s7316,s11743,s5946,s9770,s11366,s13307,s13625,s14455,s15681,s11386,s13626,s7310,s8977,s8980,s14436,s6727r1,s9066,s20343,s9775,s11745,s11750,s13754,s10011,s6719,s13761,s7318,s8616,s7318,s7454,s7239,s8616,s11391,s11402,s7220,s18749,s6408,s11397,s5946,s11385,s4954,s8978,s13755,s11754,s8980,s9775,s11764,s13302,s14442,s20401,s8981,s9544,s9585,s11368,s7239,s20405,s8615,s11324,s7454,s7454,s11751,s7318,s7454,s15679,s9584,s11326,s13428,s11368,s13418,s18037,s10011,s11359,s13622,s6016,s11367,s13633,s15680,s13299r2,s13766,s14447,s15679,s20345,s4970,s13298r2,s14442,s7310,s8620,s11743,s13749,s8828,s14455,s20344,s13770,s8620,s9464,s9596,s10014,s11340,s13622,s13749,s15680,s15681,s18037,s5942,s8830,s13299r2,s6727r1,s18031,s13754,s9464,s13761,s7239,s7454,s9464,s7454,s11326,s5934,s8620,s11745,s20344,s11359,s13632,s15681,s7427,s7427r2,s8615,s13296,s18017,s4951,s5938,s8832,s11756,s13766,s9772,s6733,s8979,s13632,s7250,s9067,s20400,s11762,s9544,s13418,s14441,s14432,s5945,s7240,s6730,s13625,s14455,s20400,s6722,s20405,s9067,s6731,s18759,s13629,s6409,s11393,s11743,s13304,s4951,s5938,s5950,s15680,s9758,s20343,s7454,s15679,s14441,s9755,s9065,s11393,s11750,s13770,s10014,s13298r1,s20342,s74092,s11355,s9464,s13761,s18259,s20344,s9575,s7250,s5934,s6046,s11354,s13311,s11756,s9544,s9596,s9772,s11325,s8828,s9064,s11361,s13298r1,s13298r2,s13428,s14441,s13299r1,s20342,s4970,s7427,s7427r2,s8984,s13311,s13358,s9596,s9065,s18017,s9065,s7318,s13296,s7454,s7239,s11340,s11756,s14441,s7314,s11354,s11325,s8615,s5950,s7454,s13755,s6409,s8616,s9473,s14436,s11366,s9578,s8616,s18259,s14441,s7454,s13633,s8982,s13761,s20345,s11754,s13754,s5950,s11361,s14441,s13302,s5938,s13632,s15681,s14441,s13761,s9473,s7318,s7314,s15681,s11326,s7318,s7428,s11326,s9004,s6411,s11324,s7239,s9596,s8977,s14441,s14441,s8652,s8616,s4841,s13761\n+chr4\t81594284\t81594304\t-\t\ts11750,s11326,s11324,s9578,s6727r1,s13761,s18758,s9066,s7239,s7239,s11326,s11366,s11326,s13622,s13761,s11326,s11354,s11367,s13298r1,s13766,s13770,s20404,s4958,s5946,s9066,s11324,s13633,s13311,s4954,s10011,s11324,s10014,s11326,s11745,s11751,s11326,s20342,s13296,s15679,s18259,s6046,s4970,s13622,s18020,s10014,s20345,s11326,s6721,s20344,s13761,s18748,s9473,s20345,s7239,s9473,s6046,s4970,s11326,s11324,s9473,s11326,s11326,s13632,s20401,s14447,s20345,s8616,s13422,s13623,s11326,s11326,s11326,s11326,s7314,s11391,s7239,s15680,s9473,s11324,s20345,s20345,s13297,s11325,s13358,s11325,s4954,s6720,s8652,s13428,s20345,s18017,s11324,s11326,s20345,s13296,s15680,s13766,s9065,s7314,s8615,s20345,s20345,s7239,s10014,s7239,s7318,s7318,s13761,s11324,s20342,s8652,s13418,s11326,s14441,s11324,s7427,s7427r2,s7239,s7239,s11368,s18037,s6719,s11326,s7454,s20345,s8828,s9596,s20345,s13299r2,s18759,s11326,s18259,s20343,s7239,s18017,s13299r1,s13633,s13749,s14441,s11751,s20345,s4970,s9464,s7318,s20342,s7314,s8615,s11367,s13749,s13632,s8982,s8984,s11324,s13622,s20345,s13626,s14442,s6046,s11326,s13296,s13358,s9063,s13302,s8620,s13311,s14441,s9578,s10014,s13761,s8620,s11324,s11324,s11326\n+chr1\t1883668\t1883765\t+\trRNA_region_0\tLSU-rRNA_Hsa\n+chr1\t7816373\t7816413\t+\trRNA_region_1\t5S\n+chr1\t9437707\t9437778\t-\trRNA_region_2\t5S\n+chr1\t13596638\t13596677\t-\trRNA_region_3\t5S\n+chr1\t13623210\t13623284\t-\trRNA_region_4\t5S\n+chr1\t15650369\t15650411\t-\trRNA_region_5\t5S\n+chr1\t25157017\t25157130\t-\trRNA_region_6\tLSU-rRNA_Hsa\n+chr1\t27916098\t27916143\t-\trRNA_region_7\tLSU-rRNA_Hsa\n+chr1\t30329251\t30329330\t-\trRNA_region_8\tLSU-rRNA_Hsa\n+chr1\t31110694\t31110745\t+\trRNA_region_9\t5S'..b'\t141544281\t141544486\t+\tlow entropy\n+chr2\t141691414\t141691495\t-\tlow entropy\n+chr2\t141691414\t141691495\t+\tlow entropy\n+chr2\t141910126\t141910216\t-\tpoly A\n+chr2\t141910126\t141910216\t+\tpoly A\n+chr2\t169299023\t169299225\t-\tlow entropy\n+chr2\t169299023\t169299225\t+\tlow entropy\n+chr2\t201165216\t201165304\t-\tlow entropy\n+chr2\t201165216\t201165304\t+\tlow entropy\n+chr2\t206071238\t206071339\t-\tpoly-T\n+chr2\t206071238\t206071339\t+\tpoly-T\n+chr22\t32814101\t32814398\t-\tlow entropy\n+chr22\t32814101\t32814398\t+\tlow entropy\n+chr2\t234263709\t234263852\t-\tlow entropy\n+chr2\t234263709\t234263852\t+\tlow entropy\n+chr22\t37255495\t37256005\t-\tlow entropy\n+chr22\t37255495\t37256005\t+\tlow entropy\n+chr22_KI270733v1_random\t127986\t128025\t-\tpoly-GA\t\n+chr22_KI270733v1_random\t127986\t128025\t+\tpoly-GA\t\n+chr2\t32866665\t32867138\t-\tlow entropy\n+chr2\t32866665\t32867138\t+\tlow entropy\n+chr2\t32916159\t32916658\t-\tpoly-G\n+chr2\t32916159\t32916658\t+\tpoly-G\n+chr2\t88856757\t89219622\t-\tIGK\n+chr2\t88856757\t89219622\t+\tIGK\n+chr3\t130634202\t130634266\t-\tpoly-A/T\tprtcl\n+chr3\t130634202\t130634266\t+\tpoly-A/T\tprtcl\n+chr3\t132325578\t132325631\t-\tlow entropy\n+chr3\t132325578\t132325631\t+\tlow entropy\n+chr3\t132358019\t132358110\t-\tlow entropy\n+chr3\t132358019\t132358110\t+\tlow entropy\n+chr3\t171562436\t171562858\t-\tlow entropy\n+chr3\t171562436\t171562858\t+\tlow entropy\n+chr3\t175232179\t175232290\t-\tlow entropy\n+chr3\t175232179\t175232290\t+\tlow entropy\n+chr3\t175667052\t175667279\t-\tlow entropy\n+chr3\t175667052\t175667279\t+\tlow entropy\n+chr3\t181231822\t181231850\t-\tSOX-OT/SNOR./RNA duplicates\n+chr3\t191699475\t191699857\t-\tlow entropy\n+chr3\t191699475\t191699857\t+\tlow entropy\n+chr3\t39197811\t39198015\t-\tlow entropy\n+chr3\t39197811\t39198015\t+\tlow entropy\n+chr3\t69714445\t69714481\t-\tpoly-AC\t\n+chr3\t69714445\t69714481\t+\tpoly-AC\t\n+chr4\t140069317\t140069737\t-\tlow entropy\n+chr4\t140069317\t140069737\t+\tlow entropy\n+chr4\t146133497\t146133594\t-\tlow entropy\n+chr4\t146133497\t146133594\t+\tlow entropy\n+chr4\t21528664\t21528881\t-\tlow entropy\n+chr4\t21528664\t21528881\t+\tlow entropy\n+chr4\t37605722\t37606255\t-\tlow entropy\n+chr4\t37605722\t37606255\t+\tlow entropy\n+chr5\t161839495\t161840043\t-\tlow entropy\n+chr5\t161839495\t161840043\t+\tlow entropy\n+chr5\t50621634\t50622070\t-\tlow entropy\n+chr5\t50621634\t50622070\t+\tlow entropy\n+chr5\t67532980\t67533076\t-\tlow entropy\n+chr5\t67532980\t67533076\t+\tlow entropy\n+chr6\t149042949\t149043118\t-\tlow entropy\n+chr6\t149042949\t149043118\t+\tlow entropy\n+chr6\t46807500\t46807869\t-\tlow entropy\n+chr6\t46807500\t46807869\t+\tlow entropy\n+chr6\t57833141\t57833279\t-\tlow entropy\n+chr6\t57833141\t57833279\t+\tlow entropy\n+chr6\t84233090\t84233140\t-\tpoly-CT\t\n+chr6\t84233090\t84233140\t+\tpoly-CT\t\n+chr6\t90037964\t90037990\t-\tlow entropy\n+chr6\t90037964\t90037990\t+\tlow entropy\n+chr6\t90377599\t90377875\t-\tlow entropy\n+chr6\t90377599\t90377875\t+\tlow entropy\n+chr6\t9795135\t9795402\t-\tlow entropy\n+chr6\t9795135\t9795402\t+\tlow entropy\n+chr7\t111539104\t111539279\t-\tlow entropy\n+chr7\t111539104\t111539279\t+\tlow entropy\n+chr7\t126477532\t126477715\t-\tlow entropy\n+chr7\t126477532\t126477715\t+\tlow entropy\n+chr7\t131828658\t131829106\t-\tlow entropy\n+chr7\t131828658\t131829106\t+\tlow entropy\n+chr7\t31599553\t31599730\t-\tlow entropy\n+chr7\t31599553\t31599730\t+\tlow entropy\n+chr7\t88419744\t88420045\t-\tlow entropy\n+chr7\t88419744\t88420045\t+\tlow entropy\n+chr7\t905292\t906668\t-\tlow entropy\n+chr7\t905292\t906668\t+\tlow entropy\n+chr8\t115498354\t115498461\t-\tlow entropy\n+chr8\t115498354\t115498461\t+\tlow entropy\n+chr8\t30913946\t30914055\t-\tlow entropy\n+chr8\t30913946\t30914055\t+\tlow entropy\n+chrX\t119711699\t119711899\t-\tlow entropy\n+chrX\t119711699\t119711899\t+\tlow entropy\n+chrX\t119801608\t119802269\t-\tlow entropy\n+chrX\t119801608\t119802269\t+\tlow entropy\n+chrX\t17050103\t17050214\t-\tlow entropy\n+chrX\t17050103\t17050214\t+\tlow entropy\n+chrX\t40110630\t40110744\t-\tpoly-T\t\n+chrX\t40110630\t40110744\t+\tpoly-T\t\n+chrX\t64287970\t64289700\t-\tpoly-A\t\n+chrX\t64287970\t64289700\t+\tpoly-A\t\n+chrX\t71094169\t71094398\t-\tlow entropy\n+chrX\t71094169\t71094398\t+\tlow entropy\n+chrX\t79200325\t79200485\t-\tpoly-A\t\n+chrX\t79200325\t79200485\t+\tpoly-A\t\n+chrX\t82680111\t82680434\t-\tlow entropy\n+chrX\t82680111\t82680434\t+\tlow entropy\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.in.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.in.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tunclassified\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tunclassified\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tunclassified\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tunclassified\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tunclassified\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t39851814\t+\t0\t6\tchr21\t39852667\t-\t6\t0\t853\tunclassified\tcircular'..b'ing_paired_1:1,spanning_paired_2:1)\n+chr21\t39871034\t+\t0\t2\tchr21\t39877568\t-\t2\t0\t6534\tunclassified\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t66.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39871034/39871035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tunclassified\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tunclassified\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tunclassified\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tunclassified\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tunclassified\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tunclassified\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tunclassified\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tunclassified\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tunclassified\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tunclassified\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tunclassified\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tunclassified\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-01.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-01.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tvalid\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tvalid\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tn_discordant_reads=2<3\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tn_support=6<8,bp_pos_stddev=25.3065>15.0,log_ratio_slope=9.62>1.8,log_ratio_rvalue=8.96>0.4\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tentropy=0.0<0.6839,n_support=4<8\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_'..b'1035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tentropy=0.0<0.6828,n_support=1<7\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tn_support=2<7,log_ratio_slope=12.99>1.8,log_ratio_rvalue=9.21>0.4\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tentropy=0.0<0.6827,n_support=1<7\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tentropy=0.0<0.6827,n_support=2<7\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tentropy=0.0<0.6826,n_support=1<7\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-02.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-02.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
@@ -0,0 +1,3 @@
+chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge data-structure
+chr21 39817544 - 140 0 chr21 42880007 + 0 140 3062463 valid linear exonic 253 170 85 6 5 2 4 1 0 0.8111 0.8497 0.0000 0.9535 0.7247 63.7843 0.8440 0.0000 0.0559 0.3651 17.3030 0.8378 0.0000 0.0289 0.0706 0.3360 1.2000 chr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)
+chr21 39877811 - 98 0 chr21 42873374 + 0 98 2995563 valid linear intronic 91 42 21 28 1 1 1 0 0 0.9528 0.9528 0.0000 0.8200 2.9207 17.3314 0.9125 0.0000 0.1910 1.5880 68.1747 0.8769 0.0000 0.1269 1.3333 0.2308 2.0000 chr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-03.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-03.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tvalid\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tvalid\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tn_discordant_reads=2<3\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tn_support=6<8,bp_pos_stddev=25.3065>15.0,log_ratio_slope=9.62>1.8,log_ratio_rvalue=8.96>0.4\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tentropy=0.0<0.6839,n_support=4<8\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_'..b'1035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tentropy=0.0<0.6828,n_support=1<7\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tn_support=2<7,log_ratio_slope=12.99>1.8,log_ratio_rvalue=9.21>0.4\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tentropy=0.0<0.6827,n_support=1<7\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tentropy=0.0<0.6827,n_support=2<7\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tentropy=0.0<0.6826,n_support=1<7\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-04.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-04.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tvalid\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tvalid\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tn_discordant_reads=2<3\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tn_support=6<8,bp_pos_stddev=25.3065>15.0,log_ratio_slope=9.62>1.8,log_ratio_rvalue=8.96>0.4\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tentropy=0.0<0.6839,n_support=4<8\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_'..b'1035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tentropy=0.0<0.6828,n_support=1<7\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tn_support=2<7,log_ratio_slope=12.99>1.8,log_ratio_rvalue=9.21>0.4\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tentropy=0.0<0.6827,n_support=1<7\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tentropy=0.0<0.6827,n_support=2<7\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tentropy=0.0<0.6826,n_support=1<7\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-05.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-05.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tvalid\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tvalid\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tn_discordant_reads=2<3\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tn_support=6<8,bp_pos_stddev=25.3065>15.0,log_ratio_slope=9.62>1.8,log_ratio_rvalue=8.96>0.4\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tentropy=0.0<0.6839,n_support=4<8\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_'..b'1035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tentropy=0.0<0.6828,n_support=1<7\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tn_support=2<7,log_ratio_slope=12.99>1.8,log_ratio_rvalue=9.21>0.4\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tentropy=0.0<0.6827,n_support=1<7\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tentropy=0.0<0.6827,n_support=2<7\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tentropy=0.0<0.6826,n_support=1<7\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-06.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-06.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tvalid\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tvalid\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tn_discordant_reads=2<3\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tn_support=6<8,bp_pos_stddev=25.3065>15.0,log_ratio_slope=9.62>1.8,log_ratio_rvalue=8.96>0.4\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tentropy=0.0<0.6839,n_support=4<8\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_'..b'1035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tentropy=0.0<0.6828,n_support=1<7\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tn_support=2<7,log_ratio_slope=12.99>1.8,log_ratio_rvalue=9.21>0.4\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tentropy=0.0<0.6827,n_support=1<7\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tentropy=0.0<0.6827,n_support=2<7\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tentropy=0.0<0.6826,n_support=1<7\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-07.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-07.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tvalid\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tvalid\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tn_discordant_reads=2<3\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tn_support=6<8,bp_pos_stddev=25.3065>15.0,log_ratio_slope=9.62>1.8,log_ratio_rvalue=8.96>0.4\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tentropy=0.0<0.6839,n_support=4<8\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_'..b'1035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tentropy=0.0<0.6828,n_support=1<7\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tn_support=2<7,log_ratio_slope=12.99>1.8,log_ratio_rvalue=9.21>0.4\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tentropy=0.0<0.6827,n_support=1<7\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tentropy=0.0<0.6827,n_support=2<7\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tentropy=0.0<0.6826,n_support=1<7\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/classify_test_16.test-08.out.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/classify_test_16.test-08.out.dbed Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,37 @@\n+chr-A\tpos-A\tdirection-A\tpos-A-acceptor\tpos-A-donor\tchr-B\tpos-B\tdirection-B\tpos-A-acceptor\tpos-A-donor\tgenomic-distance\tfilter-status\tcircRNA\tintronic/exonic\tscore\tsoft+hardclips\tn-split-reads\tn-discordant-reads\tn-edges\tn-nodes-A\tn-nodes-B\tn-splice-junc-A\tn-splice-junc-B\tentropy-bp-edge\tentropy-all-edges\tbp-pos-stddev\tentropy-disco-bps\tlr-A-slope\tlr-A-intercept\tlr-A-rvalue\tlr-A-pvalue\tlr-A-stderr\tlr-B-slope\tlr-B-intercept\tlr-B-rvalue\tlr-B-pvalue\tlr-B-stderr\tdisco/split\tclips/score\tnodes/edge\tdata-structure\n+chr21\t39817544\t-\t140\t0\tchr21\t42880007\t+\t0\t140\t3062463\tvalid\tlinear\texonic\t253\t170\t85\t6\t5\t2\t4\t1\t0\t0.8111\t0.8497\t0.0000\t0.9535\t0.7247\t63.7843\t0.8440\t0.0000\t0.0559\t0.3651\t17.3030\t0.8378\t0.0000\t0.0289\t0.0706\t0.3360\t1.2000\tchr21:39817544/39817545(-)->chr21:42880007/42880008(+):(spanning_paired_1:65,spanning_paired_2:65,spanning_singleton_1:2,spanning_singleton_1_r:3,spanning_singleton_2:2,spanning_singleton_2_r:3)&chr21:39817544/39817545(-)->chr21:42879876/42879877(+):(discordant_mates:10,spanning_paired_1:6,spanning_paired_1_t:1,spanning_paired_2:6,spanning_paired_2_t:1,spanning_singleton_1_r:2,spanning_singleton_2_r:2)&chr21:39817544/39817545(-)->chr21:42878371/42878372(+):(discordant_mates:2,spanning_paired_1:1,spanning_paired_1_t:1,spanning_paired_2:1,spanning_paired_2_t:1,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:39846044/39846045(-)->chr21:42879876/42879877(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:39817544/39817545(-)->chr21:42876293/42876294(+):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t39877811\t-\t98\t0\tchr21\t42873374\t+\t0\t98\t2995563\tvalid\tlinear\tintronic\t91\t42\t21\t28\t1\t1\t1\t0\t0\t0.9528\t0.9528\t0.0000\t0.8200\t2.9207\t17.3314\t0.9125\t0.0000\t0.1910\t1.5880\t68.1747\t0.8769\t0.0000\t0.1269\t1.3333\t0.2308\t2.0000\tchr21:39877811/39877812(-)->chr21:42873374/42873375(+):(discordant_mates:56,spanning_paired_1:12,spanning_paired_1_t:9,spanning_paired_2:12,spanning_paired_2_t:9)\n+chr21\t42861433\t+\t6\t18\tchr21\t42866505\t-\t18\t6\t5072\tn_discordant_reads=2<3\tcircular\texonic\t41\t30\t14\t2\t7\t5\t6\t1\t2\t1.0000\t0.9375\t18.7474\t0.7500\t26.9000\t3.0000\t0.9425\t0.0164\t5.5097\t20.1000\t42.8000\t0.8833\t0.0470\t6.1587\t0.1429\t0.3659\t1.5714\tchr21:42861433/42861434(+)->chr21:42866505/42866506(-):(discordant_mates:2,spanning_paired_1:2,spanning_paired_1_t:1,spanning_paired_2:2,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42861433/42861434(+)->chr21:42870116/42870117(-):(spanning_paired_1:3,spanning_paired_2:3,spanning_singleton_1_r:1,spanning_singleton_2_r:1)&chr21:42860568/42860569(+)->chr21:42867210/42867211(-):(spanning_paired_1_t:2,spanning_paired_2_t:2)&chr21:42860320/42860321(+)->chr21:42861520/42861521(-):(spanning_paired_1:1,spanning_paired_2:1,spanning_singleton_1:1,spanning_singleton_2:1)&chr21:42859986/42859987(+)->chr21:42863878/42863879(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:42860320/42860321(+)->chr21:42866505/42866506(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:42866446/42866447(+)->chr21:42870110/42870111(-):(discordant_mates:2)\n+chr21\t39836011\t+\t0\t8\tchr21\t39836545\t-\t8\t0\t534\tn_support=6<8,bp_pos_stddev=25.3065>15.0,log_ratio_slope=9.62>1.8,log_ratio_rvalue=8.96>0.4\tcircular\tintronic\t17\t12\t6\t0\t2\t2\t2\t0\t0\t0.4056\t0.6934\t25.3065\t0.0000\t1.5000\t29.5000\t0.7746\t0.2254\t0.8660\t0.0000\t93.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3529\t2.0000\tchr21:39836011/39836012(+)->chr21:39836545/39836546(-):(spanning_paired_1_t:4,spanning_paired_2_t:4)&chr21:39835707/39835708(+)->chr21:39839850/39839851(-):(spanning_paired_1_t:1,spanning_paired_2_t:1,spanning_singleton_1:1,spanning_singleton_2:1)\n+chr21\t39875880\t+\t0\t6\tchr21\t39876476\t-\t6\t0\t596\tentropy=0.0<0.6839,n_support=4<8\tcircular\tintronic\t12\t8\t4\t0\t2\t2\t2\t0\t0\t0.0000\t0.4056\t0.0000\t0.0000\t0.0000\t68.0000\t0.0000\t1.0000\t0.0000\t0.0000\t58.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:39875880/39875881(+)->chr21:39876476/39876477(-):(spanning_paired_1:3,spanning_paired_2:3)&chr21:39874280/39874281(+)->chr21:39876277/39876278(-):(spanning_paired_1_'..b'1035(+)->chr21:39877568/39877569(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42836581\t+\t0\t2\tchr21\t42839672\t-\t2\t0\t3091\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t19.0000\t0.0000\t1.0000\t0.0000\t0.0000\t107.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42836581/42836582(+)->chr21:42839672/42839673(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)\n+chr21\t42853701\t+\t0\t2\tchr21\t42860029\t-\t2\t0\t6328\tentropy=0.0<0.6828,n_support=1<7\tcircular\tintronic\t3\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t22.0000\t0.0000\t1.0000\t0.0000\t0.0000\t31.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.3333\t2.0000\tchr21:42853701/42853702(+)->chr21:42860029/42860030(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42866483\t-\t0\t2\tchr21\t42870116\t-\t2\t0\t3633\tentropy=0.0<0.6828,n_support=1<7\tlinear\tintronic\t3\t9\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t38.0000\t0.0000\t1.0000\t0.0000\t0.0000\t88.0000\t0.0000\t1.0000\t0.0000\t0.0000\t1.5000\t2.0000\tchr21:42866483/42866484(-)->chr21:42870116/42870117(-):(spanning_paired_1:1,spanning_paired_2:1)\n+chr21\t42839824\t+\t0\t4\tchr21\t42840323\t-\t4\t0\t499\tn_support=2<7,log_ratio_slope=12.99>1.8,log_ratio_rvalue=9.21>0.4\tcircular\tintronic\t2\t2\t0\t2\t1\t1\t1\t0\t0\t1.0000\t1.0000\t0.0000\t0.4056\t44.0000\t82.0000\t1.0000\t0.0000\t0.0000\t0.0000\t120.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.5000\t2.0000\tchr21:42839824/42839825(+)->chr21:42840323/42840324(-):(discordant_mates:4)\n+chr21\t39770910\t+\t2\t0\tchr21\t39771416\t-\t0\t2\t506\tentropy=0.0<0.6827,n_support=1<7\tcircular\tintronic\t2\t2\t1\t0\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t75.0000\t0.0000\t1.0000\t0.0000\t0.0000\t48.0000\t0.0000\t1.0000\t0.0000\t0.0000\t0.5000\t2.0000\tchr21:39770910/39770911(+)->chr21:39771416/39771417(-):(spanning_singleton_1_r:1,spanning_singleton_2_r:1)\n+chr21\t39860702\t-\t4\t0\tchr21\t42843890\t+\t0\t4\t2983188\tentropy=0.0<0.6827,n_support=2<7\tlinear\tintronic\t2\t0\t0\t2\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t40.0000\t0.0000\t2.0000\tchr21:39860702/39860703(-)->chr21:42843890/42843891(+):(discordant_mates:4)\n+chr21\t39781087\t+\t0\t2\tchr21\t39782101\t-\t2\t0\t1014\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39781087/39781088(+)->chr21:39782101/39782102(-):(discordant_mates:2)\n+chr21\t39846131\t+\t0\t2\tchr21\t39846834\t-\t2\t0\t703\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t39.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39846131/39846132(+)->chr21:39846834/39846835(-):(discordant_mates:2)\n+chr21\t39853321\t-\t2\t0\tchr21\t42851610\t+\t0\t2\t2998289\tentropy=0.0<0.6826,n_support=1<7\tlinear\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:39853321/39853322(-)->chr21:42851610/42851611(+):(discordant_mates:2)\n+chr21\t42840341\t+\t0\t2\tchr21\t42842586\t-\t2\t0\t2245\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t0.0000\t124.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42840341/42840342(+)->chr21:42842586/42842587(-):(discordant_mates:2)\n+chr21\t42843861\t+\t2\t0\tchr21\t42845272\t-\t0\t2\t1411\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t65.0000\t0.0000\t1.0000\t0.0000\t0.0000\t60.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42843861/42843862(+)->chr21:42845272/42845273(-):(discordant_mates:2)\n+chr21\t42863757\t+\t0\t2\tchr21\t42864220\t-\t2\t0\t463\tentropy=0.0<0.6826,n_support=1<7\tcircular\tintronic\t1\t0\t0\t1\t1\t1\t1\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t0.0000\t126.0000\t0.0000\t1.0000\t0.0000\t0.0000\t123.0000\t0.0000\t1.0000\t0.0000\t20.0000\t0.0000\t2.0000\tchr21:42863757/42863758(+)->chr21:42864220/42864221(-):(discordant_mates:2)\n'
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/detect.txt
--- a/test-data/detect.txt Fri Apr 28 03:56:08 2017 -0400
+++ b/test-data/detect.txt Thu Aug 10 05:38:27 2017 -0400
b
@@ -1,2 +1,2 @@
-chr-A pos-A direction-A chr-B pos-B direction-B genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps data-structure
-chr21 39877811 - chr21 42873374 + 2995563 n_discordant_reads=0<1,n_support=3<8 linear intronic 9 6 3 0 1 1 1 0 0 1.0000 1.0000 0.0000 0.0000 chr21:39877811/39877812(-)->chr21:42873374/42873375(+):(spanning_paired_1_t:3,spanning_paired_2_t:3)
+chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-B-acceptor pos-B-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge data-structure
+chr21 39877811 - 6 0 chr21 42873374 + 0 6 2995563 unclassified linear intronic 9 6 3 0 1 1 1 0 0 1.0000 1.0000 0.0000 0.0000 17.0000 37.3333 0.9994 0.0216 0.5774 24.0000 42.3333 0.9954 0.0611 2.3094 0.0000 0.3333 2.0000 chr21:39877811/39877812(-)->chr21:42873374/42873375(+):(spanning_paired_1_t:3,spanning_paired_2_t:3)
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/fix_test-01.fixed.bam
b
Binary file test-data/fix_test-01.fixed.bam has changed
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/integrate_test_terg_s041.in.dbed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/integrate_test_terg_s041.in.dbed Thu Aug 10 05:38:27 2017 -0400
b
@@ -0,0 +1,6 @@
+chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge data-structure
+chr21 38445621 - 200 0 chr21 41498118 + 0 200 3052497 valid linear exonic 588 378 189 21 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 chr21:38445621/38445622(-)->chr21:41498118/41498119(+):(discordant_mates:14,spanning_paired_1:114,spanning_paired_1_t:4,spanning_paired_2:114,spanning_paired_2_t:4)&chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:33,spanning_paired_2:33)&chr21:38445621/38445622(-)->chr21:41507949/41507950(+):(discordant_mates:8,spanning_paired_1:15,spanning_paired_2:15)&chr21:38474121/38474122(-)->chr21:41498118/41498119(+):(spanning_paired_1:14,spanning_paired_2:14)&chr21:38445621/38445622(-)->chr21:41480475/41480476(+):(discordant_mates:6,spanning_paired_1:2,spanning_paired_2:2)&chr21:38423561/38423562(-)->chr21:41498118/41498119(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:38445621/38445622(-)->chr21:41506444/41506445(+):(discordant_mates:6,spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:38445621/38445622(-)->chr21:41504366/41504367(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38446208/38446209(-)->chr21:41498118/41498119(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41480475/41480476(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41508080/41508081(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38423567/38423568(-)->chr21:41485824/41485825(+):(discordant_mates:4)&chr21:38445482/38445483(-)->chr21:41500346/41500347(+):(discordant_mates:2)&chr21:38474093/38474094(-)->chr21:41507966/41507967(+):(discordant_mates:2)
+chr21 38487350 - 200 0 chr21 41479719 + 0 200 2992369 valid linear intronic 151 96 48 7 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 chr21:38487350/38487351(-)->chr21:41479719/41479720(+):(discordant_mates:14,spanning_paired_1:41,spanning_paired_1_t:7,spanning_paired_2:41,spanning_paired_2_t:7)
+chr21 36338771 + 0 10 chr21 36344707 - 10 0 5936 valid circular exonic 117 58 38 3 2 2 2 0 0 0.8536 0.8591 2.436 0.871 2.1714 19.1829 0.9605 0 0.1021 2.0941 32.6902 0.9725 0 0.0814 0.0789 0.2479 2 chr21:36338771/36338772(+)->chr21:36344707/36344708(-):(discordant_mates:6,spanning_paired_1:14,spanning_paired_1_t:23,spanning_paired_2:14,spanning_paired_2_t:23)&chr21:36338841/36338842(+)->chr21:36341534/36341535(-):(spanning_paired_1:1,spanning_paired_2:1)
+chr21 29321220 + 10 0 chr21 29329693 - 0 10 8473 valid circular exonic 49 35 15 4 4 4 3 0 0 0.8605 0.8018 0 0.6667 8.3531 34.3077 0.9863 0 0.4412 9.4685 14.5897 0.9701 0 0.749 0.2667 0.3571 1.75 chr21:29321220/29321221(+)->chr21:29329693/29329694(-):(discordant_mates:4,spanning_paired_1:9,spanning_paired_1_t:1,spanning_paired_2:9,spanning_paired_2_t:1)&chr21:29326058/29326059(+)->chr21:29329693/29329694(-):(spanning_paired_1:4,spanning_paired_2:4)&chr21:29327324/29327325(+)->chr21:29329704/29329705(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:29321216/29321217(+)->chr21:29329647/29329648(-):(discordant_mates:4)
+chr21 33414887 + 5 5 chr21 33432871 - 5 5 17984 valid circular exonic 20 12 6 2 3 3 2 1 0 0.8277 0.9167 0 0.75 12.6 50.8 0.7995 0.1045 5.4663 12.8 24.2 0.8072 0.0987 5.4049 0.3333 0.3 1.6667 chr21:33414887/33414888(+)->chr21:33432871/33432872(-):(spanning_paired_1:2,spanning_paired_1_t:3,spanning_paired_2:2,spanning_paired_2_t:3)&chr21:33426883/33426884(+)->chr21:33432871/33432872(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:33414915/33414916(+)->chr21:33421509/33421510(-):(discordant_mates:4)
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/integrate_test_terg_s041.out.no-gtf.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/integrate_test_terg_s041.out.no-gtf.txt Thu Aug 10 05:38:27 2017 -0400
b
@@ -0,0 +1,6 @@
+shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge frameshift=0 frameshift=+1 frameshift=+2 data-structure
+1 TMPRSS2->ERG chr21 38487350 - 200 0 chr21 41479719 + 0 200 2992369 valid linear intronic 151 96 48 7 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 chr21:38487350/38487351(-)->chr21:41479719/41479720(+):(discordant_mates:14,spanning_paired_1:41,spanning_paired_1_t:7,spanning_paired_2:41,spanning_paired_2_t:7)
+1 TMPRSS2->ERG chr21 38445621 - 200 0 chr21 41498118 + 0 200 3052497 valid linear exonic 588 378 189 21 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000442448.5)-ensembl chr21:38445621/38445622(-)->chr21:41498118/41498119(+):(discordant_mates:14,spanning_paired_1:114,spanning_paired_1_t:4,spanning_paired_2:114,spanning_paired_2_t:4)&chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:33,spanning_paired_2:33)&chr21:38445621/38445622(-)->chr21:41507949/41507950(+):(discordant_mates:8,spanning_paired_1:15,spanning_paired_2:15)&chr21:38474121/38474122(-)->chr21:41498118/41498119(+):(spanning_paired_1:14,spanning_paired_2:14)&chr21:38445621/38445622(-)->chr21:41480475/41480476(+):(discordant_mates:6,spanning_paired_1:2,spanning_paired_2:2)&chr21:38423561/38423562(-)->chr21:41498118/41498119(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:38445621/38445622(-)->chr21:41506444/41506445(+):(discordant_mates:6,spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:38445621/38445622(-)->chr21:41504366/41504367(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38446208/38446209(-)->chr21:41498118/41498119(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41480475/41480476(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41508080/41508081(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38423567/38423568(-)->chr21:41485824/41485825(+):(discordant_mates:4)&chr21:38445482/38445483(-)->chr21:41500346/41500347(+):(discordant_mates:2)&chr21:38474093/38474094(-)->chr21:41507966/41507967(+):(discordant_mates:2)
+2 chr21:36338771->chr21:36344707 chr21 36338771 + 0 10 chr21 36344707 - 10 0 5936 valid circular exonic 117 58 38 3 2 2 2 0 0 0.8536 0.8591 2.436 0.871 2.1714 19.1829 0.9605 0 0.1021 2.0941 32.6902 0.9725 0 0.0814 0.0789 0.2479 2 chr21:36338771/36338772(+)->chr21:36344707/36344708(-):(discordant_mates:6,spanning_paired_1:14,spanning_paired_1_t:23,spanning_paired_2:14,spanning_paired_2_t:23)&chr21:36338841/36338842(+)->chr21:36341534/36341535(-):(spanning_paired_1:1,spanning_paired_2:1)
+3 chr21:29329693->chr21:29321220 chr21 29321220 + 10 0 chr21 29329693 - 0 10 8473 valid circular exonic 49 35 15 4 4 4 3 0 0 0.8605 0.8018 0 0.6667 8.3531 34.3077 0.9863 0 0.4412 9.4685 14.5897 0.9701 0 0.749 0.2667 0.3571 1.75 chr21:29321220/29321221(+)->chr21:29329693/29329694(-):(discordant_mates:4,spanning_paired_1:9,spanning_paired_1_t:1,spanning_paired_2:9,spanning_paired_2_t:1)&chr21:29326058/29326059(+)->chr21:29329693/29329694(-):(spanning_paired_1:4,spanning_paired_2:4)&chr21:29327324/29327325(+)->chr21:29329704/29329705(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:29321216/29321217(+)->chr21:29329647/29329648(-):(discordant_mates:4)
+4 chr21:33432871<->chr21:33414887 chr21 33414887 + 5 5 chr21 33432871 - 5 5 17984 valid circular exonic 20 12 6 2 3 3 2 1 0 0.8277 0.9167 0 0.75 12.6 50.8 0.7995 0.1045 5.4663 12.8 24.2 0.8072 0.0987 5.4049 0.3333 0.3 1.6667 chr21:33414887/33414888(+)->chr21:33432871/33432872(-):(spanning_paired_1:2,spanning_paired_1_t:3,spanning_paired_2:2,spanning_paired_2_t:3)&chr21:33426883/33426884(+)->chr21:33432871/33432872(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:33414915/33414916(+)->chr21:33421509/33421510(-):(discordant_mates:4)
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/integrate_test_terg_s041.out.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/integrate_test_terg_s041.out.txt Thu Aug 10 05:38:27 2017 -0400
b
@@ -0,0 +1,6 @@
+shared-id fusion chr-A pos-A direction-A pos-A-acceptor pos-A-donor chr-B pos-B direction-B pos-A-acceptor pos-A-donor genomic-distance filter-status circRNA intronic/exonic score soft+hardclips n-split-reads n-discordant-reads n-edges n-nodes-A n-nodes-B n-splice-junc-A n-splice-junc-B entropy-bp-edge entropy-all-edges bp-pos-stddev entropy-disco-bps lr-A-slope lr-A-intercept lr-A-rvalue lr-A-pvalue lr-A-stderr lr-B-slope lr-B-intercept lr-B-rvalue lr-B-pvalue lr-B-stderr disco/split clips/score nodes/edge frameshift=0 frameshift=+1 frameshift=+2 data-structure
+1 TMPRSS2->ERG chr21 38487350 - 200 0 chr21 41479719 + 0 200 2992369 valid linear intronic 151 96 48 7 1 1 1 0 0 0.869 0.869 0.2887 0.8124 2.1465 5.6435 0.9817 0 0.0571 1.9162 26.2818 0.99 0 0.0376 0.1458 0.3179 2 chr21:38487350/38487351(-)->chr21:41479719/41479720(+):(discordant_mates:14,spanning_paired_1:41,spanning_paired_1_t:7,spanning_paired_2:41,spanning_paired_2_t:7)
+1 TMPRSS2->ERG chr21 38445621 - 200 0 chr21 41498118 + 0 200 3052497 valid linear exonic 588 378 189 21 14 7 9 2 4 0.7421 0.8197 0 0.7162 0.7344 19.9882 0.9657 0 0.0178 0.7194 21.976 0.9648 0 0.0177 0.1111 0.3214 1.1429 TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000332149.9)-ensembl_havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000398585.7)-ensembl->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000424093.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000454499.5)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000455813.1)-havana->ERG(ENST00000442448.5)-ensembl,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000288319.11)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398905.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398907.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398910.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398911.5)-havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000398919.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000417133.6)-ensembl_havana,TMPRSS2(ENST00000458356.5)-havana->ERG(ENST00000442448.5)-ensembl chr21:38445621/38445622(-)->chr21:41498118/41498119(+):(discordant_mates:14,spanning_paired_1:114,spanning_paired_1_t:4,spanning_paired_2:114,spanning_paired_2_t:4)&chr21:38445621/38445622(-)->chr21:41508080/41508081(+):(spanning_paired_1:33,spanning_paired_2:33)&chr21:38445621/38445622(-)->chr21:41507949/41507950(+):(discordant_mates:8,spanning_paired_1:15,spanning_paired_2:15)&chr21:38474121/38474122(-)->chr21:41498118/41498119(+):(spanning_paired_1:14,spanning_paired_2:14)&chr21:38445621/38445622(-)->chr21:41480475/41480476(+):(discordant_mates:6,spanning_paired_1:2,spanning_paired_2:2)&chr21:38423561/38423562(-)->chr21:41498118/41498119(+):(spanning_paired_1:2,spanning_paired_2:2)&chr21:38445621/38445622(-)->chr21:41506444/41506445(+):(discordant_mates:6,spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:38445621/38445622(-)->chr21:41504366/41504367(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38446208/38446209(-)->chr21:41498118/41498119(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41480475/41480476(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38474121/38474122(-)->chr21:41508080/41508081(+):(spanning_paired_1:1,spanning_paired_2:1)&chr21:38423567/38423568(-)->chr21:41485824/41485825(+):(discordant_mates:4)&chr21:38445482/38445483(-)->chr21:41500346/41500347(+):(discordant_mates:2)&chr21:38474093/38474094(-)->chr21:41507966/41507967(+):(discordant_mates:2)
+2 chr21:36338771->chr21:36344707 chr21 36338771 + 0 10 chr21 36344707 - 10 0 5936 valid circular exonic 117 58 38 3 2 2 2 0 0 0.8536 0.8591 2.436 0.871 2.1714 19.1829 0.9605 0 0.1021 2.0941 32.6902 0.9725 0 0.0814 0.0789 0.2479 2 chr21:36338771/36338772(+)->chr21:36344707/36344708(-):(discordant_mates:6,spanning_paired_1:14,spanning_paired_1_t:23,spanning_paired_2:14,spanning_paired_2_t:23)&chr21:36338841/36338842(+)->chr21:36341534/36341535(-):(spanning_paired_1:1,spanning_paired_2:1)
+3 chr21:29329693->chr21:29321220 chr21 29321220 + 10 0 chr21 29329693 - 0 10 8473 valid circular exonic 49 35 15 4 4 4 3 0 0 0.8605 0.8018 0 0.6667 8.3531 34.3077 0.9863 0 0.4412 9.4685 14.5897 0.9701 0 0.749 0.2667 0.3571 1.75 chr21:29321220/29321221(+)->chr21:29329693/29329694(-):(discordant_mates:4,spanning_paired_1:9,spanning_paired_1_t:1,spanning_paired_2:9,spanning_paired_2_t:1)&chr21:29326058/29326059(+)->chr21:29329693/29329694(-):(spanning_paired_1:4,spanning_paired_2:4)&chr21:29327324/29327325(+)->chr21:29329704/29329705(-):(spanning_paired_1_t:1,spanning_paired_2_t:1)&chr21:29321216/29321217(+)->chr21:29329647/29329648(-):(discordant_mates:4)
+4 chr21:33432871<->chr21:33414887 chr21 33414887 + 5 5 chr21 33432871 - 5 5 17984 valid circular exonic 20 12 6 2 3 3 2 1 0 0.8277 0.9167 0 0.75 12.6 50.8 0.7995 0.1045 5.4663 12.8 24.2 0.8072 0.0987 5.4049 0.3333 0.3 1.6667 chr21:33414887/33414888(+)->chr21:33432871/33432872(-):(spanning_paired_1:2,spanning_paired_1_t:3,spanning_paired_2:2,spanning_paired_2_t:3)&chr21:33426883/33426884(+)->chr21:33432871/33432872(-):(spanning_paired_1:1,spanning_paired_2:1)&chr21:33414915/33414916(+)->chr21:33421509/33421510(-):(discordant_mates:4)
b
diff -r 173ca9768e22 -r 5348cfd3ba5c test-data/integrate_tmprss-erg.hg38.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/integrate_tmprss-erg.hg38.gtf Thu Aug 10 05:38:27 2017 -0400
b
b'@@ -0,0 +1,159 @@\n+chr21\tensembl_havana\tgene\t38380027\t38661780\t.\t-\t.\tgene_id "ENSG00000157554"; gene_version "18"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding";\n+chr21\thavana\tCDS\t38498363\t38498380\t.\t-\t0\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "1"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38445404\t38445621\t.\t-\t0\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "2"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38423410\t38423561\t.\t-\t1\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "3"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38403506\t38403709\t.\t-\t2\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "4"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38402557\t38402637\t.\t-\t2\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "5"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38392376\t38392444\t.\t-\t2\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "6"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38391659\t38391715\t.\t-\t2\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "7"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38390995\t38391042\t.\t-\t2\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_version "5"; exon_number "8"; gene_name "ERG"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ERG-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS82674"; protein_id "ENSP00000381877"; protein_version "1"; tag "basic"; transcript_support_level "1";\n+chr21\thavana\tCDS\t38383406\t38383923\t.\t-\t2\tgene_id "ENSG00000157554"; gene_version "18"; transcript_id "ENST00000398905"; transcript_ve'..b'anscript_version "5"; exon_number "2"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000397846"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "5";\n+chr21\thavana\tCDS\t41494356\t41494578\t.\t-\t0\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000424093"; transcript_version "5"; exon_number "3"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000397846"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "5";\n+chr21\thavana\tCDS\t41489507\t41489593\t.\t-\t2\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000424093"; transcript_version "5"; exon_number "4"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000397846"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "5";\n+chr21\thavana\tCDS\t41480476\t41480602\t.\t-\t2\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000424093"; transcript_version "5"; exon_number "5"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000397846"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "5";\n+chr21\thavana\tCDS\t41479172\t41479282\t.\t-\t1\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000424093"; transcript_version "5"; exon_number "6"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000397846"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "5";\n+chr21\thavana\tCDS\t41476577\t41476620\t.\t-\t1\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000424093"; transcript_version "5"; exon_number "7"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000397846"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "5";\n+chr21\thavana\tCDS\t41473446\t41473496\t.\t-\t2\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000424093"; transcript_version "5"; exon_number "8"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000397846"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "5";\n+chr21\thavana\tCDS\t41498119\t41498133\t.\t-\t0\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000455813"; transcript_version "1"; exon_number "2"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-205"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000391784"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "2";\n+chr21\thavana\tCDS\t41494356\t41494578\t.\t-\t0\tgene_id "ENSG00000184012"; gene_version "11"; transcript_id "ENST00000455813"; transcript_version "1"; exon_number "3"; gene_name "TMPRSS2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "TMPRSS2-205"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000391784"; protein_version "1"; tag "cds_end_NF"; tag "mRNA_end_NF"; transcript_support_level "2";\n'