Repository 'feelnc'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/feelnc

Changeset 0:b36afbb04e1c (2018-03-28)
Next changeset 1:17a77824c8e4 (2018-04-25)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/feelnc commit e56ef992726eb29b7a24842fde2c052a92214016
added:
feelnc_wrapper.xml
test-data/annotation_chr38.gtf
test-data/candidate_lncRNA.gtf.lncRNA-sort.gtf
test-data/candidate_lncRNA.gtf.mRNA-sort.gtf
test-data/genome_chr38.fa
test-data/transcript_chr38.gtf
b
diff -r 000000000000 -r b36afbb04e1c feelnc_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feelnc_wrapper.xml Wed Mar 28 13:14:02 2018 -0400
[
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="utf-8"?>
+<tool id="feelnc" name="FEELnc" profile="17.01" version="0.1.1">
+    <description>FlExible Extraction of LncRNA</description>
+    <requirements>
+        <requirement type="package" version="0.1.1">feelnc</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+FEELnc_pipeline.sh 
+--candidate='${transcripts}'
+--reference='${annotation}'
+--genome='${genome}' 
+--outname='candidate_lncRNA'
+--outdir='out_feelnc'
+    ]]></command>
+    <inputs>
+        <param name="transcripts" argument="--candidate" type="data" format="gtf" label="Transcripts assembly" help="Stringtie or Cufflinks output" />
+        <param name="annotation" argument="--reference" type="data" format="gtf" label="Reference annotation" />
+        <param name="genome" argument="--genome" type="data" format="fasta" label="Genome sequence" />
+    </inputs>
+    <outputs>
+        <data format="gtf" name="candidate_lncRNA" from_work_dir="out_feelnc/codpot/candidate_lncRNA.codpot.lncRNA.gtf" label="lncRNA annotation with ${tool.name} on ${on_string}" />
+        <data format="gtf" name="candidate_mRNA" from_work_dir="out_feelnc/codpot/candidate_lncRNA.codpot.mRNA.gtf" label="mRNA annotation with ${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="transcripts" value="transcript_chr38.gtf" />
+            <param name="annotation" value="annotation_chr38.gtf" />
+            <param name="genome" value="genome_chr38.fa" />
+            <output name="candidate_lncRNA" file="candidate_lncRNA.gtf.lncRNA-sort.gtf" sort="True" />
+            <output name="candidate_mRNA" file="candidate_lncRNA.gtf.mRNA-sort.gtf" sort="True" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+FEELnc pipeline is used to annotate long non-coding RNAs (lncRNAs) based on reconstructed transcripts from RNA-seq data (either with or without a reference genome). 
+
+--------
+
+**Project links:**
+
+https://github.com/tderrien/FEELnc
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/nar/gkw1306</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r b36afbb04e1c test-data/annotation_chr38.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation_chr38.gtf Wed Mar 28 13:14:02 2018 -0400
b
b'@@ -0,0 +1,5621 @@\n+38\tprotein_coding\tgene\t71179\t72976\t.\t+\t.\tgene_id "ENSCAFG00000009450"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding";\n+38\tprotein_coding\ttranscript\t71179\t72976\t.\t+\t.\tgene_id "ENSCAFG00000009450"; transcript_id "ENSCAFT00000015026"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "BTG2-201"; transcript_source "ensembl";\n+38\tprotein_coding\texon\t71179\t71332\t.\t+\t.\tgene_id "ENSCAFG00000009450"; transcript_id "ENSCAFT00000015026"; exon_number "1"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "BTG2-201"; transcript_source "ensembl"; exon_id "ENSCAFE00000102483";\n+38\tprotein_coding\tCDS\t71179\t71332\t.\t+\t0\tgene_id "ENSCAFG00000009450"; transcript_id "ENSCAFT00000015026"; exon_number "1"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "BTG2-201"; transcript_source "ensembl"; protein_id "ENSCAFP00000013904";\n+38\tprotein_coding\tstart_codon\t71179\t71181\t.\t+\t0\tgene_id "ENSCAFG00000009450"; transcript_id "ENSCAFT00000015026"; exon_number "1"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "BTG2-201"; transcript_source "ensembl";\n+38\tprotein_coding\texon\t72642\t72976\t.\t+\t.\tgene_id "ENSCAFG00000009450"; transcript_id "ENSCAFT00000015026"; exon_number "2"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "BTG2-201"; transcript_source "ensembl"; exon_id "ENSCAFE00000102489";\n+38\tprotein_coding\tCDS\t72642\t72973\t.\t+\t2\tgene_id "ENSCAFG00000009450"; transcript_id "ENSCAFT00000015026"; exon_number "2"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "BTG2-201"; transcript_source "ensembl"; protein_id "ENSCAFP00000013904";\n+38\tprotein_coding\tstop_codon\t72974\t72976\t.\t+\t0\tgene_id "ENSCAFG00000009450"; transcript_id "ENSCAFT00000015026"; exon_number "2"; gene_name "BTG2"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "BTG2-201"; transcript_source "ensembl";\n+38\tprotein_coding\tgene\t103799\t112448\t.\t-\t.\tgene_id "ENSCAFG00000009459"; gene_name "FMOD"; gene_source "ensembl"; gene_biotype "protein_coding";\n+38\tprotein_coding\ttranscript\t103799\t112448\t.\t-\t.\tgene_id "ENSCAFG00000009459"; transcript_id "ENSCAFT00000015038"; gene_name "FMOD"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "FMOD-201"; transcript_source "ensembl";\n+38\tprotein_coding\texon\t112107\t112448\t.\t-\t.\tgene_id "ENSCAFG00000009459"; transcript_id "ENSCAFT00000015038"; exon_number "1"; gene_name "FMOD"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "FMOD-201"; transcript_source "ensembl"; exon_id "ENSCAFE00000315885";\n+38\tprotein_coding\texon\t108443\t109430\t.\t-\t.\tgene_id "ENSCAFG00000009459"; transcript_id "ENSCAFT00000015038"; exon_number "2"; gene_name "FMOD"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "FMOD-201"; transcript_source "ensembl"; exon_id "ENSCAFE00000102538";\n+38\tprotein_coding\tCDS\t108443\t109421\t.\t-\t0\tgene_id "ENSCAFG00000009459"; transcript_id "ENSCAFT00000015038"; exon_number "2"; gene_name "FMOD"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "FMOD-201"; transcript_source "ensembl"; protein_id "ENSCAFP00000013913";\n+38\tprotein_coding\tstart_codon\t109419\t109421\t.\t-\t0\tgene_id "ENSCAFG00000009459"; transcript_id "ENSCAFT00000015038"; exon_number "2"; gene_name "FMOD"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "FMOD-201"; transcript_source "ensembl";\n+38\tprotein_coding\texon\t103799\t103950\t.\t-\t.\tgene_id "ENSCAFG00000009459"; transcript_id "ENSCAFT00000015038"; exon_number "3"; gene_name "FMOD"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_name "FMOD-201"; transcript_source "ensembl"; exon_id "ENSCAFE00000102540";\n+38\tprotein_coding\tCDS\t103802\t103950\t.\t-\t2\tgene_id "ENSCAFG00000009459"; transcript_id "ENSCAFT00000015038"; exon_number "3"; gene_n'..b'"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23905633\t23905677\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "9"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000316591";\n+38\tprotein_coding\tCDS\t23905633\t23905677\t.\t-\t0\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "9"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23903200\t23903317\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "10"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000300987";\n+38\tprotein_coding\tCDS\t23903200\t23903317\t.\t-\t0\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "10"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23902431\t23902522\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "11"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000297399";\n+38\tprotein_coding\tCDS\t23902431\t23902522\t.\t-\t2\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "11"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23902149\t23902214\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "12"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000284347";\n+38\tprotein_coding\tCDS\t23902149\t23902214\t.\t-\t0\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "12"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23901628\t23901660\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "13"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000289311";\n+38\tprotein_coding\tCDS\t23901628\t23901660\t.\t-\t0\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "13"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23901568\t23901625\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "14"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000322242";\n+38\tprotein_coding\tCDS\t23901568\t23901625\t.\t-\t0\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "14"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23901148\t23901301\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "15"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000320694";\n+38\tprotein_coding\tCDS\t23901148\t23901301\t.\t-\t2\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "15"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n+38\tprotein_coding\texon\t23900814\t23900856\t.\t-\t.\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "16"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; exon_id "ENSCAFE00000253912";\n+38\tprotein_coding\tCDS\t23900814\t23900856\t.\t-\t1\tgene_id "ENSCAFG00000024444"; transcript_id "ENSCAFT00000037740"; exon_number "16"; gene_source "ensembl"; gene_biotype "protein_coding"; transcript_source "ensembl"; protein_id "ENSCAFP00000033319";\n'
b
diff -r 000000000000 -r b36afbb04e1c test-data/candidate_lncRNA.gtf.lncRNA-sort.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/candidate_lncRNA.gtf.lncRNA-sort.gtf Wed Mar 28 13:14:02 2018 -0400
b
b'@@ -0,0 +1,1114 @@\n+38\tCufflinks\texon\t10413424\t10413839\t.\t+\t.\tgene_id "XLOC_090671"; transcript_id "TCONS_00231772"; class_code "u"; exon_number "1"; oId "CUFF.138718.1"; tss_id "TSS139647";\n+38\tCufflinks\texon\t10413938\t10414557\t.\t+\t.\tgene_id "XLOC_090671"; transcript_id "TCONS_00231773"; class_code "u"; exon_number "1"; oId "CUFF.138718.2"; tss_id "TSS139648";\n+38\tCufflinks\texon\t10414466\t10414557\t.\t+\t.\tgene_id "XLOC_090671"; transcript_id "TCONS_00231772"; class_code "u"; exon_number "2"; oId "CUFF.138718.1"; tss_id "TSS139647";\n+38\tCufflinks\texon\t10443102\t10444725\t.\t+\t.\tgene_id "XLOC_090671"; transcript_id "TCONS_00231772"; class_code "u"; exon_number "3"; oId "CUFF.138718.1"; tss_id "TSS139647";\n+38\tCufflinks\texon\t10443102\t10444725\t.\t+\t.\tgene_id "XLOC_090671"; transcript_id "TCONS_00231773"; class_code "u"; exon_number "2"; oId "CUFF.138718.2"; tss_id "TSS139648";\n+38\tCufflinks\texon\t10646531\t10646561\t.\t-\t.\tgene_id "XLOC_090951"; transcript_id "TCONS_00232893"; class_code "u"; exon_number "1"; oId "CUFF.138746.1"; tss_id "TSS140242";\n+38\tCufflinks\texon\t10661011\t10663355\t.\t-\t.\tgene_id "XLOC_090951"; transcript_id "TCONS_00232894"; class_code "u"; exon_number "1"; oId "CUFF.138746.2"; tss_id "TSS140242";\n+38\tCufflinks\texon\t10663297\t10663355\t.\t-\t.\tgene_id "XLOC_090951"; transcript_id "TCONS_00232893"; class_code "u"; exon_number "2"; oId "CUFF.138746.1"; tss_id "TSS140242";\n+38\tCufflinks\texon\t10664410\t10664532\t.\t-\t.\tgene_id "XLOC_090951"; transcript_id "TCONS_00232893"; class_code "u"; exon_number "3"; oId "CUFF.138746.1"; tss_id "TSS140242";\n+38\tCufflinks\texon\t10664410\t10664532\t.\t-\t.\tgene_id "XLOC_090951"; transcript_id "TCONS_00232894"; class_code "u"; exon_number "2"; oId "CUFF.138746.2"; tss_id "TSS140242";\n+38\tCufflinks\texon\t10665521\t10665896\t.\t-\t.\tgene_id "XLOC_090951"; transcript_id "TCONS_00232893"; class_code "u"; exon_number "4"; oId "CUFF.138746.1"; tss_id "TSS140242";\n+38\tCufflinks\texon\t10665521\t10665896\t.\t-\t.\tgene_id "XLOC_090951"; transcript_id "TCONS_00232894"; class_code "u"; exon_number "3"; oId "CUFF.138746.2"; tss_id "TSS140242";\n+38\tCufflinks\texon\t10884994\t10885190\t.\t+\t.\tgene_id "XLOC_090672"; transcript_id "TCONS_00231774"; class_code "u"; exon_number "1"; oId "CUFF.138756.1"; tss_id "TSS139649";\n+38\tCufflinks\texon\t10886343\t10886878\t.\t+\t.\tgene_id "XLOC_090672"; transcript_id "TCONS_00231774"; class_code "u"; exon_number "2"; oId "CUFF.138756.1"; tss_id "TSS139649";\n+38\tCufflinks\texon\t11546597\t11546763\t.\t+\t.\tgene_id "XLOC_090674"; transcript_id "TCONS_00231787"; class_code "x"; exon_number "1"; gene_name "ENSCAFG00000010731"; nearest_ref "ENSCAFT00000017072"; oId "CUFF.139324.1"; tss_id "TSS139653";\n+38\tCufflinks\texon\t11560208\t11560407\t.\t+\t.\tgene_id "XLOC_090674"; transcript_id "TCONS_00231787"; class_code "x"; exon_number "2"; gene_name "ENSCAFG00000010731"; nearest_ref "ENSCAFT00000017072"; oId "CUFF.139324.1"; tss_id "TSS139653";\n+38\tCufflinks\texon\t11561016\t11561260\t.\t+\t.\tgene_id "XLOC_090674"; transcript_id "TCONS_00231787"; class_code "x"; exon_number "3"; gene_name "ENSCAFG00000010731"; nearest_ref "ENSCAFT00000017072"; oId "CUFF.139324.1"; tss_id "TSS139653";\n+38\tCufflinks\texon\t11565818\t11566499\t.\t+\t.\tgene_id "XLOC_090674"; transcript_id "TCONS_00231787"; class_code "x"; exon_number "4"; gene_name "ENSCAFG00000010731"; nearest_ref "ENSCAFT00000017072"; oId "CUFF.139324.1"; tss_id "TSS139653";\n+38\tCufflinks\texon\t11673388\t11673575\t.\t+\t.\tgene_id "XLOC_090675"; transcript_id "TCONS_00231788"; class_code "x"; exon_number "1"; gene_name "ENSCAFG00000010731"; nearest_ref "ENSCAFT00000017072"; oId "CUFF.139287.1"; tss_id "TSS139654";\n+38\tCufflinks\texon\t11673388\t11673607\t.\t+\t.\tgene_id "XLOC_090675"; transcript_id "TCONS_00231789"; class_code "x"; exon_number "1"; gene_name "ENSCAFG00000010731"; nearest_ref "ENSCAFT00000017072"; oId "CUFF.139287.2"; tss_id "TSS139654";\n+38\tCufflinks\texon\t11673655\t11673730\t.\t+\t.\tgene_id "XLOC_090675"; transcript_id "TCONS_00231790"; class_code "x"; exon_number "1"; gene_name "EN'..b'8180893\t.\t-\t.\tgene_id "XLOC_090943"; transcript_id "TCONS_00232880"; class_code "u"; exon_number "3"; oId "CUFF.139147.1"; tss_id "TSS140231";\n+38\tCufflinks\texon\t8183107\t8183146\t.\t-\t.\tgene_id "XLOC_090943"; transcript_id "TCONS_00232881"; class_code "u"; exon_number "4"; oId "CUFF.139147.2"; tss_id "TSS140232";\n+38\tCufflinks\texon\t916839\t916982\t.\t+\t.\tgene_id "XLOC_090608"; transcript_id "TCONS_00231457"; class_code "u"; exon_number "1"; oId "CUFF.138206.1"; tss_id "TSS139495";\n+38\tCufflinks\texon\t921030\t921143\t.\t+\t.\tgene_id "XLOC_090608"; transcript_id "TCONS_00231457"; class_code "u"; exon_number "2"; oId "CUFF.138206.1"; tss_id "TSS139495";\n+38\tCufflinks\texon\t931061\t931154\t.\t+\t.\tgene_id "XLOC_090608"; transcript_id "TCONS_00231457"; class_code "u"; exon_number "3"; oId "CUFF.138206.1"; tss_id "TSS139495";\n+38\tCufflinks\texon\t9471671\t9471911\t.\t+\t.\tgene_id "XLOC_090669"; transcript_id "TCONS_00231769"; class_code "u"; exon_number "1"; oId "CUFF.138683.1"; tss_id "TSS139645";\n+38\tCufflinks\texon\t9471671\t9471911\t.\t+\t.\tgene_id "XLOC_090669"; transcript_id "TCONS_00231770"; class_code "u"; exon_number "1"; oId "CUFF.138683.2"; tss_id "TSS139645";\n+38\tCufflinks\texon\t9478700\t9478780\t.\t+\t.\tgene_id "XLOC_090669"; transcript_id "TCONS_00231769"; class_code "u"; exon_number "2"; oId "CUFF.138683.1"; tss_id "TSS139645";\n+38\tCufflinks\texon\t9485243\t9485502\t.\t+\t.\tgene_id "XLOC_090669"; transcript_id "TCONS_00231769"; class_code "u"; exon_number "3"; oId "CUFF.138683.1"; tss_id "TSS139645";\n+38\tCufflinks\texon\t9485243\t9485502\t.\t+\t.\tgene_id "XLOC_090669"; transcript_id "TCONS_00231770"; class_code "u"; exon_number "2"; oId "CUFF.138683.2"; tss_id "TSS139645";\n+38\tCufflinks\texon\t9506786\t9507412\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232885"; class_code "u"; exon_number "1"; oId "CUFF.138979.1"; tss_id "TSS140236";\n+38\tCufflinks\texon\t9549709\t9549764\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232885"; class_code "u"; exon_number "2"; oId "CUFF.138979.1"; tss_id "TSS140236";\n+38\tCufflinks\texon\t9575653\t9575724\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232886"; class_code "o"; exon_number "1"; gene_name "ENSCAFG00000029859"; nearest_ref "ENSCAFT00000048330"; oId "CUFF.138979.2"; tss_id "TSS140237";\n+38\tCufflinks\texon\t9604520\t9604668\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232885"; class_code "u"; exon_number "3"; oId "CUFF.138979.1"; tss_id "TSS140236";\n+38\tCufflinks\texon\t9604520\t9604668\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232886"; class_code "o"; exon_number "2"; gene_name "ENSCAFG00000029859"; nearest_ref "ENSCAFT00000048330"; oId "CUFF.138979.2"; tss_id "TSS140237";\n+38\tCufflinks\texon\t9642250\t9642323\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232885"; class_code "u"; exon_number "4"; oId "CUFF.138979.1"; tss_id "TSS140236";\n+38\tCufflinks\texon\t9794105\t9794136\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232887"; class_code "o"; exon_number "1"; gene_name "ENSCAFG00000029859"; nearest_ref "ENSCAFT00000048330"; oId "CUFF.138979.3"; tss_id "TSS140237";\n+38\tCufflinks\texon\t9880332\t9880397\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232886"; class_code "o"; exon_number "3"; gene_name "ENSCAFG00000029859"; nearest_ref "ENSCAFT00000048330"; oId "CUFF.138979.2"; tss_id "TSS140237";\n+38\tCufflinks\texon\t9880332\t9880397\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232887"; class_code "o"; exon_number "2"; gene_name "ENSCAFG00000029859"; nearest_ref "ENSCAFT00000048330"; oId "CUFF.138979.3"; tss_id "TSS140237";\n+38\tCufflinks\texon\t9945834\t9946205\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232886"; class_code "o"; exon_number "4"; gene_name "ENSCAFG00000029859"; nearest_ref "ENSCAFT00000048330"; oId "CUFF.138979.2"; tss_id "TSS140237";\n+38\tCufflinks\texon\t9945834\t9946205\t.\t-\t.\tgene_id "XLOC_090947"; transcript_id "TCONS_00232887"; class_code "o"; exon_number "3"; gene_name "ENSCAFG00000029859"; nearest_ref "ENSCAFT00000048330"; oId "CUFF.138979.3"; tss_id "TSS140237";\n'
b
diff -r 000000000000 -r b36afbb04e1c test-data/candidate_lncRNA.gtf.mRNA-sort.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/candidate_lncRNA.gtf.mRNA-sort.gtf Wed Mar 28 13:14:02 2018 -0400
b
b'@@ -0,0 +1,273 @@\n+38\tCufflinks\texon\t10265190\t10265289\t.\t-\t.\tgene_id "XLOC_090949"; transcript_id "TCONS_00232891"; class_code "u"; exon_number "1"; oId "CUFF.138676.1"; tss_id "TSS140240";\n+38\tCufflinks\texon\t10321384\t10321633\t.\t-\t.\tgene_id "XLOC_090949"; transcript_id "TCONS_00232891"; class_code "u"; exon_number "2"; oId "CUFF.138676.1"; tss_id "TSS140240";\n+38\tCufflinks\texon\t22958458\t22958676\t.\t+\t.\tgene_id "XLOC_090833"; transcript_id "TCONS_00232410"; class_code "="; exon_number "1"; gene_name "ENSCAFG00000011692"; nearest_ref "ENSCAFT00000018544"; oId "ENSCAFT00000018544"; tss_id "TSS139987";\n+38\tCufflinks\texon\t22958758\t22959390\t.\t+\t.\tgene_id "XLOC_090833"; transcript_id "TCONS_00232410"; class_code "="; exon_number "2"; gene_name "ENSCAFG00000011692"; nearest_ref "ENSCAFT00000018544"; oId "ENSCAFT00000018544"; tss_id "TSS139987";\n+38\tCufflinks\texon\t23052109\t23052291\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "1"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23052117\t23052291\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "1"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23053697\t23053936\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "2"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23053697\t23053936\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "2"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23054964\t23055089\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "3"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23054964\t23055089\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "3"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23057007\t23057147\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "4"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23057007\t23057147\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "4"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23057822\t23057968\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "5"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23057822\t23057968\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "5"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23060147\t23060280\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "6"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23060147\t23060280\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "6"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23060810\t23060954\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "7"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23060810\t23060954\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "7"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23062593\t23062747\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "8"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23062593\t23062747\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "8"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23063695\t23063830\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232417"; class_code "u"; exon_number "9"; oId "CUFF.139754.1"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23063695\t23063830\t.\t+\t.\tgene_id "XLOC_090839"; transcript_id "TCONS_00232418"; class_code "u"; exon_number "9"; oId "CUFF.139754.2"; tss_id "TSS139994";\n+38\tCufflinks\texon\t23063947\t23064048\t.\t+\t.\tgene_id "XLO'..b'\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231692"; class_code "u"; exon_number "8"; oId "CUFF.138100.11"; tss_id "TSS139598";\n+38\tCufflinks\texon\t3288769\t3289040\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231684"; class_code "x"; exon_number "20"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.3"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3288769\t3289040\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231686"; class_code "x"; exon_number "20"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.7"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3288769\t3289040\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231692"; class_code "u"; exon_number "9"; oId "CUFF.138100.11"; tss_id "TSS139598";\n+38\tCufflinks\texon\t3293962\t3294089\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231690"; class_code "x"; exon_number "19"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.9"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3294018\t3294089\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231683"; class_code "x"; exon_number "20"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.2"; tss_id "TSS139596";\n+38\tCufflinks\texon\t3294018\t3294089\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231687"; class_code "x"; exon_number "19"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.10"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3294018\t3294089\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231688"; class_code "x"; exon_number "21"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.5"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3297502\t3297545\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231682"; class_code "x"; exon_number "20"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.1"; tss_id "TSS139595";\n+38\tCufflinks\texon\t3297502\t3297545\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231683"; class_code "x"; exon_number "21"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.2"; tss_id "TSS139596";\n+38\tCufflinks\texon\t3297502\t3297545\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231687"; class_code "x"; exon_number "20"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.10"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3297502\t3297545\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231688"; class_code "x"; exon_number "22"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.5"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3297502\t3297545\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231689"; class_code "x"; exon_number "19"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.8"; tss_id "TSS139597";\n+38\tCufflinks\texon\t3297502\t3297545\t.\t+\t.\tgene_id "XLOC_090646"; transcript_id "TCONS_00231690"; class_code "x"; exon_number "20"; gene_name "ENSCAFG00000027924"; nearest_ref "ENSCAFT00000042207"; oId "CUFF.138100.9"; tss_id "TSS139597";\n+38\tCufflinks\texon\t515156\t515948\t.\t+\t.\tgene_id "XLOC_090599"; transcript_id "TCONS_00231414"; class_code "u"; exon_number "1"; oId "CUFF.138034.12"; tss_id "TSS139475";\n+38\tCufflinks\texon\t518126\t518188\t.\t+\t.\tgene_id "XLOC_090599"; transcript_id "TCONS_00231414"; class_code "u"; exon_number "2"; oId "CUFF.138034.12"; tss_id "TSS139475";\n+38\tCufflinks\texon\t519376\t519454\t.\t+\t.\tgene_id "XLOC_090599"; transcript_id "TCONS_00231414"; class_code "u"; exon_number "3"; oId "CUFF.138034.12"; tss_id "TSS139475";\n+38\tCufflinks\texon\t521313\t523494\t.\t+\t.\tgene_id "XLOC_090599"; transcript_id "TCONS_00231414"; class_code "u"; exon_number "4"; oId "CUFF.138034.12"; tss_id "TSS139475";\n+38\tCufflinks\texon\t613419\t613475\t.\t+\t.\tgene_id "XLOC_090599"; transcript_id "TCONS_00231414"; class_code "u"; exon_number "5"; oId "CUFF.138034.12"; tss_id "TSS139475";\n'
b
diff -r 000000000000 -r b36afbb04e1c test-data/genome_chr38.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_chr38.fa Wed Mar 28 13:14:02 2018 -0400
b
b'@@ -0,0 +1,398577 @@\n+>38 dna:chromosome chromosome:CanFam3.1:38:1:23914537:1 REF\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATTCTCTTCGATTGGACTCT\n+CCGATTCGATTCTTTTCGGGTTCGATTTGATTCTTCTCCATTAGATTCGATTCGATTCTT\n+TTCGATCTGATTCAATAAGATTCTTTTCGATCGATTTAATTCTGTTCGATTCTATTCTAT\n+TCGATTTGATTCGATTCGATTCTTTTCAATTCGATTCTATTCGATTCTTTTCATTTCGAT\n+TCGATTATTTCGATTCGATTCTTTTCAATTTGATTCGATTTGATCATTTGGATTCGATTT\n+GTTACATTTCGATTCGATTCTTTTCGATTCTGTTCGATTCGATTCTTTTCGATTCTGTTC\n+GATTCGATTCTTTTCGATTCTATTCAATTCTTTTCCATTCGATATGATTTGATTCATTTC\n+GAATTTTTTTCGATTCGTTTCGATTCAAATCTATTCGATTCTTTTCGATTCAATTCGATT\n+CGATTGATTCTTTTCAATTCAATTCGATTCCATTCGATTCGATTCTTTTCGATTCAATTC\n+GATTCTTTTTGATTTGATACAATTCGATTCGATTCTTTTCCATTCGATTCAATCCGATTC\n+TTTTCTATTCGATTCAACTCAACTCGATTCTTTTCGATTCGATTTTTTCGATTCGATTCT\n+TTTCAATTCGATTCAATTCTTTTCGATTTGATTCTTTTCGTTTCAATTCGATTCGATTCT\n+TTTCAATTCGATTCGATTCGATTCTGTTCGTTTCAATTAGATTCTTTTCAATTTTATTTG\n+ATTCGATTCTTTTCAATTCTATTCGGTACTGTTCGATTCGATTCTTTTCGATTCAATTCG\n+ATTGGAATCGATTCTTTTTGATTCAGTTTGATTCGATTTGATACGATTTTCTTTTTTTGT\n+TTTTTTGATACGATTTTCGATTGTATTCAATTCGATTCTTTTCGGTTCTATTAGATTCGA\n+TTCTTTTGGATTCAAATTGATATGATTCGATTCTTTTCGATTCCATTCGATACGATTCAA\n+TTCTTTTCGATTCAATTCGTTTCGATGCTATTCAATTCGATTCTTTTTGATTCAATTCGA\n+TTATTTTCGATTCAATTCGATTCTTTTCGATTCAATTCCATTGGAATCGATACGATTCTT\n+TTCGATTCGATTCTTTTCGGTTCTGTTCGATTCAATTCTTTTCGATTCAGTTCAATTCGA\n+TTCTTTTCTTTTCGATTCGATTCTTTTCGATTTGATATGATTCGATTCATTTCGATTTTT\n+TTCGATTCGTTTCGATTCGAATCTATTTGATTCTTTTCGATTCAATTCGATTCGATTGAT\n+TCTTTTTCAATTCATTTCGATTCGATTCGATTCCGTTCGATATGACTCTTTTCTATTCGA\n+TTCGATTCTTTTTAACTCGATATGATTCGATTCGATTCTTTTTGAATTGACTCAATTCAA\n+TTCAATTTTTTTCGATTCGATTTTTTCGATTTCTTTTCAATTTGATTCGATTCTTTTCCA\n+TTTGATTTTTTTCGATTCGATTCGATTTGACTCTAGTCATTTCGATTCGATTCTTTTTGA\n+TTCGATTCGATTCGATTCTTTTCGTTTCAATTAGATTCTCTTCGATTCGATACTTTTCGA\n+TTCGATTCTTGTCAATTCGATTCAATTCTATTCTTTTCGATTCGATATGATTCGATTTGA\n+TTCGTTGCGAATCAATTCGATTCCATTCTTTTTGATTCGATTCGATTCGATTTTTTTAGA\n+TTTGATTTGATAAGATTCTTTTCAATTGGATTCGATTCGATTCTCTTTGATGTTTTTTGA\n+TTCGATCTTTTCATCGATTTGATTCGATTCAATTCAATTCATTTCGATTCGATTCCATTC\n+GAATTTTTTGATTCTATTATTTTCAATTCGATTCTTTTCGATTCTATTCGATTCGATTCG\n+ATTTTTTTCTATTCGATTTATTTCGATTCGATTCGATTTGATTCTTTTCATTTCGATTAG\n+ATTCTCTTCGATTCGGTTCTTTTTGATTCAATTCTTTTCGATTCGATTTGATTTGAATCT\n+TTTCGAGTTGATTCGGTTCGATTCTTTTCAATTCGATTGATAAGATTCTTTTCGTTCGAT\n+TCAATTTTGTTCTATTCTATTCGATTCGATTCTATTCGATTTGATTCTTTTCAATTCAAT\n+TCTATTCGATTCTTTTCATTTTGATTCAATTCTTTTCGATTCGATTCTTTTCAATTTGAT\n+TTGATTCGATCATTTGGATTCGATTTGTTACATTTTGATTCGATTCTTTTCGATTCGATT\n+CTTTAATATTTGATTTGATACGATTCTTTTCAATTCGATTCGATCTGATTCTTTTCAATT\n+CGATTCGATTCTTTTCGATTTGATCGCATTCGATTCAATGGTTTTCGATTCGATTTGATT\n+CTTCCGATTCGATTCGATTCTTTTCATTACGATTCAATTCAATTCGATTCTTTTCGATTC\n+GATTTGATTCTCTTCGGTTCGATTCGATTCTTTTCGACCCGCTTCGATTCGGCCCTTTTC\n+GTTTCGCTTCGGTTCTTTTCGATTCGATCTGATTCTTTTCATTTCGATTAGATCCTCTTC\n+GATTCGATTCTATTCGTTTCGATTAGATTCTTTTCGATTCGATTCGATTCAATTCTTTTC\n+ACTTCGAGTCGATTCTACTCGGTTCATCTCGAATCGATTCGATTCTTTTTGATTTGATCT\n+ATTCGATTCTTTTCGATTCAGTTCGATTCAATTCGATTCTTTTCTATTTGATTCGATTCT\n+TTTCCACTCGATGTGATTTGATTCGTTTTGATTTTTTTCGATTTGTTCCAATTCGAATCT\n+ATTCGATTCTTTTCGTTTCAATTCGATTCGATTCTTTTTGATTCAATTCCATTCGATTCG\n+AATAGATTCGATTCTTTTCGATTCGATACGATTCGATTCTTTTCGATGCGAATCAATTCG\n+ATTCGATTCTTTTTTATTCGATTCGATTCTTTTAGATTTGATTCGATACGATTCTTTTCT\n+ATTCGATTCTTTTCGATTCGATTCGATTCTTTTCAATTTGATTCGTTTCGATTTGAATCT\n+TTTTGTTTTGATTTGATTTTCTACAATTCGATTCTTTGGATTCGATTCTTTTTGATTTGA\n+TTAGATTCGATTCATTTCAATTCAAATCAATTCAATATGATTCGATTCGTTGCAATCTGA\n+TTCGATTTTTTCGAATCGATTTGATTCGATTCGATTAGTTTCGATTCGACTCGATGTGAT\n+TCGATTCGATTATCGATTCAATTCAATTCCAATTATTCTCAATTCGATTCGATTATTTTT\n+ATTCAATTCTATTATTAGCTTTTCGATGCGATTCGATTCGATTCGATTCTTTTCGATTCA\n+ATCAATTCGATTTGATTCTTTTCAATTCGATTCGATTCTTTTCGATTTGATTGCATTCGA\n+TTCAATTGTTTTCGATTCGATTTGATTCTTTCGATTCGATTCGATTCTTTTCATTACGAT\n+TCAATTCAATTCGATTCTTTTCGATTCGATTTGATTCTTTTCGGTTCGATTCGATTCTTT\n+TCGATTCGATTCGATTCGACACTTTTCGTTTCGATTCGATTCTTTTCGATTCGATTGATT\n+CTTTTCGTTTCGATTAGATTCTCTTTGATTCGATTCTTTTCGATTCCATTTGATTTGATT\n+CTTCTCCATTCGATTCGATTCAATTCTTTTCGAATCGATTCGATAAGATTCTTTTCATTC\n+GATTCAATTC'..b'TGGGCCTCACCTGGACCTGGTGACTCTGGTCTCCACCTGGCTCTCACGTGGAC\n+CTGGTGACTGTGGTCCCCACCTGGGCCTCACCTGGACCTGGTGACTGTGGTTCCCACCTT\n+GGCCTCACCTGGACCTGGTGACTCTGGTCTCCACCTGGCTCTCACGTGGACCTGGTGACT\n+GTGGTCCCCACCTGGGCCTCACCTGGACCTGGTGACTGTGGTTCCCACCTTGGCCTCACC\n+TGGACCTGGTGACTCTGGTCTCCACCTGGCTCTCACTGGACCTGGTGACTGTGGTGCCCT\n+GCCTGGGCCTCACCTGCACCTGGTGACCATGGGCCCTACCTGGGCTCCCATGACCTGGTG\n+ACTGTGGTCCCCACCTGGGCCTCACTTGGACCTGGTGACCCTGGTCCCCTCCTGGGCCTC\n+ATCTGTGCCCAGTGATTGTGGTCCCAACCTGGGCCTCACCTGGACCCAGTACACTGTGGT\n+TCACAGATGGACCTCACCTGTAGCTGGTGACTCTGGTCCCCACCTGGACCTCACCTGGAA\n+CTGGTGACTGTGATTCCCACCGTGGCCTCACCTGGACCCAGTGACTGTGGTCCCCCACTG\n+GGCTTCACCCGGACCTGGTGACTGTGGTTCCTACCTGGGTCTCACCTGACCTGGTGACTG\n+TGGTCCCCACTTGGGCCTCACTTGACCTGGTGACTCTGATCCTTACCTGGCCCTCACCCG\n+GACCTGGTGACTCTGGTCCCAAATTGGGCCTCACCCAGACCTGGTGACTCTAGTCCCCAA\n+CTGGGCTTCACCTAGGTGACTCTGGTCTCCACATGAGCCTCATGTGGATCTGCGACTGTG\n+GTCCCCACCTTGGCCTCACCTGAACCTGGTGACTCTGGTCCCCACCTGGGCTCACCTGGA\n+CCTGGTGACTGGGCTCCTCAGCTACCAATGACTAGGATCCAGGATGGACCCTTAGTTGCT\n+ACTGACACCGGGACCATCCCCAGGGTCTCACTTGTTCCTGGTGACTATGGTCCTTGTCTG\n+GGCTGACCTGGCTCATCCTTGTCTCTGATCCCCTCTTGAGCCTCACCTGCTCTTGCAGGT\n+ATGGTCCCCCCTGTCGTGGGTGTGTAGTGTCTATGGTCCCTTCTGGGCCTCACCTGATGG\n+TACTGGGCCACGATATTGGCCCTTTGCAAGACCAGCAGGGTCTCCGGGTCCTCCTGCATC\n+GTGCCGCAATCCAAACACATCCAAGAATGACGGCACGTGAGAACCTTCCGAGGACACATG\n+CGTCCGGGTGCTCCCCGTGGGCAGGTGCCCCTCTGTCTGACTGGAGCCGGAGACAACCCC\n+GTAGCCCCGAGGGGCACAGGCTGGGCTGGAGCATCACACACAGGCCCTCGCTGCCTGTCT\n+GTCCCCTGCCCACCTACAGTCCACCTTCCCAGCCACTGTCCCAGGACACACCGTCACGCT\n+GGGGTCACAGAGCGCTCCCACCCCGGTATCAGGAGCACACGGGCGGGGGACCCAGGGACT\n+CCGGGGAACTGCCCACCTGATGCTCGGGGGTGACGTCTCCCAGCCTGGGGCCGGCGCAGA\n+GATGGTGGTGGGTGGGCCGGCCAGCGAGTGTCCCTGTGGGGTGGGGGGGGGGCAGGGATC\n+GAAGGTTTCTCTTAGATTCCACAAACACACGACCAAGGGGAGAAAGCGACGGACTGGACT\n+TCATCAAAATTAAAACCTGATGCCAGCAAGAGTGCGACCAAACTCCCCACCCTGAGGGCA\n+GCGATGTGTGAATAGCCTGAGGGGGGCCTGGGGGAGCACGAGGGACTCCACGTTCCAGTG\n+CGTGAAGAGCAAGCCTGCTCACGGGCCAGGGGGACCCAGCGGGCACAGGGCGGGATGGGG\n+GGAGTCAGTCACCCCTGTCGGGGTCCATGTGTCTAGTATAGGCTGGAAAGGTCATGGCTT\n+GGGTGCCTGCACAGCACAGGGCTTATAGGGGAGCGGGGGTCCCGGGGGAGCTGCCTGAGT\n+TCCGACTGCAGCCTCCTTCCCTGGACGGGAGCAGCAGGGTGGCCACTCTCACCCCGTGGG\n+GCTGTGGAAGGGCTGTAGGAGCTGCTCACGGTCATCGCATGGGGGGTCTGGGTGTTGTGG\n+AGCCCAGCACGGGAGGCCTGTGCCCCCACCTGGACCCACAGGCCCCACTGTGGCGCGCGG\n+GGAGCAGGGATCAGCTGCCACTCACCTGTGACCTGTCGCTGTGTGAGTGCCGCCTCTTTA\n+TGCACTGGGGTACTGGGGAGAGTGGGAGGGTTGGGGTACTGGGGAGAGTGGGAGAGTGGG\n+GGTACTGGGGAGAGTGGGAGAGTTGGGGTACTGAGGAGAGATGAGTACTGAGCTACTGGG\n+GAGAGCAAGAGAGTCAGGTCGCTGTGAGGCAGATCTGGAGCATGAGTGCCTCCGAAGGTC\n+TGCGGACGCAGAGTGGGCACCAGGCAGCAGTGGAACCCGGGGGCACCACAGTTCCACCCA\n+GAGTGGGGTCCCACATCATCACTGTGGGCAGGCGGGGTCCAATCAGCACTTAGCCCTTCA\n+CCCTGAAGGCCGAGGGTCCCGGGGGAGGGTCCTGTCCCCACGTCGGCAGGAAGGGGATGC\n+AGGCACCCTCCCATATCCTCAGTTCCCATCCATGGGCCCTGCAGGCCTCCCGCCCTTCAA\n+GGCAGATTGGGGGTCACAGATGCCCCCCCGCCCCTCAGGGTTCACAGCATGGGGGTGCAG\n+TTAAGAACATTTGAGGGGCTCTTGGGTGGTTCCGTCGGTTCCACGTCGACTCTTGAGTTT\n+GGCTCAAGTCAGGATCTCGAGGTTGTGGCGTGGAGCCTCACGTGGGTCTCGGGCTCCTGT\n+GGGTGAGGAGTCCGCTCCAGTTTCCCCCTCTCCCTCCACCCACCCCTCACTGCACCCTCT\n+CTCAAGTAAATCTTCTAAAAATTAACATGAAGACACGTGAGCACCTTGATCCCCTCTGCA\n+GAGGTGCCTTTGTGTGTGTAACGACCGTGTAAATGCCTTTCACGGACCCACCGGGAGTCC\n+TACCGGAAACCCCCTTTCCTTGGTATTTATTGATGTACAGTCGACACACAACATGACAGT\n+AGTTTCAGGGCTACAGCTAGTGACTTGACAATTCTGTGCCTCACTCAGCTCTCCCCACGA\n+GAAGTGGCATCACCGTCTGTCCCCGTACAAAGTTGTCCCAACAGTACTGACTCTGTTGCC\n+AGGCTGTACTTTCTAAATTTCTGGGATTTCTTTATTTAGAACCAGACGCTTGTAGCTTTT\n+CCTATTGTTCGTCCATGTTGCCCACCTGCCCCCCCGCGGCCCCTCTGGCCATCAGTCAGC\n+CCGATCCATCCCATTACCCAGAGAAAGGACATGGGGCAGCCCAGGGGGCTCAGTGCTTCA\n+GCGCCTGCCTTCGGCCCAGAGTGTAATCCTGGAGTCCCGGGAACAAGTCCCACATCGGGT\n+TCCCTACATGGAGCCTGCTTCTCCCTCTGCCTGTGTCTCTGCCTCTCTCTCTCTCTCTGT\n+CTCTCAAGGATATATAAATAACATTTTTAAAAACAAAAAGCAATAGAAAGGACAAAACGC\n+ATACGATCCATTCAAGAGTTGCAGAAAAAGCATTTGACAGAATACAACATAAAAAGCATA\n+GTGGGTTTTGAGGAAATGTCTCTCAACAGAGAAAAGGCCATGTAGGAAGGCCCGCAGCTG\n+TCATCCTCAAGGGTGACAACCTAAAATCGTCTCCCCTAAGAGCAGGAACAAACCAAGGGG\n+GTCCCCTCTCAGCGCTGGAAGTCCTACCGTAGTCAACACACAAGACACAGCAATGTGGGG\n+GTCGCAACCACGCTGAGAGGGGCCTGGCCCCTCAGGACACGCGGGGAGGCTTCGGGGCCA\n+TTGGGGCCACATACACGTTGACAGAGCCCAGCTTCCG\n'
b
diff -r 000000000000 -r b36afbb04e1c test-data/transcript_chr38.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcript_chr38.gtf Wed Mar 28 13:14:02 2018 -0400
b
b'@@ -0,0 +1,23075 @@\n+38\tCufflinks\texon\t69885\t70101\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231318"; exon_number "1"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.1"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139446";\n+38\tCufflinks\texon\t70452\t71332\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231318"; exon_number "2"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.1"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139446";\n+38\tCufflinks\texon\t72642\t80377\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231318"; exon_number "3"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.1"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139446";\n+38\tCufflinks\texon\t70225\t70705\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231321"; exon_number "1"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.4"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139447";\n+38\tCufflinks\texon\t72642\t80377\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231321"; exon_number "2"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.4"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139447";\n+38\tCufflinks\texon\t70225\t70705\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231320"; exon_number "1"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.3"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139447";\n+38\tCufflinks\texon\t71050\t71332\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231320"; exon_number "2"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.3"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139447";\n+38\tCufflinks\texon\t72642\t80377\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231320"; exon_number "3"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.3"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139447";\n+38\tCufflinks\texon\t70225\t71332\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231319"; exon_number "1"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.2"; nearest_ref "ENSCAFT00000015026"; class_code "="; tss_id "TSS139447"; p_id "P17339";\n+38\tCufflinks\texon\t72642\t80377\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231319"; exon_number "2"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.2"; nearest_ref "ENSCAFT00000015026"; class_code "="; tss_id "TSS139447"; p_id "P17339";\n+38\tCufflinks\texon\t71179\t71332\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231322"; exon_number "1"; gene_name "ENSCAFG00000009450"; oId "ENSCAFT00000015026"; contained_in "TCONS_00231318"; nearest_ref "ENSCAFT00000015026"; class_code "="; tss_id "TSS139448"; p_id "P17339";\n+38\tCufflinks\texon\t72642\t72976\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231322"; exon_number "2"; gene_name "ENSCAFG00000009450"; oId "ENSCAFT00000015026"; contained_in "TCONS_00231318"; nearest_ref "ENSCAFT00000015026"; class_code "="; tss_id "TSS139448"; p_id "P17339";\n+38\tCufflinks\texon\t71849\t71921\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231323"; exon_number "1"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.6"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139449";\n+38\tCufflinks\texon\t72642\t80377\t.\t+\t.\tgene_id "XLOC_090592"; transcript_id "TCONS_00231323"; exon_number "2"; gene_name "ENSCAFG00000009450"; oId "CUFF.137603.6"; nearest_ref "ENSCAFT00000015026"; class_code "j"; tss_id "TSS139449";\n+38\tCufflinks\texon\t81332\t91296\t.\t+\t.\tgene_id "XLOC_090593"; transcript_id "TCONS_00231324"; exon_number "1"; oId "CUFF.137598.1"; class_code "u"; tss_id "TSS139450";\n+38\tCufflinks\texon\t198591\t198879\t.\t+\t.\tgene_id "XLOC_090594"; transcript_id "TCONS_00231325"; exon_number "1"; gene_name "ENSCAFG00000009462"; oId "ENSCAFT00000015041"; nearest_ref "ENSCAFT00000015041"; class_code "="; tss_id "TSS139451"; p_id "P17340";\n+38\tCufflinks\texon\t206406\t207385\t.\t+\t.\tgene_id "XLOC_090594"; transcript_id "TCONS_00231325"; exon_number "2"; gene_name "ENSCAFG00000009462"; oId "ENSCAFT00000015041"; nearest_re'..b'34.1"; class_code "u"; tss_id "TSS141665";\n+38\tCufflinks\texon\t22849630\t22851719\t.\t.\t.\tgene_id "XLOC_092133"; transcript_id "TCONS_00234757"; exon_number "1"; oId "CUFF.139733.1"; class_code "u"; tss_id "TSS141666";\n+38\tCufflinks\texon\t22866990\t22870409\t.\t.\t.\tgene_id "XLOC_092134"; transcript_id "TCONS_00234758"; exon_number "1"; oId "CUFF.139987.1"; class_code "u"; tss_id "TSS141667";\n+38\tCufflinks\texon\t22935500\t22936872\t.\t.\t.\tgene_id "XLOC_092135"; transcript_id "TCONS_00234759"; exon_number "1"; oId "CUFF.139737.1"; class_code "u"; tss_id "TSS141668";\n+38\tCufflinks\texon\t22961460\t22962534\t.\t.\t.\tgene_id "XLOC_092136"; transcript_id "TCONS_00234760"; exon_number "1"; oId "CUFF.139739.1"; class_code "u"; tss_id "TSS141669";\n+38\tCufflinks\texon\t22962604\t22963596\t.\t.\t.\tgene_id "XLOC_092137"; transcript_id "TCONS_00234761"; exon_number "1"; oId "CUFF.139747.1"; class_code "u"; tss_id "TSS141670";\n+38\tCufflinks\texon\t22968648\t22969821\t.\t.\t.\tgene_id "XLOC_092138"; transcript_id "TCONS_00234762"; exon_number "1"; oId "CUFF.139743.1"; class_code "u"; tss_id "TSS141671";\n+38\tCufflinks\texon\t22971758\t22973782\t.\t.\t.\tgene_id "XLOC_092139"; transcript_id "TCONS_00234763"; exon_number "1"; oId "CUFF.139745.1"; class_code "u"; tss_id "TSS141672";\n+38\tCufflinks\texon\t22977740\t22978736\t.\t.\t.\tgene_id "XLOC_092140"; transcript_id "TCONS_00234764"; exon_number "1"; oId "CUFF.139746.1"; class_code "u"; tss_id "TSS141673";\n+38\tCufflinks\texon\t22979216\t22980386\t.\t.\t.\tgene_id "XLOC_092141"; transcript_id "TCONS_00234765"; exon_number "1"; oId "CUFF.139742.1"; class_code "u"; tss_id "TSS141674";\n+38\tCufflinks\texon\t23010336\t23010700\t.\t.\t.\tgene_id "XLOC_092142"; transcript_id "TCONS_00234766"; exon_number "1"; oId "CUFF.139748.1"; class_code "u"; tss_id "TSS141675";\n+38\tCufflinks\texon\t23041588\t23043051\t.\t.\t.\tgene_id "XLOC_092143"; transcript_id "TCONS_00234767"; exon_number "1"; oId "CUFF.139750.1"; class_code "u"; tss_id "TSS141676";\n+38\tCufflinks\texon\t23135896\t23137064\t.\t.\t.\tgene_id "XLOC_092144"; transcript_id "TCONS_00234768"; exon_number "1"; oId "CUFF.139752.1"; class_code "u"; tss_id "TSS141677";\n+38\tCufflinks\texon\t23173434\t23174233\t.\t.\t.\tgene_id "XLOC_092145"; transcript_id "TCONS_00234769"; exon_number "1"; oId "CUFF.139753.1"; class_code "u"; tss_id "TSS141678";\n+38\tCufflinks\texon\t23245346\t23246102\t.\t.\t.\tgene_id "XLOC_092146"; transcript_id "TCONS_00234770"; exon_number "1"; oId "CUFF.139757.1"; class_code "u"; tss_id "TSS141679";\n+38\tCufflinks\texon\t23289127\t23289489\t.\t.\t.\tgene_id "XLOC_092147"; transcript_id "TCONS_00234771"; exon_number "1"; oId "CUFF.139759.1"; class_code "u"; tss_id "TSS141680";\n+38\tCufflinks\texon\t23301636\t23302168\t.\t.\t.\tgene_id "XLOC_092148"; transcript_id "TCONS_00234772"; exon_number "1"; oId "CUFF.139760.1"; class_code "u"; tss_id "TSS141681";\n+38\tCufflinks\texon\t23324087\t23324570\t.\t.\t.\tgene_id "XLOC_092149"; transcript_id "TCONS_00234773"; exon_number "1"; oId "CUFF.139761.1"; class_code "u"; tss_id "TSS141682";\n+38\tCufflinks\texon\t23341478\t23342032\t.\t.\t.\tgene_id "XLOC_092150"; transcript_id "TCONS_00234774"; exon_number "1"; oId "CUFF.139764.1"; class_code "u"; tss_id "TSS141683";\n+38\tCufflinks\texon\t23513561\t23516239\t.\t.\t.\tgene_id "XLOC_092151"; transcript_id "TCONS_00234775"; exon_number "1"; oId "CUFF.139772.1"; class_code "u"; tss_id "TSS141684";\n+38\tCufflinks\texon\t23522830\t23523949\t.\t.\t.\tgene_id "XLOC_092152"; transcript_id "TCONS_00234776"; exon_number "1"; oId "CUFF.139771.1"; class_code "u"; tss_id "TSS141685";\n+38\tCufflinks\texon\t23524427\t23525047\t.\t.\t.\tgene_id "XLOC_092153"; transcript_id "TCONS_00234777"; exon_number "1"; oId "CUFF.139767.1"; class_code "u"; tss_id "TSS141686";\n+38\tCufflinks\texon\t23629060\t23630480\t.\t.\t.\tgene_id "XLOC_092154"; transcript_id "TCONS_00234778"; exon_number "1"; oId "CUFF.139776.1"; class_code "u"; tss_id "TSS141687";\n+38\tCufflinks\texon\t23636377\t23637429\t.\t.\t.\tgene_id "XLOC_092155"; transcript_id "TCONS_00234779"; exon_number "1"; oId "CUFF.139774.1"; class_code "u"; tss_id "TSS141688";\n'