Repository 'feelnc2asko'
hg clone https://toolshed.g2.bx.psu.edu/repos/genouest/feelnc2asko

Changeset 0:e323c49b8bcc (2018-04-12)
Next changeset 1:af75f883cab4 (2018-04-12)
Commit message:
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/feelnc2asko commit 92849224db1963d090fbb25d410cc659a5449241
added:
feelnc2asko.pl
feelnc2asko.xml
test-data/completeAnnot.gff3
test-data/feelnc_lncRNA.gtf
test-data/feelnc_mRNA.gtf
test-data/initial.gff3
b
diff -r 000000000000 -r e323c49b8bcc feelnc2asko.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feelnc2asko.pl Thu Apr 12 06:05:23 2018 -0400
[
@@ -0,0 +1,147 @@
+use strict;
+use warnings;
+use Getopt::Long;
+use Bio::Tools::GFF;
+
+my ($anngff, $lncgff, $newgff);
+
+GetOptions("ann=s" => \$anngff, "lnc=s" => \$lncgff, "new=s" => \$newgff);
+
+my $gffout = Bio::Tools::GFF->new(-fh=> \*STDOUT, -gff_version => 3);
+
+#1. the standard annotation
+my $gffin = Bio::Tools::GFF->new(-file => $anngff, -gff_version => 3);
+
+
+while (my $feature = $gffin->next_feature()) {
+ if ($feature-> primary_tag eq 'mRNA') {
+    my ($gene)=$feature->get_tag_values("gene");
+    $feature->remove_tag("Parent");
+    $feature->add_tag_value("Parent", $gene);
+    $feature->add_tag_value("feelnc_type", "standard");
+    $gffout->write_feature($feature);
+  }
+  if ($feature-> primary_tag eq 'gene') {
+    my ($name)=$feature->get_tag_values("Name");
+    $feature->remove_tag("ID");
+    $feature->add_tag_value("ID", $name);
+    $feature->add_tag_value("feelnc_type", "standard");
+    $gffout->write_feature($feature);
+  }
+}
+$gffin->close();
+
+
+my %genes=();
+my %transcripts=();
+
+#2. The lncRNA gtf
+my $fncgtf = Bio::Tools::GFF->new( -file => $lncgff, -gff_version => '2' );
+
+while (my $feat = $fncgtf->next_feature()) {
+ next if ($feat->primary_tag() ne 'exon');
+
+ my $mrna= ($feat->get_tag_values('transcript_id'))[0];
+  my $gene= ($feat->get_tag_values('gene_id'))[0];
+# print STDERR "str: ", $feat->strand(), "\n";
+ if (exists($genes{$gene})) {
+ if ($genes{$gene}->start() > $feat->start()) {
+         $genes{$gene}->start($feat->start());
+        }
+        if ($genes{$gene}->end() < $feat->end()) {
+         $genes{$gene}->end($feat->end());
+        }
+ }
+  else {
+    my $geneft = Bio::SeqFeature::Generic->new(
+ -start       => $feat->start(),
+ -end         => $feat->end(),
+ -strand      => $feat->strand(),
+ -primary_tag => 'gene',
+ -source_tag  => $feat->source_tag(),
+ -seq_id => $feat->seq_id());
+#    $geneft->add_tag_value("feelnc_type", "lncRNA");
+ $genes{$gene}=$geneft;
+}
+
+if (exists($transcripts{$mrna})) {
+ if ($transcripts{$mrna}->start() > $feat->start()) {
+         $transcripts{$mrna}->start($feat->start());
+        }
+        if ($transcripts{$mrna}->end() < $feat->end()) {
+         $transcripts{$mrna}->end($feat->end());
+        }
+  }
+ else {
+ my $tr = Bio::SeqFeature::Generic->new(
+ -start       => $feat->start(),
+ -end         => $feat->end(),
+ -strand      => $feat->strand(),
+ -primary_tag => 'mRNA',
+ -source_tag  => $feat->source_tag(),
+ -seq_id => $feat->seq_id());
+ $tr->add_tag_value("ID", $mrna);
+    $tr->add_tag_value("Parent",$gene);
+    $tr->add_tag_value("feelnc_type", "lncRNA");
+ $transcripts{$mrna}=$tr;
+ }
+}
+
+#3. The new mRNA gtf
+my $nmgtf = Bio::Tools::GFF->new( -file => $newgff, -gff_version => '2' );
+
+while (my $feat = $nmgtf->next_feature()) {
+ next if ($feat->primary_tag() ne 'exon');
+
+ my $mrna= ($feat->get_tag_values('transcript_id'))[0];
+  my $gene= ($feat->get_tag_values('gene_id'))[0];
+# print STDERR "str: ", $feat->strand() , "\n";
+ if (exists($genes{$gene})) {
+ if ($genes{$gene}->start() > $feat->start()) {
+         $genes{$gene}->start($feat->start());
+        }
+        if ($genes{$gene}->end() < $feat->end()) {
+         $genes{$gene}->end($feat->end());
+        }
+ }
+  else {
+    my $geneft = Bio::SeqFeature::Generic->new(
+ -start       => $feat->start(),
+ -end         => $feat->end(),
+ -strand      => $feat->strand(),
+ -primary_tag => 'gene',
+ -source_tag  => $feat->source_tag(),
+ -seq_id => $feat->seq_id());
+ $geneft->add_tag_value("ID", $gene);
+#    $geneft->add_tag_value("feelnc_type", "new");
+ $genes{$gene}=$geneft;
+}
+
+if (exists($transcripts{$mrna})) {
+ if ($transcripts{$mrna}->start() > $feat->start()) {
+         $transcripts{$mrna}->start($feat->start());
+        }
+        if ($transcripts{$mrna}->end() < $feat->end()) {
+         $transcripts{$mrna}->end($feat->end());
+        }
+  }
+ else {
+ my $tr = Bio::SeqFeature::Generic->new(
+ -start       => $feat->start(),
+ -end         => $feat->end(),
+ -strand      => $feat->strand(),
+ -primary_tag => 'mRNA',
+ -source_tag  => $feat->source_tag(),
+ -seq_id => $feat->seq_id());
+ $tr->add_tag_value("ID", $mrna);
+    $tr->add_tag_value("Parent",$gene);
+    $tr->add_tag_value("feelnc_type", "new");
+ $transcripts{$mrna}=$tr;
+ }
+}
+
+foreach my $mrna (keys %transcripts) {
+ my ($parent)=$transcripts{$mrna}->get_tag_values("Parent");
+  $gffout->write_feature($genes{$parent});
+  $gffout->write_feature($transcripts{$mrna});
+}
b
diff -r 000000000000 -r e323c49b8bcc feelnc2asko.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feelnc2asko.xml Thu Apr 12 06:05:23 2018 -0400
[
@@ -0,0 +1,36 @@
+<tool id="feelnc2asko" name="Convert FeelNC GTF" version="0.1">
+    <description>to GFF3 for AskOmics</description>
+    <requirements>
+        <requirement type="package" version="1.6.924">perl-bioperl</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        perl '$__tool_directory__/feelnc2asko.pl' --ann '${anngff}' --lnc '${lncgtf}' --new '${newgtf}' > '${outgff}'
+    ]]></command>
+
+    <inputs>
+        <param format="gff" name="anngff" type="data" label="Initial annotation file" />
+        <param format="gtf" name="lncgtf" type="data" label="FeelNC lncRNA annotation" />
+        <param format="gtf" name="newgtf" type="data" label="FeelNC new mRNA annotation" />
+    </inputs>
+
+    <outputs>
+        <data format="gff" name="outgff" label="${tool.name} on ${on_string} : FeelNC GFF" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="anngff" ftype="gff" value="initial.gff3" />
+            <param name="lncgtf" ftype="gtf" value="feelnc_lncRNA.gtf" />
+            <param name="newgtf" ftype="gtf" value="feelnc_mRNA.gtf" />
+            <output name="outgff" ftype="gff" file="completeAnnot.gff3" compare="sim_size" />
+        </test>
+    </tests>
+
+    <help>
+        Generates a GFF compliant to AskOmics from the FeelNC output files merged with the initial annotation file.
+    </help>
+
+    <citations>
+    </citations>
+</tool>
b
diff -r 000000000000 -r e323c49b8bcc test-data/completeAnnot.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/completeAnnot.gff3 Thu Apr 12 06:05:23 2018 -0400
b
b'@@ -0,0 +1,295 @@\n+##gff-version 3\n+GL349622\tGnomon\tgene\t60051\t61739\t.\t-\t.\tID=LOC103310714;Dbxref=GeneID:103310714;Name=LOC103310714;feelnc_type=standard;gbkey=Gene;gene=LOC103310714;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t60051\t61739\t.\t-\t.\tID=rna172;Parent=LOC103310714;Dbxref=GeneID:103310714,Genbank:XM_008189920.1;Name=XM_008189920.1;feelnc_type=standard;gbkey=mRNA;gene=LOC103310714;model_evidence=Supporting evidence includes similarity to: 1 Protein;product=uncharacterized LOC103310714;transcript_id=XM_008189920.1\n+GL349622\tGnomon\tgene\t199052\t202572\t.\t+\t.\tID=LOC107884578;Dbxref=GeneID:107884578;Name=LOC107884578;feelnc_type=standard;gbkey=Gene;gene=LOC107884578;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t199052\t202572\t.\t+\t.\tID=rna173;Parent=LOC107884578;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;Name=XM_016806997.1;feelnc_type=standard;gbkey=mRNA;gene=LOC107884578;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 2 samples with support for all annotated introns;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\tgene\t203530\t204174\t.\t-\t.\tID=LOC107884064;Dbxref=GeneID:107884064;Name=LOC107884064;feelnc_type=standard;gbkey=Gene;gene=LOC107884064;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t203530\t204174\t.\t-\t.\tID=rna174;Parent=LOC107884064;Dbxref=GeneID:107884064,Genbank:XM_016805453.1;Name=XM_016805453.1;feelnc_type=standard;gbkey=mRNA;gene=LOC107884064;model_evidence=Supporting evidence includes similarity to: 1 Protein;product=RNA-directed DNA polymerase from mobile element jockey-like;transcript_id=XM_016805453.1\n+GL349622\tGnomon\tgene\t211117\t212787\t.\t+\t.\tID=LOC103310726;Dbxref=GeneID:103310726;Name=LOC103310726;feelnc_type=standard;gbkey=Gene;gene=LOC103310726;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t211117\t212787\t.\t+\t.\tID=rna175;Parent=LOC103310726;Dbxref=GeneID:103310726,Genbank:XM_008189963.1;Name=XM_008189963.1;feelnc_type=standard;gbkey=mRNA;gene=LOC103310726;model_evidence=Supporting evidence includes similarity to: 1 Protein;product=uncharacterized LOC103310726;transcript_id=XM_008189963.1\n+GL349622\tGnomon\tgene\t223060\t233978\t.\t+\t.\tID=LOC103310745;Dbxref=GeneID:103310745;Name=LOC103310745;feelnc_type=standard;gbkey=Gene;gene=LOC103310745;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t223060\t233978\t.\t+\t.\tID=rna176;Parent=LOC103310745;Dbxref=GeneID:103310745,Genbank:XM_008190015.1;Name=XM_008190015.1;feelnc_type=standard;gbkey=mRNA;gene=LOC103310745;model_evidence=Supporting evidence includes similarity to: 38 Proteins;product=zinc finger protein 664-like;transcript_id=XM_008190015.1\n+GL349622\tGnomon\tgene\t415564\t416313\t.\t+\t.\tID=LOC100570479;Dbxref=GeneID:100570479;Name=LOC100570479;feelnc_type=standard;gbkey=Gene;gene=LOC100570479;gene_biotype=lncRNA\n+GL349622\tGnomon\tgene\t415576\t416202\t.\t-\t.\tID=LOC107884592;Dbxref=GeneID:107884592;Name=LOC107884592;feelnc_type=standard;gbkey=Gene;gene=LOC107884592;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t415576\t416202\t.\t-\t.\tID=rna178;Parent=LOC107884592;Dbxref=GeneID:107884592,Genbank:XM_016807037.1;Name=XM_016807037.1;feelnc_type=standard;gbkey=mRNA;gene=LOC107884592;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 3 samples with support for all annotated introns;product=uncharacterized LOC107884592;transcript_id=XM_016807037.1\n+GL349622\tGnomon\tgene\t416907\t418413\t.\t-\t.\tID=LOC100570170;Dbxref=GeneID:100570170;Name=LOC100570170;feelnc_type=standard;gbkey=Gene;gene=LOC100570170;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t416907\t418413\t.\t-\t.\tID=rna179;Parent=LOC100570170;Dbxref=GeneID:100570170,Genbank:XM_008185349.2;Name=XM_008185349.2;feelnc_type=standard;gbkey=mRNA;gene=LOC100570170;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq '..b'%2C transcript variant X1;transcript_id=XM_003240066.3\n+GL349622\tGnomon\tmRNA\t2264010\t2275204\t.\t-\t.\tID=rna335;Parent=LOC100159605;Dbxref=GeneID:100159605,Genbank:XM_001945754.4,APHIDBASE:ACYPI000962;Name=XM_001945754.4;feelnc_type=standard;gbkey=mRNA;gene=LOC100159605;model_evidence=Supporting evidence includes similarity to: 19 ESTs%2C 1 Protein%2C and 98%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 33 samples with support for all annotated introns;product=heparan-alpha-glucosaminide N-acetyltransferase%2C transcript variant X3;transcript_id=XM_001945754.4\n+GL349622\tGnomon\tgene\t2270755\t2281456\t.\t+\t.\tID=LOC100168508;Dbxref=GeneID:100168508;Name=LOC100168508;feelnc_type=standard;gbkey=Gene;gene=LOC100168508;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t2270755\t2281456\t.\t+\t.\tID=rna336;Parent=LOC100168508;Dbxref=GeneID:100168508,Genbank:XM_008189547.2;Name=XM_008189547.2;feelnc_type=standard;gbkey=mRNA;gene=LOC100168508;model_evidence=Supporting evidence includes similarity to: 11 ESTs%2C 6 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 7 samples with support for all annotated introns;product=ATPase family AAA domain-containing protein 3;transcript_id=XM_008189547.2\n+GL349622\tGnomon\tgene\t2282156\t2284359\t.\t-\t.\tID=LOC100165058;Dbxref=APHIDBASE:ACYPI006028,GeneID:100165058;Name=LOC100165058;feelnc_type=standard;gbkey=Gene;gene=LOC100165058;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t2282156\t2284359\t.\t-\t.\tID=rna337;Parent=LOC100165058;Dbxref=GeneID:100165058,Genbank:XM_001950091.4,APHIDBASE:ACYPI006028;Name=XM_001950091.4;feelnc_type=standard;gbkey=mRNA;gene=LOC100165058;model_evidence=Supporting evidence includes similarity to: 1 mRNA%2C 25 ESTs%2C 5 Proteins%2C and 99%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 44 samples with support for all annotated introns;product=solute carrier family 35 member B1;transcript_id=XM_001950091.4\n+GL349622\tCufflinks\tgene\t218384\t219722\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t218384\t219722\t.\t-\t1\tID=CUFF.77.1;Parent=CUFF.77;feelnc_type=lncRNA\n+GL349622\tCufflinks\tgene\t604709\t609482\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t604709\t609482\t.\t-\t1\tID=CUFF.110.1;Parent=CUFF.110;feelnc_type=lncRNA\n+GL349622\tCufflinks\tgene\t490324\t536159\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t490324\t536159\t.\t-\t1\tID=CUFF.85.1;Parent=CUFF.85;feelnc_type=lncRNA\n+GL349622\tCufflinks\tgene\t2021789\t2026165\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t2021789\t2026165\t.\t-\t1\tID=CUFF.135.2;Parent=CUFF.135;feelnc_type=lncRNA\n+GL349622\tCufflinks\tgene\t407420\t409348\t.\t+\t1\tID=CUFF.82\n+GL349622\tCufflinks\tmRNA\t407420\t409348\t.\t+\t1\tID=CUFF.82.1;Parent=CUFF.82;feelnc_type=new\n+GL349622\tCufflinks\tgene\t490324\t536159\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t533485\t536159\t.\t-\t1\tID=CUFF.85.2;Parent=CUFF.85;feelnc_type=lncRNA\n+GL349622\tCufflinks\tgene\t1722435\t1724318\t.\t+\t1\tID=CUFF.130\n+GL349622\tCufflinks\tmRNA\t1722435\t1724318\t.\t+\t1\tID=CUFF.130.1;Parent=CUFF.130;feelnc_type=new\n+GL349622\tCufflinks\tgene\t1200950\t1218393\t.\t+\t1\tID=CUFF.289\n+GL349622\tCufflinks\tmRNA\t1200950\t1218393\t.\t+\t1\tID=CUFF.289.1;Parent=CUFF.289;feelnc_type=new\n+GL349622\tCufflinks\tgene\t1758962\t1760807\t.\t+\t1\tID=CUFF.164\n+GL349622\tCufflinks\tmRNA\t1758962\t1760807\t.\t+\t1\tID=CUFF.164.1;Parent=CUFF.164;feelnc_type=new\n+GL349622\tCufflinks\tgene\t1359442\t1360152\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t1359442\t1360152\t.\t-\t1\tID=CUFF.491.1;Parent=CUFF.491;feelnc_type=lncRNA\n+GL349622\tCufflinks\tgene\t675392\t678724\t.\t+\t1\tID=CUFF.91\n+GL349622\tCufflinks\tmRNA\t675392\t678724\t.\t+\t1\tID=CUFF.91.1;Parent=CUFF.91;feelnc_type=new\n+GL349622\tCufflinks\tgene\t2021789\t2026165\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t2021789\t2024477\t.\t-\t1\tID=CUFF.135.1;Parent=CUFF.135;feelnc_type=new\n+GL349622\tCufflinks\tgene\t400758\t405243\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t400758\t405243\t.\t-\t1\tID=CUFF.80.1;Parent=CUFF.80;feelnc_type=lncRNA\n+GL349622\tCufflinks\tgene\t1160606\t1161032\t.\t-\t1\t\n+GL349622\tCufflinks\tmRNA\t1160606\t1161032\t.\t-\t1\tID=CUFF.350.1;Parent=CUFF.350;feelnc_type=lncRNA\n'
b
diff -r 000000000000 -r e323c49b8bcc test-data/feelnc_lncRNA.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feelnc_lncRNA.gtf Thu Apr 12 06:05:23 2018 -0400
b
@@ -0,0 +1,28 @@
+GL349622 Cufflinks exon 533485 533534 882 - . gene_id "CUFF.85"; transcript_id "CUFF.85.2"; FPKM "2.3945340028"; conf_hi "2.630871"; conf_lo "2.158197"; cov "214.763323"; exon_number "1"; frac "0.489040";
+GL349622 Cufflinks exon 533586 533675 882 - . gene_id "CUFF.85"; transcript_id "CUFF.85.2"; FPKM "2.3945340028"; conf_hi "2.630871"; conf_lo "2.158197"; cov "214.763323"; exon_number "2"; frac "0.489040";
+GL349622 Cufflinks exon 535570 536159 882 - . gene_id "CUFF.85"; transcript_id "CUFF.85.2"; FPKM "2.3945340028"; conf_hi "2.630871"; conf_lo "2.158197"; cov "214.763323"; exon_number "3"; frac "0.489040";
+GL349622 Cufflinks exon 490324 490331 1000 - . gene_id "CUFF.85"; transcript_id "CUFF.85.1"; FPKM "2.7134838239"; conf_hi "2.976733"; conf_lo "2.450235"; cov "243.369608"; exon_number "1"; frac "0.510960";
+GL349622 Cufflinks exon 531740 531830 1000 - . gene_id "CUFF.85"; transcript_id "CUFF.85.1"; FPKM "2.7134838239"; conf_hi "2.976733"; conf_lo "2.450235"; cov "243.369608"; exon_number "2"; frac "0.510960";
+GL349622 Cufflinks exon 535570 536159 1000 - . gene_id "CUFF.85"; transcript_id "CUFF.85.1"; FPKM "2.7134838239"; conf_hi "2.976733"; conf_lo "2.450235"; cov "243.369608"; exon_number "3"; frac "0.510960";
+GL349622 Cufflinks exon 604709 604997 1000 - . gene_id "CUFF.110"; transcript_id "CUFF.110.1"; FPKM "0.0961002126"; conf_hi "0.135302"; conf_lo "0.056899"; cov "8.912035"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 609022 609482 1000 - . gene_id "CUFF.110"; transcript_id "CUFF.110.1"; FPKM "0.0961002126"; conf_hi "0.135302"; conf_lo "0.056899"; cov "8.912035"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 1160606 1160686 1000 - . gene_id "CUFF.350"; transcript_id "CUFF.350.1"; FPKM "0.1406859378"; conf_hi "0.225523"; conf_lo "0.055849"; cov "12.977983"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 1160750 1161032 1000 - . gene_id "CUFF.350"; transcript_id "CUFF.350.1"; FPKM "0.1406859378"; conf_hi "0.225523"; conf_lo "0.055849"; cov "12.977983"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 218384 218748 1000 - . gene_id "CUFF.77"; transcript_id "CUFF.77.1"; FPKM "0.0743883680"; conf_hi "0.114151"; conf_lo "0.034626"; cov "6.927126"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 219422 219582 1000 - . gene_id "CUFF.77"; transcript_id "CUFF.77.1"; FPKM "0.0743883680"; conf_hi "0.114151"; conf_lo "0.034626"; cov "6.927126"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 219650 219722 1000 - . gene_id "CUFF.77"; transcript_id "CUFF.77.1"; FPKM "0.0743883680"; conf_hi "0.114151"; conf_lo "0.034626"; cov "6.927126"; exon_number "3"; frac "1.000000";
+GL349622 Cufflinks exon 2021789 2021806 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "1"; frac "0.530904";
+GL349622 Cufflinks exon 2021900 2022107 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "2"; frac "0.530904";
+GL349622 Cufflinks exon 2022183 2022321 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "3"; frac "0.530904";
+GL349622 Cufflinks exon 2022426 2022632 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "4"; frac "0.530904";
+GL349622 Cufflinks exon 2022873 2023046 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "5"; frac "0.530904";
+GL349622 Cufflinks exon 2023146 2024788 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "6"; frac "0.530904";
+GL349622 Cufflinks exon 2024861 2025119 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "7"; frac "0.530904";
+GL349622 Cufflinks exon 2025186 2025371 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "8"; frac "0.530904";
+GL349622 Cufflinks exon 2025439 2025624 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "9"; frac "0.530904";
+GL349622 Cufflinks exon 2025689 2025775 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "10"; frac "0.530904";
+GL349622 Cufflinks exon 2025850 2026165 518 - . gene_id "CUFF.135"; transcript_id "CUFF.135.2"; FPKM "0.0207553118"; conf_hi "0.028907"; conf_lo "0.012604"; cov "1.968198"; exon_number "11"; frac "0.530904";
+GL349622 Cufflinks exon 1359442 1359624 1000 - . gene_id "CUFF.491"; transcript_id "CUFF.491.1"; FPKM "0.0739312720"; conf_hi "0.114941"; conf_lo "0.032922"; cov "7.121265"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 1359763 1360152 1000 - . gene_id "CUFF.491"; transcript_id "CUFF.491.1"; FPKM "0.0739312720"; conf_hi "0.114941"; conf_lo "0.032922"; cov "7.121265"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 400758 401861 1000 - . gene_id "CUFF.80"; transcript_id "CUFF.80.1"; FPKM "0.0766645216"; conf_hi "0.102680"; conf_lo "0.050649"; cov "6.048173"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 405167 405243 1000 - . gene_id "CUFF.80"; transcript_id "CUFF.80.1"; FPKM "0.0766645216"; conf_hi "0.102680"; conf_lo "0.050649"; cov "6.048173"; exon_number "2"; frac "1.000000";
b
diff -r 000000000000 -r e323c49b8bcc test-data/feelnc_mRNA.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feelnc_mRNA.gtf Thu Apr 12 06:05:23 2018 -0400
b
@@ -0,0 +1,23 @@
+GL349622 Cufflinks exon 2021789 2021806 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "1"; frac "0.469096";
+GL349622 Cufflinks exon 2021900 2022107 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "2"; frac "0.469096";
+GL349622 Cufflinks exon 2022183 2022321 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "3"; frac "0.469096";
+GL349622 Cufflinks exon 2022426 2022632 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "4"; frac "0.469096";
+GL349622 Cufflinks exon 2022873 2023046 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "5"; frac "0.469096";
+GL349622 Cufflinks exon 2023146 2023501 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "6"; frac "0.469096";
+GL349622 Cufflinks exon 2023769 2023890 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "7"; frac "0.469096";
+GL349622 Cufflinks exon 2023956 2024335 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "8"; frac "0.469096";
+GL349622 Cufflinks exon 2024402 2024477 1000 - . gene_id "CUFF.135"; transcript_id "CUFF.135.1"; FPKM "0.0400018344"; conf_hi "0.056818"; conf_lo "0.023185"; cov "3.793320"; exon_number "9"; frac "0.469096";
+GL349622 Cufflinks exon 1758962 1759051 1000 + . gene_id "CUFF.164"; transcript_id "CUFF.164.1"; FPKM "0.0615172631"; conf_hi "0.082146"; conf_lo "0.040889"; cov "5.925513"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 1759303 1759407 1000 + . gene_id "CUFF.164"; transcript_id "CUFF.164.1"; FPKM "0.0615172631"; conf_hi "0.082146"; conf_lo "0.040889"; cov "5.925513"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 1759497 1760705 1000 + . gene_id "CUFF.164"; transcript_id "CUFF.164.1"; FPKM "0.0615172631"; conf_hi "0.082146"; conf_lo "0.040889"; cov "5.925513"; exon_number "3"; frac "1.000000";
+GL349622 Cufflinks exon 1760786 1760807 1000 + . gene_id "CUFF.164"; transcript_id "CUFF.164.1"; FPKM "0.0615172631"; conf_hi "0.082146"; conf_lo "0.040889"; cov "5.925513"; exon_number "4"; frac "1.000000";
+GL349622 Cufflinks exon 1722435 1722497 1000 + . gene_id "CUFF.130"; transcript_id "CUFF.130.1"; FPKM "0.0544636688"; conf_hi "0.084675"; conf_lo "0.024253"; cov "4.870514"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 1723677 1724318 1000 + . gene_id "CUFF.130"; transcript_id "CUFF.130.1"; FPKM "0.0544636688"; conf_hi "0.084675"; conf_lo "0.024253"; cov "4.870514"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 1200950 1209113 1000 + . gene_id "CUFF.289"; transcript_id "CUFF.289.1"; FPKM "1.5619807989"; conf_hi "1.636876"; conf_lo "1.487085"; cov "150.378205"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 1217207 1218393 1000 + . gene_id "CUFF.289"; transcript_id "CUFF.289.1"; FPKM "1.5619807989"; conf_hi "1.636876"; conf_lo "1.487085"; cov "150.378205"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 407420 407912 1000 + . gene_id "CUFF.82"; transcript_id "CUFF.82.1"; FPKM "0.0426234238"; conf_hi "0.061439"; conf_lo "0.023808"; cov "4.079788"; exon_number "1"; frac "0.545551";
+GL349622 Cufflinks exon 408138 409348 1000 + . gene_id "CUFF.82"; transcript_id "CUFF.82.1"; FPKM "0.0426234238"; conf_hi "0.061439"; conf_lo "0.023808"; cov "4.079788"; exon_number "2"; frac "0.545551";
+GL349622 Cufflinks exon 675392 675441 1000 + . gene_id "CUFF.91"; transcript_id "CUFF.91.1"; FPKM "0.0251218709"; conf_hi "0.036719"; conf_lo "0.013525"; cov "2.419808"; exon_number "1"; frac "1.000000";
+GL349622 Cufflinks exon 675736 676224 1000 + . gene_id "CUFF.91"; transcript_id "CUFF.91.1"; FPKM "0.0251218709"; conf_hi "0.036719"; conf_lo "0.013525"; cov "2.419808"; exon_number "2"; frac "1.000000";
+GL349622 Cufflinks exon 676964 677863 1000 + . gene_id "CUFF.91"; transcript_id "CUFF.91.1"; FPKM "0.0251218709"; conf_hi "0.036719"; conf_lo "0.013525"; cov "2.419808"; exon_number "3"; frac "1.000000";
+GL349622 Cufflinks exon 678290 678724 1000 + . gene_id "CUFF.91"; transcript_id "CUFF.91.1"; FPKM "0.0251218709"; conf_hi "0.036719"; conf_lo "0.013525"; cov "2.419808"; exon_number "4"; frac "1.000000";
b
diff -r 000000000000 -r e323c49b8bcc test-data/initial.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/initial.gff3 Thu Apr 12 06:05:23 2018 -0400
b
b'@@ -0,0 +1,3069 @@\n+GL349622\tGnomon\tgene\t60051\t61739\t.\t-\t.\tID=gene116;Dbxref=GeneID:103310714;Name=LOC103310714;gbkey=Gene;gene=LOC103310714;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t60051\t61739\t.\t-\t.\tID=rna172;Parent=gene116;Dbxref=GeneID:103310714,Genbank:XM_008189920.1;Name=XM_008189920.1;gbkey=mRNA;gene=LOC103310714;model_evidence=Supporting evidence includes similarity to: 1 Protein;product=uncharacterized LOC103310714;transcript_id=XM_008189920.1\n+GL349622\tGnomon\texon\t61677\t61739\t.\t-\t.\tID=id1586;Parent=rna172;Dbxref=GeneID:103310714,Genbank:XM_008189920.1;gbkey=mRNA;gene=LOC103310714;product=uncharacterized LOC103310714;transcript_id=XM_008189920.1\n+GL349622\tGnomon\texon\t60986\t61132\t.\t-\t.\tID=id1587;Parent=rna172;Dbxref=GeneID:103310714,Genbank:XM_008189920.1;gbkey=mRNA;gene=LOC103310714;product=uncharacterized LOC103310714;transcript_id=XM_008189920.1\n+GL349622\tGnomon\texon\t60051\t60518\t.\t-\t.\tID=id1588;Parent=rna172;Dbxref=GeneID:103310714,Genbank:XM_008189920.1;gbkey=mRNA;gene=LOC103310714;product=uncharacterized LOC103310714;transcript_id=XM_008189920.1\n+GL349622\tGnomon\tCDS\t61677\t61739\t.\t-\t0\tID=cds159;Parent=rna172;Dbxref=GeneID:103310714,Genbank:XP_008188142.1;Name=XP_008188142.1;gbkey=CDS;gene=LOC103310714;product=uncharacterized protein LOC103310714;protein_id=XP_008188142.1\n+GL349622\tGnomon\tCDS\t60986\t61132\t.\t-\t0\tID=cds159;Parent=rna172;Dbxref=GeneID:103310714,Genbank:XP_008188142.1;Name=XP_008188142.1;gbkey=CDS;gene=LOC103310714;product=uncharacterized protein LOC103310714;protein_id=XP_008188142.1\n+GL349622\tGnomon\tCDS\t60051\t60518\t.\t-\t0\tID=cds159;Parent=rna172;Dbxref=GeneID:103310714,Genbank:XP_008188142.1;Name=XP_008188142.1;gbkey=CDS;gene=LOC103310714;product=uncharacterized protein LOC103310714;protein_id=XP_008188142.1\n+GL349622\tGnomon\tgene\t199052\t202572\t.\t+\t.\tID=gene117;Dbxref=GeneID:107884578;Name=LOC107884578;gbkey=Gene;gene=LOC107884578;gene_biotype=protein_coding\n+GL349622\tGnomon\tmRNA\t199052\t202572\t.\t+\t.\tID=rna173;Parent=gene117;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;Name=XM_016806997.1;gbkey=mRNA;gene=LOC107884578;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 2 samples with support for all annotated introns;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\texon\t199052\t199675\t.\t+\t.\tID=id1589;Parent=rna173;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;gbkey=mRNA;gene=LOC107884578;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\texon\t199809\t199934\t.\t+\t.\tID=id1590;Parent=rna173;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;gbkey=mRNA;gene=LOC107884578;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\texon\t200024\t200167\t.\t+\t.\tID=id1591;Parent=rna173;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;gbkey=mRNA;gene=LOC107884578;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\texon\t200266\t200325\t.\t+\t.\tID=id1592;Parent=rna173;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;gbkey=mRNA;gene=LOC107884578;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\texon\t200605\t200864\t.\t+\t.\tID=id1593;Parent=rna173;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;gbkey=mRNA;gene=LOC107884578;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\texon\t200928\t201090\t.\t+\t.\tID=id1594;Parent=rna173;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;gbkey=mRNA;gene=LOC107884578;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622\tGnomon\texon\t202380\t202572\t.\t+\t.\tID=id1595;Parent=rna173;Dbxref=GeneID:107884578,Genbank:XM_016806997.1;gbkey=mRNA;gene=LOC107884578;product=transcription initiation factor TFIID subunit 1-like;transcript_id=XM_016806997.1\n+GL349622'..b'or_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1396235\t1396450\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 805 1020 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1395198\t1395401\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 1021 1224 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1394967\t1395141\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 1225 1399 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1394737\t1394892\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 1400 1555 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1392808\t1392927\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 1556 1675 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1392655\t1392741\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 1676 1762 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1392068\t1392569\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 1763 2264 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1389062\t1389156\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 2265 2359 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t1388356\t1388994\t.\t-\t.\tID=e3be8e62-36cf-44c9-ab71-fafcb9323c9a;Target=XM_001942814.4 2360 2998 +;for_remapping=2;gap_count=0;num_ident=2849;num_mismatch=0;pct_coverage=95.03;pct_coverage_hiqual=95.03;pct_identity_gap=100;pct_identity_ungap=100;rank=1\n+GL349622\tRefSeq\tcDNA_match\t2123429\t2123504\t76\t+\t.\tID=1ddd2e4d-692f-4692-bdb2-fa470cb8e6c5;Target=NM_001246122.1 3 78 +;consensus_splices=4;exon_identity=0.995992;for_remapping=2;gap_count=2;identity=0.992016;idty=1;matches=497;num_ident=497;num_mismatch=0;pct_coverage=99.2016;pct_coverage_hiqual=99.2016;pct_identity_gap=99.5992;pct_identity_ungap=100;product_coverage=0.996008;rank=1;score=76;splices=4;weighted_identity=0.992481\n+GL349622\tRefSeq\tcDNA_match\t2123585\t2123694\t110\t+\t.\tID=1ddd2e4d-692f-4692-bdb2-fa470cb8e6c5;Target=NM_001246122.1 79 188 +;consensus_splices=4;exon_identity=0.995992;for_remapping=2;gap_count=2;identity=0.992016;idty=1;matches=497;num_ident=497;num_mismatch=0;pct_coverage=99.2016;pct_coverage_hiqual=99.2016;pct_identity_gap=99.5992;pct_identity_ungap=100;product_coverage=0.996008;rank=1;score=110;splices=4;weighted_identity=0.992481\n+GL349622\tRefSeq\tcDNA_match\t2123766\t2124076\t304.514\t+\t.\tID=1ddd2e4d-692f-4692-bdb2-fa470cb8e6c5;Target=NM_001246122.1 189 501 +;consensus_splices=4;exon_identity=0.995992;for_remapping=2;gap_count=2;identity=0.992016;idty=0.99361;matches=497;num_ident=497;num_mismatch=0;pct_coverage=99.2016;pct_coverage_hiqual=99.2016;pct_identity_gap=99.5992;pct_identity_ungap=100;product_coverage=0.996008;rank=1;score=304.514;splices=4;weighted_identity=0.992481;Gap=M238 I1 M60 I1 M13\n'