diff feelnc2asko.pl @ 0:e323c49b8bcc draft

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/feelnc2asko commit 92849224db1963d090fbb25d410cc659a5449241
author genouest
date Thu, 12 Apr 2018 06:05:23 -0400
parents
children af75f883cab4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/feelnc2asko.pl	Thu Apr 12 06:05:23 2018 -0400
@@ -0,0 +1,147 @@
+use strict;
+use warnings;
+use Getopt::Long;
+use Bio::Tools::GFF;
+
+my ($anngff, $lncgff, $newgff);
+
+GetOptions("ann=s" => \$anngff, "lnc=s" => \$lncgff, "new=s" => \$newgff);
+
+my $gffout = Bio::Tools::GFF->new(-fh=> \*STDOUT, -gff_version => 3);
+
+#1. the standard annotation
+my $gffin = Bio::Tools::GFF->new(-file => $anngff, -gff_version => 3);
+
+
+while (my $feature = $gffin->next_feature()) {
+	if ($feature-> primary_tag eq 'mRNA') {
+    my ($gene)=$feature->get_tag_values("gene");
+    $feature->remove_tag("Parent");
+    $feature->add_tag_value("Parent", $gene);
+    $feature->add_tag_value("feelnc_type", "standard");
+    $gffout->write_feature($feature);
+  }
+  if ($feature-> primary_tag eq 'gene') {
+    my ($name)=$feature->get_tag_values("Name");
+    $feature->remove_tag("ID");
+    $feature->add_tag_value("ID", $name);
+    $feature->add_tag_value("feelnc_type", "standard");
+    $gffout->write_feature($feature);
+  }
+}
+$gffin->close();
+
+
+my %genes=();
+my %transcripts=();
+
+#2. The lncRNA gtf
+my $fncgtf = Bio::Tools::GFF->new( -file => $lncgff, -gff_version => '2' );
+
+while (my $feat = $fncgtf->next_feature()) {
+	next if ($feat->primary_tag() ne 'exon');
+
+	my $mrna= ($feat->get_tag_values('transcript_id'))[0];
+  my $gene= ($feat->get_tag_values('gene_id'))[0];
+#	print STDERR "str: ", $feat->strand(), "\n";
+	if (exists($genes{$gene})) {
+		if ($genes{$gene}->start() > $feat->start()) {
+        	$genes{$gene}->start($feat->start());
+        }
+        if ($genes{$gene}->end() < $feat->end()) {
+        	$genes{$gene}->end($feat->end());
+        }
+	}
+  else {
+    my $geneft = Bio::SeqFeature::Generic->new(
+			-start       => $feat->start(),
+			-end         => $feat->end(),
+			-strand      => $feat->strand(),
+			-primary_tag => 'gene',
+			-source_tag  => $feat->source_tag(),
+			-seq_id => $feat->seq_id());
+#    $geneft->add_tag_value("feelnc_type", "lncRNA");
+		$genes{$gene}=$geneft;
+}
+
+if (exists($transcripts{$mrna})) {
+		if ($transcripts{$mrna}->start() > $feat->start()) {
+        	$transcripts{$mrna}->start($feat->start());
+        }
+        if ($transcripts{$mrna}->end() < $feat->end()) {
+        	$transcripts{$mrna}->end($feat->end());
+        }
+  }
+	else {
+		my $tr = Bio::SeqFeature::Generic->new(
+			-start       => $feat->start(),
+			-end         => $feat->end(),
+			-strand      => $feat->strand(),
+			-primary_tag => 'mRNA',
+			-source_tag  => $feat->source_tag(),
+			-seq_id => $feat->seq_id());
+		$tr->add_tag_value("ID", $mrna);
+    $tr->add_tag_value("Parent",$gene);
+    $tr->add_tag_value("feelnc_type", "lncRNA");
+		$transcripts{$mrna}=$tr;
+	}
+}
+
+#3. The new mRNA gtf
+my $nmgtf = Bio::Tools::GFF->new( -file => $newgff, -gff_version => '2' );
+
+while (my $feat = $nmgtf->next_feature()) {
+	next if ($feat->primary_tag() ne 'exon');
+
+	my $mrna= ($feat->get_tag_values('transcript_id'))[0];
+  my $gene= ($feat->get_tag_values('gene_id'))[0];
+#	print STDERR "str: ", $feat->strand() , "\n";
+	if (exists($genes{$gene})) {
+		if ($genes{$gene}->start() > $feat->start()) {
+        	$genes{$gene}->start($feat->start());
+        }
+        if ($genes{$gene}->end() < $feat->end()) {
+        	$genes{$gene}->end($feat->end());
+        }
+	}
+  else {
+    my $geneft = Bio::SeqFeature::Generic->new(
+			-start       => $feat->start(),
+			-end         => $feat->end(),
+			-strand      => $feat->strand(),
+			-primary_tag => 'gene',
+			-source_tag  => $feat->source_tag(),
+			-seq_id => $feat->seq_id());
+		$geneft->add_tag_value("ID", $gene);
+#    $geneft->add_tag_value("feelnc_type", "new");
+		$genes{$gene}=$geneft;
+}
+
+if (exists($transcripts{$mrna})) {
+		if ($transcripts{$mrna}->start() > $feat->start()) {
+        	$transcripts{$mrna}->start($feat->start());
+        }
+        if ($transcripts{$mrna}->end() < $feat->end()) {
+        	$transcripts{$mrna}->end($feat->end());
+        }
+  }
+	else {
+		my $tr = Bio::SeqFeature::Generic->new(
+			-start       => $feat->start(),
+			-end         => $feat->end(),
+			-strand      => $feat->strand(),
+			-primary_tag => 'mRNA',
+			-source_tag  => $feat->source_tag(),
+			-seq_id => $feat->seq_id());
+		$tr->add_tag_value("ID", $mrna);
+    $tr->add_tag_value("Parent",$gene);
+    $tr->add_tag_value("feelnc_type", "new");
+		$transcripts{$mrna}=$tr;
+	}
+}
+
+foreach my $mrna (keys %transcripts) {
+	my ($parent)=$transcripts{$mrna}->get_tag_values("Parent");
+  $gffout->write_feature($genes{$parent});
+  $gffout->write_feature($transcripts{$mrna});
+}