diff Perl/Naegleria/generateMauveJson.pl @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Perl/Naegleria/generateMauveJson.pl	Thu May 30 11:52:25 2024 +0000
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $chrom_focus = $ARGV[0];
+
+for (my $i=37; $i >= 1; $i--){print "NL$i;";}exit;
+
+print "[\n";
+my %gene_positions;
+open(G,"Map_annotation_ID_NFGwada.gff3.gff3");
+while(<G>){
+	my $line = $_;
+	$line =~s/\n//g;$line =~s/\r//g;
+	my @infos = split("\t",$line);
+	if ($infos[2] eq "mRNA" && /ID=([^;]+);/){
+		my $gene = $1;
+		my $start = $infos[3];
+		my $end = $infos[4];
+		my $chr = $infos[0];
+		#if ($chrom_focus && $chrom_focus ne $chr){next;}
+		$gene_positions{$gene} = "$chr-$start-$end";
+	}
+}
+close(G);
+
+open(G,"Map_annotation_ID_NLova7.gff3.gff3");
+while(<G>){
+        my $line = $_;
+        $line =~s/\n//g;$line =~s/\r//g;
+        my @infos = split("\t",$line);
+        if ($infos[2] eq "mRNA" && /ID=([^;]+);/){
+                my $gene = $1;
+                my $start = $infos[3];
+                my $end = $infos[4];
+                my $chr = $infos[0];
+		#if ($chrom_focus && $chrom_focus ne $chr){next;}
+                $gene_positions{$gene} = "$chr-$start-$end";
+        }
+}
+close(G);
+
+my $lines = "";
+open(F,"orthofinder_matrix.txt");
+<F>;
+while(<F>){
+	my $line = $_;
+	$line =~s/\n//g;$line =~s/\r//g;
+	my @infos = split("\t",$line);
+	my $nb_found = 0;
+	my $index = 0;
+	for (my $i = 1; $i <= $#infos; $i++){
+		my $val = $infos[$i];
+		if ($val =~/\w+/){
+			$nb_found++;
+			$index = $i;
+		}
+	}
+	if ($nb_found == 1){
+		#print "$index\n";
+	}
+	#next;
+	if ($nb_found == $#infos){
+		my $gene1 = $infos[1];
+		my $gene2 = $infos[7];
+		if ($gene1 !~/,/ && $gene2 !~/,/){
+			my ($chr1,$start1,$end1) = split(/-/,$gene_positions{$gene1});
+			my ($chr2,$start2,$end2) = split(/-/,$gene_positions{$gene2});
+			$chr2 = lc($chr2);
+			#if ($chr1 eq $chr2 && $chr1 eq "$chrom_focus"){
+			if ($chr1 ne $chr2 && $chr2 =~/\w+/){
+			#if ($chr1 eq $chr2){
+				if ($chr1=~/chr(\d+)/){
+					#my $nb = $1 * 1400000;
+					my $nb = 0;
+					$start1 += $nb;
+					$end1 += $nb;
+				}
+				if ($chr2=~/chr(\d+)/){
+                                        #my $nb = $1 * 1400000;
+					my $nb = 0;
+                                        $start2 += $nb;
+                                        $end2 += $nb;
+                                }	
+			#[{"name": "574556.4.fasta","start": 717247,"end": 718620,"strand": "-","lcb_idx": 1},
+				#$lines .= "[{\"name\": \"1.fasta\",\"start\": $start1,\"end\": $end1,\"strand\": \"-\",\"lcb_idx\": 1},{\"name\": \"2.fasta\",\"start\": $start2,\"end\": $end2,\"strand\": \"-\",\"lcb_idx\": 2}],\n";
+				$chr1 =~s/chr/NF/g;
+				$chr2 =~s/chr/NL/g;
+				print "$chr1 $start1 $end1 $chr2 $start2 $end2\n";
+			}
+		}
+	}
+}
+close(F);
+
+chop($lines);
+chop($lines);
+#print "$lines\n]";