Mercurial > repos > dereeper > pangenome_explorer
view PanExplorer_workflow/Perl/Naegleria/generateMauveJson.pl @ 2:97e4e3e818b6 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:48:09 +0000 |
parents | 032f6b3806a3 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; my $chrom_focus = $ARGV[0]; for (my $i=37; $i >= 1; $i--){print "NL$i;";}exit; print "[\n"; my %gene_positions; open(G,"Map_annotation_ID_NFGwada.gff3.gff3"); while(<G>){ my $line = $_; $line =~s/\n//g;$line =~s/\r//g; my @infos = split("\t",$line); if ($infos[2] eq "mRNA" && /ID=([^;]+);/){ my $gene = $1; my $start = $infos[3]; my $end = $infos[4]; my $chr = $infos[0]; #if ($chrom_focus && $chrom_focus ne $chr){next;} $gene_positions{$gene} = "$chr-$start-$end"; } } close(G); open(G,"Map_annotation_ID_NLova7.gff3.gff3"); while(<G>){ my $line = $_; $line =~s/\n//g;$line =~s/\r//g; my @infos = split("\t",$line); if ($infos[2] eq "mRNA" && /ID=([^;]+);/){ my $gene = $1; my $start = $infos[3]; my $end = $infos[4]; my $chr = $infos[0]; #if ($chrom_focus && $chrom_focus ne $chr){next;} $gene_positions{$gene} = "$chr-$start-$end"; } } close(G); my $lines = ""; open(F,"orthofinder_matrix.txt"); <F>; while(<F>){ my $line = $_; $line =~s/\n//g;$line =~s/\r//g; my @infos = split("\t",$line); my $nb_found = 0; my $index = 0; for (my $i = 1; $i <= $#infos; $i++){ my $val = $infos[$i]; if ($val =~/\w+/){ $nb_found++; $index = $i; } } if ($nb_found == 1){ #print "$index\n"; } #next; if ($nb_found == $#infos){ my $gene1 = $infos[1]; my $gene2 = $infos[7]; if ($gene1 !~/,/ && $gene2 !~/,/){ my ($chr1,$start1,$end1) = split(/-/,$gene_positions{$gene1}); my ($chr2,$start2,$end2) = split(/-/,$gene_positions{$gene2}); $chr2 = lc($chr2); #if ($chr1 eq $chr2 && $chr1 eq "$chrom_focus"){ if ($chr1 ne $chr2 && $chr2 =~/\w+/){ #if ($chr1 eq $chr2){ if ($chr1=~/chr(\d+)/){ #my $nb = $1 * 1400000; my $nb = 0; $start1 += $nb; $end1 += $nb; } if ($chr2=~/chr(\d+)/){ #my $nb = $1 * 1400000; my $nb = 0; $start2 += $nb; $end2 += $nb; } #[{"name": "574556.4.fasta","start": 717247,"end": 718620,"strand": "-","lcb_idx": 1}, #$lines .= "[{\"name\": \"1.fasta\",\"start\": $start1,\"end\": $end1,\"strand\": \"-\",\"lcb_idx\": 1},{\"name\": \"2.fasta\",\"start\": $start2,\"end\": $end2,\"strand\": \"-\",\"lcb_idx\": 2}],\n"; $chr1 =~s/chr/NF/g; $chr2 =~s/chr/NL/g; print "$chr1 $start1 $end1 $chr2 $start2 $end2\n"; } } } } close(F); chop($lines); chop($lines); #print "$lines\n]";