annotate Perl/Naegleria/generateMauveJson.pl @ 6:35d71348e81e draft

Uploaded
author dereeper
date Thu, 30 May 2024 12:12:30 +0000
parents e42d30da7a74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
2
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
3 use strict;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
4
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
5 my $chrom_focus = $ARGV[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
6
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
7 for (my $i=37; $i >= 1; $i--){print "NL$i;";}exit;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
8
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
9 print "[\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
10 my %gene_positions;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
11 open(G,"Map_annotation_ID_NFGwada.gff3.gff3");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
12 while(<G>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
13 my $line = $_;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
14 $line =~s/\n//g;$line =~s/\r//g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
15 my @infos = split("\t",$line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
16 if ($infos[2] eq "mRNA" && /ID=([^;]+);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
17 my $gene = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
18 my $start = $infos[3];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
19 my $end = $infos[4];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
20 my $chr = $infos[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
21 #if ($chrom_focus && $chrom_focus ne $chr){next;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
22 $gene_positions{$gene} = "$chr-$start-$end";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
23 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
24 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
25 close(G);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
26
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
27 open(G,"Map_annotation_ID_NLova7.gff3.gff3");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
28 while(<G>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
29 my $line = $_;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
30 $line =~s/\n//g;$line =~s/\r//g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
31 my @infos = split("\t",$line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
32 if ($infos[2] eq "mRNA" && /ID=([^;]+);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
33 my $gene = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
34 my $start = $infos[3];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
35 my $end = $infos[4];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
36 my $chr = $infos[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
37 #if ($chrom_focus && $chrom_focus ne $chr){next;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
38 $gene_positions{$gene} = "$chr-$start-$end";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
39 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
40 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
41 close(G);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
42
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
43 my $lines = "";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
44 open(F,"orthofinder_matrix.txt");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
45 <F>;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
46 while(<F>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
47 my $line = $_;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
48 $line =~s/\n//g;$line =~s/\r//g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
49 my @infos = split("\t",$line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
50 my $nb_found = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
51 my $index = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
52 for (my $i = 1; $i <= $#infos; $i++){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
53 my $val = $infos[$i];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
54 if ($val =~/\w+/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
55 $nb_found++;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
56 $index = $i;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
57 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
58 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
59 if ($nb_found == 1){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
60 #print "$index\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
61 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
62 #next;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
63 if ($nb_found == $#infos){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
64 my $gene1 = $infos[1];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
65 my $gene2 = $infos[7];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
66 if ($gene1 !~/,/ && $gene2 !~/,/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
67 my ($chr1,$start1,$end1) = split(/-/,$gene_positions{$gene1});
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
68 my ($chr2,$start2,$end2) = split(/-/,$gene_positions{$gene2});
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
69 $chr2 = lc($chr2);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
70 #if ($chr1 eq $chr2 && $chr1 eq "$chrom_focus"){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
71 if ($chr1 ne $chr2 && $chr2 =~/\w+/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
72 #if ($chr1 eq $chr2){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
73 if ($chr1=~/chr(\d+)/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
74 #my $nb = $1 * 1400000;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
75 my $nb = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
76 $start1 += $nb;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
77 $end1 += $nb;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
78 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
79 if ($chr2=~/chr(\d+)/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
80 #my $nb = $1 * 1400000;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
81 my $nb = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
82 $start2 += $nb;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
83 $end2 += $nb;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
84 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
85 #[{"name": "574556.4.fasta","start": 717247,"end": 718620,"strand": "-","lcb_idx": 1},
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
86 #$lines .= "[{\"name\": \"1.fasta\",\"start\": $start1,\"end\": $end1,\"strand\": \"-\",\"lcb_idx\": 1},{\"name\": \"2.fasta\",\"start\": $start2,\"end\": $end2,\"strand\": \"-\",\"lcb_idx\": 2}],\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
87 $chr1 =~s/chr/NF/g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
88 $chr2 =~s/chr/NL/g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
89 print "$chr1 $start1 $end1 $chr2 $start2 $end2\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
90 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
91 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
92 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
93 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
94 close(F);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
95
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
96 chop($lines);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
97 chop($lines);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
98 #print "$lines\n]";