annotate Perl/ProjectPAVinCircos.pl @ 5:4157f435fa89 draft

Uploaded
author dereeper
date Thu, 30 May 2024 12:11:25 +0000
parents e42d30da7a74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
2
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
3 use strict;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
4
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
5 my $pav = $ARGV[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
6 my $ref = $ARGV[1];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
7 my $strain_info = $ARGV[2];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
8 my $gff = $ARGV[3];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
9 my $ordering = $ARGV[4];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
10 my $outfile = $ARGV[5];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
11
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
12 my $circos_exe = "circos/bin/circos";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
13 #my $circos_exe = "circos";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
14
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
15 my %metadata;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
16 open(M,$strain_info);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
17 while(my $line = <M>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
18 chomp($line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
19 my ($id,$name) = split(/\t/,$line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
20 $metadata{$id} = $name;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
21 $metadata{$name} = $id;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
22 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
23 close(M);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
24
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
25
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
26 my $pangenome_size = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
27 my $nb_core_genes = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
28 my $nb_dispensable_genes = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
29 my $nb_specific_genes = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
30 my %cluster_of_gene;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
31 my %genes_of_cluster;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
32 my %tagging;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
33 open(P,$pav);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
34 my $firstline = <P>;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
35 chomp($firstline);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
36 my @infos = split(/\t/,$firstline);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
37 my %samples;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
38 my %hash_presence;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
39 my $col;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
40 for (my $j = 1; $j <= $#infos; $j++){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
41 my $val = $infos[$j];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
42 if ($val eq $ref){$col = $j;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
43 $samples{$j} = $val;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
44 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
45 while(my $line = <P>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
46 chomp($line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
47 my @infos = split(/\t/,$line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
48 my $cluster_num = $infos[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
49 my $clnb = $infos[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
50 my $pos = $clnb;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
51 my $pos_before = $pos - 1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
52 $pangenome_size++;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
53 if (length($clnb) == 1){$clnb = "000".$clnb;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
54 elsif (length($clnb) == 2){$clnb = "00".$clnb;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
55 elsif (length($clnb) == 3){$clnb = "0".$clnb;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
56 my $name = "CLUSTER".$clnb;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
57
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
58 # exclude lines where several genes for one strains
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
59 if ($line =~/,/){next;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
60
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
61 my $gene_ref = $infos[$col];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
62 for (my $j = 1; $j <= $#infos; $j++){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
63 my $gene = $infos[$j];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
64 my $sample = $samples{$j};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
65 if ($gene =~/\w+/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
66 $hash_presence{$gene_ref}{$sample} = 1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
67 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
68 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
69 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
70 close(P);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
71
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
72 my $track_thin = 0.5 / scalar keys(%samples);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
73 my $r0 = 0.4;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
74 my $r1 = $r0+$track_thin;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
75 my $highlight_block = "";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
76 my %max_position;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
77 open(O,$ordering);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
78 <O>;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
79 while(my $sample_name = <O>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
80 #foreach my $sample(keys(%samples)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
81 chomp($sample_name);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
82 #my $sample_name = $samples{$sample};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
83 my $sample_id = $metadata{$sample_name};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
84 print "$sample_name $sample_id\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
85 my $ref_id = $metadata{$ref};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
86 open(GFF,$gff);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
87 open(CIRCOS_TRACK,">$outfile.$sample_id.circos.heatmap.txt");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
88 my $current_start;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
89 my $current_stop;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
90 my $current_chrom;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
91 $r0+=$track_thin;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
92 $r1+=$track_thin;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
93 #my $r1_final = $r1-($track_thin/3);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
94 my $r1_final = $r1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
95 $r0.="r";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
96 $r1_final.="r";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
97 $highlight_block .= qq~
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
98 <highlight>
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
99 file = $outfile.$sample_id.circos.heatmap.txt
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
100 r0 = $r0
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
101 r1 = $r1_final
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
102 </highlight>
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
103 ~;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
104 while(my $line = <GFF>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
105 chomp($line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
106 my @infos = split(/\t/,$line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
107 if ($infos[2] eq "gene" && $line =~/ID=([^;]+);*/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
108 my $chr = $infos[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
109 my $start = $infos[3];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
110 my $end = $infos[4];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
111 $current_start = $start;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
112 $current_stop = $end;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
113 $current_chrom = $chr;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
114 if ($end > $max_position{$current_chrom}){$max_position{$current_chrom} = $end;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
115 my $gene = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
116 my $genelength = $end-$start;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
117 my $gene_complete = $sample_name.":".$gene;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
118 if ($hash_presence{$gene}{$sample_name}){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
119 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
120 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
121 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
122 if ($infos[2] eq "CDS" && $line =~/Parent=([^;]+);*/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
123 my $gene = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
124 my $gene_complete = $sample_name.":".$gene;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
125 if ($line =~/protein_id=([^;]+);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
126 $gene = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
127 if ($hash_presence{$gene}{$sample_name}){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
128 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
129 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
130 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
131 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
132 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
133 close(GFF);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
134 close(CIRCOS_TRACK);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
135 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
136 open(KARYOTYPE,">$outfile.karyotype.txt");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
137 foreach my $chr(keys(%max_position)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
138 my $max = $max_position{$chr};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
139 print KARYOTYPE "chr - $chr 1 0 $max black";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
140 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
141 close(KARYOTYPE);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
142
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
143 open(F,"circos_templates/circos1.conf");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
144 open(CIRCOS_CONF,">$outfile.circos1.conf");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
145 while(<F>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
146 if (/HIGHLIGHT_BLOCK/){print CIRCOS_CONF $highlight_block;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
147 elsif (/KARYOTYPE_PATH/){print CIRCOS_CONF "karyotype = $outfile.karyotype.txt\n";}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
148 elsif (/OUTPUT_PNG/){print CIRCOS_CONF "file = $outfile\n";}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
149 else{print CIRCOS_CONF $_;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
150 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
151 close(F);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
152 close(CIRCOS_CONF);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
153
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
154 system("$circos_exe -conf $outfile.circos1.conf");