comparison PanExplorer_workflow/Perl/ProjectPAVinCircos.pl @ 1:032f6b3806a3 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:16:08 +0000
parents
children
comparison
equal deleted inserted replaced
0:3cbb01081cde 1:032f6b3806a3
1 #!/usr/bin/perl
2
3 use strict;
4
5 my $pav = $ARGV[0];
6 my $ref = $ARGV[1];
7 my $strain_info = $ARGV[2];
8 my $gff = $ARGV[3];
9 my $ordering = $ARGV[4];
10 my $outfile = $ARGV[5];
11
12 my $circos_exe = "circos/bin/circos";
13 #my $circos_exe = "circos";
14
15 my %metadata;
16 open(M,$strain_info);
17 while(my $line = <M>){
18 chomp($line);
19 my ($id,$name) = split(/\t/,$line);
20 $metadata{$id} = $name;
21 $metadata{$name} = $id;
22 }
23 close(M);
24
25
26 my $pangenome_size = 0;
27 my $nb_core_genes = 0;
28 my $nb_dispensable_genes = 0;
29 my $nb_specific_genes = 0;
30 my %cluster_of_gene;
31 my %genes_of_cluster;
32 my %tagging;
33 open(P,$pav);
34 my $firstline = <P>;
35 chomp($firstline);
36 my @infos = split(/\t/,$firstline);
37 my %samples;
38 my %hash_presence;
39 my $col;
40 for (my $j = 1; $j <= $#infos; $j++){
41 my $val = $infos[$j];
42 if ($val eq $ref){$col = $j;}
43 $samples{$j} = $val;
44 }
45 while(my $line = <P>){
46 chomp($line);
47 my @infos = split(/\t/,$line);
48 my $cluster_num = $infos[0];
49 my $clnb = $infos[0];
50 my $pos = $clnb;
51 my $pos_before = $pos - 1;
52 $pangenome_size++;
53 if (length($clnb) == 1){$clnb = "000".$clnb;}
54 elsif (length($clnb) == 2){$clnb = "00".$clnb;}
55 elsif (length($clnb) == 3){$clnb = "0".$clnb;}
56 my $name = "CLUSTER".$clnb;
57
58 # exclude lines where several genes for one strains
59 if ($line =~/,/){next;}
60
61 my $gene_ref = $infos[$col];
62 for (my $j = 1; $j <= $#infos; $j++){
63 my $gene = $infos[$j];
64 my $sample = $samples{$j};
65 if ($gene =~/\w+/){
66 $hash_presence{$gene_ref}{$sample} = 1;
67 }
68 }
69 }
70 close(P);
71
72 my $track_thin = 0.5 / scalar keys(%samples);
73 my $r0 = 0.4;
74 my $r1 = $r0+$track_thin;
75 my $highlight_block = "";
76 my %max_position;
77 open(O,$ordering);
78 <O>;
79 while(my $sample_name = <O>){
80 #foreach my $sample(keys(%samples)){
81 chomp($sample_name);
82 #my $sample_name = $samples{$sample};
83 my $sample_id = $metadata{$sample_name};
84 print "$sample_name $sample_id\n";
85 my $ref_id = $metadata{$ref};
86 open(GFF,$gff);
87 open(CIRCOS_TRACK,">$outfile.$sample_id.circos.heatmap.txt");
88 my $current_start;
89 my $current_stop;
90 my $current_chrom;
91 $r0+=$track_thin;
92 $r1+=$track_thin;
93 #my $r1_final = $r1-($track_thin/3);
94 my $r1_final = $r1;
95 $r0.="r";
96 $r1_final.="r";
97 $highlight_block .= qq~
98 <highlight>
99 file = $outfile.$sample_id.circos.heatmap.txt
100 r0 = $r0
101 r1 = $r1_final
102 </highlight>
103 ~;
104 while(my $line = <GFF>){
105 chomp($line);
106 my @infos = split(/\t/,$line);
107 if ($infos[2] eq "gene" && $line =~/ID=([^;]+);*/){
108 my $chr = $infos[0];
109 my $start = $infos[3];
110 my $end = $infos[4];
111 $current_start = $start;
112 $current_stop = $end;
113 $current_chrom = $chr;
114 if ($end > $max_position{$current_chrom}){$max_position{$current_chrom} = $end;}
115 my $gene = $1;
116 my $genelength = $end-$start;
117 my $gene_complete = $sample_name.":".$gene;
118 if ($hash_presence{$gene}{$sample_name}){
119 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n";
120 }
121 }
122 if ($infos[2] eq "CDS" && $line =~/Parent=([^;]+);*/){
123 my $gene = $1;
124 my $gene_complete = $sample_name.":".$gene;
125 if ($line =~/protein_id=([^;]+);/){
126 $gene = $1;
127 if ($hash_presence{$gene}{$sample_name}){
128 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n";
129 }
130 }
131 }
132 }
133 close(GFF);
134 close(CIRCOS_TRACK);
135 }
136 open(KARYOTYPE,">$outfile.karyotype.txt");
137 foreach my $chr(keys(%max_position)){
138 my $max = $max_position{$chr};
139 print KARYOTYPE "chr - $chr 1 0 $max black";
140 }
141 close(KARYOTYPE);
142
143 open(F,"circos_templates/circos1.conf");
144 open(CIRCOS_CONF,">$outfile.circos1.conf");
145 while(<F>){
146 if (/HIGHLIGHT_BLOCK/){print CIRCOS_CONF $highlight_block;}
147 elsif (/KARYOTYPE_PATH/){print CIRCOS_CONF "karyotype = $outfile.karyotype.txt\n";}
148 elsif (/OUTPUT_PNG/){print CIRCOS_CONF "file = $outfile\n";}
149 else{print CIRCOS_CONF $_;}
150 }
151 close(F);
152 close(CIRCOS_CONF);
153
154 system("$circos_exe -conf $outfile.circos1.conf");