3
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4
|
|
5 my $pav = $ARGV[0];
|
|
6 my $ref = $ARGV[1];
|
|
7 my $strain_info = $ARGV[2];
|
|
8 my $gff = $ARGV[3];
|
|
9 my $ordering = $ARGV[4];
|
|
10 my $outfile = $ARGV[5];
|
|
11
|
|
12 my $circos_exe = "circos/bin/circos";
|
|
13 #my $circos_exe = "circos";
|
|
14
|
|
15 my %metadata;
|
|
16 open(M,$strain_info);
|
|
17 while(my $line = <M>){
|
|
18 chomp($line);
|
|
19 my ($id,$name) = split(/\t/,$line);
|
|
20 $metadata{$id} = $name;
|
|
21 $metadata{$name} = $id;
|
|
22 }
|
|
23 close(M);
|
|
24
|
|
25
|
|
26 my $pangenome_size = 0;
|
|
27 my $nb_core_genes = 0;
|
|
28 my $nb_dispensable_genes = 0;
|
|
29 my $nb_specific_genes = 0;
|
|
30 my %cluster_of_gene;
|
|
31 my %genes_of_cluster;
|
|
32 my %tagging;
|
|
33 open(P,$pav);
|
|
34 my $firstline = <P>;
|
|
35 chomp($firstline);
|
|
36 my @infos = split(/\t/,$firstline);
|
|
37 my %samples;
|
|
38 my %hash_presence;
|
|
39 my $col;
|
|
40 for (my $j = 1; $j <= $#infos; $j++){
|
|
41 my $val = $infos[$j];
|
|
42 if ($val eq $ref){$col = $j;}
|
|
43 $samples{$j} = $val;
|
|
44 }
|
|
45 while(my $line = <P>){
|
|
46 chomp($line);
|
|
47 my @infos = split(/\t/,$line);
|
|
48 my $cluster_num = $infos[0];
|
|
49 my $clnb = $infos[0];
|
|
50 my $pos = $clnb;
|
|
51 my $pos_before = $pos - 1;
|
|
52 $pangenome_size++;
|
|
53 if (length($clnb) == 1){$clnb = "000".$clnb;}
|
|
54 elsif (length($clnb) == 2){$clnb = "00".$clnb;}
|
|
55 elsif (length($clnb) == 3){$clnb = "0".$clnb;}
|
|
56 my $name = "CLUSTER".$clnb;
|
|
57
|
|
58 # exclude lines where several genes for one strains
|
|
59 if ($line =~/,/){next;}
|
|
60
|
|
61 my $gene_ref = $infos[$col];
|
|
62 for (my $j = 1; $j <= $#infos; $j++){
|
|
63 my $gene = $infos[$j];
|
|
64 my $sample = $samples{$j};
|
|
65 if ($gene =~/\w+/){
|
|
66 $hash_presence{$gene_ref}{$sample} = 1;
|
|
67 }
|
|
68 }
|
|
69 }
|
|
70 close(P);
|
|
71
|
|
72 my $track_thin = 0.5 / scalar keys(%samples);
|
|
73 my $r0 = 0.4;
|
|
74 my $r1 = $r0+$track_thin;
|
|
75 my $highlight_block = "";
|
|
76 my %max_position;
|
|
77 open(O,$ordering);
|
|
78 <O>;
|
|
79 while(my $sample_name = <O>){
|
|
80 #foreach my $sample(keys(%samples)){
|
|
81 chomp($sample_name);
|
|
82 #my $sample_name = $samples{$sample};
|
|
83 my $sample_id = $metadata{$sample_name};
|
|
84 print "$sample_name $sample_id\n";
|
|
85 my $ref_id = $metadata{$ref};
|
|
86 open(GFF,$gff);
|
|
87 open(CIRCOS_TRACK,">$outfile.$sample_id.circos.heatmap.txt");
|
|
88 my $current_start;
|
|
89 my $current_stop;
|
|
90 my $current_chrom;
|
|
91 $r0+=$track_thin;
|
|
92 $r1+=$track_thin;
|
|
93 #my $r1_final = $r1-($track_thin/3);
|
|
94 my $r1_final = $r1;
|
|
95 $r0.="r";
|
|
96 $r1_final.="r";
|
|
97 $highlight_block .= qq~
|
|
98 <highlight>
|
|
99 file = $outfile.$sample_id.circos.heatmap.txt
|
|
100 r0 = $r0
|
|
101 r1 = $r1_final
|
|
102 </highlight>
|
|
103 ~;
|
|
104 while(my $line = <GFF>){
|
|
105 chomp($line);
|
|
106 my @infos = split(/\t/,$line);
|
|
107 if ($infos[2] eq "gene" && $line =~/ID=([^;]+);*/){
|
|
108 my $chr = $infos[0];
|
|
109 my $start = $infos[3];
|
|
110 my $end = $infos[4];
|
|
111 $current_start = $start;
|
|
112 $current_stop = $end;
|
|
113 $current_chrom = $chr;
|
|
114 if ($end > $max_position{$current_chrom}){$max_position{$current_chrom} = $end;}
|
|
115 my $gene = $1;
|
|
116 my $genelength = $end-$start;
|
|
117 my $gene_complete = $sample_name.":".$gene;
|
|
118 if ($hash_presence{$gene}{$sample_name}){
|
|
119 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n";
|
|
120 }
|
|
121 }
|
|
122 if ($infos[2] eq "CDS" && $line =~/Parent=([^;]+);*/){
|
|
123 my $gene = $1;
|
|
124 my $gene_complete = $sample_name.":".$gene;
|
|
125 if ($line =~/protein_id=([^;]+);/){
|
|
126 $gene = $1;
|
|
127 if ($hash_presence{$gene}{$sample_name}){
|
|
128 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n";
|
|
129 }
|
|
130 }
|
|
131 }
|
|
132 }
|
|
133 close(GFF);
|
|
134 close(CIRCOS_TRACK);
|
|
135 }
|
|
136 open(KARYOTYPE,">$outfile.karyotype.txt");
|
|
137 foreach my $chr(keys(%max_position)){
|
|
138 my $max = $max_position{$chr};
|
|
139 print KARYOTYPE "chr - $chr 1 0 $max black";
|
|
140 }
|
|
141 close(KARYOTYPE);
|
|
142
|
|
143 open(F,"circos_templates/circos1.conf");
|
|
144 open(CIRCOS_CONF,">$outfile.circos1.conf");
|
|
145 while(<F>){
|
|
146 if (/HIGHLIGHT_BLOCK/){print CIRCOS_CONF $highlight_block;}
|
|
147 elsif (/KARYOTYPE_PATH/){print CIRCOS_CONF "karyotype = $outfile.karyotype.txt\n";}
|
|
148 elsif (/OUTPUT_PNG/){print CIRCOS_CONF "file = $outfile\n";}
|
|
149 else{print CIRCOS_CONF $_;}
|
|
150 }
|
|
151 close(F);
|
|
152 close(CIRCOS_CONF);
|
|
153
|
|
154 system("$circos_exe -conf $outfile.circos1.conf");
|