Mercurial > repos > dereeper > pangenome_explorer
comparison PanExplorer_workflow/Perl/ProjectPAVinCircos.pl @ 1:032f6b3806a3 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:16:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:3cbb01081cde | 1:032f6b3806a3 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 | |
5 my $pav = $ARGV[0]; | |
6 my $ref = $ARGV[1]; | |
7 my $strain_info = $ARGV[2]; | |
8 my $gff = $ARGV[3]; | |
9 my $ordering = $ARGV[4]; | |
10 my $outfile = $ARGV[5]; | |
11 | |
12 my $circos_exe = "circos/bin/circos"; | |
13 #my $circos_exe = "circos"; | |
14 | |
15 my %metadata; | |
16 open(M,$strain_info); | |
17 while(my $line = <M>){ | |
18 chomp($line); | |
19 my ($id,$name) = split(/\t/,$line); | |
20 $metadata{$id} = $name; | |
21 $metadata{$name} = $id; | |
22 } | |
23 close(M); | |
24 | |
25 | |
26 my $pangenome_size = 0; | |
27 my $nb_core_genes = 0; | |
28 my $nb_dispensable_genes = 0; | |
29 my $nb_specific_genes = 0; | |
30 my %cluster_of_gene; | |
31 my %genes_of_cluster; | |
32 my %tagging; | |
33 open(P,$pav); | |
34 my $firstline = <P>; | |
35 chomp($firstline); | |
36 my @infos = split(/\t/,$firstline); | |
37 my %samples; | |
38 my %hash_presence; | |
39 my $col; | |
40 for (my $j = 1; $j <= $#infos; $j++){ | |
41 my $val = $infos[$j]; | |
42 if ($val eq $ref){$col = $j;} | |
43 $samples{$j} = $val; | |
44 } | |
45 while(my $line = <P>){ | |
46 chomp($line); | |
47 my @infos = split(/\t/,$line); | |
48 my $cluster_num = $infos[0]; | |
49 my $clnb = $infos[0]; | |
50 my $pos = $clnb; | |
51 my $pos_before = $pos - 1; | |
52 $pangenome_size++; | |
53 if (length($clnb) == 1){$clnb = "000".$clnb;} | |
54 elsif (length($clnb) == 2){$clnb = "00".$clnb;} | |
55 elsif (length($clnb) == 3){$clnb = "0".$clnb;} | |
56 my $name = "CLUSTER".$clnb; | |
57 | |
58 # exclude lines where several genes for one strains | |
59 if ($line =~/,/){next;} | |
60 | |
61 my $gene_ref = $infos[$col]; | |
62 for (my $j = 1; $j <= $#infos; $j++){ | |
63 my $gene = $infos[$j]; | |
64 my $sample = $samples{$j}; | |
65 if ($gene =~/\w+/){ | |
66 $hash_presence{$gene_ref}{$sample} = 1; | |
67 } | |
68 } | |
69 } | |
70 close(P); | |
71 | |
72 my $track_thin = 0.5 / scalar keys(%samples); | |
73 my $r0 = 0.4; | |
74 my $r1 = $r0+$track_thin; | |
75 my $highlight_block = ""; | |
76 my %max_position; | |
77 open(O,$ordering); | |
78 <O>; | |
79 while(my $sample_name = <O>){ | |
80 #foreach my $sample(keys(%samples)){ | |
81 chomp($sample_name); | |
82 #my $sample_name = $samples{$sample}; | |
83 my $sample_id = $metadata{$sample_name}; | |
84 print "$sample_name $sample_id\n"; | |
85 my $ref_id = $metadata{$ref}; | |
86 open(GFF,$gff); | |
87 open(CIRCOS_TRACK,">$outfile.$sample_id.circos.heatmap.txt"); | |
88 my $current_start; | |
89 my $current_stop; | |
90 my $current_chrom; | |
91 $r0+=$track_thin; | |
92 $r1+=$track_thin; | |
93 #my $r1_final = $r1-($track_thin/3); | |
94 my $r1_final = $r1; | |
95 $r0.="r"; | |
96 $r1_final.="r"; | |
97 $highlight_block .= qq~ | |
98 <highlight> | |
99 file = $outfile.$sample_id.circos.heatmap.txt | |
100 r0 = $r0 | |
101 r1 = $r1_final | |
102 </highlight> | |
103 ~; | |
104 while(my $line = <GFF>){ | |
105 chomp($line); | |
106 my @infos = split(/\t/,$line); | |
107 if ($infos[2] eq "gene" && $line =~/ID=([^;]+);*/){ | |
108 my $chr = $infos[0]; | |
109 my $start = $infos[3]; | |
110 my $end = $infos[4]; | |
111 $current_start = $start; | |
112 $current_stop = $end; | |
113 $current_chrom = $chr; | |
114 if ($end > $max_position{$current_chrom}){$max_position{$current_chrom} = $end;} | |
115 my $gene = $1; | |
116 my $genelength = $end-$start; | |
117 my $gene_complete = $sample_name.":".$gene; | |
118 if ($hash_presence{$gene}{$sample_name}){ | |
119 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n"; | |
120 } | |
121 } | |
122 if ($infos[2] eq "CDS" && $line =~/Parent=([^;]+);*/){ | |
123 my $gene = $1; | |
124 my $gene_complete = $sample_name.":".$gene; | |
125 if ($line =~/protein_id=([^;]+);/){ | |
126 $gene = $1; | |
127 if ($hash_presence{$gene}{$sample_name}){ | |
128 print CIRCOS_TRACK "$current_chrom $current_start $current_stop fill_color=red\n"; | |
129 } | |
130 } | |
131 } | |
132 } | |
133 close(GFF); | |
134 close(CIRCOS_TRACK); | |
135 } | |
136 open(KARYOTYPE,">$outfile.karyotype.txt"); | |
137 foreach my $chr(keys(%max_position)){ | |
138 my $max = $max_position{$chr}; | |
139 print KARYOTYPE "chr - $chr 1 0 $max black"; | |
140 } | |
141 close(KARYOTYPE); | |
142 | |
143 open(F,"circos_templates/circos1.conf"); | |
144 open(CIRCOS_CONF,">$outfile.circos1.conf"); | |
145 while(<F>){ | |
146 if (/HIGHLIGHT_BLOCK/){print CIRCOS_CONF $highlight_block;} | |
147 elsif (/KARYOTYPE_PATH/){print CIRCOS_CONF "karyotype = $outfile.karyotype.txt\n";} | |
148 elsif (/OUTPUT_PNG/){print CIRCOS_CONF "file = $outfile\n";} | |
149 else{print CIRCOS_CONF $_;} | |
150 } | |
151 close(F); | |
152 close(CIRCOS_CONF); | |
153 | |
154 system("$circos_exe -conf $outfile.circos1.conf"); |