annotate pangenomeCogAnalysis_V1.pl @ 13:45cc191a3290 draft

Uploaded
author mgarnier
date Thu, 19 Aug 2021 13:39:14 +0000
parents b36506d26a43
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
1 #!/usr/bin/perl
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
2
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
3 use strict;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
4 use warnings;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
5
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
6 my $num_args = $#ARGV + 1;
6
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
7 if ($num_args != 9) {
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
8 print "Il n'y a pas le bon nombre d'arguments !\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
9 exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
10 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
11
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
12 # INPUT_
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
13 my $matrix_file = $ARGV[0]; # fichier tabulé : une liste d'orthogroupes qui se retrouvent ou non dans les différentes souches
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
14 my $species_file = $ARGV[1]; # association de chaque souche à son espèce (fichier tabulé également)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
15 my $annotation = $ARGV[2]; # collection de fichiers tabulés qui contiennent pour chaque gène la ou les catégories de COG associée(s)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
16 my $order = $ARGV[3]; # cette entrée correspond simplement au nom des souches qui sont rentrées dans le même ordre que les fichiers d'annotation : cela permet de savoir pour un fichier COG à quelle souche et donc plus tard à quelle espèce il correspond
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
17 my $annotation_GFF = $ARGV[4]; # fichiers avec les GFF
6
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
18 # my $order_GFF = $ARGV[5];
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
19
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
20 # OUTPUT_
6
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
21 my $output = $ARGV[5]; # liste des espèces avec leurs orthogroupes (présence-absence)
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
22 my $output2 = $ARGV[6]; # fichier des moyennes
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
23 my $output3 = $ARGV[7]; # fichier de la liste des valeurs pour chaque catégorie de COG et pour chaque espèce
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
24 my $output4 = $ARGV[8]; # fichier avec les catégories de COG pour core-génome / génome accessoire / gènes spé
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
25
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
26
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
27 # print "ok\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
28 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
29
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
30 my @list_gff = split(',', $annotation_GFF); # liste des différents fichiers GFF (qui se retrouvent dans le dossier Annotation Maker)
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
31 my %hSpecies = (); # HASH -> key: N_Id (ex NF_AR12) ; val: nom de l'esp (ex Naegleria Fowleri)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
32
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
33 ######################## LE SPECIES_FILE ###########################
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
34 open (S, $species_file);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
35 while (my $line = <S>){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
36
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
37 $line =~s/\n//g; $line =~s/\r//g;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
38 my @sp = split('\t', $line);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
39 # print "$line\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
40 # exit;
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
41 $hSpecies{$sp[0]} = $sp[1]; # HASH -> key: N_Id ; val: name
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
42
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
43 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
44 my $nbr = keys (%hSpecies); #compter le nombre de souches max
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
45 # = taille de la table de hash
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
46 # print "J'ai $nbr clés\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
47 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
48
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
49 close (S);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
50
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
51 #///////////////////////////////////////////////////////////////////////////////////////////////////
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
52
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
53 ############################################ LA MATRICE ############################################
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
54
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
55 open(M, $matrix_file);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
56
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
57 my $first_line = <M>;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
58 $first_line =~s/\n//g; $first_line =~s/\r//g; # ne garder que la première ligne du tableau
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
59 my @samples = split(/\t/,$first_line); # mettre dans une liste (@samples) chaque intitulé de colonne = N_Id
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
60 # print "$first_line\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
61 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
62
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
63 # Le but ici est de récupérer les combinaisons associées à chaque espèce : NF, NG et NL
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
64 my %hCombination =(); # HASH -> key: N_Id ; val: combinaison
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
65
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
66 for (my $i=1; $i <= $#samples; $i++){ # on parcourt chaque colonne ($i) mais on ne regarde que le N_Id
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
67 my $header = $samples[$i]; # on récupère le N_Id dans $header (soit le nom de la colonne i)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
68 my $species = $hSpecies{$header}; # on regarde dans la table avec N_Id => Nom esp et on attribue à chaque header (qui est ici une clé) sa valeur donc son nom d'esp correspondant
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
69 $hCombination{$species} .= "_".$i; # à chaque tour de boucle, pour une $species spé va ajouter le n° de colonne $i pour avoir la combinaison spé à chaque esp
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
70 # print "$header\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
71 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
72 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
73
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
74
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
75 # foreach my $species (keys (%hCombination)){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
76 # my $combination = $hCombination{$species};
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
77 # # print "$species $combination\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
78 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
79
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
80
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
81 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
82
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
83 # orthogrp présents :
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
84 my %hCombination_prs = (); # HASH -> key: combinaison ; val: liste des orthogroupes
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
85 # orthogrp absents :
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
86 my %hCombination_abs = (); # idem
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
87
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
88
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
89
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
90 my %coregenes = (); # HASH -> key: gene ; val: orthogroupe (pour core-genome)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
91 my %specificgenes = (); # HASH -> key: gene ; val: orthogroupe (pour gènes spécifiques)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
92 my %accessorygenes = (); # HASH -> key: gene ; val: orthogroupe (pour génome accessoire)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
93
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
94 my $coregene_line;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
95 my %coregenes2 = (); # HASH -> key1: colonne i ; key2: gène ; val: orthogroupe
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
96 my %specificgenes2 = (); # HASH -> key1: colonne i ; key2: gène ; val: orthogroupe
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
97
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
98 my %Genes_of_OG = (); # HASH -> key1: orthogroupe ; key2: colonne i ; val: gène
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
99
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
100
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
101 while(<M>) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
102
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
103 my $line = $_;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
104 $line =~s/\n//g; $line =~s/\r//g;
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
105 my $nb_found = 0;
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
106 my @infos = split(/\t/,$line);
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
107 my $orthogroup = $infos[0]; # on récupère le nom de l'orthogroupe dans $orthogroup
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
108 my $first_column = $infos[1]; # ici on récupère les gènes de la première colonne qui vont nous servir pour le core-génome
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
109 my $combi_prs = "";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
110 my $combi_abs = "";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
111 my $val;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
112 my $gene_random;
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
113 my $unique_col_detected;
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
114
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
115
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
116 for (my $i=1; $i <= $#infos; $i++){ # on travaille par ligne puis dans chaque ligne (while(<M>)), cellule par cellule (cette boucle for)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
117
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
118 $val = $infos[$i]; # on récupère l'information contenue dans la case $i
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
119
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
120 if ($val =~/\w/){ # s'il cette cellule contient qq chose...
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
121 $combi_prs .= "_".$i; # ...on va concaténer notre chaine $combi_prs pour que cela forme une combinaison
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
122 $nb_found++; # on incrémente le compteur qui permet de savoir cb de fois notre orthogroupe est présent (le but sera de l'utiliser quand nb_found == 9)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
123 $gene_random=$val; # on récupère la valeur de la case (les gènes)
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
124 $unique_col_detected = $i;
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
125
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
126 my @table_genes = split (',', $val);
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
127 my $premier_gene = $table_genes[0];
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
128 $Genes_of_OG{$i}{$orthogroup} = $premier_gene; # pour chaque orthorgoupe de chaque colonne, on récupère le premier gène
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
129 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
130
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
131 else { # si jamais il n'y a rien dans la cellule...
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
132 $combi_abs .= "_".$i; # ... on fait la même chose mais avec $combi_abs
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
133 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
134
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
135 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
136
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
137 # $hCount{$combi}++;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
138 $hCombination_prs{$combi_prs}.=$orthogroup."\n"; # à la fin de chaque ligne, on va ajouter notre orthogroupe à la combinaison qui lui correspond
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
139 $hCombination_abs{$combi_abs}.=$orthogroup."\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
140
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
141
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
142
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
143 if ($nb_found == $#infos){ # si nb_found = au nombre de souche, c'est qu'on a à faire à un core-génome
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
144 # print "$orthogroup\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
145 # print "$nb_found\n=================\n";
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
146 for (my $i=1; $i <= $#infos; $i++){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
147 my @list_of_genes = split (',', $infos[$i]); # ici va séparer tous les gènes (qui se présentent comme une liste, séparés par des ',')
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
148 my $first_gene = $list_of_genes[0]; # prend la valeur du premier gène uniquement !
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
149 $coregenes{$first_gene}= $orthogroup; # on va récupérer ce premier gène qu'on met dans un hash (pour y avoir accès facilement, d'où val = 1, ici ça n'a pas d'importance)
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
150 $coregenes2{$i}{$first_gene}= $orthogroup;
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
151
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
152 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
153 if (!$coregene_line){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
154 $coregene_line = $line;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
155 }
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
156 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
157 elsif ($nb_found == 1) { # si on a un gène spé
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
158
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
159 # print "$gene_random\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
160 # print "$line\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
161 # print "$unique_col_detected\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
162
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
163
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
164
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
165 my @list_of_genes = split (',', $gene_random); # idem, on ne veut qu'un seul gène donc on crée la liste
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
166 my $first_gene = $list_of_genes[0]; # on ne prend que le premier
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
167 # print "$first_gene\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
168 # exit;
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
169 $specificgenes{$first_gene}= $orthogroup; # et pareil on crée la table de hash
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
170 $specificgenes2{$unique_col_detected}{$first_gene}= $orthogroup;
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
171 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
172
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
173 else { # là c'est le génome accessoire, i.e tout le reste !
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
174 my @list_of_genes = split (',', $gene_random);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
175 my $first_gene = $list_of_genes[0];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
176 $accessorygenes{$first_gene}= $orthogroup;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
177 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
178
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
179 }
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
180
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
181 my %hCol_Annotated = (); # HASH -> key: colonne ; val: 1 (colonnes pour lesquelles les GFF sont présents)
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
182
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
183 # Le but ici est de ne garder que les colonnes (donc les souches) qui ont un fichier GFF associé
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
184 my @list_column = split ('\t', $coregene_line);
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
185 for (my $i=1; $i <= $#list_column; $i++){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
186 my @list_genes = split (', ', $list_column[$i]);
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
187 my $premier_gene = $list_genes[0];
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
188 my $strain = $samples[$i]; # récupérer le nom de la souche
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
189
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
190
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
191 foreach my $gff (@list_gff){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
192 my $result_grep = `grep $premier_gene $gff`;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
193
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
194 if ($result_grep){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
195 $hCol_Annotated{$i}=$strain;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
196
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
197 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
198 # print "$result_grep\n";
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
199 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
200 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
201 # exit;
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
202 # foreach my $i (sort keys (%specificgenes2)){ # parcours de la table %hCount2 au niveau des catégories
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
203 # foreach my $gene (keys %{$specificgenes2{$i} }){ # parcours de la table %hCount2 au niveau des espèces
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
204 # print "$i\t$gene\t".$specificgenes2{$i}{$gene}."\n";
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
205 # }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
206 # }
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
207 # exit;
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
208 # while (my ($k,$v) = each(%strain_specie)) {
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
209 # print "i=$k strain=$v\n";
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
210 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
211 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
212 # foreach my $oups (keys (%coregenes)) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
213 # print "$oups\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
214 # }
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
215 # exit;
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
216
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
217 close (M);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
218
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
219 my %Hash_Specific = ();
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
220
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
221 open (OUT, '>', $output) or die $!;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
222 print OUT "$annotation\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
223 foreach my $species (keys (%hCombination)){ # parcours de la table de hash %hCombination (key: nom esp ; val: combi)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
224 my $combination = $hCombination{$species}; # on récupère dans la variable $combination la valeur de chaque clé {species} (= nom esp) de la table de hash %hCombination
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
225 my $ortho_presents = $hCombination_prs{$combination}; # $ortho_presents prend la valeur de chaque clé {combination} (récupérée juste au-dessus) de la table de hash %hCombination
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
226 my $ortho_absents = $hCombination_abs{$combination}; # en somme on a 3 combi possibles (_1_2_3_4_5 | _6 | _7_8_9) donc pour ces 3 combi-là, qui sont les clés de %hCombination_prs ou_abs, on va retrouver la liste des orthogroupes qui correspondent
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
227
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
228 # open (OUT,">results.list.txt");
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
229
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
230 if ($ortho_presents){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
231 print OUT "> $species - present\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
232 print OUT "$ortho_presents\n";
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
233 my @orthogroups_name = split ('\n', $ortho_presents);
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
234 foreach my $ortho (@orthogroups_name){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
235 $Hash_Specific{$ortho} = $species;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
236 }
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
237 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
238
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
239 if ($ortho_absents){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
240 # open (OUT2,">$species.$combination.absents.list.txt");
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
241 print OUT "> $species - absent\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
242 print OUT "$ortho_absents\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
243 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
244
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
245 # close(OUT2);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
246 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
247
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
248 close(OUT);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
249
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
250 #//////////////////////////////////////////////////////////////////////////////////////////////////
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
251
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
252 ############################################### COG ###############################################
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
253
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
254 # STEP 1 : CORRESPONDANCE ENTRE LES DIFFERENTS FICHIERS DE COG ET L'ORDRE --------------------------------------------
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
255 my @files = split(',', $annotation); # liste des différents fichiers COG (qui se retrouvent dans le dossier Naegleria)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
256 my @list = split(',', $order); # liste de l'ordre des souches
5
3d565ddc7b95 Uploaded
mgarnier
parents: 4
diff changeset
257 #my ($f,$l);
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
258
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
259 my %hCorrespondance = (); #HASH -> key: un fichier COG ; val: un nom de souche (ces 2 données sont entrées en input = $annotation et $order)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
260
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
261 # ++++++++++++ parcours de 2 listes en même temps ++++++++++++ #
8
9132bdc6ce8b Uploaded
mgarnier
parents: 6
diff changeset
262 my $l = 1;
5
3d565ddc7b95 Uploaded
mgarnier
parents: 4
diff changeset
263 foreach my $f (@files){
3d565ddc7b95 Uploaded
mgarnier
parents: 4
diff changeset
264 $hCorrespondance{$f} = $list[$l]; # on fait correspondre pour chaque fichier de COG, un nom de souche
3d565ddc7b95 Uploaded
mgarnier
parents: 4
diff changeset
265 $l++;
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
266 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
267
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
268 # #Affichage du hash
5
3d565ddc7b95 Uploaded
mgarnier
parents: 4
diff changeset
269 # foreach my $f (keys %hCorrespondance){
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
270 # print $f."=>".$hCorrespondance{$f}."\n"
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
271 # }
5
3d565ddc7b95 Uploaded
mgarnier
parents: 4
diff changeset
272 # exit;
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
273
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
274 # STEP 2 : POUR CHAQUE FICHIER DE COG, FAIRE CORRESPONDRE L'ESPECE (ET NON LA SOUCHE) -------------------------------------
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
275 my %hCorresp_file_species = (); # HASH -> key: un fichier de COG ; val: une espèce
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
276 my %species_names; # HASH -> key: nom d'espèce ; val: 1
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
277
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
278 foreach my $h (keys (%hCorrespondance)){ # parcours de la table de hash {fichier COG => nom souche}
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
279 my $smpl = $hCorrespondance{$h}; # $smpl prend la valeur de la clé (donc d'un nom de souche)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
280 my $espece = $hSpecies{$smpl}; # on regarde la correspondance entre ce $smpl et les nom qu'on a dans notre table de hash %hSpecies (fichier "species.txt") pour avoir le nom de l'espèce dans $espece
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
281 $species_names{$espece} = 1; # on garde sous le coude nos nom d'espèce dans cette nouvelle table de hash
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
282 $hCorresp_file_species{$h} = $espece; # BUT ATTEINT : on donne pour chaque fichier de COG le nom de l'espèce qui lui correspond
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
283 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
284 # while (my ($k,$v) = each(%hCorresp_file_species)) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
285 # print "file=$k sp=$v\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
286 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
287 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
288
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
289 # STEP 3 : COMPTAGE DES CATEGORIES DE COG ------------------------------------------------------------------------------
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
290 my %hCount2 = (); # HASH -> key1: catégorie de COG ; key2: espèce associée ; val: comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
291
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
292 # comptage du core-genome / des gènes spé / du génome accessoire
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
293 my %hCore_Count = (); # HASH -> key: catégorie de COG ; val: comptage (ce hash ne sera utilisé que pour le core-genome)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
294 my %hSpecific_Count = (); # HASH -> key: catégorie de COG ; val: comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
295 my %hAccessory_Count = (); # HASH -> key: catégorie de COG ; val: comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
296
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
297 # hash pour récupérer le gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
298 my %hCore_Cat = (); # HASH -> key: catégorie de COG ; val: gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
299 my %hAccessory_Cat = (); # HASH -> key: catégorie de COG ; val: gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
300 my %hSpecific_Cat = (); # HASH -> key: catégorie de COG ; val: gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
301
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
302 # hash pour récupérer le gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
303 my %hCore_Cat_Esp = (); # HASH -> key1: catégorie de COG ; key2: espèce ; val: gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
304 my %hAccessory_Cat_Esp = (); # HASH -> key1: catégorie de COG ; key2: espèce ; val: gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
305 my %hSpecific_Cat_Esp = (); # HASH -> key1: catégorie de COG ; key2: espèce ; val: gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
306
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
307 my %Cog_of_gene = (); # HASH -> key: gène ; val: cat de COG
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
308 my %Specie_of_gene = (); # HASH -> key: gène ; val: souche
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
309
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
310 foreach my $file(@files){ # parcours de la liste des fichiers
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
311 my $esp = $hCorresp_file_species{$file}; # on récupère l'espèce pour chaque fichier de COG dans $esp
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
312 # print $esp."\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
313 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
314
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
315 my %hCount = (); # HASH -> key: catégorie de COG ; val: comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
316
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
317
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
318 open (A, $file); # on va parcourir maintenant chaque fichier un à un
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
319
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
320 while (my $line2 = <A>){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
321
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
322 $line2 =~s/\n//g; $line2 =~s/\r//g; # on procède ligne par ligne
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
323 my @Genes = split('\t', $line2);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
324 my $gene = $Genes[0];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
325 my $first_cat = $Genes[2];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
326 $Cog_of_gene{$gene} = $first_cat;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
327
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
328 for (my $j=2; $j <= $#Genes; $j++) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
329 my $cat = $Genes[$j]; # on récupère la ou les catégorie(s) de COG
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
330 $hCount{$cat}++; # pour la catégorie donnée, on incrémente son nb d'occurences
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
331
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
332 if ($coregenes{$gene}){ # si le $gene fait bien partie du core-genome (donc de notre table de hash %coregenes)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
333 $hCore_Count{$cat}++; # on incrémente le hash
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
334 $hCore_Cat{$cat}=$gene; # on récupère le nom du gène
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
335 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
336 if ($accessorygenes{$gene}){ # s'il fait partie des gènes accessoires
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
337 $hAccessory_Count{$cat}++;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
338 $hAccessory_Cat{$cat}=$gene;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
339 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
340 if ($specificgenes{$gene}){ # s'il fait partie des gènes spécifiques
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
341 $hSpecific_Count{$cat}++;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
342 $hSpecific_Cat{$cat}=$gene;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
343 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
344 # $hCount2{$cat}{$esp}++; # TABLE DE HASH AVEC CLES=CAT DE COG + ESPECE VAL=COMPTAGE
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
345 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
346
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
347 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
348 close (A);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
349
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
350 # print "$file $esp\n=============\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
351 while (my ($k,$v) = each(%hCount)) { # parcours de la table de hash de comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
352 # print "cat=$k nb=$v\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
353 $hCount2{$k}{$esp}.= "$v,"; # pour un $k (= une catégorie de COG) on lui associe son espèce et on donne la valeur du comptage qui vient de %hCount
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
354 # le but ici est en fait pour une espèce et une catégorie données on veut le nombre d'occurences par souche (pour NF par ex on aura 5 valeurs car il y a 5 souches)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
355 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
356
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
357 # Récupérer les gènes du core-génome
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
358 while (my ($cat_core,$gene_core) = each(%hCore_Cat)) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
359 $hCore_Cat_Esp{$cat_core}{$esp}=$gene_core;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
360 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
361 # Récupérer les gènes du génome-accessoire
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
362 while (my ($cat_acc,$gene_acc) = each(%hAccessory_Cat)) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
363 $hAccessory_Cat_Esp{$cat_acc}{$esp}=$gene_acc;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
364 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
365 # Récupérer les gènes spécifique
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
366 while (my ($cat_spe,$gene_spe) = each(%hSpecific_Cat)) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
367 $hSpecific_Cat_Esp{$cat_spe}{$esp}=$gene_spe;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
368 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
369
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
370 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
371 # foreach my $category (sort keys (%hSpecific_Cat_Esp)) { # parcours au niveau de la 1ere clé
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
372
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
373 # foreach my $especeee (keys %{$hSpecific_Cat_Esp{$category} }) { # parcours au niveau de la 2e clé pour la $category donnée
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
374
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
375 # print "$category\t$especeee\t$hSpecific_Cat_Esp{$category}{$especeee}\n"; # on crée une sortie qui affiche en somme notre hash %hCount2
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
376 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
377 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
378 # exit;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
379
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
380 # STEP 4 : AFFICHAGE DANS LE FICHIER DE SORTIE ------------------------------------------------------------------------------
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
381 open (OUT4, ">$output4") or die $!;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
382
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
383 print OUT4 "Species"."\t"."COG categories"."\t"."Core-genome"."\t"."Accessory genome"."\t"."Specific genes"."\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
384
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
385 foreach my $category (sort keys (%hCount2)){ # parcours de la table %hCount2 au niveau des catégories
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
386 foreach my $especeee (keys %{$hCount2{$category} }){ # parcours de la table %hCount2 au niveau des espèces
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
387 print OUT4 "$especeee\t$category\t"; # affichage des esp puis des cat
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
388
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
389 # if ($hCore_Cat_Esp{$category}{$especeee}) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
390 # print OUT4 "$hCore_Cat_Esp{$category}{$especeee}\t";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
391 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
392 my $c = 0;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
393 if ($hCore_Count{$category}){ # si cette catégorie existe dans le core-génome
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
394 $c = ($hCore_Count{$category}/scalar keys (%coregenes))*100; # calcul du % du comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
395 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
396 print OUT4 "$c\t"; # affichage du %
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
397
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
398 # if ($hAccessory_Cat_Esp{$category}{$especeee}) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
399 # print OUT4 "$hAccessory_Cat_Esp{$category}{$especeee}\t";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
400 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
401 my $acc = 0;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
402 if ($hAccessory_Count{$category}){ # si cette catégorie existe dans le génome accessoire
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
403 $acc = ($hAccessory_Count{$category}/scalar keys (%accessorygenes))*100; # calcul du % du comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
404 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
405 print OUT4 "$acc\t"; # affichage du %
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
406
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
407 # if ($hSpecific_Cat_Esp{$category}{$especeee}) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
408 # print OUT4 "$hSpecific_Cat_Esp{$category}{$especeee}\t";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
409 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
410 my $s = 0;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
411 if ($hSpecific_Count{$category}){ # si cette catégorie existe dans les gènes spécifiques
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
412 $s = ($hSpecific_Count{$category}/scalar keys (%specificgenes))*100; # calcul du % du comptage
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
413 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
414 print OUT4 "$s\n"; # affichage du %
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
415 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
416 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
417 close (OUT4);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
418
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
419 open (OUT3, ">$output3") or die $!;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
420 foreach my $category (sort keys (%hCount2)) { # parcours au niveau de la 1ere clé
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
421
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
422 foreach my $especeee (keys %{$hCount2{$category} }) { # parcours au niveau de la 2e clé pour la $category donnée
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
423
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
424 print OUT3 "$category\t$especeee\t$hCount2{$category}{$especeee}\n"; # on crée une sortie qui affiche en somme notre hash %hCount2
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
425 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
426 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
427
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
428 close (OUT3);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
429
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
430
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
431 open (OUT2, ">$output2") or die $!;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
432
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
433 print OUT2 "category";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
434 foreach my $e (sort keys (%species_names)){ # on parcours le hash d'espèces...
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
435 print OUT2 "\t".$e; #... où on récupère le nom de celles-ci
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
436 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
437 print OUT2 "\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
438
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
439 foreach my $category (sort keys (%hCount2)) { # on parcourt de nouveau les catégories de notre hash à 2 clés
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
440 print OUT2 $category;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
441
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
442 foreach my $especes (sort keys (%species_names)) { # on parcourt également le hash d'espèces
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
443
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
444 my $nbr = 0;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
445 if ($hCount2{$category}{$especes}) { # si pour une catégorie et une espèce données, on a un nombre : $nbr prend la valeur de ce dernier
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
446 $nbr = $hCount2{$category}{$especes};
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
447 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
448 # $nbr =~s/\n//g; $nbr =~s/\r//g;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
449
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
450
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
451 my @liste = split(',', $nbr); # vu qu'il peut y avoir plusieurs nombres on les dissocie
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
452
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
453 my $somme=0;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
454 my $n=0;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
455 my $moyenne=0;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
456 #print "\nma liste de $nbr: ".join("%",@liste)."\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
457 foreach my $x (@liste) { # on parcourt nos nombres
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
458 $somme=$somme+$x;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
459 $n=$n+1;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
460 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
461
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
462 if ($n>0){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
463 $moyenne = $somme/$n; # on fait le calcul de la moyenne
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
464 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
465 # print "$category, $especes: $hCount2{$category}{$especes}\t";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
466 # print "moyenne = $moyenne\n=============\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
467
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
468 print OUT2 "\t".$moyenne; # fichier de sortie
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
469 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
470 print OUT2 "\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
471 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
472
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
473 close (OUT2);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
474
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
475 # foreach my $cat (keys (%hCore_Cat)){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
476 # print OUT4 $c_gene."\t";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
477 # }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
478
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
479
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
480 #//////////////////////////////////////////////////////////////////////////////////////////////////
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
481
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
482 ############################################### GFF ###############################################
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
483
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
484
6
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
485 # my @order_gff = split(',', $order_GFF); # liste de l'ordre des souches
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
486 my ($g,$o);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
487
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
488 my %hgff_order = (); #HASH -> key: un fichier GFF ; val: un nom de souche (ces 2 données sont entrées en input = $annotation_GFF et $order_GFF)
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
489 my %Gene_position = ();
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
490 my %Cat_genes = ();
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
491 my %Cat_genes2 = ();
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
492
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
493 my %hash_of_genes = ();
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
494
6
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
495
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
496 foreach $g (@list_gff){
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
497 # print "$g\n";
6
cf9da93e4145 Uploaded
mgarnier
parents: 5
diff changeset
498 # $hgff_order{$g} = $order_gff[$o++]; # on fait correspondre pour chaque fichier GFF, un nom de souche
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
499 open (G, $g);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
500 while (<G>) {
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
501 my @table_gff = split (/\t/, $_);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
502 my $chr = $table_gff[0];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
503 my $start = $table_gff[3];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
504 my $end = $table_gff[4];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
505 my $gene_name = $table_gff[8];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
506 my $type = $table_gff[2];
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
507
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
508
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
509 #or $type eq "CDS"
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
510 if ($type && $type eq "mRNA" && $gene_name =~ /ID=([^;]+);/){
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
511 my $gene = $1;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
512 # print $gene."\n";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
513 # exit;
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
514 $hash_of_genes{$gene}=1;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
515
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
516 foreach my $cog (keys (%hCore_Cat)){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
517 if ($hCore_Cat{$cog} eq $gene){
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
518 $Cat_genes{$gene}=$cog;
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
519 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
520 }
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
521 foreach my $cog_bis (keys (%hSpecific_Cat)){
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
522 if ($hSpecific_Cat{$cog_bis} eq $gene){
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
523 $Cat_genes2{$gene}=$cog_bis;
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
524 }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
525 }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
526
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
527 $Gene_position{$gene}="$chr\t$start\t$end";
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
528 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
529
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
530 # foreach my $gene (keys (%hash_of_genes)){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
531 # my $orthogrp = $hGene_OG{$gene};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
532 # print "$orthogrp\n";
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
533 # }
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
534 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
535
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
536 close (G);
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
537 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
538
4
db4e1e6850b0 Uploaded
mgarnier
parents: 2
diff changeset
539 my %Hash_Convert = ( "A"=>1, "B"=>2, "C"=>3, "D"=>4, "E"=>5, "F"=>6, "G"=>7, "H"=>8, "I"=>9, "J"=>10, "K"=>11, "L"=>12, "M"=>13, "N"=>14, "O"=>15, "P"=>16, "Q"=>17, "R"=>18,"S"=>19, "T"=>20, "U"=>21, "V"=>22, "W"=>23, "X"=>24, "Y"=>25, "Z"=>26, "unknown"=>27);
db4e1e6850b0 Uploaded
mgarnier
parents: 2
diff changeset
540
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
541 mkdir("Core");
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
542 foreach my $i (keys (%coregenes2)){
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
543
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
544 if (!$hCol_Annotated{$i}) { # si le fichier GFF n'existe pas
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
545 next;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
546 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
547
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
548
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
549 my $strain_name = $hCol_Annotated{$i};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
550
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
551 my $specie_name = $hSpecies{$strain_name};
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
552
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
553
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
554
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
555 open (OUT5, "> Core/$strain_name.$specie_name.txt") or die "Cannot create file $!\n";
8
9132bdc6ce8b Uploaded
mgarnier
parents: 6
diff changeset
556 print OUT5 "Orthogroup\tGene\tChromosome\tStart\tEnd\tCOG categories\tNumber assigned\n";
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
557
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
558 my $refcoregenes2 = $coregenes2{$i};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
559 my %subhash = %$refcoregenes2;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
560 foreach my $gene (keys (%subhash)){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
561 # print "$gene\n";
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
562 my $cat = "unknown";
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
563 if ($Cog_of_gene{$gene}){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
564 $cat = $Cog_of_gene{$gene};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
565 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
566 # if (!$Gene_position{$gene}){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
567 # print "$gene\n coucou"; exit;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
568 # }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
569
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
570 # if (!$subhash{$gene}){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
571 # print "$gene\n";
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
572 # }
4
db4e1e6850b0 Uploaded
mgarnier
parents: 2
diff changeset
573 print OUT5 $subhash{$gene}."\t"."$gene\t".$Gene_position{$gene}."\t".$cat."\t".$Hash_Convert{$cat}."\n";
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
574
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
575 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
576
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
577 close (OUT5);
0
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
578 }
731fb6cb324b Uploaded
mgarnier
parents:
diff changeset
579
12
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
580 mkdir("StrainSpecific");
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
581 foreach my $i (keys (%specificgenes2)){
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
582
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
583 if (!$hCol_Annotated{$i}) { # si le fichier GFF n'existe pas
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
584 next;
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
585 }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
586
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
587
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
588 my $strain_name = $hCol_Annotated{$i};
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
589
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
590 my $specie_name = $hSpecies{$strain_name};
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
591
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
592
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
593
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
594 open (OUT7, "> StrainSpecific/$strain_name.$specie_name.txt") or die "Cannot create file $!\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
595 print OUT7 "Orthogroup\tGene\tChromosome\tStart\tEnd\tCOG categories\tNumber assigned\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
596
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
597 my $refspecificgenes2 = $specificgenes2{$i};
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
598 my %subhash = %$refspecificgenes2;
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
599 foreach my $gene (keys (%subhash)){
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
600 # print "$gene\n"; exit;
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
601 my $cat = "unknown";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
602 if ($Cog_of_gene{$gene}){
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
603 $cat = $Cog_of_gene{$gene};
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
604 }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
605 # if (!$Gene_position{$gene}){
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
606 # print "$gene\n coucou"; exit;
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
607 # }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
608
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
609 # if (!$subhash{$gene}){
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
610 # print "$gene\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
611 # }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
612 print OUT7 $subhash{$gene}."\t"."$gene\t".$Gene_position{$gene}."\t".$cat."\t".$Hash_Convert{$cat}."\n";
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
613
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
614 }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
615
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
616 close (OUT7);
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
617 }
b36506d26a43 Uploaded
mgarnier
parents: 10
diff changeset
618
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
619
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
620 mkdir("GroupSpecific");
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
621 foreach my $i (keys (%Genes_of_OG)){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
622 if (!$hCol_Annotated{$i}) { # si le fichier GFF n'existe pas
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
623 next;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
624 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
625
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
626 my $strain_name = $hCol_Annotated{$i};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
627
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
628 my $specie_name = $hSpecies{$strain_name};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
629
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
630 open (OUT6, "> GroupSpecific/$strain_name.$specie_name.txt") or die "Cannot create file $!\n";
8
9132bdc6ce8b Uploaded
mgarnier
parents: 6
diff changeset
631 print OUT6 "Orthogroup\tGene\tChromosome\tStart\tEnd\tCOG categories\tNumber assigned\n";
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
632
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
633 my $refGenes_of_OG = $Genes_of_OG{$i};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
634 my %subhash = %$refGenes_of_OG;
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
635
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
636 foreach my $orthogroup (keys (%subhash)){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
637 if ($Hash_Specific{$orthogroup} && $Hash_Specific{$orthogroup} eq $specie_name){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
638 my $gene = $subhash{$orthogroup};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
639
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
640 my $cat = "unknown";
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
641 if ($Cog_of_gene{$gene}){
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
642 $cat = $Cog_of_gene{$gene};
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
643 }
4
db4e1e6850b0 Uploaded
mgarnier
parents: 2
diff changeset
644 print OUT6 $orthogroup."\t".$subhash{$orthogroup}."\t".$Gene_position{$gene}."\t".$cat."\t".$Hash_Convert{$cat}."\n";
2
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
645 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
646
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
647 }
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
648 close (OUT6);
0428ce25da81 Uploaded
mgarnier
parents: 1
diff changeset
649 }