Mercurial > repos > fgiacomoni > lipidmaps_textsearch
diff lib/lipidmaps.pm @ 0:e8bd49794291 draft
Init repository with last lipidmaps_textsearch master version
author | fgiacomoni |
---|---|
date | Tue, 11 Apr 2017 03:47:06 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/lipidmaps.pm Tue Apr 11 03:47:06 2017 -0400 @@ -0,0 +1,957 @@ +package lib::lipidmaps ; + +use strict; +use warnings; + +use Data::Dumper; +use Carp ; +use LWP::UserAgent ; + +use Exporter; +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +$VERSION = "1.0"; +@ISA = qw(Exporter); +@EXPORT = (); +@EXPORT_OK = qw(set_total_carbons get_elts_from_lm_common_name); +%EXPORT_TAGS = ( ALL => [qw(set_total_carbons get_elts_from_lm_common_name)] ) ; + + +=head1 NAME + +My::operations - An example module + +=head1 SYNOPSIS + + use My::operations; + my $object = My::Module->new(); + print $object->as_string; + +=head1 DESCRIPTION + +This module clusters several more used maths functions like factorial... + +=head1 METHODS + +Methods are : + +=head2 METHOD new + + ## Description : new + ## Input : $self + ## Ouput : bless $self ; + ## Usage : new() ; + +=cut + +sub new { + ## Variables + my $self={}; + bless($self) ; + return $self ; +} +### END of SUB + +=head2 METHOD build_lm_mass_query + + ## Description : set the query from the lipdmaps WS + ## Input : $url, $delta, $cat, $cl, $subcl + ## Output : $query + ## Usage : my ( $query ) = build_lm_mass_query( $url, $delta, $cat, $cl, $subcl ) ; + +=cut +## START of SUB +sub build_lm_mass_query { + ## Retrieve Values + my $self = shift ; + my ( $url, $delta, $cat, $cl, $subcl ) = @_ ; + my $query = undef ; + ## build the REST query like : + #'http://www.lipidmaps.org/data/structure/LMSDSearch.php? + #Mode=ProcessTextSearch&OutputMode=File&OutputType=TSV&CoreClass=3&MainClass=303&SubClass=30301&ExactMass=1000.05&ExactMassOffSet=0.5' + + if ( defined $url ) { + $query = $$url ; + if ( defined $$cat ) { $query.= '&CoreClass='.$$cat ; } + if ( defined $$cl ) { $query .= '&MainClass='.$$cl ; } + if ( defined $$subcl ) { $query .= '&SubClass='.$$subcl ; } + if ( defined $delta ) { $query .= '&ExactMassOffSet='.$$delta ; } + ## and prepare the mass param : + $query .= '&ExactMass=' ; + } + else { + croak "Can't find any url to buil your query\n" ; + } + return(\$query) ; +} +## END of SUB + +=head2 METHOD get_lm_mass_query + + ## Description : get the builded query on LM WS + ## Input : $query, $mass + ## Output : $result + ## Usage : my ( $result ) = get_lm_mass_query( $query, $mass ) ; + +=cut +## START of SUB +sub get_lm_mass_query { + ## Retrieve Values + my $self = shift ; + my ( $query, $mass ) = @_ ; + + my $result = undef ; + my $rest_query = $$query.$$mass ; + +# print "QUERY_".$rest_query."_STOP\n" ; + my $browser = LWP::UserAgent->new; + $result = $browser->get($rest_query) ; + die "Can't GET the folowing mz query: $rest_query!" if ( ! defined $rest_query ) ; + + if ($result->is_error) { + croak "$result->status_line \n" ; + } +# else { +# print Dumper $result->content ; +# } + + return(\$result->content, \$rest_query) ; +} +## END of SUB + +=head2 METHOD get_lm_entry_object + + ## Description : build a list of lm_entry_objects from a raw result + ## Input : $result + ## Output : $entry_objects + ## Usage : my ( $entry_objects ) = get_lm_entry_object( $result ) ; + +=cut +## START of SUB +sub get_lm_entry_object { + ## Retrieve Values + my $self = shift ; + my ( $result, $q_mass ) = @_ ; + + my @entry_objects = () ; + my @sorted = () ; + my $nb_entries = 0 ; + my $qmass = $$q_mass ; + + if ( defined $result ) { + +# print "RESULTS: $$result\n" ; + + my @tmp = split (/\n|\r/, $$result) ; + my @entries = sort { $a cmp $b } @tmp; + $nb_entries = scalar(@entries) ; + foreach my $entry (@entries) { + my %entry = ( ID => undef, COMMON_NAME => undef, SYST_NAME => undef, FORMULA => undef, MASS => undef ) ; + + $entry{ID} = $self->get_lm_id(\$entry) ; + $entry{COMMON_NAME} = $self->get_lm_common_name(\$entry) ; + $entry{SYST_NAME} = $self->get_lm_systematic_name(\$entry) ; + $entry{FORMULA} = $self->get_lm_formula(\$entry) ; + $entry{MASS} = $self->get_lm_mass(\$entry) ; + + ## delta + $entry{QMASS} = $qmass ; + $entry{DELTA} = $self->delta_mass(\$qmass, $entry{MASS} ) ; + push (@entry_objects, \%entry) ; + } + } ## END IF + else { + croak "Can't manage a an empty result and produce a entry object\n" ; + } +# print Dumper @entry_objects ; + ## Sort By ID + if (scalar (@entry_objects) > 1 ) { ## for + @sorted = sort { $a->{ID} cmp $b->{ID} } @entry_objects ; + } + else { + @sorted = @entry_objects; + } +# print Dumper @sorted ; + return(\@sorted, \$nb_entries) ; +} +## END of SUB + +=head2 METHOD get_cluster_object + + ## Description : build a list of clusters objects from a entry_object (list of hash) using a list oh parsing rules decrived in CONF file + ## Input : $entries_result, $RULES + ## Output : $clusters_object + ## Usage : my ( $clusters_object ) = get_cluster_object( $entries_result, $RULES ) ; + +=cut +## START of SUB +sub get_cluster_object { + ## Retrieve Values + my $self = shift ; + my ( $entries_result, $RULES, $RECIPES ) = @_ ; + + my (@pre_clustering) = ( () ) ; + my $clusters_object = undef ; + + if ( ( defined $entries_result ) and ( defined $RULES )) { + + foreach my $entry (@$entries_result) { + my %cluster = ( CLUSTER_DELTA => undef, CLUSTER_NAME => undef, FORMULA => undef, ISOTOPIC_RATIO => undef, NB_ENTRIES_FOR_CLUSTER => 0, ENTRY_IDS => undef ) ; + + ## get features + my ($g1,$g2,$g3,$c1,$i1,$c2,$i2,$c3,$i3,$ox,$post) = $self->parse_lm_common_name($entry->{COMMON_NAME}, $RULES, $RECIPES) ; + + my ( $total_c ) = $self->set_total_carbons($c1, $c2, $c3) ; + my ( $total_i ) = $self->set_total_insaturations($i1, $i2, $i3) ; + my ( $group ) = $self->set_group($g1, $g2, $g3) ; + + ## set cluster object + $cluster{CLUSTER_NAME} = $self->set_cluster_name($group, $total_c, $total_i, \$ox, \$post, $entry->{COMMON_NAME} ) ; + $entry->{CLUSTER_NAME} = $cluster{CLUSTER_NAME} ; + $cluster{CLUSTER_DELTA} = $entry->{DELTA} ; + $cluster{FORMULA} = $entry->{FORMULA} ; + $cluster{ISOTOPIC_RATIO} = 'N/A' ; ## compute isotopic ratio + $cluster{NB_ENTRIES_FOR_CLUSTER} = 1 ; + $cluster{ENTRY_IDS} = $entry->{ID} ; + + push (@pre_clustering, \%cluster) ; + } + ## Pool cluster objects by cluster name + $clusters_object = $self->pool_cluster_objects(\@pre_clustering) ; + } + else { + croak "Can't manage a an empty entries result and produce a cluster object\n" ; + } + return($clusters_object) ; +} +## END of SUB + +=head2 METHOD get_lm_id + + ## Description : retrieve the following param in the formatted output + ## Input : $content + ## Output : $LM_id + ## Usage : my ( $LM_id ) = get_lm_id( $separator, $content ) ; + +=cut +## START of SUB +sub get_lm_id { + ## Retrieve Values + my $self = shift ; + my ( $content ) = @_ ; + my $LM_id = undef ; + my $entry = ${$content} ; + if ( $entry =~ /^LM[A-Z](.*)/ ) { + ##$1=LM_ID $2=COMMON_NAME $3=SYSTEMATIC_NAME $4=FORMULA $5=MASS $6=CATEGORY $7=MAIN_CLASS $8=SUB_CLASS + if ( $entry =~ /^(\w+)\t(.*)/ ) { $LM_id = $1 ; } + } + else { + carp "The submitted entry is not at the right format : $entry\n" ; + } + return(\$LM_id) ; +} +## END of SUB + +=head2 METHOD get_lm_common_name + + ## Description : retrieve the following param in the formatted output + ## Input : $content + ## Output : $common_name + ## Usage : my ( $common_name ) = get_lm_common_name( $separator, $content ) ; + +=cut +## START of SUB +sub get_lm_common_name { + ## Retrieve Values + my $self = shift ; + my ( $content ) = @_ ; + my $common_name = undef ; + my $entry = ${$content} ; + if ( $entry =~ /^LM[A-Z](.*)/ ) { + ##$1=LM_ID $2=COMMON_NAME $3=SYSTEMATIC_NAME $4=FORMULA $5=MASS $6=CATEGORY $7=MAIN_CLASS $8=SUB_CLASS + if ( $entry =~ /^(\w+)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)/ ) { $common_name = $2 ; } + } + else { + carp "The submitted entry is not at the right format : $entry\n" ; + } + return(\$common_name) ; +} +## END of SUB + +=head2 METHOD get_lm_systematic_name + + ## Description : retrieve the following param in the formatted output + ## Input : $separator, $content + ## Output : $common_name + ## Usage : my ( $common_name ) = get_lm_systematic_name( $separator, $content ) ; + +=cut +## START of SUB +sub get_lm_systematic_name { + ## Retrieve Values + my $self = shift ; + my ( $content ) = @_ ; + my $systematic_name = undef ; + my $entry = ${$content} ; + if ( $entry =~ /^LM[A-Z](.*)/ ) { + ##$1=LM_ID $2=COMMON_NAME $3=SYSTEMATIC_NAME $4=FORMULA $5=MASS $6=CATEGORY $7=MAIN_CLASS $8=SUB_CLASS + if ( $entry =~ /^(\w+)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)/ ) { $systematic_name = $3 ; } + } + else { + carp "The submitted entry is not at the right format : $entry\n" ; + } + return(\$systematic_name) ; +} +## END of SUB + +=head2 METHOD get_lm_formula + + ## Description : retrieve the following param $formula in the formatted output + ## Input : $content + ## Output : $formula + ## Usage : my ( $formula ) = get_lm_systematic_name( $separator, $content ) ; + +=cut +## START of SUB +sub get_lm_formula { + ## Retrieve Values + my $self = shift ; + my ( $content ) = @_ ; + my $formula = undef ; + my $entry = ${$content} ; + if ( $entry =~ /^LM[A-Z](.*)/ ) { + ##$1=LM_ID $2=COMMON_NAME $3=SYSTEMATIC_NAME $4=FORMULA $5=MASS $6=CATEGORY $7=MAIN_CLASS $8=SUB_CLASS + if ( $entry =~ /^(\w+)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)/ ) { $formula = $4 ; } + } + else { + carp "The submitted entry is not at the right format : $entry\n" ; + } + return(\$formula) ; +} +## END of SUB + +=head2 METHOD get_lm_mass + + ## Description : retrieve the following param $mass in the formatted output + ## Input : $content + ## Output : $mass + ## Usage : my ( $mass ) = get_lm_mass( $separator, $content ) ; + +=cut +## START of SUB +sub get_lm_mass { + ## Retrieve Values + my $self = shift ; + my ( $content ) = @_ ; + my $mass = undef ; + my $entry = ${$content} ; + if ( $entry =~ /^LM[A-Z](.*)/ ) { + ##$1=LM_ID $2=COMMON_NAME $3=SYSTEMATIC_NAME $4=FORMULA $5=MASS $6=CATEGORY $7=MAIN_CLASS $8=SUB_CLASS + if ( $entry =~ /^(\w+)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)\t(.*)/ ) { $mass = $5 ; } + } + else { + carp "The submitted entry is not at the right format : $entry\n" ; + } + return(\$mass) ; +} +## END of SUB + +=head2 METHOD delta_mass + + ## Description : compute a delta between the query mass and cpd mass + ## Input : $q_mass, $cpd_mass + ## Output : $delta_mass + ## Usage : my ( $delta_mass ) = delta_mass( $q_mass, $cpd_mass ) ; + +=cut +## START of SUB +sub delta_mass { + ## Retrieve Values + my $self = shift ; + my ( $q_mass, $cpd_mass ) = @_ ; + + my $delta_mass = 0 ; + + if ( ( defined $$q_mass ) and ( defined $$cpd_mass ) and ( $$q_mass > 0 ) and ( $$cpd_mass > 0 ) ) { + my $delta = $$cpd_mass - $$q_mass; + $delta_mass = sprintf("%.2f", $delta ); + } ## END IF + else { + croak "Provided masses are not defined or equal to zero\n" ; + } + + + return(\$delta_mass) ; +} +## END of SUB + +=head2 METHOD get_elts_from_lm_common_name + + ## Description : get group, numbers of carbons, insaturation and oxydations in LM common name and return a 'cluster' name. + ## Input : $lm_common_name, $RULES, $RECIPES + ## Output : $Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post + ## Usage : my ( $Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post ) = get_elts_from_lm_common_name( $lm_common_name, $RULES, $RECIPES ) ; + +=cut +## START of SUB +sub parse_lm_common_name { + ## Retrieve Values + my $self = shift ; + my ( $lm_common_name, $RULES, $RECIPES ) = @_ ; + my ( $Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) ; + my $rule_nb = 0 ; + + my $common_name = $$lm_common_name ; + if (defined $common_name) { + if( $common_name =~ /\s/ ) { $common_name =~ s/ //g ; } ## del all spaces + my @matches = (undef) ; # init first position at undef to manage undef regex + my %matching_res = () ; + ## Goals : + ## - use each parsing rule on the common name + ## - extrat the 9 values containing in the common name (value can be undef) + ## - translate initialized $Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post values by real ones (ex; 0 => PE, or 0-1-4 => PS-O-P) +# print "\n\nCOMMON_NAME=".$common_name."\n" ; + foreach my $rule (keys %{$RULES} ) { + $rule =~ m/^RULE(\d+)/ ; + $rule_nb = $1 ; + push ( @matches, ($common_name =~ m/$RULES->{$rule}/g) ) ; # catch all matches in the regex +# print "\nUsing - RULE -> $rule_nb\n" ; +# print Dumper @matches ; +# print "\n- - - - - - - - - - - - - \n" ; + my ($v1, $v2, $v3, $v4, $v5, $v6, $v7, $v8, $v9, $v10, $v11 ) = split(/,/, $RECIPES->{'RECIPE'.$rule_nb}) ; +# print "($v1, $v2, $v3, $v4, $v5, $v6, $v7, $v8, $v9, $v10, $v11 )\n" ; + ( $Gp1, $Gp2, $Gp3 ) = ( $matches[$v1], $matches[$v2], $matches[$v3] ) ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i ) = ( $matches[$v4], $matches[$v5], $matches[$v6], $matches[$v7], $matches[$v8], $matches[$v9] ) ; + ( $Ox, $Post ) = ( $matches[$v10], $matches[$v11] ) ; + +# if (scalar (@matches) > 1) { last ; } # get out of the loop + ## Add a method to manage several rules matching + $matching_res{$rule} = [$Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post] ; + ( $Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) ; + ($v1, $v2, $v3, $v4, $v5, $v6, $v7, $v8, $v9, $v10, $v11 ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) ; + @matches = (undef) ; + } + + ## foreach parsed rules - get the best + my $best_rule_is = undef ; + my ( $current_nb, $best_nb) = ( 0, 0 ) ; + foreach my $rule_nb (keys %matching_res) { + $current_nb = scalar grep { defined $_ } @{$matching_res{$rule_nb}}; + if ( $current_nb > $best_nb ) { $best_nb = $current_nb ; $best_rule_is = $rule_nb ; } + } + + if ( defined $best_rule_is ) { +# print "The best rule is : $best_rule_is\n" ; + my @tmp = @{$matching_res{$best_rule_is}} ; +# print Dumper @tmp ; + ( $Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post ) = ( $tmp[0], $tmp[1], $tmp[2], $tmp[3], $tmp[4], $tmp[5], $tmp[6], $tmp[7], $tmp[8], $tmp[9], $tmp[10] ) ; +# print "$Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post\n" ; + } + } + else { + croak "No Common name to get in method get_elts_from_lm_common_name!\n" ; + } + return( $Gp1, $Gp2, $Gp3, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post) ; +} +## END of SUB + +=head2 METHOD set_total_carbons + + ## Description : compute total number of carbons found in entry + ## Input : $Ch1_C, $Ch2_C, $Ch3_C + ## Output : $total_C + ## Usage : my ( $total_C ) = set_total_carbons( $Ch1_C, $Ch2_C, $Ch3_C ) ; + +=cut +## START of SUB +sub set_total_carbons { + ## Retrieve Values + my $self = shift ; + my ( $Ch1_C, $Ch2_C, $Ch3_C ) = @_ ; + my $total_C = undef ; + + if ( ( defined $Ch1_C ) and ( defined $Ch2_C ) and ( defined $Ch3_C ) ) { $total_C = ( $Ch1_C + $Ch2_C + $Ch3_C ) ; } + elsif ( ( defined $Ch1_C ) and ( defined $Ch2_C ) ) { $total_C = ( $Ch1_C + $Ch2_C ) ; } + elsif ( ( defined $Ch1_C ) ) { $total_C = ( $Ch1_C ) ; } + else { ( $total_C ) = ( undef ) ; } + + return(\$total_C) ; +} +## END of SUB + +=head2 METHOD set_total_insaturations + + ## Description : compute total number of insaturations found in entry + ## Input : $Ch1_i, $Ch2_i, $Ch3_i + ## Output : $total_i + ## Usage : my ( $total_i ) = set_total_insaturations( $Ch1_i, $Ch2_i, $Ch3_i ) ; + +=cut +## START of SUB +sub set_total_insaturations { + ## Retrieve Values + my $self = shift ; + my ( $Ch1_i, $Ch2_i, $Ch3_i ) = @_ ; + my $total_i = undef ; + + if ( ( defined $Ch1_i ) and ( defined $Ch2_i ) and ( defined $Ch3_i ) ) { $total_i = ( $Ch1_i + $Ch2_i + $Ch3_i ) ; } + elsif ( ( defined $Ch1_i ) and ( defined $Ch2_i ) ) { $total_i = ( $Ch1_i + $Ch2_i ) ; } + elsif ( ( defined $Ch1_i ) ) { $total_i = ( $Ch1_i ) ; } + else { ( $total_i ) = ( undef ) ; } + + return(\$total_i) ; +} +## END of SUB + +=head2 METHOD set_group + + ## Description : set a complete group when multiple + ## Input : $Gp1, $Gp2, $Gp3 + ## Output : $group + ## Usage : my ( $group ) = set_group( $Gp1, $Gp2, $Gp3 ) ; + +=cut +## START of SUB +sub set_group { + ## Retrieve Values + my $self = shift ; + my ( $Gp1, $Gp2, $Gp3 ) = @_ ; + my $group = undef ; + + if ( ( defined $Gp1 ) and ( defined $Gp2 ) and ( defined $Gp3 ) ) { $group = $Gp1.'-'.$Gp2.'-'.$Gp3 ; } + elsif ( ( defined $Gp1 ) and ( defined $Gp2 ) ) { $group = $Gp1.'-'.$Gp2 ; } + elsif ( ( defined $Gp1 ) ) { $group = $Gp1 ; } + else { $group = undef ; } + + return(\$group) ; +} +## END of SUB + +=head2 METHOD set_cluster_name + + ## Description : set the cluster name by concat $group / $total_c / $total_i / $ox / $post or use $common_name if no elt exist. + ## Input : $group, $total_c, $total_i, $ox, $post, $common_name + ## Output : $cluster_name + ## Usage : my ( $cluster_name ) = set_cluster_name( $group, $total_c, $total_i, $ox, $post ) ; + +=cut +## START of SUB +sub set_cluster_name { + ## Retrieve Values + my $self = shift ; + my ( $group, $total_c, $total_i, $ox, $post, $common_name ) = @_ ; + my $cluster_name = undef ; + + if ( ( defined $$total_c ) and ( defined $$total_i ) and ( defined $$group ) ) { + $cluster_name = $$group.'_'.$$total_c.':'.$$total_i ; + ## OXIDATION AND REST + if ( defined $$ox ) { $cluster_name = $cluster_name.'('.$$ox.')' ; } + if ( defined $$post ) { $cluster_name = $cluster_name.$$post ; } + } + elsif ( ( defined $$group ) ) { $cluster_name = $$group ; } + else { $cluster_name = $$common_name ; } + + return(\$cluster_name) ; +} +## END of SUB + +=head2 METHOD pool_cluster_objects + + ## Description : pool all cluster objects in a same group by their cluster name (and formula) + ## Input : $cluster_objects + ## Output : $pool_objects + ## Usage : my ( $pool_objects ) = pool_cluster_objects( $cluster_objects ) ; + +=cut +## START of SUB +sub pool_cluster_objects { + ## Retrieve Values + my $self = shift ; + my ( $cluster_objects ) = @_ ; + + my @pools = () ; + my %sort = () ; + my @deltas = () ; + + # prerequis for the first iteration + my $ref_common_name = 'NONE' ; + + # sort entries by cluster with a key hash system + foreach my $object (@{$cluster_objects}) { + my %current = %{$object} ; + my $q_name = $current{CLUSTER_NAME} ; + + ## prepare data + if ( !defined $sort{$$q_name}{ENTRY_IDS} ) { + $sort{$$q_name}{ENTRY_IDS} = [] ; + $sort{$$q_name}{ENTRY_DELTAS} = [] ; + $sort{$$q_name}{NB_ENTRIES_FOR_CLUSTER} = 1 ; + } + else { + $sort{$$q_name}{NB_ENTRIES_FOR_CLUSTER} = ($sort{$$q_name}{NB_ENTRIES_FOR_CLUSTER}) + 1 ; + } + + ## pool + if ( $$q_name ne $ref_common_name ) { + $sort{$$q_name}{CLUSTER_NAME} = $q_name ; + push ( @{ $sort{$$q_name}{ENTRY_IDS} }, $current{ENTRY_IDS} ) ; + push ( @{ $sort{$$q_name}{ENTRY_DELTAS} }, $current{CLUSTER_DELTA} ) ; + } + else { + push ( @{ $sort{$$q_name}{ENTRY_IDS} }, $current{ENTRY_IDS} ) ; + push ( @{ $sort{$$q_name}{ENTRY_DELTAS} }, $current{CLUSTER_DELTA} ) ; + next ; + } + + ## set rest of features + $sort{$$q_name}{FORMULA} = $current{FORMULA} ; + $sort{$$q_name}{ISOTOPIC_RATIO} = $current{ISOTOPIC_RATIO} ; +# $sort{$$q_name}{CLUSTER_DELTA} = $current{CLUSTER_DELTA} ; + + $ref_common_name = $$q_name ; + } + + # and pool them + foreach ( keys %sort ) { + + ## sort IDS + my @tmp = @{ $sort{$_}{ENTRY_IDS} } ; + my @tmp_unref = () ; + my @sort_unref = () ; + my @sort_ref = () ; + + foreach (@tmp) { my $id = $$_ ; push (@tmp_unref, $id) ; } + @sort_unref = sort { $a cmp $b } @tmp_unref ; + foreach (@sort_unref) { my $id = $_ ; push (@sort_ref, \$id) ; } + $sort{$_}{ENTRY_IDS} = \@sort_ref ; + + ## sort delta and init the smallest for the cluster + my @tmp_deltas = @{ $sort{$_}{ENTRY_DELTAS} } ; + my @tmp_delta_sorted = () ; + @tmp_delta_sorted = sort { $a <=> $b } @tmp_deltas ; + $sort{$_}{CLUSTER_DELTA} = $tmp_delta_sorted[0] ; + + ## get all + push (@pools, $sort{$_}) ; + } + + ## sort by nb of entries in the cluster + my @pool_sorted = sort { $b->{NB_ENTRIES_FOR_CLUSTER} <=> $a->{NB_ENTRIES_FOR_CLUSTER} } @pools ; + + return(\@pool_sorted) ; +} +## END of SUB + +## Fonction : permet de retourner une somme de features pour un common name +## Input : $CommonName de type LM +## Ouput : $SimpleName +sub getSimpleNameFromCommonName { + ## Retrieve Values + my ( $CommonName ) = @_; + my $SimpleName = undef ; + + if (defined $CommonName) { + ## virer les espaces : + if($CommonName =~ /\s/) { $CommonName =~ s/ //g ; } + + ## Initialize all + + my ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef ) ; + + if ( $CommonName =~/^\-$/) { ## RULE des '-' only + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( 'UnNamed_IN_LM', undef, undef, undef, undef, undef ) ; + } + ## *** *** PG *** *** ## + elsif ($CommonName =~/^([A-Z|a-z|0-9|\-]*)$/) { ## RULE des XX-xx only -- Ex: PE-NMe2 ou PC + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, undef, undef, undef, undef, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\((\d+):(\d+)\/(\d+):(\d+)\)$/) { ## RULE des XX(16:1/16:1) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\((\d+):(\d+)\/(\d+):(\d+)\(([OHC]*)\)\)$/) { ## RULE des XX(16:1/16:1(OHC)) OXYDES!! + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, $6 ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\(([OHCKe0-9]*)\)\)$/) { ## RULE des XX(16:1(1Z,..)/16:1(1Z,..)(OHC)) OXYDES!! -- Ex: PE(16:0/22:6(54Z,7Z,10Z,12E,16Z,19Z)(14OH)) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, $6 ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX(16:1(1Z,..)/16:1(1Z,..)) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,5})\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX(16:1(1Z,...)) ou XX(16:1) ONLY + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, undef, undef, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\(([A-Z]{1})-(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX(X-16:1(1Z,..)/16:1(1Z,..)) avec (1Z,...) en option + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1.'-'.$2, $3, $4, $5, $6, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/([A-Z]{1})-(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX(16:1(1Z,..)/X-16:1(1Z,..)) avec (1Z,...) en option + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1.'-'.$4, $2, $3, $5, $6, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\(([A-Z]{1})-(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/([A-Z]{1})-(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX(X-16:1(1Z,..)/X-16:1(1Z,..)) avec (1Z,...) en option + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1.'-'.$2.'-'.$5, $3, $4, $6, $7, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]*-[A-Z|a-z|0-9]*)\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX-xx(16:1(1Z,..)/16:1(1Z,..)) -- Ex: PE-NMe(16:0/16:0) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]*-[A-Z|a-z|0-9]*)\(([A-Z]{1})-(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/([A-Z]{1})-(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX-xx(X-16:1(1Z,..)/X-16:1(1Z,..)) -- Ex: PE-NMe(O-16:0/O-16:0) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1.'-'.$2.'-'.$5, $3, $4, $6, $7, undef ) ; + } + elsif ( $CommonName =~/^([a-z|A-Z|\-]*)\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX-xx(16:1(1Z,)/16:1(1Z,)/16:1(1Z,...)) -- Ex: PS-NAc(18:0/18:1(9Z)/16:0) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox ) = ( $1, $2, $3, $4, $5, $6, $7, undef ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)(\[[A-Z|a-z|0-9]*\])$/) { ## RULE des XX(16:1(1Z,..)/16:1(1Z,..))[Xx] -- Ex : PC(18:1(9Z)/0:0)[rac] + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox, $Post ) = ( $1, $2, $3, $4, $5, undef, $6 ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)\(([A-Z|a-z|0-9|\[\]]*)\)\)$/) { ## RULE des XX(16:1(1Z,..)/16:1(1Z,..))(OHC[S]) -- Ex: PE(16:0/20:4(5Z,8Z,10E,14Z)(12OH[S])) OXYDES!! + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, $6 ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z]{2,4})\(([A-Z]{1})-(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)\(([A-Z|a-z|0-9|\[\]]*)\)\)$/) { ## RULE des XX(X-16:1(1Z,..)/16:1(1Z,..))(OHC[S]) -- Ex: PE(P-18:0/20:4(6E,8Z,11Z,14Z)(5OH[S])) OXYDES!! + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1.'-'.$2, $3, $4, $5, $6, $7 ) ; + } + elsif ( $CommonName =~/^([A-Z|a-z|0-9|\[\]|',]*)\((\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/) { ## RULE des XX[3',5'](16:1(1Z,..)/16:1(1Z,..)) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, undef ) ; + } + ## *** *** *** *** SM *** *** *** *** ## + elsif ( $CommonName =~ /^([A-Za-z0-9\-]*)\([a-z](\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\(([COH0-9\-]*)\)\)$/ ) { ## RULE des xx-XX(16:1(1z..)/16:0(1z..)(OH)) -- Ex: PE-Cer(d16:2(4E,6E)/24:1(15Z)(2OH)) OXYDES !! + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, $6 ) ; + } + elsif ( $CommonName =~ /^([A-Za-z0-9\-]*)\([a-z]{0,1}(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/[a-z]{0,1}(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/ ) { ## RULE des xx-XX(16:1(1z..)/16:0(1z..)) -- Ex: GlcCer(d14:2(4E,6E)/18:1(9Z)) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, undef ) ; + } + elsif ( $CommonName =~ /^(M\(IP\)2C)\([a-z]{0,1}(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/[a-z]{0,1}(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\(([COH0-9\-]*)\)\)$/ ) { ## RULE des M(IP)2C(16:1(1z..)/16:0(1z..)) -- Ex: M(IP)2C(d18:0/18:0(2OH)) OXYDES !! + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, $6 ) ; + } + elsif ( $CommonName =~ /^(M\(IP\)2C)\([a-z]{0,1}(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\/[a-z]{0,1}(\d+):(\d+)[A-Z|a-z|0-9|,|\(\)]*\)$/ ) { ## RULE des M(IP)2C(16:1(1z..)/16:0(1z..)) -- Ex: M(IP)2C(t18:0/18:0) + ( $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ox ) = ( $1, $2, $3, $4, $5, undef ) ; + } + else { + print "-->Cpd not parsed: ".$CommonName."\n" ; + } + + ## Set SimpleName : + $SimpleName = &setSimpleName($CommonName, $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post) ; + } + else { + croak "No Common name to get in method getSimpleNameFromCommonName!\n" ; + } + + return($SimpleName) ; +} +### END of SUB + +## Fonction : permet de reconstruire un nom simple a partir de donnees +## Input : $CommonName, $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post +## Ouput : $SimpleName +sub setSimpleName { + ## Retrieve Values + + my ( $CommonName, $Group, $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i, $Ch3_C, $Ch3_i, $Ox, $Post ) = @_; + my ( $cTotal, $iTotal ) = ( undef, undef ) ; + my $SimpleName = undef ; + + ### COMON NAME TO SIMPLE NAME !! + if ( defined $Group ) { + + ## SOMME DES CARBONES ET INSATURATIONS + if ( ( defined $Ch1_C ) and ( defined $Ch1_i ) and ( defined $Ch2_C ) and ( defined $Ch2_i ) and ( defined $Ch3_C ) and ( defined $Ch3_i ) ) { + $cTotal = ( $Ch1_C + $Ch2_C + $Ch3_C ) ; + $iTotal = ( $Ch1_i + $Ch2_i + $Ch3_i ) ; + } + elsif ( ( defined $Ch1_C ) and ( defined $Ch1_i ) and ( defined $Ch2_C ) and ( defined $Ch2_i ) ) { + $cTotal = ( $Ch1_C + $Ch2_C ) ; + $iTotal = ( $Ch1_i + $Ch2_i ) ; + } + elsif ( ( defined $Ch1_C ) and ( defined $Ch1_i ) ) { + $cTotal = ( $Ch1_C ) ; + $iTotal = ( $Ch1_i ) ; + } + else { + ( $cTotal, $iTotal ) = ( undef, undef ) ; + } + ## Composition du NOM + if ( ( defined $cTotal ) and ( defined $iTotal ) ) { + $SimpleName = $Group.'_'.$cTotal.':'.$iTotal ; + + ## OXIDATION AND REST + if ( defined $Ox ) { $SimpleName = $SimpleName.'('.$Ox.')' ; } + if ( defined $Post ) { $SimpleName = $SimpleName.$Post ; } + } + else { $SimpleName = $Group ; } + } + else { $SimpleName = $CommonName ; } + + return($SimpleName) ; +} +### END of SUB + +## Fonction : permet de retourner une somme de features pour un common name +## Input : $CommonName de type LM +## Ouput : $Groupe, $Familly, $cTotal, $iTotal +sub getSimpleCommonName_Old { + ## Retrieve Values + my ( $CommonName ) = @_; + my ($Groupe, $Familly, $cTotal, $iTotal, $Reste, $Ox ) = ( undef, undef, undef, undef, undef, undef, undef ) ; + + if ( $CommonName =~/([A-Za-z|0-9|\-]*)\((.*)\)$/ ) { #" Attention Common name conteint parfois des espaces en debut" + ## retrieve $Familly and $Reste + ( $Familly, $Reste, $Groupe) = ( $1, $2, '' ) ; + my ( $Ch1_C, $Ch2_C, $Ch1_i, $Ch2_i ) = ( 0, 0, 0, 0 ) ; + + ## Gestion du "Groupe" et des carbones / insatures + if ( $Reste =~/^([A-Z|a-z])\-(\d+):(\d+)[A-Z0-9,\(\)]*\/(\d+):(\d+)[A-Z0-9,\(\)]*/ ) { ## rule (X|x-16:1/16:1) + $Groupe = $Familly.'-'.$1 ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $2, $3, $4, $5 ) ; + } + elsif ( $Reste =~/^([A-Z|a-z])\-(\d+):(\d+)[A-Z0-9,\(\)]*\/([A-Z|a-z])\-(\d+):(\d+)[A-Z0-9,\(\)]*/ ) { ## rule (X|x-16:1/X|x-16:1) + $Groupe = $Familly.'-'.$1.$4 ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $2, $3, $5, $6 ) ; + } + elsif ( $Reste =~/^(\d+):(\d+)[A-Z0-9,\(\)]*\/([A-Z|a-z])\-(\d+):(\d+)[A-Z0-9,\(\)]*/ ) { ## rule (16:1/X|x-16:1) + $Groupe = $Familly.'-'.$3 ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $1, $2, $4, $5 ) ; + } +# elsif ( $Reste =~/(\d+):(\d+)[A-Z0-9,\(\)]*\/(\d+):(\d+)\([A-Z0-9,\(\)]*$/ ) { ## rule (16:1/16:1) + elsif ( $Reste =~/(\d+):(\d+)\/(\d+):(\d+)$/ ) { ## rule (16:1/16:1) + $Groupe = $Familly ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $1, $2, $3, $4 ) ; + } + elsif ( $Reste =~/^([A-Z|a-z])(\d+):(\d+)[A-Z0-9,\(\)]*\/(\d+):(\d+)[A-Z0-9,\(\)]*/ ) { ## rule (X|x16:1/16:1) + $Groupe = $Familly.'-'.$1 ; ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $2, $3, $4, $5 ) ; + } + elsif ( $Reste =~/^(\d+):(\d+)[A-Z0-9,\(\)]*\/([A-Z|a-z])(\d+):(\d+)[A-Z0-9,\(\)]*/ ) { ## rule (16:1/X|x16:1) + $Groupe = $Familly.'-'.$3 ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $1, $2, $4, $5 ) ; + } + elsif ( $Reste =~/^([A-Z|a-z])(\d+):(\d+)[A-Z0-9,\(\)]*\/([A-Z|a-z])(\d+):(\d+)[A-Z0-9,\(\)]*/ ) { ## rule (X|x16:1/X|x16:1) + $Groupe = $Familly.'-'.$1.$4 ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $2, $3, $5, $6 ) ; + } + ## Cas des Oxydes ([320]) + elsif ( $Reste =~/^(\d+):(\d+)[A-Z0-9,\(\)]*\/(\d+):(\d+)[A-Z0-9,\(\)]*\(([A-Z|a-z|0-9|\[\]]*)\)/ ) { ## rule (16:1/16:1)(xOH[S]) + $Groupe = $Familly.'-'.$1 ; + + $Ox = $5 ; ## for Ox gpt + print "$Ox\n\n" ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( $1, $2, $3, $4 ) ; + } + else { + carp "This reste ($Reste) is unknown in actual parsing rules\n" ; + $Groupe = $CommonName ; + ( $Ch1_C, $Ch1_i, $Ch2_C, $Ch2_i ) = ( 0, 0, 0, 0 ) ; + } + + + + ## End of parsing "Gestion du "Groupe" et des carbones / insatures" + + ## Sum Carbones and insaturates + if ( ( defined $Ch1_C ) and ( defined $Ch1_i ) and ( defined $Ch2_C ) and ( defined $Ch2_i ) ) { + if ( ( $Ch1_C+$Ch2_C ) < ( $Ch1_i+$Ch2_i ) ) { + warn "Number of insatured are greater than carbon for $CommonName \n" ; + } + else { + $cTotal = $Ch1_C + $Ch2_C ; + $iTotal = $Ch1_i + $Ch2_i ; + } + } + else { + croak "Carbones or insatures number is not defined \n" ; + } + } # End IF + elsif ( $CommonName =~/^-$/ ) { ## cas du no common name dans la base (-) + $Groupe = 'UnNAMED_IN_LM' ; + $cTotal = 0 ; + $iTotal = 0 ; + } # End ELSIF + else { + print "Not yet parsing rules for this commonName $CommonName\n" ; + $Groupe = $CommonName ; + $cTotal = 0 ; + $iTotal = 0 ; + } + + return( $Groupe, $Familly, $cTotal, $iTotal, $Ox ) ; +} +### END of SUB + +## Fonction : permet de regrouper les composes ayant le meme commona name simplifie +## Input : +## Ouput : +sub clusteringCommonName { + ## Retrieve Values + my ( $refH_Features ) = @_; + my %Results = %{$refH_Features} ; + my %Clusters = () ; + +# print Dumper %Results ; + + foreach my $id ( keys %Results ) { + + if ( $id ne 'nbentries' ) { + + my $oldFormula = '' ; + my $Cpd = undef ; + + if (defined $Results{$id}{'simplename'}) { $Cpd = $Results{$id}{'simplename'} ; } +# else { print"+++COCO\n"; $Cpd = 'UNDEF' ; } ## ?? comprends pas l'impact sur la suite ! + + + if ( !defined $Clusters{$Cpd}{'list'} ) { $Clusters{$Cpd}{'list'} = [] ; } + + my $currentFormula = $Results{$id}{'formula'} ; + + if ( $currentFormula ne $oldFormula ) { + $Clusters{$Cpd}{'formula'} = $Results{$id}{'formula'} ; + push ( @{$Clusters{$Cpd}{'list'} }, $id ) ; + } + else { + push ( @{$Clusters{$Cpd}{'list'} }, $id ) ; + next ; + } + } + else { # Gere le cas ou la key du hash courant est le nombre d'entrees... + next ; + } + } + return(\%Clusters) ; +} +### END of SUB + + + + + +1 ; + + +__END__ + +=head1 SUPPORT + +You can find documentation for this module with the perldoc command. + + perldoc XXX.pm + +=head1 Exports + +=over 4 + +=item :ALL is ... + +=back + +=head1 AUTHOR + +Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. + +=head1 VERSION + +version 1 : xx / xx / 201x + +version 2 : ?? + +=cut \ No newline at end of file