diff lib/massbank_parser.pm @ 0:023c380900ef draft default tip

Init repository with last massbank_ws_searchspectrum master version
author fgiacomoni
date Wed, 19 Apr 2017 11:31:58 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/massbank_parser.pm	Wed Apr 19 11:31:58 2017 -0400
@@ -0,0 +1,992 @@
+package lib::massbank_parser ;
+
+use strict;
+use warnings ;
+use Exporter ;
+use Carp ;
+
+use File::Basename;
+
+use Data::Dumper ;
+
+use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);
+
+our $VERSION = "1.0" ;
+our @ISA = qw(Exporter) ;
+our @EXPORT = qw( getChemNamesFromString getPeaksFromString ) ;
+our %EXPORT_TAGS = ( ALL => [qw( getChemNamesFromString getPeaksFromString )] ) ;
+
+=head1 NAME
+
+parser::chem::massbank - An example module
+
+=head1 SYNOPSIS
+
+    use parser::chem::massbank ;
+    my $object = parser::chem::massbank->new();
+    print $object->as_string;
+
+=head1 DESCRIPTION
+
+This module does not really exist, it
+was made for the sole purpose of
+demonstrating how POD works.
+
+=head1 METHODS
+
+Methods are :
+
+=head2 METHOD new
+
+	## Description : new
+	## Input : $self
+	## Ouput : bless $self ;
+	## Usage : new() ;
+
+=cut
+
+sub new {
+    ## Variables
+    my $self={};
+    bless($self) ;
+    return $self ;
+}
+### END of SUB
+
+=head2 METHOD get_list_of_analysis_intrument_names
+
+	## Description : permt de retourner la liste des nom uniques des instruments utilises
+	## Input : $dir, $ms_files (a list of files)
+	## Output : $names
+	## Usage : my ( $names ) = get_list_of_analysis_intrument_names( $ms_files ) ;
+	
+=cut
+## START of SUB
+sub get_list_of_analysis_intrument_names {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $dir, $ms_files ) = @_ ;
+    my (%tmp_names, @names) = ( (), () ) ;
+    foreach my $ms_file (@{$ms_files}) {
+    	my $file = $dir.'\\'.$ms_file ;
+    	if ( ( defined $file ) and ( -e $file )) {
+	    	open(MS, "<$file") or die "Cant' read the file $file\n" ;
+	    	while ( my $field = <MS> ){
+	    		chomp $field ;
+	        	if ($field =~/AC\$INSTRUMENT:(.*)/) {
+	        		if ( $tmp_names{$1} ) { last ; }
+	        		else { $tmp_names{$1} = 1 ; push (@names, $1) ; }
+	        	}
+	    	}
+	    	close(MS) ;
+	    }
+	    else { 
+	    	croak "Can't work with a undef / none existing massbank file\n" ;
+	    }
+    }
+    return(\@names) ;
+}
+## END of SUB
+
+=head2 METHOD get_analysis_instruments_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_analysis_instruments_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_analysis_instruments_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my $control = 0 ;
+    my %features = (
+    	'name' => undef,
+    	'type' => undef,
+    ) ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/AC\$INSTRUMENT: (.*)/) { $features{'name'} = $1 ; $control++; }
+	    	elsif ($field =~/AC\$INSTRUMENT_TYPE: (.*)/) { $features{'type'} = $1 ; $control++; }
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    if ($control == 0) { %features = () ;  }
+    return(\%features) ;
+}
+## END of SUB
+     
+=head2 METHOD get_ms_methods_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_ms_methods_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_ms_methods_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my $control = 0 ;
+    my %features = (
+    	'ion_mode' => undef,
+    	'ms_type' => undef,
+    	'collision_energy' => undef,
+    	'collision_gas' => undef,
+    	'desolvation_gas_flow' => undef,
+    	'desolvation_temperature' => undef,
+    	'ionization_energy' => undef,
+    	'laser' => undef,
+    	'matrix' => undef,
+    	'mass_accuracy' => undef,
+    	'reagent_gas' => undef,
+    	'scanning' => undef
+    ) ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/AC\$MASS_SPECTROMETRY: ION_MODE:(.*)/) { $features{'ion_mode'} = $1 ; $control++; } # mandatory
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: MS_TYPE:(.*)/) { $features{'ms_type'} = $1 ; $control++; } # mandatory
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_ENERGY(.*)/) { $features{'collision_energy'} = $1 ; $control++; } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_GAS(.*)/) { $features{'collision_gas'} = $1 ; $control++; } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW(.*)/) { $features{'desolvation_gas_flow'} = $1 ; $control++;  } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE(.*)/) { $features{'desolvation_temperature'} = $1 ; $control++; } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: IONIZATION_ENERGY(.*)/) { $features{'ionization_energy'} = $1 ; $control++;  } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: LASER(.*)/) { $features{'laser'} = $1 ; $control++; } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: MATRIX(.*)/) { $features{'matrix'} = $1 ; $control++; } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: MASS_ACCURACY(.*)/) { $features{'mass_accuracy'} = $1 ; $control++; } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: REAGENT_GAS(.*)/) { $features{'reagent_gas'} = $1 ; $control++; } # optionnal
+	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: SCANNING(.*)/) { $features{'scanning'} = $1 ; $control++; } # optionnal
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    ## vide l'object si undef
+    if ($control == 0) { %features = () ;  }
+    return(\%features) ;
+}
+## END of SUB
+
+=head2 METHOD get_solvents_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_solvents_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_solvents_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my @features = () ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/AC\$CHROMATOGRAPHY: SOLVENT(.*)/) { push(@features, 'Solvent '.$1 ) ;  }
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    return(\@features) ;
+}
+## END of SUB
+
+=head2 METHOD get_sample_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_sample_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_sample_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my $control = 0;
+    my %features = (
+    	'sample_type' => undef,
+    ) ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/SP\$SAMPLE(.*)/) { $features{'sample_type'} = $1 ; $control++ ; }
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    if ($control == 0) { %features = () ;  }
+    return(\%features) ;
+}
+## END of SUB
+
+=head2 METHOD get_chromato_methods_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_chromato_methods_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_chromato_methods_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my $control = 0 ;
+    my %features = (
+    	'capillary_voltage' => undef,
+    	'column_name' => undef,
+    	'column_temperature' => undef,
+    	'flow_gradient' => undef,
+    	'flow_rate' => undef,
+    	'retention_time' => undef,
+    ) ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/AC\$CHROMATOGRAPHY: CAPILLARY_VOLTAGE (.*)/) { $features{'capillary_voltage'} = $1 ; $control++ ; }
+	    	elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_NAME (.*)/) { $features{'column_name'} = $1 ; $control++ ; }
+	    	elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_TEMPERATURE (.*)/) { $features{'column_temperature'} = $1 ; $control++ ; }
+	    	elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_GRADIENT (.*)/) { $features{'flow_gradient'} = $1 ; $control++ ; }
+	    	elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_RATE (.*)/) { $features{'flow_rate'} = $1 ; $control++ ; }
+	    	elsif ($field =~/AC\$CHROMATOGRAPHY: RETENTION_TIME (.*)/) { $features{'retention_time'} = $1 ; $control++ ; }
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    if ($control == 0) { %features = () ;  }
+    return(\%features) ;
+}
+## END of SUB
+
+=head2 METHOD get_analytical_conditions_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank .. for massbank version < 2.0
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_analytical_conditions_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_analytical_conditions_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    my $control_ms = 0 ;
+    my %features_ms = (
+    	'ion_mode' => undef,
+    	'ms_type' => undef,
+    	'collision_energy' => undef,
+    	'collision_gas' => undef,
+    	'desolvation_gas_flow' => undef,
+    	'desolvation_temperature' => undef,
+    	'ionization_energy' => undef,
+    	'laser' => undef,
+    	'matrix' => undef,
+    	'mass_accuracy' => undef,
+    	'reagent_gas' => undef,
+    	'scanning' => undef    	
+    ) ;
+    my $control_chrom = 0 ;
+    my %features_chrom = (
+    	'capillary_voltage' => undef,
+    	'column_name' => undef,
+    	'column_temperature' => undef,
+    	'flow_gradient' => undef,
+    	'flow_rate' => undef,
+    	'retention_time' => undef   	
+    ) ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;
+    		## new = chromato_method	
+    		if ($field =~/AC\$ANALYTICAL_CONDITION: CAPILLARY_VOLTAGE (.*)/) { $features_chrom{'capillary_voltage'} = $1 ; $control_chrom++ ; }
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_NAME (.*)/) { $features_chrom{'column_name'} = $1 ; $control_chrom++ ; }
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_TEMPERATURE( .*)/) { $features_chrom{'column_temperature'} = $1 ; $control_chrom++ ; }
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_GRADIENT (.*)/) { $features_chrom{'flow_gradient'} = $1 ; $control_chrom++ ;  }
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_RATE (.*)/) { $features_chrom{'flow_rate'} = $1 ; $control_chrom++ ; }
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: RETENTION_TIME (.*)/) { $features_chrom{'retention_time'} = $1 ; $control_chrom++ ; }
+	    	## new = ms_method
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: ION_MODE (.*)/) { $features_ms{'ion_mode'} = $1 ; $control_ms++ ; } # mandatory
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: MS_TYPE (.*)/) { $features_ms{'ms_type'} = $1 ; $control_ms++ ; } # mandatory
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_ENERGY (.*)/) { $features_ms{'collision_energy'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_GAS (.*)/) { $features_ms{'collision_gas'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_GAS_FLOW (.*)/) { $features_ms{'desolvation_gas_flow'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_TEMPERATURE (.*)/) { $features_ms{'desolvation_temperature'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: IONIZATION_ENERGY (.*)/) { $features_ms{'ionization_energy'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: LASER (.*)/) { $features_ms{'laser'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: MATRIX (.*)/) { $features_ms{'matrix'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: MASS_ACCURACY (.*)/) { $features_ms{'mass_accuracy'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: REAGENT_GAS (.*)/) { $features_ms{'reagent_gas'} = $1 ; $control_ms++ ; } # optionnal
+	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: SCANNING (.*)/) { $features_ms{'scanning'} = $1 ; $control_ms++ ; } # optionnal
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    if ($control_ms == 0) { %features_ms = () ;  }
+    if ($control_chrom == 0) { %features_chrom = () ;  }
+    return(\%features_chrom, \%features_ms) ;
+}
+## END of SUB
+
+=head2 METHOD get_spectrums_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_spectrums_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_spectrums_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    my $control = 0 ;
+    my %features = (
+    	'ion_type' => undef,
+    	'precursor_mz' => undef,
+    	'precursor_type' => undef,
+    	'num_peaks' => undef,
+    ) ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/MS\$FOCUSED_ION: ION_TYPE(.*)/) { $features{'ion_type'} = $1 ; $control++ ; }
+	    	elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_M\/Z(.*)/) { $features{'precursor_mz'} = $1 ; $control++ ; }
+	    	elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_TYPE(.*)/) { $features{'precursor_type'} = $1 ; $control++ ; }
+	    	elsif ($field =~/PK\$NUM_PEAK: (.*)/) { $features{'num_peaks'} = $1 ; $control++ ; }
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    if ($control == 0) { %features = () ;  }
+    return(\%features) ;
+}
+## END of SUB
+
+=head2 METHOD get_peaks_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_peaks_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_peaks_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my @features = () ;
+    my $peaks = 0 ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;
+    		if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; }
+    		elsif ( $peaks == 1 ) { ## detected peak area
+    			if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    			## for int = xx.xxx and mz = xxx.xxx
+    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    			## for int = xx and mz = xxx.xxx
+    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    			## for int = xxxxx.xxx and mz = xxx
+    			elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    		}
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    return(\@features) ;
+}
+## END of SUB
+
+=head2 METHOD getPeaksFromString
+
+	## Description : permet de recuperer la data peaks d'un record handler massbank
+	## Input : $record
+	## Output : $features
+	## Usage : my ( $features ) = getPeaksFromString( $record ) ;
+	
+=cut
+## START of SUB
+sub getPeaksFromString {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_ ;
+    
+    my @features = () ;
+    my $peaks = 0 ;
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; }
+    		elsif ( $peaks == 1 ) { ## detected peak area
+    			if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    			## for int = xx.xxx and mz = xxx.xxx
+    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    			## for int = xx and mz = xxx.xxx
+    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    			## for int = xxxxx.xxx and mz = xxx
+    			elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    			## for int = x.xxxex and m/z = xxx.xxx (int with exposant)
+    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)e(\d)\s+(\d+)/) {
+    				my %tmp = ( 'mz' => $1, 'intensity' => ($2*(10*$3)), 'relative_intensity' => $4 ) ;
+    				push (@features, \%tmp) ;
+    			}
+    		}
+	    	else { next ; }
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    return(\@features) ;
+}
+## END of SUB
+
+=head2 METHOD getIdFromString
+
+	## Description : get the accesion id of massbank record
+	## Input : $record
+	## Output : $id
+	## Usage : my ( $id ) = getIdFromString ( $record ) ;
+	
+=cut
+## START of SUB
+sub getIdFromString {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my ( $id ) = ( undef ) ;
+    
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/ACCESSION:\s+(.+)/) { 
+				$id = $1;
+    		}
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    
+    return ($id) ;
+}
+### END of SUB
+
+
+
+=head2 METHOD getInstrumentTypeFromString
+
+	## Description : get the instrument type of massbank record
+	## Input : $record
+	## Output : $instrumentType
+	## Usage : my ( $instrumentType ) = getInstrumentTypeFromString ( $record ) ;
+	
+=cut
+## START of SUB
+sub getInstrumentTypeFromString {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my ( $instrumentType ) = ( undef ) ;
+    
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/INSTRUMENT_TYPE:\s+(.+)/) { 
+				$instrumentType = $1;
+    		}
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    
+    return ($instrumentType) ;
+}
+### END of SUB
+
+=head2 METHOD getFormulaFromString
+
+	## Description : get the elementar formula of massbank record
+	## Input : $record
+	## Output : $formula
+	## Usage : my ( $formula ) = getFormulaFromString ( $record ) ;
+	
+=cut
+## START of SUB
+sub getFormulaFromString {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my ( $formula ) = ( undef ) ;
+    
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/CH\$FORMULA:\s+(.+)/) { 
+				$formula = $1;
+    		}
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    
+    return ($formula) ;
+}
+### END of SUB
+
+=head2 METHOD getInchiFromString
+
+	## Description : get the IUPAC InCHi of massbank record
+	## Input : $record
+	## Output : $inchi
+	## Usage : my ( $inchi ) = getInchiFromString ( $record ) ;
+	
+=cut
+## START of SUB
+sub getInchiFromString {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my ( $inchi ) = ( undef ) ;
+    
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/CH\$IUPAC:\s+(.+)/) {
+				$inchi = $1;
+    		}
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    
+    return ($inchi) ;
+}
+### END of SUB
+
+=head2 METHOD getExactMzFromString
+
+	## Description : get the exact mass of massbank record
+	## Input : $record
+	## Output : $exactMass
+	## Usage : my ( $exactMass ) = getExactMzFromString ( $record ) ;
+	
+=cut
+## START of SUB
+sub getExactMzFromString {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my ( $exactMass ) = ( undef ) ;
+    
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/CH\$EXACT_MASS:\s+(.+)/) { 
+				$exactMass = $1;
+    		}
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    
+    return ($exactMass) ;
+}
+### END of SUB
+
+
+=head2 METHOD getPrecursorTypeFromString
+
+	## Description : get the precursor type of massbank record
+	## Input : $record
+	## Output : $precursorType
+	## Usage : my ( $precursorType ) = getPrecursorTypeFromString ( $record ) ;
+	
+=cut
+## START of SUB
+sub getPrecursorTypeFromString {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my $id = undef ;
+    my $precursorType = undef ;
+    my $precursorType_first  = undef ;
+    my $ionType_first  = undef ;
+    my $precursorType_optionnal = undef ;
+    
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/ACCESSION:\s+(.+)/) { 
+				$id = $1;
+    		}
+    		if ($field =~/RECORD_TITLE:\s+(.+)/) { 
+				my @title = split(/;/, $1) ;
+				$precursorType_optionnal = $title[-1] ;
+				$precursorType_optionnal =~ s/\s//g ;
+    		}
+    		if ($field =~/PRECURSOR_TYPE(.+)/) {
+				$precursorType_first = $1;
+				last;
+    		}
+    		if ($field =~/ION_TYPE(.+)/) {
+				$ionType_first = $1;
+				last;
+    		}
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    
+    ## manage undef precursor/ion type field 
+#    print "ID:$id-//-$precursorType_first-//-$ionType_first-//-$precursorType_optionnal\n" ;
+    if (defined $precursorType_first) {
+    	$precursorType = $precursorType_first ;
+    }
+    elsif ( (!defined $precursorType_first) and (defined $ionType_first) ) {
+    	$precursorType = $ionType_first ;
+    }
+    elsif ( (!defined $precursorType_first) and (!defined $ionType_first) and (defined $precursorType_optionnal) ) {
+    	$precursorType = $precursorType_optionnal ;
+    }
+    else {
+    	$precursorType = 'NA' ;
+    }
+    
+    return ($precursorType) ;
+}
+### END of SUB
+
+=head2 METHOD getMsTypeFromString
+
+	## Description : get the MS type of massbank record
+	## Input : $record
+	## Output : $msType
+	## Usage : my ( $msType ) = getMsTypeFromString ( $record ) ;
+	
+=cut
+## START of SUB
+sub getMsTypeFromString {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my ( $msType ) = ( undef ) ;
+    
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {
+    		if ($field =~/AC\$MASS_SPECTROMETRY:\s+MS_TYPE\s+(.+)/) { 
+				$msType = $1;
+    		}
+    	}
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank handler\n" ;
+    }
+    
+    return ($msType) ;
+}
+### END of SUB
+
+=head2 METHOD getChemNamesFromString
+
+	## Description : get lits of names of a massbank record
+	## Input : $record
+	## Output : $names
+	## Usage : my ( $names ) = getChemNamesFromString( $record ) ;
+	
+=cut
+## START of SUB
+sub getChemNamesFromString {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_ ;
+    
+    my @names = () ;
+    if ( defined $record ) {
+    	my @tmp = split(/\n/, $record) ;
+    	foreach my $field (@tmp) {   		
+    		if ($field =~/CH\$NAME: (.*)/) { 
+    			push(@names, $1 ) ;  }
+	    	else { next ; }
+    	}
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank record (string)\n" ;
+    }
+    return(\@names) ;
+}
+## END of SUB
+
+
+
+
+
+=head2 METHOD getMassBankHandler
+
+	## Description : get a massbank handler from a file
+	## Input : $record
+	## Output : $massbankHandler
+	## Usage : my ( $massbankHandler ) = getMassBankHandler ( $record ) ;
+	
+=cut
+## START of SUB
+sub getMassBankHandler {
+    ## Retrieve Values
+    my $self = shift ;
+    my ( $record ) = @_;
+    my ( $massbankHandler ) = ( undef ) ;
+    
+    ## TODO...
+    
+    return ($massbankHandler) ;
+}
+### END of SUB
+
+=head2 METHOD get_annotations_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_annotations_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_annotations_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my @features = () ;
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/PK\$ANNOTATION:(.*)/) { push( @features, $1) ;  }
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    return(\@features) ;
+}
+## END of SUB
+
+=head2 METHOD get_links_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_annotations_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_links_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    my %features = () ;
+    my $control = 0 ;
+    
+    my ( @CAS, @KEGG, @PUBCHEM ) = ((), (), ()) ;
+    
+    if ( ( defined $ms_file ) and ( -e $ms_file )) {
+    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
+    	while ( my $field = <MS> ){
+    		chomp $field ;    		
+    		if ($field =~/CH\$LINK: CAS (.*)/) { push (@CAS, $1) ; $control++; }
+    		elsif ($field =~/CH\$LINK: KEGG (.*)/) { push (@KEGG, $1) ; $control++; }
+    		elsif ($field =~/CH\$LINK: PUBCHEM CID (.*)/) { push (@PUBCHEM, $1) ; $control++; }
+    		## others !!?
+    		
+	    	else { next ; }
+    	}
+    	close(MS) ;
+    	# for db field
+    }
+    else {
+    	croak "Can't work with a undef / none existing massbank file\n" ;
+    }
+    
+    $features{'CAS'} = \@CAS ;
+    $features{'KEGG'} = \@KEGG ;
+    $features{'PUBCHEM'} = \@PUBCHEM ;
+    
+    return(\%features) ;
+}
+## END of SUB
+
+=head2 METHOD get_ms_record_links_data
+
+	## Description : permet de recuperer tous les champs d'un object massbank
+	## Input : $ms_file
+	## Output : $features
+	## Usage : my ( $features ) = get_ms_record_links_data( $ms_file ) ;
+	
+=cut
+## START of SUB
+sub get_ms_record_links_data {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ms_file ) = @_ ;
+    
+    ## Internal reference for MASSBANK and RESPECT
+    
+    my @massbank_id = ( 'TUE', 'GLS', 'AU', 'MSJ', 'ML','FIO', 'UF', 'CO', 'UO', 'TT', 'OUF', 'MCH', 'NU', 'KNA', 'MT', 'CE', 'KO', 'KZ', 'JEL', 'JP', 'PR', 'BML', 'CA', 'TY', 'PB', 'FU', 'EA', 'UT', 'BSU', 'WA' ) ;
+    my @respect_id = ( 'PS', 'PT', 'PM' ) ;
+    
+    my $dabase_used = undef ;
+    my %db = ( 'accession' => undef, 'name' => undef ) ;
+    my $control = 0 ;
+    
+    if ( $ms_file ) {
+    	my $filename = basename("$ms_file",  ".txt");
+    	
+    	if ( $filename =~ /(\w+)$/ ) { # keep only record id (0001-PS0002 => PS0002 or BJ0045 => BJ0045) 
+    		$db{'accession'} = $1 ;
+    		$control++ ;
+    		if ( ( defined $db{'accession'} ) and ( $db{'accession'} =~ /(\D+)(\d+)/) ) {
+    			my ($key, $eval) = ($1, 0) ;
+    			foreach (@respect_id) { if ($_ eq $key) { $db{'name'} = 'RESPECT' ; $eval = 1 ; last ; } }
+    			foreach (@massbank_id) { if ($_ eq $key) { $db{'name'} = 'MASSBANK' ; $eval = 1 ; last ; } }
+    			if ( $eval == 0 ){ 	carp "The following key ($key) for $db{'accession'} has an unknown reference (not a Massbank or ReSpect source)\n" ; }
+    		}
+    	}
+    }
+    if ($control == 0) { %db = () ;  }
+	return(\%db) ;
+}
+## END of SUB
+
+
+1 ;
+
+
+__END__
+
+=head1 SUPPORT
+
+You can find documentation for this module with the perldoc command.
+
+ perldoc parser::chem::massbank.pm
+
+=head1 Exports
+
+=over 4
+
+=item :ALL is ...
+
+=back
+
+=head1 AUTHOR
+
+Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
+
+=head1 VERSION
+
+version 1 : 25 / 06 / 2013
+
+version 2 : ??
+
+=cut
\ No newline at end of file