view lib/massbank_parser.pm @ 0:023c380900ef draft default tip

Init repository with last massbank_ws_searchspectrum master version
author fgiacomoni
date Wed, 19 Apr 2017 11:31:58 -0400
parents
children
line wrap: on
line source

package lib::massbank_parser ;

use strict;
use warnings ;
use Exporter ;
use Carp ;

use File::Basename;

use Data::Dumper ;

use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);

our $VERSION = "1.0" ;
our @ISA = qw(Exporter) ;
our @EXPORT = qw( getChemNamesFromString getPeaksFromString ) ;
our %EXPORT_TAGS = ( ALL => [qw( getChemNamesFromString getPeaksFromString )] ) ;

=head1 NAME

parser::chem::massbank - An example module

=head1 SYNOPSIS

    use parser::chem::massbank ;
    my $object = parser::chem::massbank->new();
    print $object->as_string;

=head1 DESCRIPTION

This module does not really exist, it
was made for the sole purpose of
demonstrating how POD works.

=head1 METHODS

Methods are :

=head2 METHOD new

	## Description : new
	## Input : $self
	## Ouput : bless $self ;
	## Usage : new() ;

=cut

sub new {
    ## Variables
    my $self={};
    bless($self) ;
    return $self ;
}
### END of SUB

=head2 METHOD get_list_of_analysis_intrument_names

	## Description : permt de retourner la liste des nom uniques des instruments utilises
	## Input : $dir, $ms_files (a list of files)
	## Output : $names
	## Usage : my ( $names ) = get_list_of_analysis_intrument_names( $ms_files ) ;
	
=cut
## START of SUB
sub get_list_of_analysis_intrument_names {
	## Retrieve Values
    my $self = shift ;
    my ( $dir, $ms_files ) = @_ ;
    my (%tmp_names, @names) = ( (), () ) ;
    foreach my $ms_file (@{$ms_files}) {
    	my $file = $dir.'\\'.$ms_file ;
    	if ( ( defined $file ) and ( -e $file )) {
	    	open(MS, "<$file") or die "Cant' read the file $file\n" ;
	    	while ( my $field = <MS> ){
	    		chomp $field ;
	        	if ($field =~/AC\$INSTRUMENT:(.*)/) {
	        		if ( $tmp_names{$1} ) { last ; }
	        		else { $tmp_names{$1} = 1 ; push (@names, $1) ; }
	        	}
	    	}
	    	close(MS) ;
	    }
	    else { 
	    	croak "Can't work with a undef / none existing massbank file\n" ;
	    }
    }
    return(\@names) ;
}
## END of SUB

=head2 METHOD get_analysis_instruments_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_analysis_instruments_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_analysis_instruments_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my $control = 0 ;
    my %features = (
    	'name' => undef,
    	'type' => undef,
    ) ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/AC\$INSTRUMENT: (.*)/) { $features{'name'} = $1 ; $control++; }
	    	elsif ($field =~/AC\$INSTRUMENT_TYPE: (.*)/) { $features{'type'} = $1 ; $control++; }
	    	else { next ; }
    	}
    	close(MS) ;
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    if ($control == 0) { %features = () ;  }
    return(\%features) ;
}
## END of SUB
     
=head2 METHOD get_ms_methods_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_ms_methods_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_ms_methods_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my $control = 0 ;
    my %features = (
    	'ion_mode' => undef,
    	'ms_type' => undef,
    	'collision_energy' => undef,
    	'collision_gas' => undef,
    	'desolvation_gas_flow' => undef,
    	'desolvation_temperature' => undef,
    	'ionization_energy' => undef,
    	'laser' => undef,
    	'matrix' => undef,
    	'mass_accuracy' => undef,
    	'reagent_gas' => undef,
    	'scanning' => undef
    ) ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/AC\$MASS_SPECTROMETRY: ION_MODE:(.*)/) { $features{'ion_mode'} = $1 ; $control++; } # mandatory
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: MS_TYPE:(.*)/) { $features{'ms_type'} = $1 ; $control++; } # mandatory
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_ENERGY(.*)/) { $features{'collision_energy'} = $1 ; $control++; } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_GAS(.*)/) { $features{'collision_gas'} = $1 ; $control++; } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW(.*)/) { $features{'desolvation_gas_flow'} = $1 ; $control++;  } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE(.*)/) { $features{'desolvation_temperature'} = $1 ; $control++; } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: IONIZATION_ENERGY(.*)/) { $features{'ionization_energy'} = $1 ; $control++;  } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: LASER(.*)/) { $features{'laser'} = $1 ; $control++; } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: MATRIX(.*)/) { $features{'matrix'} = $1 ; $control++; } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: MASS_ACCURACY(.*)/) { $features{'mass_accuracy'} = $1 ; $control++; } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: REAGENT_GAS(.*)/) { $features{'reagent_gas'} = $1 ; $control++; } # optionnal
	    	elsif ($field =~/AC\$MASS_SPECTROMETRY: SCANNING(.*)/) { $features{'scanning'} = $1 ; $control++; } # optionnal
	    	else { next ; }
    	}
    	close(MS) ;
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    ## vide l'object si undef
    if ($control == 0) { %features = () ;  }
    return(\%features) ;
}
## END of SUB

=head2 METHOD get_solvents_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_solvents_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_solvents_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my @features = () ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/AC\$CHROMATOGRAPHY: SOLVENT(.*)/) { push(@features, 'Solvent '.$1 ) ;  }
	    	else { next ; }
    	}
    	close(MS) ;
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    return(\@features) ;
}
## END of SUB

=head2 METHOD get_sample_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_sample_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_sample_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my $control = 0;
    my %features = (
    	'sample_type' => undef,
    ) ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/SP\$SAMPLE(.*)/) { $features{'sample_type'} = $1 ; $control++ ; }
	    	else { next ; }
    	}
    	close(MS) ;
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    if ($control == 0) { %features = () ;  }
    return(\%features) ;
}
## END of SUB

=head2 METHOD get_chromato_methods_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_chromato_methods_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_chromato_methods_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my $control = 0 ;
    my %features = (
    	'capillary_voltage' => undef,
    	'column_name' => undef,
    	'column_temperature' => undef,
    	'flow_gradient' => undef,
    	'flow_rate' => undef,
    	'retention_time' => undef,
    ) ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/AC\$CHROMATOGRAPHY: CAPILLARY_VOLTAGE (.*)/) { $features{'capillary_voltage'} = $1 ; $control++ ; }
	    	elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_NAME (.*)/) { $features{'column_name'} = $1 ; $control++ ; }
	    	elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_TEMPERATURE (.*)/) { $features{'column_temperature'} = $1 ; $control++ ; }
	    	elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_GRADIENT (.*)/) { $features{'flow_gradient'} = $1 ; $control++ ; }
	    	elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_RATE (.*)/) { $features{'flow_rate'} = $1 ; $control++ ; }
	    	elsif ($field =~/AC\$CHROMATOGRAPHY: RETENTION_TIME (.*)/) { $features{'retention_time'} = $1 ; $control++ ; }
	    	else { next ; }
    	}
    	close(MS) ;
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    if ($control == 0) { %features = () ;  }
    return(\%features) ;
}
## END of SUB

=head2 METHOD get_analytical_conditions_data

	## Description : permet de recuperer tous les champs d'un object massbank .. for massbank version < 2.0
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_analytical_conditions_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_analytical_conditions_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    my $control_ms = 0 ;
    my %features_ms = (
    	'ion_mode' => undef,
    	'ms_type' => undef,
    	'collision_energy' => undef,
    	'collision_gas' => undef,
    	'desolvation_gas_flow' => undef,
    	'desolvation_temperature' => undef,
    	'ionization_energy' => undef,
    	'laser' => undef,
    	'matrix' => undef,
    	'mass_accuracy' => undef,
    	'reagent_gas' => undef,
    	'scanning' => undef    	
    ) ;
    my $control_chrom = 0 ;
    my %features_chrom = (
    	'capillary_voltage' => undef,
    	'column_name' => undef,
    	'column_temperature' => undef,
    	'flow_gradient' => undef,
    	'flow_rate' => undef,
    	'retention_time' => undef   	
    ) ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;
    		## new = chromato_method	
    		if ($field =~/AC\$ANALYTICAL_CONDITION: CAPILLARY_VOLTAGE (.*)/) { $features_chrom{'capillary_voltage'} = $1 ; $control_chrom++ ; }
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_NAME (.*)/) { $features_chrom{'column_name'} = $1 ; $control_chrom++ ; }
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_TEMPERATURE( .*)/) { $features_chrom{'column_temperature'} = $1 ; $control_chrom++ ; }
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_GRADIENT (.*)/) { $features_chrom{'flow_gradient'} = $1 ; $control_chrom++ ;  }
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_RATE (.*)/) { $features_chrom{'flow_rate'} = $1 ; $control_chrom++ ; }
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: RETENTION_TIME (.*)/) { $features_chrom{'retention_time'} = $1 ; $control_chrom++ ; }
	    	## new = ms_method
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: ION_MODE (.*)/) { $features_ms{'ion_mode'} = $1 ; $control_ms++ ; } # mandatory
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: MS_TYPE (.*)/) { $features_ms{'ms_type'} = $1 ; $control_ms++ ; } # mandatory
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_ENERGY (.*)/) { $features_ms{'collision_energy'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_GAS (.*)/) { $features_ms{'collision_gas'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_GAS_FLOW (.*)/) { $features_ms{'desolvation_gas_flow'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_TEMPERATURE (.*)/) { $features_ms{'desolvation_temperature'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: IONIZATION_ENERGY (.*)/) { $features_ms{'ionization_energy'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: LASER (.*)/) { $features_ms{'laser'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: MATRIX (.*)/) { $features_ms{'matrix'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: MASS_ACCURACY (.*)/) { $features_ms{'mass_accuracy'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: REAGENT_GAS (.*)/) { $features_ms{'reagent_gas'} = $1 ; $control_ms++ ; } # optionnal
	    	elsif ($field =~/AC\$ANALYTICAL_CONDITION: SCANNING (.*)/) { $features_ms{'scanning'} = $1 ; $control_ms++ ; } # optionnal
	    	else { next ; }
    	}
    	close(MS) ;
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    if ($control_ms == 0) { %features_ms = () ;  }
    if ($control_chrom == 0) { %features_chrom = () ;  }
    return(\%features_chrom, \%features_ms) ;
}
## END of SUB

=head2 METHOD get_spectrums_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_spectrums_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_spectrums_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    my $control = 0 ;
    my %features = (
    	'ion_type' => undef,
    	'precursor_mz' => undef,
    	'precursor_type' => undef,
    	'num_peaks' => undef,
    ) ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/MS\$FOCUSED_ION: ION_TYPE(.*)/) { $features{'ion_type'} = $1 ; $control++ ; }
	    	elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_M\/Z(.*)/) { $features{'precursor_mz'} = $1 ; $control++ ; }
	    	elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_TYPE(.*)/) { $features{'precursor_type'} = $1 ; $control++ ; }
	    	elsif ($field =~/PK\$NUM_PEAK: (.*)/) { $features{'num_peaks'} = $1 ; $control++ ; }
	    	else { next ; }
    	}
    	close(MS) ;
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    if ($control == 0) { %features = () ;  }
    return(\%features) ;
}
## END of SUB

=head2 METHOD get_peaks_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_peaks_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_peaks_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my @features = () ;
    my $peaks = 0 ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;
    		if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; }
    		elsif ( $peaks == 1 ) { ## detected peak area
    			if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    			## for int = xx.xxx and mz = xxx.xxx
    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    			## for int = xx and mz = xxx.xxx
    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    			## for int = xxxxx.xxx and mz = xxx
    			elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    		}
	    	else { next ; }
    	}
    	close(MS) ;
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    return(\@features) ;
}
## END of SUB

=head2 METHOD getPeaksFromString

	## Description : permet de recuperer la data peaks d'un record handler massbank
	## Input : $record
	## Output : $features
	## Usage : my ( $features ) = getPeaksFromString( $record ) ;
	
=cut
## START of SUB
sub getPeaksFromString {
	## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_ ;
    
    my @features = () ;
    my $peaks = 0 ;
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; }
    		elsif ( $peaks == 1 ) { ## detected peak area
    			if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    			## for int = xx.xxx and mz = xxx.xxx
    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    			## for int = xx and mz = xxx.xxx
    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    			## for int = xxxxx.xxx and mz = xxx
    			elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
    				push (@features, \%tmp) ;
    			}
    			## for int = x.xxxex and m/z = xxx.xxx (int with exposant)
    			elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)e(\d)\s+(\d+)/) {
    				my %tmp = ( 'mz' => $1, 'intensity' => ($2*(10*$3)), 'relative_intensity' => $4 ) ;
    				push (@features, \%tmp) ;
    			}
    		}
	    	else { next ; }
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    return(\@features) ;
}
## END of SUB

=head2 METHOD getIdFromString

	## Description : get the accesion id of massbank record
	## Input : $record
	## Output : $id
	## Usage : my ( $id ) = getIdFromString ( $record ) ;
	
=cut
## START of SUB
sub getIdFromString {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my ( $id ) = ( undef ) ;
    
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/ACCESSION:\s+(.+)/) { 
				$id = $1;
    		}
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    
    return ($id) ;
}
### END of SUB



=head2 METHOD getInstrumentTypeFromString

	## Description : get the instrument type of massbank record
	## Input : $record
	## Output : $instrumentType
	## Usage : my ( $instrumentType ) = getInstrumentTypeFromString ( $record ) ;
	
=cut
## START of SUB
sub getInstrumentTypeFromString {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my ( $instrumentType ) = ( undef ) ;
    
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/INSTRUMENT_TYPE:\s+(.+)/) { 
				$instrumentType = $1;
    		}
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    
    return ($instrumentType) ;
}
### END of SUB

=head2 METHOD getFormulaFromString

	## Description : get the elementar formula of massbank record
	## Input : $record
	## Output : $formula
	## Usage : my ( $formula ) = getFormulaFromString ( $record ) ;
	
=cut
## START of SUB
sub getFormulaFromString {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my ( $formula ) = ( undef ) ;
    
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/CH\$FORMULA:\s+(.+)/) { 
				$formula = $1;
    		}
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    
    return ($formula) ;
}
### END of SUB

=head2 METHOD getInchiFromString

	## Description : get the IUPAC InCHi of massbank record
	## Input : $record
	## Output : $inchi
	## Usage : my ( $inchi ) = getInchiFromString ( $record ) ;
	
=cut
## START of SUB
sub getInchiFromString {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my ( $inchi ) = ( undef ) ;
    
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/CH\$IUPAC:\s+(.+)/) {
				$inchi = $1;
    		}
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    
    return ($inchi) ;
}
### END of SUB

=head2 METHOD getExactMzFromString

	## Description : get the exact mass of massbank record
	## Input : $record
	## Output : $exactMass
	## Usage : my ( $exactMass ) = getExactMzFromString ( $record ) ;
	
=cut
## START of SUB
sub getExactMzFromString {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my ( $exactMass ) = ( undef ) ;
    
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/CH\$EXACT_MASS:\s+(.+)/) { 
				$exactMass = $1;
    		}
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    
    return ($exactMass) ;
}
### END of SUB


=head2 METHOD getPrecursorTypeFromString

	## Description : get the precursor type of massbank record
	## Input : $record
	## Output : $precursorType
	## Usage : my ( $precursorType ) = getPrecursorTypeFromString ( $record ) ;
	
=cut
## START of SUB
sub getPrecursorTypeFromString {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my $id = undef ;
    my $precursorType = undef ;
    my $precursorType_first  = undef ;
    my $ionType_first  = undef ;
    my $precursorType_optionnal = undef ;
    
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/ACCESSION:\s+(.+)/) { 
				$id = $1;
    		}
    		if ($field =~/RECORD_TITLE:\s+(.+)/) { 
				my @title = split(/;/, $1) ;
				$precursorType_optionnal = $title[-1] ;
				$precursorType_optionnal =~ s/\s//g ;
    		}
    		if ($field =~/PRECURSOR_TYPE(.+)/) {
				$precursorType_first = $1;
				last;
    		}
    		if ($field =~/ION_TYPE(.+)/) {
				$ionType_first = $1;
				last;
    		}
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    
    ## manage undef precursor/ion type field 
#    print "ID:$id-//-$precursorType_first-//-$ionType_first-//-$precursorType_optionnal\n" ;
    if (defined $precursorType_first) {
    	$precursorType = $precursorType_first ;
    }
    elsif ( (!defined $precursorType_first) and (defined $ionType_first) ) {
    	$precursorType = $ionType_first ;
    }
    elsif ( (!defined $precursorType_first) and (!defined $ionType_first) and (defined $precursorType_optionnal) ) {
    	$precursorType = $precursorType_optionnal ;
    }
    else {
    	$precursorType = 'NA' ;
    }
    
    return ($precursorType) ;
}
### END of SUB

=head2 METHOD getMsTypeFromString

	## Description : get the MS type of massbank record
	## Input : $record
	## Output : $msType
	## Usage : my ( $msType ) = getMsTypeFromString ( $record ) ;
	
=cut
## START of SUB
sub getMsTypeFromString {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my ( $msType ) = ( undef ) ;
    
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {
    		if ($field =~/AC\$MASS_SPECTROMETRY:\s+MS_TYPE\s+(.+)/) { 
				$msType = $1;
    		}
    	}
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank handler\n" ;
    }
    
    return ($msType) ;
}
### END of SUB

=head2 METHOD getChemNamesFromString

	## Description : get lits of names of a massbank record
	## Input : $record
	## Output : $names
	## Usage : my ( $names ) = getChemNamesFromString( $record ) ;
	
=cut
## START of SUB
sub getChemNamesFromString {
	## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_ ;
    
    my @names = () ;
    if ( defined $record ) {
    	my @tmp = split(/\n/, $record) ;
    	foreach my $field (@tmp) {   		
    		if ($field =~/CH\$NAME: (.*)/) { 
    			push(@names, $1 ) ;  }
	    	else { next ; }
    	}
    }
    else {
    	croak "Can't work with a undef / none existing massbank record (string)\n" ;
    }
    return(\@names) ;
}
## END of SUB





=head2 METHOD getMassBankHandler

	## Description : get a massbank handler from a file
	## Input : $record
	## Output : $massbankHandler
	## Usage : my ( $massbankHandler ) = getMassBankHandler ( $record ) ;
	
=cut
## START of SUB
sub getMassBankHandler {
    ## Retrieve Values
    my $self = shift ;
    my ( $record ) = @_;
    my ( $massbankHandler ) = ( undef ) ;
    
    ## TODO...
    
    return ($massbankHandler) ;
}
### END of SUB

=head2 METHOD get_annotations_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_annotations_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_annotations_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my @features = () ;
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/PK\$ANNOTATION:(.*)/) { push( @features, $1) ;  }
	    	else { next ; }
    	}
    	close(MS) ;
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    return(\@features) ;
}
## END of SUB

=head2 METHOD get_links_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_annotations_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_links_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    my %features = () ;
    my $control = 0 ;
    
    my ( @CAS, @KEGG, @PUBCHEM ) = ((), (), ()) ;
    
    if ( ( defined $ms_file ) and ( -e $ms_file )) {
    	open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
    	while ( my $field = <MS> ){
    		chomp $field ;    		
    		if ($field =~/CH\$LINK: CAS (.*)/) { push (@CAS, $1) ; $control++; }
    		elsif ($field =~/CH\$LINK: KEGG (.*)/) { push (@KEGG, $1) ; $control++; }
    		elsif ($field =~/CH\$LINK: PUBCHEM CID (.*)/) { push (@PUBCHEM, $1) ; $control++; }
    		## others !!?
    		
	    	else { next ; }
    	}
    	close(MS) ;
    	# for db field
    }
    else {
    	croak "Can't work with a undef / none existing massbank file\n" ;
    }
    
    $features{'CAS'} = \@CAS ;
    $features{'KEGG'} = \@KEGG ;
    $features{'PUBCHEM'} = \@PUBCHEM ;
    
    return(\%features) ;
}
## END of SUB

=head2 METHOD get_ms_record_links_data

	## Description : permet de recuperer tous les champs d'un object massbank
	## Input : $ms_file
	## Output : $features
	## Usage : my ( $features ) = get_ms_record_links_data( $ms_file ) ;
	
=cut
## START of SUB
sub get_ms_record_links_data {
	## Retrieve Values
    my $self = shift ;
    my ( $ms_file ) = @_ ;
    
    ## Internal reference for MASSBANK and RESPECT
    
    my @massbank_id = ( 'TUE', 'GLS', 'AU', 'MSJ', 'ML','FIO', 'UF', 'CO', 'UO', 'TT', 'OUF', 'MCH', 'NU', 'KNA', 'MT', 'CE', 'KO', 'KZ', 'JEL', 'JP', 'PR', 'BML', 'CA', 'TY', 'PB', 'FU', 'EA', 'UT', 'BSU', 'WA' ) ;
    my @respect_id = ( 'PS', 'PT', 'PM' ) ;
    
    my $dabase_used = undef ;
    my %db = ( 'accession' => undef, 'name' => undef ) ;
    my $control = 0 ;
    
    if ( $ms_file ) {
    	my $filename = basename("$ms_file",  ".txt");
    	
    	if ( $filename =~ /(\w+)$/ ) { # keep only record id (0001-PS0002 => PS0002 or BJ0045 => BJ0045) 
    		$db{'accession'} = $1 ;
    		$control++ ;
    		if ( ( defined $db{'accession'} ) and ( $db{'accession'} =~ /(\D+)(\d+)/) ) {
    			my ($key, $eval) = ($1, 0) ;
    			foreach (@respect_id) { if ($_ eq $key) { $db{'name'} = 'RESPECT' ; $eval = 1 ; last ; } }
    			foreach (@massbank_id) { if ($_ eq $key) { $db{'name'} = 'MASSBANK' ; $eval = 1 ; last ; } }
    			if ( $eval == 0 ){ 	carp "The following key ($key) for $db{'accession'} has an unknown reference (not a Massbank or ReSpect source)\n" ; }
    		}
    	}
    }
    if ($control == 0) { %db = () ;  }
	return(\%db) ;
}
## END of SUB


1 ;


__END__

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

 perldoc parser::chem::massbank.pm

=head1 Exports

=over 4

=item :ALL is ...

=back

=head1 AUTHOR

Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 VERSION

version 1 : 25 / 06 / 2013

version 2 : ??

=cut