diff lib/msp.pm @ 0:e3d43b8c987b draft

Init repository with last tool-bank-golm-lib_search master version
author fgiacomoni
date Mon, 05 Dec 2016 08:32:04 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/msp.pm	Mon Dec 05 08:32:04 2016 -0500
@@ -0,0 +1,642 @@
+package lib::msp ;
+
+use strict;
+use warnings ;
+use Exporter ;
+use Carp ;
+
+use Data::Dumper ;
+use List::MoreUtils qw(uniq);
+
+use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);
+
+our $VERSION = "1.0";
+our @ISA = qw(Exporter);
+our @EXPORT = qw( get_mzs get_intensities get_masses_from_string get_intensities_from_string keep_only_max_masses keep_only_max_intensities encode_spectrum_for_query sorting_descending_intensities round_num apply_relative_intensity remove_redundants);
+our %EXPORT_TAGS = ( ALL => [qw( get_mzs get_intensities get_masses_from_string get_intensities_from_string keep_only_max_masses keep_only_max_intensities encode_spectrum_for_query sorting_descending_intensities round_num apply_relative_intensity remove_redundants)] );
+
+=head1 NAME
+
+My::Module - An example module
+
+=head1 SYNOPSIS
+
+    use My::Module;
+    my $object = My::Module->new();
+    print $object->as_string;
+
+=head1 DESCRIPTION
+
+This module does not really exist, it
+was made for the sole purpose of
+demonstrating how POD works.
+
+=head1 METHODS
+
+Methods are :
+
+=head2 METHOD new
+
+	## Description : new
+	## Input : $self
+	## Ouput : bless $self ;
+	## Usage : new() ;
+
+=cut
+
+sub new {
+    ## Variables
+    my $self={};
+    bless($self) ;
+    return $self ;
+}
+### END of SUB
+
+
+
+=head2 METHOD get_mzs
+
+	## Description : parse msp file and get mzs
+	## Input : $msp_file, $mzRes, $maxIon
+	## Output : \@total_spectra_mzs 
+	## Usage : my ( $mzs ) = get_mzs( $msp_file , $mzRes, $maxIon) ;
+	## Structure of res: [ $arr_ref1 , $arr_ref2 ... $arr_refN ]
+=cut
+## START of SUB
+sub get_mzs {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $msp_file, $mzRes ) = @_ ;
+  
+  	my @ions = () ;
+  	my @temp_mzs = () ;
+  	my @uniq_masses ;
+  	my @mzs = ();
+  	my @total_spectra_mzs = ();
+  	my $mz ;
+  	my $i = 0 ;
+  	  	
+    open (MSP , "<" , $msp_file) or die $! ;
+    
+	{
+		local $/ = 'Name' ;
+	    my @infos = () ;
+	    # One line is : "Name -> Name" englobing a whole spectrum with all infos
+	    while(my $line = <MSP>) {
+	    	
+	    	chomp $line;
+	    	@infos = split (/\n/ , $line) ;
+	    	# Loop over all lines of a spectrum
+	    	for (my $i=0 ; $i<@infos ; $i++) {
+	    		# Detect spectrum lines only
+		    	if ($infos[$i] =~ /(\d+\.?\d*)\s+(\d+\.?\d*)\s*;\s*/) {
+		    		
+		    		@ions = split ( /;/ , $infos[$i] ) ;
+		    		# Retrieve mzs according to maxIons value
+		    		foreach my $ion (@ions) {
+		    			
+		    			if ($ion =~ /^\s*(\d+\.?\d*)\s+(\d+\.?\d*)$/) {
+		    				
+		    				$mz = $1 ;
+		    				# Truncate/round mzs depending on $mzRes wanted
+		    				if ($mzRes == 0) {
+		    					my $mz_rounded = sprintf("%.".$mzRes."f", $mz) ;
+		    					push (@temp_mzs , $mz_rounded) ;
+		    				}
+		    				# Check that $mzRes is not greater than the number of digits after comma
+		    				elsif ($mzRes > 0) {
+		    					if ($mz !~ /^\d+\.\d+$/) { croak "*********\n\nYou are trying to specify $mzRes significant decimals, but one or more masses in the input file are unitary masses.\nYou should try again with mzRes = 0\n\n\n"; }
+		    					elsif($mzRes > length(( $mz =~ /.+\.(.*)/)[0] )) {
+		    						$mz = sprintf("%.".$mzRes."f" , $mz) ;
+		    					}
+		    					my $mz_rounded = _round_num($mz,$mzRes) ;
+		    					push (@temp_mzs , $$mz_rounded) ;
+		    				}
+		    			}
+		    		}
+		    	}
+	    	}
+	    	if($line ne '') {
+		    	@{ $total_spectra_mzs[$i] } = @temp_mzs ;
+			    $i++ ;
+			    @temp_mzs = () ;
+	    	}  	
+	    }
+    }
+    #print Dumper \@total_spectra_mzs ;
+    close (MSP) ;
+    return(\@total_spectra_mzs) ;
+}
+## END of SUB
+
+
+
+
+=head2 METHOD get_intensities
+
+	## Description : parse msp file and get intensities
+	## Input : $msp_file, $maxIons
+	## Output : \@total_spectra_intensities 
+	## Usage : my ( $intensities ) = get_mzs( $msp_file, $maxIons ) ;
+	## Structure of res: [ $arr_ref1 , $arr_ref2 ... $arr_refN ]
+=cut
+## START of SUB
+sub get_intensities {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $msp_file ) = @_ ;
+  
+  	my @ions = () ;
+  	my @temp_intensities = () ;
+  	my @intensities = () ;
+  	my @total_spectra_intensities = (); 
+  	my $i = 0 ;
+  	
+    open (MSP , "<" , $msp_file) or die $! ;
+    
+    {
+		local $/ = 'Name' ;
+	    my @infos = () ;
+	    # Extract spectrum
+	    while(my $line = <MSP>) {
+	    	chomp $line;
+	    	@infos = split (/\n/ , $line) ;
+	    	#Detect spectrum
+	    	for (my $i=0 ; $i<@infos ; $i++) {
+		    	if ($infos[$i] =~ /(\d+\.?\d*)\s+(\d+\.?\d*)\s*;\s*?/) {
+		    		@ions = split ( /;/ , $infos[$i] ) ;
+		    		# Retrieve intensities
+		    		foreach my $ion (@ions) {
+		    			if ($ion =~ /^\s*(\d+\.?\d*)\s+(\d+\.?\d*)$/) {
+    						my $intensity = $2 ;
+    						push ( @temp_intensities , $intensity ) ;
+		    			}
+		    		}
+		    	}
+	    	}
+	    	if($line ne '') {
+		    	@{ $total_spectra_intensities[$i] } = @temp_intensities ;
+			    $i++ ;
+			    @temp_intensities = () ;	  
+	    	}  	
+	    }
+    }
+    close (MSP) ;
+    return(\@total_spectra_intensities) ;
+}
+## END of SUB
+
+
+=head2 METHOD get_masses_from_string
+
+	## Description : parse a spectrum string and get mzs and intensities
+	## Input : $spectrum_string, $mzRes
+	## Output : \@spectrum_intensities_mzs 
+	## Usage : my ( $spectrum_mzs ) = get_masses_from_string( $spectrum_string , $mzRes ) ;
+=cut
+## START of SUB
+sub get_masses_from_string {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $spectrum_string, $mzRes ) = @_ ;
+    
+    my @intensities = () ;
+    my @mzs = () ;
+        
+    if (defined $spectrum_string) {
+    	
+    	if ($spectrum_string ne '') {
+    		
+    		if ($spectrum_string =~ /\s*(\d+\.?\d*)\s+(\d+\.?\d*)\s*/ ) {
+    		
+	    		my @val = split (/\s+/ , $spectrum_string) ;
+	    		for (my $i=0 ; $i<@val ; $i++) {
+	    			if ($i%2 == 0) {
+	    				my $mz = $val[$i] ;
+	    				# Truncate/round mzs depending on $mzRes wanted
+	    				if ($mzRes == 0) {
+	    					$mz = int($mz) ;
+	    					push ( @mzs , $val[$i] ) ;
+	    				}
+	    				# Check that $mzRes is not greater than the number of digits after comma
+	    				elsif ($mzRes > 0) {
+	    					if($mzRes > length(( $mz =~ /.+\.(.*)/)[0] )) {
+	    						$mz = sprintf("%.".$mzRes."f" , $mz) ;
+	    					}
+	    					my $mz_rounded = _round_num($mz,$mzRes) ;
+	    					push ( @mzs , $$mz_rounded ) ;
+	    				}
+	    			}
+	    		}
+	    		return (\@mzs) ;
+    		}
+    		else { croak "Wrong format of the spectrum. See help\n" }
+    	}
+    	else { croak "Spectrum is empty, the service will stop\n" } ;
+    }
+    else { croak "Spectrum is not defined, service will stop\n" } ;
+}
+## END of SUB
+
+
+
+=head2 METHOD get_intensities_from_string
+
+	## Description : parse a spectrum string and get intensities
+	## Input : $spectrum_string
+	## Output : \@spectrum_intensities 
+	## Usage : my ( $spectrum_intensities ) = get_intensities_from_string( $spectrum_string ) ;
+=cut
+## START of SUB
+sub get_intensities_from_string {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $spectrum_string ) = @_ ;
+    
+    my @intensities = () ;
+    my @mzs = () ;
+        
+    if (defined $spectrum_string) {
+    	
+    	if ($spectrum_string ne '') {
+    		
+    		if ($spectrum_string =~ /\s*(\d+\.?\d*)\s+(\d+\.?\d*)\s*/ ) {
+    		
+	    		my @val = split (/\s+/ , $spectrum_string) ;
+	    		for (my $i=0 ; $i<@val ; $i++) {
+	    			if ($i%2 != 0) {
+	    				my $int = $val[$i] ;
+	    				push ( @intensities , $int ) ; 
+	    			}
+	    		}
+	    		return (\@intensities) ;
+    		}
+    		else { croak "Wrong format of the spectrum. See help\n" }
+    	}
+    	else { croak "Spectrum is empty, the service will stop\n" } ;
+    }
+    else { croak "Spectrum is not defined, service will stop\n" } ;
+}
+## END of SUB
+
+
+
+
+
+=head2 METHOD sorting_descending_intensities
+
+	## Description : sort mzs and intensities arrays by descending intensity values
+	## Input : $ref_mzs_res, $ref_ints_res
+	## Output : \@mzs_res, \@ints_res 
+	## Usage : my ( \@mzs_res, \@ints_res ) = sorting_descending_intensities( $ref_mzs_res, $ref_ints_res ) ;
+=cut
+## START of SUB
+sub sorting_descending_intensities {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ref_mzs_res, $ref_ints_res ) = @_ ;
+    
+    my @mzs_res = () ;
+    my @ints_res = () ;
+    
+    if ( defined $ref_mzs_res && defined $ref_ints_res ) {
+    	if ( (scalar @$ref_mzs_res) != 0 && (scalar @$ref_ints_res) != 0 ) {
+		    
+		    @mzs_res = @$ref_mzs_res ;
+			@ints_res = @$ref_ints_res ;
+			
+			# Case when we have only one array of masses (input is a string of masses and not a file)
+		    if ( ref(@$ref_ints_res[0]) ne "ARRAY") {
+		    
+		    	my @sorted_indices = sort { $ints_res[$b] <=> $ints_res[$a] } 0..$#ints_res;
+				@$_ = @{$_}[@sorted_indices] for \(@mzs_res, @ints_res);
+				
+		    }
+			else {
+				## Sorting ions by decreasing intensity values
+				for (my $i=0 ; $i<@ints_res ; $i++) {
+					my @sorted_indices = sort { @{$ints_res[$i]}[$b] <=> @{$ints_res[$i]}[$a] } 0..$#{$ints_res[$i]};
+					@$_ = @{$_}[@sorted_indices] for \(@{$ints_res[$i]},@{$mzs_res[$i]});
+				}
+			}
+    	} 
+    	else { carp "Cannot sort intensities, mzs or intensities are empty" ; return (\@mzs_res, \@ints_res) ; } 
+    } 
+    else { carp "Cannot sort intensities, mzs or intensities are undef" ; return (\@mzs_res, \@ints_res) ; }
+    
+	return (\@mzs_res, \@ints_res) ;
+}
+## END of SUB
+
+
+
+
+=head2 METHOD keep_only_max_masses
+
+	## Description : keep only $maxIons masses 
+	## Input : $mzs_res_sorted, $maxIons
+	## Output : \@mzs
+	## Usage : my ( $mzs ) = keep_only_max_masses( $mzs_res_sorted, $ints_res_sorted, $maxIons ) ;
+=cut
+## START of SUB
+sub keep_only_max_masses {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ref_mzs_res, $maxIons ) = @_ ;
+    
+    my @mzs = () ;
+    my @tot_mzs = () ;
+    
+    if ( ref(@$ref_mzs_res[0]) ne "ARRAY") {
+    	my $i = 0 ;
+    	while (scalar @tot_mzs < $maxIons && $i < @$ref_mzs_res){
+	    	push (@tot_mzs , $$ref_mzs_res[$i++]) ;
+    	}
+    }
+    else {
+	    for (my $i=0 ; $i<@$ref_mzs_res ; $i++) {
+		  	my $j = 0 ;
+		    while (scalar @mzs < $maxIons && $j < @$ref_mzs_res[$i]){
+		    	push (@mzs , $ref_mzs_res->[$i][$j++]) ;
+	    	}
+	    	push (@tot_mzs , \@mzs) ;
+	    }
+    }
+	return (\@tot_mzs) ;
+}
+## END of SUB
+
+
+
+
+=head2 METHOD keep_only_max_intensities
+
+	## Description : keep only $maxIons intensities 
+	## Input : $ints_res_sorted, $maxIons
+	## Output : \@ints
+	## Usage : my ( $ints ) = keep_only_max_intensities( $ints_res_sorted, $maxIons ) ;
+=cut
+## START of SUB
+sub keep_only_max_intensities {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $ref_ints_res, $maxIons ) = @_ ;
+    
+    my @ints = () ;
+    my @tot_ints = () ;
+	if ( ref(@$ref_ints_res[0]) ne "ARRAY") {
+    	my $i = 0 ;
+    	while (scalar @tot_ints < $maxIons && $i < @$ref_ints_res){
+	    	push (@tot_ints , $$ref_ints_res[$i++]) ;
+    	}
+    }
+    else {
+    	for (my $i=0 ; $i<@$ref_ints_res ; $i++) {
+	    	my $j = 0 ;
+	    	while (scalar @ints < $maxIons && $j < @$ref_ints_res[$i]){
+	    		push (@ints , $ref_ints_res->[$i][$j++]) ;
+	    	}
+	    	push (@tot_ints , \@ints) ;
+    	}
+    }    
+	return (\@tot_ints) ;
+}
+## END of SUB
+
+
+
+
+
+=head2 METHOD encode_spectrum_for_query
+
+	## Description : get mzs and intensities values and generate the spectra strings formatted for the WS query (html) 
+	## Input : $mzs, $intensities
+	## Output : \@encoded_spectra
+	## Usage : my ( $encoded_spectra ) = get_spectra( $mzs, $intensities ) ;
+	
+=cut
+## START of SUB
+sub encode_spectrum_for_query {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $mzs, $intensities ) = @_ ;
+    
+    my @encoded_spectra = () ;
+    my $spectrum = "" ;
+    my $k = 0 ;
+    
+    #print Dumper $mzs ;
+    
+    if ( defined $mzs && defined $intensities ) {
+    	if ( @$mzs && @$intensities ) {
+    		
+    		# Case when we have only one array of masses (input is a string of masses and not a file)
+		    if ( ref(@$mzs[0]) ne "ARRAY") {
+    			for (my $i=0 ; $i< @$mzs ; $i++) {
+    				$spectrum = $spectrum . @$mzs[$i] . " " . @$intensities[$i] . " ";
+    			}
+			    push ( @encoded_spectra , $spectrum ) ;
+		    }
+		    else {
+			    for (my $i=0 ; $i< @$mzs ; $i++) {
+			    	
+			    	for ( my $j=0 ; $j< @{ @$mzs[$i] } ; $j++ ) {
+			    		
+			    		$spectrum = $spectrum . $$mzs[$i][$j] . " " . $$intensities[$i][$j] . " ";
+			    	}
+			    	$encoded_spectra[$k] = $spectrum ;
+			    	$k++ ;
+			    	$spectrum = '' ;
+			    }
+		    }
+    	}
+    	else { carp "Cannot encode spectrum, mzs and intensities arrays are empty" ; return \@encoded_spectra ; }
+    }
+    else { carp "Cannot encode spectrum, mzs and intensities are undef" ; return \@encoded_spectra ; }
+    return \@encoded_spectra ;
+}
+## END of SUB
+
+
+=head2 METHOD round_num
+
+	## Description : round a number by the sended decimal
+	## Input : $number, $decimal
+	## Output : $round_num
+	## Usage : my ( $round_num ) = round_num( $number, $decimal ) ;
+	
+=cut
+## START of SUB 
+sub _round_num {
+    ## Retrieve Values
+    my ( $number, $decimal ) = @_ ;
+    my $round_num = 0 ;
+    
+	if ( ( defined $decimal ) and ( $decimal > 0 ) and ( defined $number ) and ( $number > 0 ) ) {
+        $round_num = sprintf("%.".$decimal."f", $number);	## a rounding is used : 5.3 -> 5 and 5.5 -> 6
+	}
+	else {
+		croak "Can't round any number : missing value or decimal\n" ;
+	}
+    
+    return(\$round_num) ;
+}
+## END of SUB
+
+
+
+=head2 METHOD apply_relative_intensity
+
+	## Description : transform absolute intensities into relative intensities
+	## Input : $intensities
+	## Output : \@intensities
+	## Usage : my ( $intensities ) = apply_relative_intensity( $intensities ) ;
+	
+=cut
+## START of SUB 
+sub apply_relative_intensity {
+	## Retrieve Values
+	my $self = shift ;
+    my ($intensities) = @_ ;
+    
+    my @intensities = @$intensities ;
+    my @relative_intensities ;
+    
+    foreach my $ints (@intensities) {
+    		my @relative_ints = map { ($_ * 100)/@$ints[0] } @$ints ;
+    		push (@relative_intensities , \@relative_ints) ;
+    }
+    return \@relative_intensities ;
+}
+## END of SUB
+
+
+
+=head2 METHOD remove_redundants
+
+	## Description : removes ions with redundant masses
+	## Input : $masses $intensities
+	## Output : \@intensities
+	## Usage : my ( $uniq_masses, $uniq_intensities ) = remove_redundants( $masses, $intensities ) ;
+	
+=cut
+## START of SUB 
+sub remove_redundants {
+	## Retrieve Values
+	my $self = shift ;
+    my ($masses, $intensities) = @_ ;
+    
+    my %uniq = () ;
+    my @uniq_intensities = () ;
+    
+    ## Create hash with key = mass and value = intensity
+    for (my $i=0 ; $i<@$masses ; $i++) {
+    	$uniq{ @$masses[$i] } = @$intensities[$i] ;
+    }
+    
+    ## Remove redundant masses
+    my @uniq_masses = uniq(@$masses) ;
+    
+    ## Keep intensities corresponding to uniq masses
+	foreach my $mass (@uniq_masses) {
+	    push (@uniq_intensities , $uniq{ $mass }) ;
+	}
+	
+	return (\@uniq_masses , \@uniq_intensities) ;
+	
+}  
+## END of SUB
+
+
+#********************************************************************************************************
+#	FONCTION DU SEUIL POUR LE BRUIT, A DECOMMENTER SI FINALEMENT CE N'EST PAS GERE DANS LA BRIQUE MetaMS
+#********************************************************************************************************
+
+
+=head2 METHOD keep_ions_above_threshold
+
+	## Description : keep only ions which intensities are above the threshold
+	## Input : $mzs_res_sorted, $ints_res_sorted, $noiseThreshold
+	## Output : $mzs_res_noise_threshold, $ints_res_noise_threshold
+	## Usage : my ( $mzs_res_noise_threshold, $ints_res_noise_threshold ) = keep_ions_above_threshold( $mzs_res_sorted, $ints_res_sorted, $noiseThreshold ) ;
+	
+=cut
+## START of SUB 
+#sub keep_ions_above_threshold {
+#	## Retrieve Values
+#	my $self = shift ;
+#    my ($mzs_res_sorted, $ints_res_sorted, $noiseThreshold) = @_ ;
+#    
+#    my (@mzs_res_noise_threshold, @ints_res_noise_threshold) = ( (),() ) ;
+#    my (@mzs_res_noise_threshold_temp, @ints_res_noise_threshold_temp) = ( (),() ) ;
+#    my $i = 0 ;
+#    my $j = 0 ;
+#    # Case when we have only one array of masses (input is a string of masses and not a file)
+#    if ( ref(@$mzs_res_sorted[0]) ne "ARRAY") {
+#    	
+#		while( @$ints_res_sorted[$i] > $noiseThreshold && $i < scalar @$mzs_res_sorted) {
+#    	
+#	    	push ( @mzs_res_noise_threshold , @$mzs_res_sorted[$i] ) ;
+#	    	push ( @ints_res_noise_threshold , @$ints_res_sorted[$i] ) ;
+#	    	$i++ ;
+#	    }
+#	}
+#    else {
+#    	while( $i < @$ints_res_sorted ) {
+#    		
+#    		while( $$ints_res_sorted[$i][$j] > $noiseThreshold && $j < scalar @$ints_res_sorted[$i]) {
+#    			
+#    			push ( @mzs_res_noise_threshold_temp , $$mzs_res_sorted[$i][$j] ) ;
+#		    	push ( @ints_res_noise_threshold_temp , $$ints_res_sorted[$i][$j] ) ;
+#		    	$j++ ;
+#    		}		
+#    		push ( @mzs_res_noise_threshold , \@mzs_res_noise_threshold_temp ) ;
+#		    push ( @ints_res_noise_threshold , \@ints_res_noise_threshold_temp ) ;
+#    		$i++ ;
+#    	}
+#	}
+#	
+#	return (\@mzs_res_noise_threshold, \@ints_res_noise_threshold) ;
+#}  
+## END of SUB
+
+
+#********************************************************************************************************
+#********************************************************************************************************
+#********************************************************************************************************
+
+
+1 ;
+
+
+__END__
+
+=head1 SUPPORT
+
+You can find documentation for this module with the perldoc command.
+
+ perldoc csv.pm
+
+=head1 Exports
+
+=over 4
+
+=item :ALL is get_spectra
+
+=back
+
+=head1 AUTHOR
+
+Gabriel Cretin E<lt>gabriel.cretin@clermont.inra.frE<gt>
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
+
+=head1 VERSION
+
+version 1 : 03 / 06 / 2016
+
+version 2 : 24 / 06 / 2016
+
+=cut
\ No newline at end of file