view golm_ws_lib_search.pl @ 0:e3d43b8c987b draft

Init repository with last tool-bank-golm-lib_search master version
author fgiacomoni
date Mon, 05 Dec 2016 08:32:04 -0500
parents
children 11779b6402bc
line wrap: on
line source

#!perl

## script  : XXX.pl
#=============================================================================
#                              Included modules and versions
#=============================================================================
## Perl modules
use strict ;
use warnings ;
use Carp qw (cluck croak carp) ;

use Data::Dumper ;
use Getopt::Long ;
use FindBin ; ## Allows you to locate the directory of original perl script

## Specific Perl Modules (PFEM)
use lib $FindBin::Bin ;
my $binPath = $FindBin::Bin ;
use JSON ;

## Dedicate Perl Modules PFEM
use lib::golm_ws_api qw( :ALL ) ;
use lib::msp qw( :ALL ) ;
use lib::output qw( :ALL ) ;
use lib::conf qw( :ALL ) ;

## Initialized values
my ($OptHelp,$ri,$riWindow,$gcColumn,$inputFile,$inputMasses) = (undef,undef,undef,undef,undef,undef) ;
my ($maxHits,$mzRes,$maxIons,$threshold,$relative,$noise_threshold) = (undef,undef,undef,undef,undef,undef) ;
my ($JaccardDistanceThreshold,$s12GowerLegendreDistanceThreshold) = (undef,undef,undef,undef,undef) ;
my ($DotproductDistanceThreshold,$HammingDistanceThreshold,$EuclideanDistanceThreshold) = (undef,undef,undef) ;
my ($excel_file,$html_file,$html_template,$json_file,$csv_file) = (undef,undef,undef,undef,undef) ;
my (@hits, @ojson) = ( () , () ) ;
my $encoded_spectra ;

## if you put no arguments, function help is started
if (!@ARGV){ &help ; } 

#=============================================================================
#                                Manage EXCEPTIONS
#=============================================================================
&GetOptions ( 	
				"help|h"     => \$OptHelp,       # HELP
				"inputFile:s"		=> \$inputFile,
				"inputMasses:s"		=> \$inputMasses,
				"ri:i"		=> \$ri,
				"riWindow:i"		=> \$riWindow,
				"gcColumn:s"		=> \$gcColumn,
				"maxHits:i"		=> \$maxHits,
				"mzRes:i"		=> \$mzRes,
				"maxIons:i"		=> \$maxIons,
				#"noiseThreshold:f" => \$noise_threshold,
				"JaccardDistanceThreshold:f"		=> \$JaccardDistanceThreshold,
				"s12GowerLegendreDistanceThreshold:f"		=> \$s12GowerLegendreDistanceThreshold,
				"DotproductDistanceThreshold:f"		=> \$DotproductDistanceThreshold,
				"HammingDistanceThreshold:f"		=> \$HammingDistanceThreshold,
				"EuclideanDistanceThreshold:f"		=> \$EuclideanDistanceThreshold,
				"relative:s"			=> \$relative,
				"output_xls:s"			=> \$excel_file,
				"output_html:s"		=> \$html_file,
				"output_json:s"		=> \$json_file,
				"output_tabular:s"		=> \$csv_file,
            ) ;
            
            die "maxHits must be >= 0\n" unless ($maxHits >= 0) ;
            die "mzRes must be >= 0 \n" unless ($mzRes >= 0) ;
            die "maxIons must be >= 0\n" unless ($maxIons >= 0) ;
            #die "noiseThreshold must be > 0\n" unless ($noise_threshold > 0) ;
         
## if you put the option -help or -h function help is started         
if(defined($OptHelp)){ &help ; }

if( (!defined ($inputFile)) and (!defined($inputMasses) )){ warn "The input data is not defined (File or mass/intensity list AS string)\n" ; &help ; }

#=============================================================================
#                                MAIN SCRIPT
#=============================================================================

## Create module objects ###

my $oapi = lib::golm_ws_api->new() ;
my $omsp = lib::msp->new() ;
my $o_output = lib::output->new() ;
my $oConf = lib::conf->new() ;


## -------------- Conf file ------------------------ :
my ( $CONF ) = ( undef ) ;
foreach my $conf ( <$binPath/*.cfg> ) {
	$CONF = $oConf->as_conf($conf) ;
}

## -------------- HTML template file ------------------------ :
$html_template = <$binPath/golm_out.tmpl> ;
$CONF->{'HTML_TEMPLATE'} = $html_template ;


## -------------- Retrieve values from conf file ------------------------ :
my $ws_url = $CONF->{'WS_URL'} ;
my $ws_proxy = $CONF->{'WS_PROXY'} ;
my $default_ri = $CONF->{'RI'} ;
my $default_ri_window = $CONF->{'RI_WINDOW'} ;
my $default_gc_column = $CONF->{'GC_COLUMN'} ;
my $default_entries = $CONF->{'DEFAULT_ENTRIES'} ;
my $analyte_ref = $CONF->{'ANALYTE_REF'} ;
my $metabolite_ref = $CONF->{'METABOLITE_REF'} ;
my $spectrum_ref = $CONF->{'SPECTRUM_REF'} ;

############# -------------- Test the Golm web service -------------- ############# :

$oapi->test_query_golm($ws_url, $ws_proxy) ;

############# -------------- Parse the .msp file -------------- ############# :


my $ref_mzs_res ;
my $ref_ints_res ;

## Case when masses are entered manually
if (defined $inputMasses && !defined $inputFile) { 
	
	## Retrieve masses from msp file
	$ref_mzs_res = $omsp->get_masses_from_string($inputMasses, $mzRes) ;
	
	## Retrieve intensities from msp file
	$ref_ints_res = $omsp->get_intensities_from_string($inputMasses) ;
	
	## Sorting intensities
	my ($mzs_res_sorted, $ints_res_sorted) = $omsp->sorting_descending_intensities($ref_mzs_res, $ref_ints_res) ;
	
	#************************
	# Noise threshold: uncomment if it is not managed in MetaMS 
	#************************
	
	## Apply noise threshold
	#my ($mzs_res_noise_threshold, $ints_res_noise_threshold) = $omsp->keep_ions_above_threshold($mzs_res_sorted, $ints_res_sorted) ;
	
	#************************
	
	## Keep a limited number of ions according to $maxIons
	if($maxIons > 0){
		
		## To uncomment if "Apply noise threshold is used"
		#$ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_noise_threshold, $maxIons ) ;
		#$ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_noise_threshold, $maxIons ) ;
		
		## To uncomment if "Apply noise threshold is used"
		$ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_sorted, $maxIons ) ;
		$ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_sorted, $maxIons ) ;
	}
	
	## Remove redundant masses
	my ($uniq_masses , $uniq_intensities) = $omsp->remove_redundants($ref_mzs_res, $ref_ints_res) ;
	
	## Relative intensity
	my $relative_ints_res = undef ;
	if ($relative eq "true") {
		my @relative_ints = map { ($_ * 100)/@$ints_res_sorted[0] } @$ints_res_sorted ;
		$relative_ints_res = \@relative_ints ;
	}
	
	## Encode spectra
	if (defined $relative_ints_res) {
		$encoded_spectra = $omsp->encode_spectrum_for_query($mzs_res_sorted, $relative_ints_res) ;
	}
	else { $encoded_spectra = $omsp->encode_spectrum_for_query($mzs_res_sorted, $ints_res_sorted) ; }
	
}
## Case with the msp file
elsif (defined $inputFile and -e $inputFile and !defined $inputMasses and defined $mzRes and defined $maxIons and defined $maxHits) {

	unless (-f $inputFile)  { croak "$inputFile is not a file" ; }
	unless (-s $inputFile)  { croak "$inputFile is empty" ; }
	
	## Get masses and their intensities
	$ref_mzs_res = $omsp->get_mzs($inputFile, $mzRes) ;
	$ref_ints_res = $omsp->get_intensities($inputFile, $maxIons) ;
	
	## Sorting intensities
	my ($mzs_res_sorted, $ints_res_sorted) = $omsp->sorting_descending_intensities($ref_mzs_res, $ref_ints_res) ;
	
	#************************
	# Noise threshold: uncomment if it is not managed in MetaMS 
	#************************
	
	## Apply noise threshold if exists
	#my ($mzs_res_noise_threshold, $ints_res_noise_threshold) = $omsp->keep_ions_above_threshold($mzs_res_sorted, $ints_res_sorted) ;
	
	#************************
	
	
	## Keep only $maxIons ions
	if($maxIons > 0){
		
		## To uncomment if "Apply noise threshold is used"
		#$ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_noise_threshold, $maxIons ) ;
		#$ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_noise_threshold, $maxIons ) ;
		
		## To uncomment if "Apply noise threshold is used"
		$mzs_res_sorted = $omsp->keep_only_max_masses( $mzs_res_sorted, $maxIons ) ;
		$ints_res_sorted = $omsp->keep_only_max_intensities( $ints_res_sorted, $maxIons ) ;
	}
	
	## Remove redundant masses
	my ($uniq_masses , $uniq_intensities) = (undef,undef) ;
	my @uniq_total_masses = () ;
	my @uniq_total_intensities = () ;
	
	for (my $i=0 ; $i<@$mzs_res_sorted && $i<@$ints_res_sorted ; $i++) {
	
		($uniq_masses , $uniq_intensities) = $omsp->remove_redundants(@$mzs_res_sorted[$i], @$ints_res_sorted[$i]) ;
		push (@uniq_total_masses , $uniq_masses) ;
		push (@uniq_total_intensities, $uniq_intensities) ;
	}
	
	## Relative intensity
	my $relative_ints_res = undef ;
	if ($relative eq "true") {
		$relative_ints_res = $omsp->apply_relative_intensity(\@uniq_total_intensities) ;
	}
	
	## Encode spectra
	if (defined $relative_ints_res) {
		$encoded_spectra = $omsp->encode_spectrum_for_query(\@uniq_total_masses, $relative_ints_res) ;
	}
	else { $encoded_spectra = $omsp->encode_spectrum_for_query(\@uniq_total_masses, \@uniq_total_intensities) ; }

}
elsif (!defined $maxHits or !defined $maxIons or !defined $mzRes) { croak "Parameters mzRes or maxIons or maxHits are undefined\n"; } 
elsif (!-f $inputFile) 										  	  { croak "$inputFile does not exist" ; }

############# -------------- Send queries to Golm -------------- ############# :

my $limited_hits ;
foreach my $spectrum (@$encoded_spectra){
	($limited_hits) = $oapi->LibrarySearch ($ri, $riWindow, $gcColumn, $spectrum, $maxHits, $JaccardDistanceThreshold,
																										  $s12GowerLegendreDistanceThreshold,
																										  $DotproductDistanceThreshold,
																										  $HammingDistanceThreshold,
																										  $EuclideanDistanceThreshold,
																										  $ws_url, $ws_proxy,
																										  $default_ri, $default_ri_window, $default_gc_column) ;
	push (@hits , $limited_hits) ;
}
			

############# -------------- Build outputs -------------- ############# :
	
my $jsons_obj = $o_output->build_json_res_object(\@hits) ;
#$o_output->write_json_skel(\$json_file, $jsons_obj) ;

# Build the ajax data source for html view
#my $ajax = $o_output->write_ajax_data_source($jsons_obj) ;


my $tbody_entries = $o_output->add_entries_to_tbody_object($jsons_obj,$analyte_ref,$metabolite_ref,$spectrum_ref) ;
$o_output->write_html_body($jsons_obj, $tbody_entries, $html_file, $html_template, $default_entries, $jsons_obj) ;
$o_output->excel_like_output($excel_file, $jsons_obj) ;
$o_output->write_csv($csv_file , $jsons_obj) ;


#====================================================================================
# Help subroutine called with -h option
# number of arguments : 0
# Argument(s)        :
# Return           : 1
#====================================================================================
sub help {
	print STDERR "
golm_ws_lib_search.pl

# golm_ws_lib_search.pl is a script to use SOAP Golm webservice and send specific queries about spectra searches. 
# Input : a list of masses (m/z) and their intensities.
# Authors : Gabriel Cretin / Franck Giacomoni / Yann Guitton 
# Emails : franck.giacomoni\@clermont.inra.fr
#		   gabriel.cretin\@clermont.inra.fr
#		   yann.guitton\@oniris-nantes.fr
# Version : 1.2
# Created : 03/06/2016
# Updated : 28/11/2016
USAGE :		 
		golm_ws_lib_search.pl -help OR
		
		golm_ws_lib_search.pl 
			-spectraFile [.msp file]	
			-spectraMasses [masses + intensities of an ion: 'mz1 int1 mz2 int2 mzx intx...']
			-ri [Rentention Index: float or integer]
			-riWindow [Retention Index Window: 1500 or the value of your choice]
			-gcColumn [AlkaneRetentionIndexGcColumnComposition: 'VAR5' or 'MDN35' or 'None']
			-maxHits [Maximum hits per queried spectra: integer >= 1 (100 for all of them)]
			-mzRes [Number of digits after the decimal point for m/z values: integer (0 if none)]
			-maxIons [Number of m/z per spectra you want to keep for the queries, default 0 for all detected ions]
			-noiseThreshold [Ions having intensity values less than this value are ignored]
			-JaccardDistanceThreshold...............[
			-s12GowerLegendreDistanceThreshold......[  Threshold for each score. Hits with greater scores are ignored: 0 (perfect match) < threshlold <= 1 (mismatch) ]
			-DotproductDistanceThreshold............[
			-EuclideanDistanceThreshold.............[
			-HammingDistanceThreshold[Threshold for hamming score. Hits with greater scores are ignored: 0 - perfect match to higher values indicating a mismatch]
			-relative [Transforms absolute intensities in the msp file into relative intensities: (intensity * 100)/ max(intensitiess), otherwise, leave them absolute: true or false]
			-output_xls [name of the xls file in output: string]
			-output_html [name of the html file in output: string]
			-output_json [name of the json file in output: string]
			-output_tabular [name of the csv file in output: string]
				
";
	exit(1);
}

## END of script

__END__

=head1 NAME

 golm_ws_lib_search.pl -- script to send GC-MS spectra queries to Golm Metabolome Database (GMD)

=head1 USAGE

 golm_ws_lib_search.pl -help OR
		
		golm_ws_lib_search.pl 
			-spectraFile [.msp file]	
			-spectraMasses [masses + intensities of an ion: 'mz1 int1 mz2 int2 mzx intx...']
			-ri [Rentention Index: float or integer]
			-riWindow [Retention Index Window: 1500 or the value of your choice]
			-gcColumn [AlkaneRetentionIndexGcColumnComposition: 'VAR5' or 'MDN35' or 'None']
			-maxHits [Maximum hits per queried spectra: integer >= 1 (100 for all of them)]
			-mzRes [Number of digits after the decimal point for m/z values: integer (0 if none)]
			-maxIons [Number of m/z per spectra you want to keep for the queries, default 0 for all detected ions]
			-noiseThreshold [Ions having intensity values less than this value are ignored]
			-JaccardDistanceThreshold...............[
			-s12GowerLegendreDistanceThreshold......[  Threshold for each score. Hits with greater scores are ignored: 0 (perfect match) < threshlold <= 1 (mismatch) ]
			-DotproductDistanceThreshold............[
			-EuclideanDistanceThreshold.............[
			-HammingDistanceThreshold[Threshold for hamming score. Hits with greater scores are ignored: 0 - perfect match to higher values indicating a mismatch]
			-relative [Transforms absolute intensities in the msp file into relative intensities: (intensity * 100)/ max(intensitiess), otherwise, leave them absolute: true or false]
			-output_xls [name of the xls file in output: string]
			-output_html [name of the html file in output: string]
			-output_json [name of the json file in output: string]
			-output_tabular [name of the csv file in output: string]

=head1 SYNOPSIS

This script sends GC-MS EI spectra from an msp file given in argument to Golm Database, and presents results on a web interface.

=head1 DESCRIPTION

This main program is a ...

=over 4

=item B<function01>

=item B<function02>

=back

=head1 AUTHOR

Gabriel Cretin	E<lt>gabriel.cretin@clermont.inra.frE<gt>
Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 VERSION

version 1.0 : 03 / 06 / 2016

version 1.1 : 24 / 06 / 2016

version 1.2 : 28 / 11 / 2016

=cut