diff wsdl_lipidmaps.pl @ 0:e8bd49794291 draft

Init repository with last lipidmaps_textsearch master version
author fgiacomoni
date Tue, 11 Apr 2017 03:47:06 -0400
parents
children 1276908e8fc4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wsdl_lipidmaps.pl	Tue Apr 11 03:47:06 2017 -0400
@@ -0,0 +1,394 @@
+#! perl
+
+use strict ;
+no strict "refs" ;
+use warnings ;
+use Carp qw (cluck croak carp confess) ;
+use Exporter ;
+use diagnostics ;
+
+use Data::Dumper ;
+use POSIX ;
+use XML::Twig;
+use Getopt::Long ;
+use Time::HiRes;
+
+
+## Permet de localisez le repertoire du script perl d'origine
+use FindBin;
+## permet de definir la localisation des .pm et .conf
+use lib $FindBin::Bin ;
+#my $libPath = $FindBin::Bin."/lib";
+my $binPath = $FindBin::Bin ;
+
+## dedicated lib
+
+use lib::lipidmaps ;
+use lib::parser ;
+use lib::writer ;
+# more inra lib
+use lib::conf  qw( :ALL ) ;
+use lib::csv  qw( :ALL ) ;
+use lib::operations  qw( :ALL ) ;
+
+## Initialized values
+#
+my $version = '1.2';
+my ( $help, $input_file, $line_header, $col_mass, $col_rt, $decimal, $round_type, $delta, $mode ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef ) ; 
+my ( $list_oxidation, $list_neutral_loss ) = ( undef, undef ) ; 
+my ( $col_classif_id, $selected_cat, $selected_cl, $selected_subcl ) = ( undef, undef, undef, undef ) ; 
+my ( $output_csv_file, $output_html_file  ) = ( undef, undef ) ;
+
+## Verbose levels (1 OR 3)
+my $verbose = 3 ; 
+
+
+&GetOptions ( 	"help|h"     		=> \$help,       		# HELP
+				"input|i:s"			=> \$input_file,		# path for input file (CSV format) -- Mandatory
+				"lineheader:i"		=> \$line_header, 		## header presence in tabular file
+				"colmass:i"			=> \$col_mass,			# Input file Column containing Masses for query -- Mandatory
+#				"colrt:i"			=> \$col_rt,			# Input file Column containing Retention time
+				"decimal:i"			=> \$decimal	,		# Significante decimal on mass -- Mandatory
+				"listoxidation:s"	=> \$list_oxidation,	## option : liste des atomes a gerer sur les masses experimentales
+				"listneutralloss:s"	=> \$list_neutral_loss,	## option : liste des atomes a gerer sur les masses experimentales
+				"round:s" 			=> \$round_type,		# Type of truncation -- Mandatory
+				"delta:f" 			=> \$delta,				# delta of mass -- Mandatory
+				"cat:s" 			=> \$selected_cat,		# Number corresponding to the main category in LIPIDMAPS -- Optional
+				"class:s"			=> \$selected_cl,		# Number corresponding to the main classe in LIPIDMAPS -- Optional
+				"subclass:s"		=> \$selected_subcl,	# Number corresponding to the sub class in LIPIDMAPS -- Optional
+				"output:s"			=> \$output_csv_file,	# File+Path for the results (CVS) -- Mandatory
+				"view:s"			=> \$output_html_file,	# File+Path for the view results (HTML) -- Mandatory
+				"colclassif:i"		=> \$col_classif_id,	# Input file Column containing LM classes ID for query -- Optional
+				"mode:s"			=> \$mode,				# mode of the initial data
+            ) ;
+
+#=============================================================================
+#                                EXCEPTIONS
+#=============================================================================
+$help and &help ;
+
+## --------------- Global parameters ---------------- :
+my $nb_pages_for_html_out = 1 ;
+
+## Conf file
+my ( $CONF, %RULES, %RECIPES, %TRANSFO ) = ( undef, (), (), ()  ) ;
+foreach my $conf ( <$binPath/*.conf> ) {
+	my $oConf = lib::conf::new() ;
+	$CONF = $oConf->as_conf($conf) ;
+}
+
+## -------------- HTML template file ------------------------ :
+foreach my $html_template ( <$binPath/*.tmpl> ) { $CONF->{'HTML_TEMPLATE'} = $html_template ; }
+
+## work with it :
+## get RULES lists :
+foreach (keys (%$CONF)) {
+	if( $_ =~/^RULE/ ) { $RULES{$_} = $CONF->{$_} ; 	} ## rules for clustering
+	elsif( $_ =~/^RECIPE/ ) { $RECIPES{$_} = $CONF->{$_} ; 	} ## fields retrieved with each rule
+	elsif( $_ =~/^ANNOT/ ) { $TRANSFO{$_} = $CONF->{$_} ; 	} ## Transformation annotation in output files
+}
+
+## Init var
+my ( $init_csv_rows, $init_mzs, $init_rts, $classif_ids, $round_init_mzs ) = ( undef, undef, undef, undef, undef ) ;
+my ( @ox_or_loss_names, @ox_or_loss_values, @transfo_init_mzs, @transfo_annotations, @transfo_init_mz_queries, @transfo_init_mz_results, @entries_results, @clusters_results, @entries_total_nb ) = ( (), (), (), (), (), (), (), (), ()  ) ;
+my ( $ox_names, $ox_values, $loss_names, $loss_values ) = ( [], [], [], [] ) ;
+my ( $is_header, $tbody_object) = (undef, undef) ;
+
+
+print "-----------**********START of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3);
+
+#### --------------------------------- 01 :: Prepare all and Parsing steps on inputs -------------------------------------
+
+## Open CVS FILE / Extract and transform Masses  
+if ( ( defined $input_file ) and ( -e $input_file ) ) {
+	print "\n[INFO] Open input file and get values...\n" if ($verbose == 3);
+	## parse all csv for later : output csv build
+	my $ocsv_input  = lib::csv->new() ;
+	my $csv = $ocsv_input->get_csv_object( "\t" ) ;
+	$init_csv_rows = $ocsv_input->parse_csv_object($csv, \$input_file) ;
+	
+	
+	if ( ( defined $line_header ) and ( $line_header > 0 ) ) { $is_header = 'yes' ;	}
+	
+	## parse masses
+	if ( defined $col_mass ) {
+		print "[INFO] Get masses from input file $input_file ...\n" if ($verbose == 3);
+#		print "[INFO] Get RT from input file $input_file ...\n" if ($verbose == 3);
+		my $ocsv = lib::csv->new() ;
+		my $csv = $ocsv->get_csv_object( "\t" ) ;
+		$init_mzs = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_mass, $is_header, $line_header ) ; ## retrieve mz values on csv
+#		$init_rts = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_rt, $is_header, $line_header ) ; ## retrieve rt values on csv
+	}
+	
+	## Adjust the mz to the instrument mode (POS/NEG)
+	if ( ( defined $mode ) and ( ($mode eq 'POS') or ($mode eq 'NEG') ) ) {
+		print "\t [INFO] Apply mass mode transforming (POS to NEU or NEG to NEU) ...\n" if ($verbose == 3);
+		my @mode_init_mzs = () ;
+		my $omode = lib::operations::new() ;
+		foreach my $mz (@$init_mzs) {
+			push (@mode_init_mzs, ${$omode->manage_mode(\$mode, \1, \0.0005486, \1.007825, \$mz)} ) ;
+		}
+		
+		if ( (scalar @$init_mzs) == (scalar @mode_init_mzs) ) {
+			$init_mzs = \@mode_init_mzs ;
+		}
+		else {
+			carp "[ERROR] The mode managing process failed and init mzs have been corrompted\n"
+		}
+	}
+	else {
+		print "\t [INFO] Apply no mass mode transforming\n" if ($verbose == 3);
+	}
+	
+	## round masses
+	if ( ( defined $round_type ) and ( defined $decimal ) ) {
+		print "\t [INFO] Apply mass rounding ...\n" if ($verbose == 3);
+		my $oround = lib::operations::new() ;
+		if 		( $round_type eq 'truncation' ) { 	$round_init_mzs = $oround->truncate_nums( $init_mzs, $decimal ) ; 			}
+		elsif 	( $round_type eq 'round' ) {		$round_init_mzs = $oround->round_nums( $init_mzs, $decimal ) ;  			}
+		else {										croak "The selected option for data transformation is unknown !\n" ; 	}
+	}
+	## parse classif ids -- optionnal
+	if ( defined $col_classif_id ) {
+		print "\t [INFO] Get LM classification IDS from input file $input_file ...\n" if ($verbose == 3);
+		my $ocsv = lib::csv::new() ;
+		my $csv = $ocsv->get_csv_object( "\t" ) ;
+		$classif_ids = $ocsv->get_value_from_csv( $csv, $input_file, $col_classif_id, $is_header, $line_header ) ;
+	}
+	
+	
+	
+	## Uses N mz and theirs entries per page (see config file).
+	# how many pages you need with your input mz list?
+#	$nb_pages_for_html_out = ceil( scalar(@{$init_mzs} ) / $CONF->{HTML_ENTRIES_PER_PAGE} )  ;
+#	print "[INFO] Your analysis will generate $nb_pages_for_html_out pages of results...\n" if ($verbose == 3);
+	
+}
+else {
+	print "[ERROR] Can't find any input file $input_file\n" if ($verbose == 3);
+	croak "Can't find any input file $input_file\n" ;
+}
+
+#### ------------------- 02 :: optionnal work on masses with neutral loss and/or oxydation == modif : -------------------
+
+# get and merge ox and neutral loss envt :
+my $oparser = lib::parser::new() ;
+if ( ( defined $list_oxidation ) and ( defined $CONF )  ) 		{ ( $ox_names, $ox_values ) = $oparser->get_oxidation_ref( $CONF, $list_oxidation ) ; }
+if ( @{$ox_values} ) 	{ 	push( @ox_or_loss_values, @{$ox_values} ) ;  push( @ox_or_loss_names, @{$ox_names} ) ; }
+if ( ( defined $list_neutral_loss ) and ( defined $CONF )  ) 	{ ( $loss_names, $loss_values ) = $oparser->get_neutral_loss_ref( $CONF, $list_neutral_loss ) ; }
+if ( @{$loss_values} ) 	{ 	push( @ox_or_loss_values, @{$loss_values} ) ;  push( @ox_or_loss_names, @{$loss_names} ) ; }
+
+# prepare a list of masses indpt of modif (ox/neutral loss) presence.
+my $init_mz_index = 0 ;
+my $i = 0 ;
+
+foreach my $init_mz (@{$round_init_mzs}) {
+	
+	my @transfo_values_list = () ;
+	my @transfo_name_list = () ;
+	my $init_annot = 'Init_MZ' ;
+	
+	push ( @transfo_values_list, \$init_mz ) ; ## the submitted init mass
+		## work on values
+	if ( @ox_or_loss_values ) {
+		my $oround = lib::operations::new() ;
+		my $round_transfo_mzs = $oround->round_nums( \@ox_or_loss_values, $decimal ) ; ## We choose to around the number.
+		foreach my $transfo_mz ( @{$round_transfo_mzs} ) {
+			my $osub = lib::operations::new() ;
+			my $transfo_init_mz = $osub->subtract_num( $init_mz, $transfo_mz ) ;
+			push ( @transfo_values_list, $transfo_init_mz ) ;
+		}
+	}
+	
+	## work on annotation for output
+	push ( @transfo_name_list, \$init_annot) ; ## init annot
+	if ( @ox_or_loss_names ) {
+		foreach my $ox_or_loss_name (@ox_or_loss_names) {
+			if ( $TRANSFO{'ANNOT_'.$ox_or_loss_name} ) { 
+				my $transfo = $TRANSFO{'ANNOT_'.$ox_or_loss_name} ;
+				push ( @transfo_name_list, \$transfo ) ; }
+		}
+	}
+	
+	## push final arrays
+	push ( @transfo_init_mzs, \@transfo_values_list ) ;
+	push ( @transfo_annotations, \@transfo_name_list ) ;
+	
+	## foreach transfo mass (round and/or modif)
+	
+	my ( @queries, @query_results, @query_result_entries, @query_result_entry_nbs, @query_result_clusters ) = ( (), (), (), (), () ) ;
+	
+	foreach my $transfo_mz ( @{$transfo_init_mzs[$init_mz_index]} ) {
+		print "[INFO] Prepare the $i.th query with the mz: $$transfo_mz... \n" if ($verbose == 3);
+		## LM recommandation : If you write a script to automate calls to LMSD, 
+		# please be kind and do not hit our server more often than once per 20 seconds.
+		# We may have to kill scripts that hit our server more frequently.
+		Time::HiRes::sleep(0.1); #.1 seconds 
+		my ( $cat, $cl, $subcl ) = ( undef, undef, undef ) ;
+	#	if ( $i >= ( scalar( @transfos_values )-1 ) ) { $i = 0 ; } ## manage the modif for each masses.
+			
+		## get the classif level :
+		if ( defined $classif_ids ) {
+
+			if ( $classif_ids->[$i] ) {
+				my $olevel = lib::parser::new() ;
+				$cat = $olevel->set_category( $classif_ids->[$i] ) ;
+				$cl = $olevel->set_class( $classif_ids->[$i] ) ;
+				$subcl = $olevel->set_subclass( $classif_ids->[$i] ) ;
+#				( $cat, $cl, $subcl ) = ( $$cat, $$cl, $$subcl ) ;
+			}
+			else { croak "This information is not available in your parsing ids\n" ; }
+		}
+		else {
+			if ( ( defined $selected_subcl) or ( defined $selected_cl ) or ( defined $selected_cat ) ) {
+				if ( ( $selected_cat !~ /^NA/ ) ) { ( $cat ) = ( \$selected_cat ) ; }
+				if ( ( $selected_cl !~ /^NA(.*)/ ) ) { ( $cl ) = ( \$selected_cl ) ; }
+				if ( ( $selected_subcl !~ /^NA(.*)/ ) ) { ( $subcl ) = ( \$selected_subcl ) ; }
+			}
+			else { croak "No selected category or classification ids list\n" ; }
+		}
+		
+		## buid and get http query :
+		my $oquery = lib::lipidmaps::new() ;
+		my $ref_http_query = $oquery->build_lm_mass_query( \$CONF->{'SEARCH_URL'}, \$delta, $cat, $cl, $subcl ) ; ## build the query for LM WS, return a list of http, get method
+		print "\t[INFO] Exec $$ref_http_query \n" if ($verbose == 3);
+		
+		## set entries clusters
+		my ( $http_result_mz, $http_query_mz ) = $oquery->get_lm_mass_query($ref_http_query, $transfo_mz) ; ## execute the query, return a list of non-splited lm_entries.
+		my ( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( undef, undef, undef ) ;
+		if ( (defined $http_result_mz) and ( $$http_result_mz ne '' ) ) { # avoid empty LM results
+			( $mz_entries_results, $mz_entries_nb ) = $oquery->get_lm_entry_object($http_result_mz, $transfo_mz) ; ## get all features of each entry and return a list of features keept in a hash
+			$mz_clusters_results = $oquery->get_cluster_object($mz_entries_results, \%RULES, \%RECIPES) ; ## clustering all entries and return a list of clusters keept in a hash
+			print "\t[INFO] The query return $$mz_entries_nb entries\n" if ($verbose == 3);
+		}
+		else { # manage empty LM results
+			( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( [], \0, [] ) ;
+			print "\t[INFO] The query return none entry with LM\n" if ($verbose == 3);
+		}	
+		
+		push( @queries, $http_query_mz ) ;
+		push( @query_results, $http_result_mz ) ;
+		push( @query_result_entries, $mz_entries_results ) ;
+		push( @query_result_entry_nbs, $mz_entries_nb ) ;
+		push( @query_result_clusters, $mz_clusters_results ) ;
+		
+	} ## end foreach transfo_mz
+	
+	$i++ ; # implem the mz rank
+	
+	push( @transfo_init_mz_queries, \@queries ) ;
+	push( @transfo_init_mz_results, \@query_results ) ;
+	push( @entries_results, \@query_result_entries ) ;
+	push( @entries_total_nb, \@query_result_entry_nbs ) ;
+	push( @clusters_results, \@query_result_clusters ) ;
+	
+	$init_mz_index++ ;	
+} ## end foreach init_mz
+
+
+#### -------------------------------- 05 :: Writes LM results --------------------------------------------
+
+# prepare data and write html output :
+if ( defined $output_html_file) {
+	## Adjust html output with only mz with records
+	my ($nb_pages, $total_entries) = (0, 0) ;
+	foreach (@entries_total_nb) {
+		foreach my $nb ( @{$_} ) { $total_entries += $$nb ; }
+		if ($total_entries > 0) { $nb_pages++ ; }
+		$total_entries = 0 ;
+	}
+
+	$nb_pages_for_html_out = ceil( $nb_pages / $CONF->{HTML_ENTRIES_PER_PAGE} )  ;
+	print "[INFO] write HTML output file containing $nb_pages_for_html_out pages\n" if ($verbose == 3);
+	
+	my $ohtml = lib::writer->new() ;
+	$tbody_object = $ohtml->set_html_tbody_object( $nb_pages_for_html_out ) ;
+	$tbody_object = $ohtml->add_mz_to_tbody_object( $tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $init_mzs, \@entries_total_nb) ;
+	$tbody_object = $ohtml->add_transformation_to_tbody_object( \@transfo_init_mzs, \@transfo_annotations, $tbody_object ) ;
+	$tbody_object = $ohtml->add_cluster_to_tbody_object( \@transfo_init_mzs, \@clusters_results, $tbody_object ) ;
+	$tbody_object = $ohtml->add_entry_to_tbody_object( \@transfo_init_mzs, \@clusters_results, \@entries_results, $tbody_object ) ;
+	
+	$tbody_object = $ohtml->sort_tbody_object($tbody_object) ;
+	
+	my $output_html = $ohtml->write_html_skel(\$output_html_file, $tbody_object, $nb_pages_for_html_out, $CONF->{'HTML_TEMPLATE'}, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ;
+}
+
+
+#write csv ouput : add 'lipidmaps' column to input file
+my $lm_matrix = undef ;
+my $ocsv = lib::writer->new() ;
+if ( defined $is_header ) { $lm_matrix = $ocsv->set_lm_matrix_object('LIPIDMAPS(score::name::mz::formula::adduct::id)', $init_mzs, \@transfo_annotations, \@clusters_results ) ;	}
+else { $lm_matrix = $ocsv->set_lm_matrix_object( undef, $init_mzs, \@transfo_annotations, \@clusters_results ) ;	}
+
+
+$lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($init_csv_rows, $lm_matrix) ;
+$ocsv->write_csv_skel(\$output_csv_file, $lm_matrix) ;
+print "[INFO] write CSV output file\n" if ($verbose == 3);
+
+print "-----------**********END of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3);
+
+#print "-----------**********RETURNS*********-------------\n" ;
+#print "\n----- Init Input Data in CSV -----\n" ;
+#print Dumper $init_csv_rows ;
+#print "\n---- Init masses parsed ...\n" ;
+#print Dumper $init_mzs ;
+#print "\n---- Init rts parsed ...\n" ;
+#print Dumper $init_rts ;
+#print "\n---- Init masses arounded ...\n" ;
+#print Dumper $round_init_mzs ;
+#print "\n---- Ox ...\n" ;
+#print Dumper $ox_names ;
+#print Dumper $ox_values ;
+#print "\n---- Neutral loss ...\n" ;
+#print Dumper $loss_names ;
+#print Dumper $loss_values ;
+#print "\n---- Applied transformations ('\@ox_or_loss_values') ...\n" ;
+#print Dumper @ox_or_loss_values ;
+#print "\n---- Masses modif ('\@transfo_init_mzs') ...\n" ;
+#print Dumper @transfo_init_mzs ;
+#print "\n---- Transfo annotation ('\@transfo_annotations') ...\n" ;
+#print Dumper @transfo_annotations ;
+#print "\n---- Queries ('\@transfo_init_mz_queries')...\n" ;
+#print Dumper @transfo_init_mz_queries ;
+#print "\n---- WS Results ('@transfo_init_mz_results')...\n" ;
+#print Dumper @transfo_init_mz_results ;
+#print "\n---- Entries results ('\@entries_results')...\n" ;
+#print Dumper @entries_results ;
+#print "\n---- Entries results numbers ('\@entries_total_nb')...\n" ;
+#print Dumper @entries_total_nb ;
+#print "\n---- Clusters results ('\@clusters_results')...\n" ;
+#print Dumper @clusters_results ;
+#print "\n---- Data model filed...\n" ;
+#print "...with csv->\n" ;
+#print Dumper $lm_matrix ;
+#print "...with html->\n" ;
+#print Dumper $tbody_object ;
+
+
+#====================================================================================
+# Help subroutine called with -h option
+# number of arguments : 0
+# Argument(s)        :
+# Return           : 1
+#====================================================================================
+sub help {
+    print STDERR "
+    	
+	# wsdl_lipidmaps
+	# Input : 
+	# Author : Franck GIACOMONI and Marion LANDI
+	# Email : fgiacomoni\@clermont.inra.fr
+	# Version : $version
+	# Created : 16/07/2012
+	# Updated: 09/06/2016 - REST implem
+	USAGE :
+	        wsdl_lipidmaps.pl -help
+	        wsdl_lipidmaps.pl 
+	        	-input \$file_input -colmass \$col_mass -colrt \$col_rt -decimal \$decimal -round \$round_type -delta \$tolerance
+	        	-output \$output_result -view \$output_view
+	        	-cat -class -subclass OR -colclassif
+				-listneutralloss \$neutral_loss -listoxidation \$oxidation [optionnal]
+	";
+}
+
+