diff wsdl_hmdb.pl @ 15:beb9619263b0 draft

Master branch Updating - - Fxx
author fgiacomoni
date Wed, 16 Jan 2019 10:18:18 -0500
parents 625fa968d99a
children 8b2fbd449ffd
line wrap: on
line diff
--- a/wsdl_hmdb.pl	Thu Jan 10 09:10:14 2019 -0500
+++ b/wsdl_hmdb.pl	Wed Jan 16 10:18:18 2019 -0500
@@ -28,6 +28,8 @@
 my ( $masses_file, $col_id, $col_mass, $header_choice, $nbline_header ) = ( undef, undef, undef, undef, undef ) ;
 my $max_query = undef ;
 my ( $delta, $molecular_species, $out_tab, $out_html, $out_xls ) = ( undef, undef, undef, undef, undef ) ;
+my $advancedFeatures = 0 ;
+my $VERBOSE = ( 3 ) ;
 
 
 #=============================================================================
@@ -46,6 +48,8 @@
 				"output_tabular:s"	=> \$out_tab,			## option : path to the ouput (tabular : input+results )
 				"output_html|v:s"	=> \$out_html,			## option : path to the results view (output2)
 				"output_xlsx:s"		=> \$out_xls,			## option : path to the xls-like format output
+				"advancedFeatures:s"=> \$advancedFeatures,	## option : set to 1 to get advanced options or 0 to get first level only.
+				"verbose:i"			=> \$VERBOSE,			## VERBOSE Of the tool
             ) ;
 
 #=============================================================================
@@ -57,6 +61,7 @@
 #                                MAIN SCRIPT
 #=============================================================================
 
+print "* * * The hmdb client program is launched: * * *\n" if ($VERBOSE>1) ;
 
 ## -------------- Conf file ------------------------ :
 my ( $CONF ) = ( undef ) ;
@@ -79,6 +84,8 @@
 my $metabocard_features = undef ;
 my $search_condition = "Search params : Molecular specie = $molecular_species / delta (mass-to-charge ratio) = $delta" ;
 
+print "\t with $search_condition\n" if ($VERBOSE>1) ;
+
 ## --------------- retrieve input data -------------- :
 
 ## manage only one mass
@@ -107,52 +114,74 @@
 	&help ;
 }
 #print Dumper $masses ;
+
 ## ---------------- launch queries -------------------- :
 
 if ( ( defined $delta ) and ( $delta > 0 ) and ( defined $molecular_species ) and ( $molecular_species ne '' ) ) {
 	## prepare masses list and execute query
 	my $oHmdb = lib::hmdb::new() ;
 	my $hmdb_pages = undef ;
+	my $status = undef ;
 	my $hmdb_ids = undef ;
 	
 	$results = [] ; # prepare arrays ref
 	my $submasses = $oHmdb->extract_sub_mz_lists($masses, $CONF->{HMDB_LIMITS} ) ;
 	
+	print "\t and ".scalar(@$masses)." masses are submitted as ".scalar(@$submasses)." queries to HMDB \n\n" if ($VERBOSE>1) ;
+	
 	## get the hmdb server status by a test query - continuous queries or kill script.
-	my $status = $oHmdb->test_matches_from_hmdb_ua() ;
+	$status = $oHmdb->test_matches_from_hmdb_ua() ;
 	$oHmdb->check_state_from_hmdb_ua($status) ; ## can kill the script execution
-
+	
+	my $cluster = 1 ;
 	foreach my $mzs ( @{$submasses} ) {
 #		print Dumper $mzs ;
 		my $result = undef ;
+		
 		my ( $hmdb_masses, $nb_masses_to_submit ) = $oHmdb->prepare_multi_masses_query($mzs) ;
-		$hmdb_pages = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ;
+		
+		print "\t Submission of m/z cluster ".sprintf '%04s',$cluster."" if ($VERBOSE>1) ;
 		
-#		print Dumper $hmdb_pages ;
+		($hmdb_pages, $status) = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ;
+		print "...HMDB reply results with status: $status\n" if ($VERBOSE>1) ;
 		
-		sleep(2) ;
+		print Dumper $hmdb_pages ;
+		
+		sleep(1) ;
 		
 		($result) = $oHmdb->parse_hmdb_csv_results($hmdb_pages, $mzs, $max_query) ; ## hash format result
 		## This previous step return results with cutoff on the number of entries returned ! 
 		$results = [ @$results, @$result ] ;
 		
-		sleep(2) ;
+#		sleep(1) ;
+		$cluster ++ ;
 	}
 	
-	## foreach metabolite get its own metabocard
-	$hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ;
-#	$hmdb_ids->{'HMDB03125'} = 1 ,
-	$metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying
-	
-	## Map metabocards with results (add supplementary data)
+	## Add more information of each found metabolite (1 for extra or 0 by default)
+	if ($advancedFeatures > 0) {
+		
+		## foreach metabolite get its own metabocard
+		$hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ;
+		
+		print "\tComplementary annotation: asking for ".scalar(@$hmdb_ids)."metabocards\n" if ($VERBOSE>1) ;
+		
+	#	$hmdb_ids->{'HMDB03125'} = 1 ,
+		$metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying
+	#	print Dumper $results ;
+	#	print Dumper $hmdb_ids ;
+	#	print Dumper $metabocard_features ;
+		## Map metabocards with results (add supplementary data)
+		if ( ( defined $results ) and ( defined $metabocard_features ) ) {
+			$results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ;
+		}
+	}
+	else {
+		print "\tNo complementary annotation asked\n" if ($VERBOSE>1) ;
+		## Fill with msg not asked advanced annotation
+		$results = $oHmdb->map_suppl_data_on_hmdb_results($results, undef) ;
+	}
 	
-#	print Dumper $results ;
-#	print Dumper $hmdb_ids ;
-#	print Dumper $metabocard_features ;
 
-	if ( ( defined $results ) and ( defined $metabocard_features ) ) {
-		$results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ;
-	}
 
 	## Uses N mz and theirs entries per page (see config file).
 	# how many pages you need with your input mz list?
@@ -165,6 +194,7 @@
 } ## end ELSE
 
 ## -------------- Produce HTML/CSV output ------------------ :
+print "\n\tProducing html and tabular outputs\n" if ($VERBOSE>1) ;
 
 if ( ( defined $out_html ) and ( defined $results ) ) {
 	my $oHtml = lib::hmdb::new() ;
@@ -209,6 +239,7 @@
 	$ocsv->write_csv_one_mass($masses, $ids, $results, $out_xls) ;
 }
 
+print "\n* * * The hmdb client program ended * * *\n" if ($VERBOSE>1) ;
 
 #====================================================================================
 # Help subroutine called with -h option