changeset 15:beb9619263b0 draft

Master branch Updating - - Fxx
author fgiacomoni
date Wed, 16 Jan 2019 10:18:18 -0500
parents 625fa968d99a
children 8b2fbd449ffd
files conf_hmdb.cfg lib/hmdb.pm wsdl_hmdb.pl wsdl_hmdb.xml
diffstat 4 files changed, 108 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/conf_hmdb.cfg	Thu Jan 10 09:10:14 2019 -0500
+++ b/conf_hmdb.cfg	Wed Jan 16 10:18:18 2019 -0500
@@ -5,9 +5,9 @@
 #
 ## ARGVT
 # Hmdb limits per job (max is 700)
-HMDB_LIMITS=50
+HMDB_LIMITS=5
 ## Maximum query filtered (default is 20 - min : 1 / max 50)
-HMDB_MAX_QUERY=20
+HMDB_MAX_QUERY=5
 ## Galaxy url for HTML JS and CSS path 
 JS_GALAXY_PATH=https://cdn.rawgit.com/fgiacomoni/galaxy_utils/master/scripts
 CSS_GALAXY_PATH=https://cdn.rawgit.com/fgiacomoni/galaxy_utils/master/style
--- a/lib/hmdb.pm	Thu Jan 10 09:10:14 2019 -0500
+++ b/lib/hmdb.pm	Wed Jan 16 10:18:18 2019 -0500
@@ -203,7 +203,7 @@
     			croak "Internal Server Error $$status..." ;
     		}
     	}
-    	if  ( $$status == 200 ) { print "The HMDB server returns that your request was fulfilled" ; }
+    	if  ( $$status == 200 ) { print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ; }
     }
     
     return (1) ;
@@ -237,17 +237,19 @@
 	$req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');
 #	print Dumper $req ; 
 	my $res = $ua->request($req);
+	my $status_line = undef ;
 #	print $res->as_string;
 	if ($res->is_success) {
 	     @page = split ( /\n/, $res->decoded_content ) ;
+	     $status_line = 'OK' ;
 	 } else {
-	 	my $status_line = $res->status_line ;
+	 	$status_line = $res->status_line ;
 	 	($status_line) = ($status_line =~ /(\d+)/);
-	 	croak "HMDB service none available !! Status of the HMDB server is : $status_line\n" ;
+	 	warn "\tHMDB service issue !! the server returned a $status_line HTTP error\n" ;
 	 }
 	
 	
-	return (\@page) ;
+	return (\@page, $status_line) ;
 }
 ## END of SUB
 
@@ -279,12 +281,13 @@
     	## NEW HMDB format V4.0 - dec2017
 		if ($line !~ /query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,delta\(ppm\)/) {
     		
-    		if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/  ) {
+    		if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a|\s+]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/  ) {
     			my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ;
 
     			if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
     			    		
 	    		$features{ENTRY_ENTRY_ID} = $entry[1] ;
+	    		$features{ENTRY_ENTRY_NAME} = $entry[2] ;
 	    		$features{ENTRY_FORMULA} = $entry[4] ;
 	    		$features{ENTRY_CPD_MZ} = $entry[5] ;
 	    		$features{ENTRY_ADDUCT} = $entry[6] ;
@@ -295,9 +298,17 @@
 	    		my %temp = %features ;
 	    		push (@{$result_by_entry{$entry[0]} }, \%temp) ;
     		}
-    		else {
-    			warn "The parsed line ($line) does not match your pattern\n " ;
-    		}
+#    		elsif ($line =~ /(\d+\.\d+)/) { #
+#    			## 288.082286511284,HMDB0002255,R-Methylmalonyl-CoA, C01213 ,C25H40N7O19P3S,867.131252359,M-3H,-,288.036475,159
+#    			## 283.108004472276,"Bicyclo_3,1,1heptane-2,3-diol,2,6,6_trimethyl","2,3-Pinanediol",n/a,C10H18O2,170.13067982,M+TFA-H,-,283.116266,29
+#    			## 174.034120330029,HMDB0011723,2-Methylhippuric acid,	 C01586,C10H11NO3,193.073893223,M-H20-H,-,174.055503,123
+#    			## 193.139160745841,HMDB0012109,"7-[(1R,2R,3R,5S)-3,5-Dihydroxy-2-[(1E,3S)-3-hydroxyoct-1-en-1-yl]cyclopentyl]-5,6-dihydroxyheptanoic acid", C06475,C20H36O7,388.246103506,M-2H,-,193.115776,121
+#    			## 214.018826827064,HMDB0011723,2-Methylhippuric acid,	 C01586,C10H11NO3,193.073893223,M+Na-2H,-,214.048559,139
+#    		}
+#    		else {
+#    			
+#    			warn "The parsed line ($line) does not match your pattern\n " ;
+#    		}
     	}
     	else {
     		next ;
@@ -597,7 +608,11 @@
     				}
     			}
     			else {
-    				warn "This HMDB id doesn't match any collected ids\n" ;
+#    				if ($features) {
+#    					warn "This HMDB id doesn't match any collected ids\n" ;	
+#    				}
+    				$entries->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ;
+    				$entries->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ;
     			}
     		}
     	}
@@ -899,6 +914,8 @@
     my $self = shift ;
     my ( $header, $init_mzs, $entries ) = @_ ;
     
+#    print Dumper $entries ;
+    
     my @hmdb_matrix = () ;
     
     if ( defined $header ) {
@@ -934,12 +951,27 @@
     				
 	    		push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;
 	    		##
-	    		my $hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME}  ;
-	    		my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID}  ;
-	    		my $hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA}  ;
-	    		my $hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ}  ;
-	    		my $hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT}  ;
-	    		my $hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA}  ;
+	    		
+	    		my ($hmdb_name, $hmdb_id, $hmdb_formula, $hmdb_cpd_mz, $hmdb_adduct, $hmdb_delta) = (undef, undef, undef, undef, undef, undef) ;
+	    		
+	    		if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} )	{	$hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ; 	}
+	    		else 															{	$hmdb_name = 'UNKNOWN' ; }
+	    		
+	    		if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} )	{	$hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ; 	}
+	    		else 															{	$hmdb_id = 0 ; }
+	    		
+	    		if ($entries->[$index_mz][$index_entries]{ENTRY_FORMULA} )	{	$hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ; 	}
+	    		else 															{	$hmdb_formula = 'N/A'  ; }
+	    		
+	    		if ($entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} )	{	$hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ; 	}
+	    		else 															{	$hmdb_cpd_mz = 'N/A' ; }
+	    		
+	    		if ($entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} )	{	$hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ; 	}
+	    		else 															{	$hmdb_adduct = 'N/A'  ; }
+	    		
+	    		if ($entries->[$index_mz][$index_entries]{ENTRY_DELTA} )	{	$hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ; 	}
+	    		else 															{	$hmdb_delta = 0 ; }
+	    		
 		    	
 		    	## METLIN data display model
 		   		## entry1= ENTRY_DELTA::ENTRY_ENTRY_NAME::ENTRY_CPD_MZ::ENTRY_FORMULA::ENTRY_ADDUCT::ENTRY_ENTRY_ID | entry2=VAR1::VAR2::VAR3::VAR4|...
--- a/wsdl_hmdb.pl	Thu Jan 10 09:10:14 2019 -0500
+++ b/wsdl_hmdb.pl	Wed Jan 16 10:18:18 2019 -0500
@@ -28,6 +28,8 @@
 my ( $masses_file, $col_id, $col_mass, $header_choice, $nbline_header ) = ( undef, undef, undef, undef, undef ) ;
 my $max_query = undef ;
 my ( $delta, $molecular_species, $out_tab, $out_html, $out_xls ) = ( undef, undef, undef, undef, undef ) ;
+my $advancedFeatures = 0 ;
+my $VERBOSE = ( 3 ) ;
 
 
 #=============================================================================
@@ -46,6 +48,8 @@
 				"output_tabular:s"	=> \$out_tab,			## option : path to the ouput (tabular : input+results )
 				"output_html|v:s"	=> \$out_html,			## option : path to the results view (output2)
 				"output_xlsx:s"		=> \$out_xls,			## option : path to the xls-like format output
+				"advancedFeatures:s"=> \$advancedFeatures,	## option : set to 1 to get advanced options or 0 to get first level only.
+				"verbose:i"			=> \$VERBOSE,			## VERBOSE Of the tool
             ) ;
 
 #=============================================================================
@@ -57,6 +61,7 @@
 #                                MAIN SCRIPT
 #=============================================================================
 
+print "* * * The hmdb client program is launched: * * *\n" if ($VERBOSE>1) ;
 
 ## -------------- Conf file ------------------------ :
 my ( $CONF ) = ( undef ) ;
@@ -79,6 +84,8 @@
 my $metabocard_features = undef ;
 my $search_condition = "Search params : Molecular specie = $molecular_species / delta (mass-to-charge ratio) = $delta" ;
 
+print "\t with $search_condition\n" if ($VERBOSE>1) ;
+
 ## --------------- retrieve input data -------------- :
 
 ## manage only one mass
@@ -107,52 +114,74 @@
 	&help ;
 }
 #print Dumper $masses ;
+
 ## ---------------- launch queries -------------------- :
 
 if ( ( defined $delta ) and ( $delta > 0 ) and ( defined $molecular_species ) and ( $molecular_species ne '' ) ) {
 	## prepare masses list and execute query
 	my $oHmdb = lib::hmdb::new() ;
 	my $hmdb_pages = undef ;
+	my $status = undef ;
 	my $hmdb_ids = undef ;
 	
 	$results = [] ; # prepare arrays ref
 	my $submasses = $oHmdb->extract_sub_mz_lists($masses, $CONF->{HMDB_LIMITS} ) ;
 	
+	print "\t and ".scalar(@$masses)." masses are submitted as ".scalar(@$submasses)." queries to HMDB \n\n" if ($VERBOSE>1) ;
+	
 	## get the hmdb server status by a test query - continuous queries or kill script.
-	my $status = $oHmdb->test_matches_from_hmdb_ua() ;
+	$status = $oHmdb->test_matches_from_hmdb_ua() ;
 	$oHmdb->check_state_from_hmdb_ua($status) ; ## can kill the script execution
-
+	
+	my $cluster = 1 ;
 	foreach my $mzs ( @{$submasses} ) {
 #		print Dumper $mzs ;
 		my $result = undef ;
+		
 		my ( $hmdb_masses, $nb_masses_to_submit ) = $oHmdb->prepare_multi_masses_query($mzs) ;
-		$hmdb_pages = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ;
+		
+		print "\t Submission of m/z cluster ".sprintf '%04s',$cluster."" if ($VERBOSE>1) ;
 		
-#		print Dumper $hmdb_pages ;
+		($hmdb_pages, $status) = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ;
+		print "...HMDB reply results with status: $status\n" if ($VERBOSE>1) ;
 		
-		sleep(2) ;
+		print Dumper $hmdb_pages ;
+		
+		sleep(1) ;
 		
 		($result) = $oHmdb->parse_hmdb_csv_results($hmdb_pages, $mzs, $max_query) ; ## hash format result
 		## This previous step return results with cutoff on the number of entries returned ! 
 		$results = [ @$results, @$result ] ;
 		
-		sleep(2) ;
+#		sleep(1) ;
+		$cluster ++ ;
 	}
 	
-	## foreach metabolite get its own metabocard
-	$hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ;
-#	$hmdb_ids->{'HMDB03125'} = 1 ,
-	$metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying
-	
-	## Map metabocards with results (add supplementary data)
+	## Add more information of each found metabolite (1 for extra or 0 by default)
+	if ($advancedFeatures > 0) {
+		
+		## foreach metabolite get its own metabocard
+		$hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ;
+		
+		print "\tComplementary annotation: asking for ".scalar(@$hmdb_ids)."metabocards\n" if ($VERBOSE>1) ;
+		
+	#	$hmdb_ids->{'HMDB03125'} = 1 ,
+		$metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying
+	#	print Dumper $results ;
+	#	print Dumper $hmdb_ids ;
+	#	print Dumper $metabocard_features ;
+		## Map metabocards with results (add supplementary data)
+		if ( ( defined $results ) and ( defined $metabocard_features ) ) {
+			$results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ;
+		}
+	}
+	else {
+		print "\tNo complementary annotation asked\n" if ($VERBOSE>1) ;
+		## Fill with msg not asked advanced annotation
+		$results = $oHmdb->map_suppl_data_on_hmdb_results($results, undef) ;
+	}
 	
-#	print Dumper $results ;
-#	print Dumper $hmdb_ids ;
-#	print Dumper $metabocard_features ;
 
-	if ( ( defined $results ) and ( defined $metabocard_features ) ) {
-		$results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ;
-	}
 
 	## Uses N mz and theirs entries per page (see config file).
 	# how many pages you need with your input mz list?
@@ -165,6 +194,7 @@
 } ## end ELSE
 
 ## -------------- Produce HTML/CSV output ------------------ :
+print "\n\tProducing html and tabular outputs\n" if ($VERBOSE>1) ;
 
 if ( ( defined $out_html ) and ( defined $results ) ) {
 	my $oHtml = lib::hmdb::new() ;
@@ -209,6 +239,7 @@
 	$ocsv->write_csv_one_mass($masses, $ids, $results, $out_xls) ;
 }
 
+print "\n* * * The hmdb client program ended * * *\n" if ($VERBOSE>1) ;
 
 #====================================================================================
 # Help subroutine called with -h option
--- a/wsdl_hmdb.xml	Thu Jan 10 09:10:14 2019 -0500
+++ b/wsdl_hmdb.xml	Wed Jan 16 10:18:18 2019 -0500
@@ -34,6 +34,8 @@
             -output_tabular "$variableMetadata2" -output_html "$HmdbOutWeb2" -output_xls "$HmdbOutXlsx2"
         #end if
         -delta "$delta" -mode "$mode" -maxquery "$maxquery"
+        -advancedFeatures "$advancedFeatures"
+        -verbose "$verbose"
         
   ]]></command>
   <inputs>
@@ -68,6 +70,14 @@
       <option value="negative">Negatif Mode</option>
       <option value="neutral" selected="True">Neutral Mass</option>
     </param>
+    <param name="advancedFeatures" label="Performing advanced annotation " type="select" display="radio" help="Getting more information from HMDB (Inchi, logP, ...) but increase time of tool execution">
+      <option value="1">Advanced mode</option>
+      <option value="0" selected="True">Default mode</option>
+    </param>
+    <param name="verbose" type="select" label="Verbose level" display="radio" help="">
+        <option value="1" selected="true">Low</option>
+        <option value="3" >High</option>
+    </param>
   </inputs>
   
   <outputs>