# HG changeset patch
# User fgiacomoni
# Date 1547651898 18000
# Node ID beb9619263b01439bc725d24d194281064a84de0
# Parent 625fa968d99a99ccea3756f3a0f147d261e95efb
Master branch Updating - - Fxx
diff -r 625fa968d99a -r beb9619263b0 conf_hmdb.cfg
--- a/conf_hmdb.cfg Thu Jan 10 09:10:14 2019 -0500
+++ b/conf_hmdb.cfg Wed Jan 16 10:18:18 2019 -0500
@@ -5,9 +5,9 @@
#
## ARGVT
# Hmdb limits per job (max is 700)
-HMDB_LIMITS=50
+HMDB_LIMITS=5
## Maximum query filtered (default is 20 - min : 1 / max 50)
-HMDB_MAX_QUERY=20
+HMDB_MAX_QUERY=5
## Galaxy url for HTML JS and CSS path
JS_GALAXY_PATH=https://cdn.rawgit.com/fgiacomoni/galaxy_utils/master/scripts
CSS_GALAXY_PATH=https://cdn.rawgit.com/fgiacomoni/galaxy_utils/master/style
diff -r 625fa968d99a -r beb9619263b0 lib/hmdb.pm
--- a/lib/hmdb.pm Thu Jan 10 09:10:14 2019 -0500
+++ b/lib/hmdb.pm Wed Jan 16 10:18:18 2019 -0500
@@ -203,7 +203,7 @@
croak "Internal Server Error $$status..." ;
}
}
- if ( $$status == 200 ) { print "The HMDB server returns that your request was fulfilled" ; }
+ if ( $$status == 200 ) { print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ; }
}
return (1) ;
@@ -237,17 +237,19 @@
$req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');
# print Dumper $req ;
my $res = $ua->request($req);
+ my $status_line = undef ;
# print $res->as_string;
if ($res->is_success) {
@page = split ( /\n/, $res->decoded_content ) ;
+ $status_line = 'OK' ;
} else {
- my $status_line = $res->status_line ;
+ $status_line = $res->status_line ;
($status_line) = ($status_line =~ /(\d+)/);
- croak "HMDB service none available !! Status of the HMDB server is : $status_line\n" ;
+ warn "\tHMDB service issue !! the server returned a $status_line HTTP error\n" ;
}
- return (\@page) ;
+ return (\@page, $status_line) ;
}
## END of SUB
@@ -279,12 +281,13 @@
## NEW HMDB format V4.0 - dec2017
if ($line !~ /query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,delta\(ppm\)/) {
- if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/ ) {
+ if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a|\s+]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/ ) {
my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ;
if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
$features{ENTRY_ENTRY_ID} = $entry[1] ;
+ $features{ENTRY_ENTRY_NAME} = $entry[2] ;
$features{ENTRY_FORMULA} = $entry[4] ;
$features{ENTRY_CPD_MZ} = $entry[5] ;
$features{ENTRY_ADDUCT} = $entry[6] ;
@@ -295,9 +298,17 @@
my %temp = %features ;
push (@{$result_by_entry{$entry[0]} }, \%temp) ;
}
- else {
- warn "The parsed line ($line) does not match your pattern\n " ;
- }
+# elsif ($line =~ /(\d+\.\d+)/) { #
+# ## 288.082286511284,HMDB0002255,R-Methylmalonyl-CoA, C01213 ,C25H40N7O19P3S,867.131252359,M-3H,-,288.036475,159
+# ## 283.108004472276,"Bicyclo_3,1,1heptane-2,3-diol,2,6,6_trimethyl","2,3-Pinanediol",n/a,C10H18O2,170.13067982,M+TFA-H,-,283.116266,29
+# ## 174.034120330029,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M-H20-H,-,174.055503,123
+# ## 193.139160745841,HMDB0012109,"7-[(1R,2R,3R,5S)-3,5-Dihydroxy-2-[(1E,3S)-3-hydroxyoct-1-en-1-yl]cyclopentyl]-5,6-dihydroxyheptanoic acid", C06475,C20H36O7,388.246103506,M-2H,-,193.115776,121
+# ## 214.018826827064,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M+Na-2H,-,214.048559,139
+# }
+# else {
+#
+# warn "The parsed line ($line) does not match your pattern\n " ;
+# }
}
else {
next ;
@@ -597,7 +608,11 @@
}
}
else {
- warn "This HMDB id doesn't match any collected ids\n" ;
+# if ($features) {
+# warn "This HMDB id doesn't match any collected ids\n" ;
+# }
+ $entries->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ;
+ $entries->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ;
}
}
}
@@ -899,6 +914,8 @@
my $self = shift ;
my ( $header, $init_mzs, $entries ) = @_ ;
+# print Dumper $entries ;
+
my @hmdb_matrix = () ;
if ( defined $header ) {
@@ -934,12 +951,27 @@
push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;
##
- my $hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ;
- my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ;
- my $hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ;
- my $hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ;
- my $hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ;
- my $hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ;
+
+ my ($hmdb_name, $hmdb_id, $hmdb_formula, $hmdb_cpd_mz, $hmdb_adduct, $hmdb_delta) = (undef, undef, undef, undef, undef, undef) ;
+
+ if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ) { $hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ; }
+ else { $hmdb_name = 'UNKNOWN' ; }
+
+ if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) { $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ; }
+ else { $hmdb_id = 0 ; }
+
+ if ($entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ) { $hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ; }
+ else { $hmdb_formula = 'N/A' ; }
+
+ if ($entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ) { $hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ; }
+ else { $hmdb_cpd_mz = 'N/A' ; }
+
+ if ($entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ) { $hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ; }
+ else { $hmdb_adduct = 'N/A' ; }
+
+ if ($entries->[$index_mz][$index_entries]{ENTRY_DELTA} ) { $hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ; }
+ else { $hmdb_delta = 0 ; }
+
## METLIN data display model
## entry1= ENTRY_DELTA::ENTRY_ENTRY_NAME::ENTRY_CPD_MZ::ENTRY_FORMULA::ENTRY_ADDUCT::ENTRY_ENTRY_ID | entry2=VAR1::VAR2::VAR3::VAR4|...
diff -r 625fa968d99a -r beb9619263b0 wsdl_hmdb.pl
--- a/wsdl_hmdb.pl Thu Jan 10 09:10:14 2019 -0500
+++ b/wsdl_hmdb.pl Wed Jan 16 10:18:18 2019 -0500
@@ -28,6 +28,8 @@
my ( $masses_file, $col_id, $col_mass, $header_choice, $nbline_header ) = ( undef, undef, undef, undef, undef ) ;
my $max_query = undef ;
my ( $delta, $molecular_species, $out_tab, $out_html, $out_xls ) = ( undef, undef, undef, undef, undef ) ;
+my $advancedFeatures = 0 ;
+my $VERBOSE = ( 3 ) ;
#=============================================================================
@@ -46,6 +48,8 @@
"output_tabular:s" => \$out_tab, ## option : path to the ouput (tabular : input+results )
"output_html|v:s" => \$out_html, ## option : path to the results view (output2)
"output_xlsx:s" => \$out_xls, ## option : path to the xls-like format output
+ "advancedFeatures:s"=> \$advancedFeatures, ## option : set to 1 to get advanced options or 0 to get first level only.
+ "verbose:i" => \$VERBOSE, ## VERBOSE Of the tool
) ;
#=============================================================================
@@ -57,6 +61,7 @@
# MAIN SCRIPT
#=============================================================================
+print "* * * The hmdb client program is launched: * * *\n" if ($VERBOSE>1) ;
## -------------- Conf file ------------------------ :
my ( $CONF ) = ( undef ) ;
@@ -79,6 +84,8 @@
my $metabocard_features = undef ;
my $search_condition = "Search params : Molecular specie = $molecular_species / delta (mass-to-charge ratio) = $delta" ;
+print "\t with $search_condition\n" if ($VERBOSE>1) ;
+
## --------------- retrieve input data -------------- :
## manage only one mass
@@ -107,52 +114,74 @@
&help ;
}
#print Dumper $masses ;
+
## ---------------- launch queries -------------------- :
if ( ( defined $delta ) and ( $delta > 0 ) and ( defined $molecular_species ) and ( $molecular_species ne '' ) ) {
## prepare masses list and execute query
my $oHmdb = lib::hmdb::new() ;
my $hmdb_pages = undef ;
+ my $status = undef ;
my $hmdb_ids = undef ;
$results = [] ; # prepare arrays ref
my $submasses = $oHmdb->extract_sub_mz_lists($masses, $CONF->{HMDB_LIMITS} ) ;
+ print "\t and ".scalar(@$masses)." masses are submitted as ".scalar(@$submasses)." queries to HMDB \n\n" if ($VERBOSE>1) ;
+
## get the hmdb server status by a test query - continuous queries or kill script.
- my $status = $oHmdb->test_matches_from_hmdb_ua() ;
+ $status = $oHmdb->test_matches_from_hmdb_ua() ;
$oHmdb->check_state_from_hmdb_ua($status) ; ## can kill the script execution
-
+
+ my $cluster = 1 ;
foreach my $mzs ( @{$submasses} ) {
# print Dumper $mzs ;
my $result = undef ;
+
my ( $hmdb_masses, $nb_masses_to_submit ) = $oHmdb->prepare_multi_masses_query($mzs) ;
- $hmdb_pages = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ;
+
+ print "\t Submission of m/z cluster ".sprintf '%04s',$cluster."" if ($VERBOSE>1) ;
-# print Dumper $hmdb_pages ;
+ ($hmdb_pages, $status) = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ;
+ print "...HMDB reply results with status: $status\n" if ($VERBOSE>1) ;
- sleep(2) ;
+ print Dumper $hmdb_pages ;
+
+ sleep(1) ;
($result) = $oHmdb->parse_hmdb_csv_results($hmdb_pages, $mzs, $max_query) ; ## hash format result
## This previous step return results with cutoff on the number of entries returned !
$results = [ @$results, @$result ] ;
- sleep(2) ;
+# sleep(1) ;
+ $cluster ++ ;
}
- ## foreach metabolite get its own metabocard
- $hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ;
-# $hmdb_ids->{'HMDB03125'} = 1 ,
- $metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying
-
- ## Map metabocards with results (add supplementary data)
+ ## Add more information of each found metabolite (1 for extra or 0 by default)
+ if ($advancedFeatures > 0) {
+
+ ## foreach metabolite get its own metabocard
+ $hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ;
+
+ print "\tComplementary annotation: asking for ".scalar(@$hmdb_ids)."metabocards\n" if ($VERBOSE>1) ;
+
+ # $hmdb_ids->{'HMDB03125'} = 1 ,
+ $metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying
+ # print Dumper $results ;
+ # print Dumper $hmdb_ids ;
+ # print Dumper $metabocard_features ;
+ ## Map metabocards with results (add supplementary data)
+ if ( ( defined $results ) and ( defined $metabocard_features ) ) {
+ $results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ;
+ }
+ }
+ else {
+ print "\tNo complementary annotation asked\n" if ($VERBOSE>1) ;
+ ## Fill with msg not asked advanced annotation
+ $results = $oHmdb->map_suppl_data_on_hmdb_results($results, undef) ;
+ }
-# print Dumper $results ;
-# print Dumper $hmdb_ids ;
-# print Dumper $metabocard_features ;
- if ( ( defined $results ) and ( defined $metabocard_features ) ) {
- $results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ;
- }
## Uses N mz and theirs entries per page (see config file).
# how many pages you need with your input mz list?
@@ -165,6 +194,7 @@
} ## end ELSE
## -------------- Produce HTML/CSV output ------------------ :
+print "\n\tProducing html and tabular outputs\n" if ($VERBOSE>1) ;
if ( ( defined $out_html ) and ( defined $results ) ) {
my $oHtml = lib::hmdb::new() ;
@@ -209,6 +239,7 @@
$ocsv->write_csv_one_mass($masses, $ids, $results, $out_xls) ;
}
+print "\n* * * The hmdb client program ended * * *\n" if ($VERBOSE>1) ;
#====================================================================================
# Help subroutine called with -h option
diff -r 625fa968d99a -r beb9619263b0 wsdl_hmdb.xml
--- a/wsdl_hmdb.xml Thu Jan 10 09:10:14 2019 -0500
+++ b/wsdl_hmdb.xml Wed Jan 16 10:18:18 2019 -0500
@@ -34,6 +34,8 @@
-output_tabular "$variableMetadata2" -output_html "$HmdbOutWeb2" -output_xls "$HmdbOutXlsx2"
#end if
-delta "$delta" -mode "$mode" -maxquery "$maxquery"
+ -advancedFeatures "$advancedFeatures"
+ -verbose "$verbose"
]]>
@@ -68,6 +70,14 @@
+
+
+
+
+
+
+
+