# HG changeset patch # User fgiacomoni # Date 1547651898 18000 # Node ID beb9619263b01439bc725d24d194281064a84de0 # Parent 625fa968d99a99ccea3756f3a0f147d261e95efb Master branch Updating - - Fxx diff -r 625fa968d99a -r beb9619263b0 conf_hmdb.cfg --- a/conf_hmdb.cfg Thu Jan 10 09:10:14 2019 -0500 +++ b/conf_hmdb.cfg Wed Jan 16 10:18:18 2019 -0500 @@ -5,9 +5,9 @@ # ## ARGVT # Hmdb limits per job (max is 700) -HMDB_LIMITS=50 +HMDB_LIMITS=5 ## Maximum query filtered (default is 20 - min : 1 / max 50) -HMDB_MAX_QUERY=20 +HMDB_MAX_QUERY=5 ## Galaxy url for HTML JS and CSS path JS_GALAXY_PATH=https://cdn.rawgit.com/fgiacomoni/galaxy_utils/master/scripts CSS_GALAXY_PATH=https://cdn.rawgit.com/fgiacomoni/galaxy_utils/master/style diff -r 625fa968d99a -r beb9619263b0 lib/hmdb.pm --- a/lib/hmdb.pm Thu Jan 10 09:10:14 2019 -0500 +++ b/lib/hmdb.pm Wed Jan 16 10:18:18 2019 -0500 @@ -203,7 +203,7 @@ croak "Internal Server Error $$status..." ; } } - if ( $$status == 200 ) { print "The HMDB server returns that your request was fulfilled" ; } + if ( $$status == 200 ) { print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ; } } return (1) ; @@ -237,17 +237,19 @@ $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV'); # print Dumper $req ; my $res = $ua->request($req); + my $status_line = undef ; # print $res->as_string; if ($res->is_success) { @page = split ( /\n/, $res->decoded_content ) ; + $status_line = 'OK' ; } else { - my $status_line = $res->status_line ; + $status_line = $res->status_line ; ($status_line) = ($status_line =~ /(\d+)/); - croak "HMDB service none available !! Status of the HMDB server is : $status_line\n" ; + warn "\tHMDB service issue !! the server returned a $status_line HTTP error\n" ; } - return (\@page) ; + return (\@page, $status_line) ; } ## END of SUB @@ -279,12 +281,13 @@ ## NEW HMDB format V4.0 - dec2017 if ($line !~ /query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,delta\(ppm\)/) { - if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/ ) { + if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a|\s+]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/ ) { my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ; if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; } $features{ENTRY_ENTRY_ID} = $entry[1] ; + $features{ENTRY_ENTRY_NAME} = $entry[2] ; $features{ENTRY_FORMULA} = $entry[4] ; $features{ENTRY_CPD_MZ} = $entry[5] ; $features{ENTRY_ADDUCT} = $entry[6] ; @@ -295,9 +298,17 @@ my %temp = %features ; push (@{$result_by_entry{$entry[0]} }, \%temp) ; } - else { - warn "The parsed line ($line) does not match your pattern\n " ; - } +# elsif ($line =~ /(\d+\.\d+)/) { # +# ## 288.082286511284,HMDB0002255,R-Methylmalonyl-CoA, C01213 ,C25H40N7O19P3S,867.131252359,M-3H,-,288.036475,159 +# ## 283.108004472276,"Bicyclo_3,1,1heptane-2,3-diol,2,6,6_trimethyl","2,3-Pinanediol",n/a,C10H18O2,170.13067982,M+TFA-H,-,283.116266,29 +# ## 174.034120330029,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M-H20-H,-,174.055503,123 +# ## 193.139160745841,HMDB0012109,"7-[(1R,2R,3R,5S)-3,5-Dihydroxy-2-[(1E,3S)-3-hydroxyoct-1-en-1-yl]cyclopentyl]-5,6-dihydroxyheptanoic acid", C06475,C20H36O7,388.246103506,M-2H,-,193.115776,121 +# ## 214.018826827064,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M+Na-2H,-,214.048559,139 +# } +# else { +# +# warn "The parsed line ($line) does not match your pattern\n " ; +# } } else { next ; @@ -597,7 +608,11 @@ } } else { - warn "This HMDB id doesn't match any collected ids\n" ; +# if ($features) { +# warn "This HMDB id doesn't match any collected ids\n" ; +# } + $entries->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ; + $entries->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ; } } } @@ -899,6 +914,8 @@ my $self = shift ; my ( $header, $init_mzs, $entries ) = @_ ; +# print Dumper $entries ; + my @hmdb_matrix = () ; if ( defined $header ) { @@ -934,12 +951,27 @@ push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ; ## - my $hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ; - my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ; - my $hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ; - my $hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ; - my $hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ; - my $hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ; + + my ($hmdb_name, $hmdb_id, $hmdb_formula, $hmdb_cpd_mz, $hmdb_adduct, $hmdb_delta) = (undef, undef, undef, undef, undef, undef) ; + + if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ) { $hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ; } + else { $hmdb_name = 'UNKNOWN' ; } + + if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) { $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ; } + else { $hmdb_id = 0 ; } + + if ($entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ) { $hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ; } + else { $hmdb_formula = 'N/A' ; } + + if ($entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ) { $hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ; } + else { $hmdb_cpd_mz = 'N/A' ; } + + if ($entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ) { $hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ; } + else { $hmdb_adduct = 'N/A' ; } + + if ($entries->[$index_mz][$index_entries]{ENTRY_DELTA} ) { $hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ; } + else { $hmdb_delta = 0 ; } + ## METLIN data display model ## entry1= ENTRY_DELTA::ENTRY_ENTRY_NAME::ENTRY_CPD_MZ::ENTRY_FORMULA::ENTRY_ADDUCT::ENTRY_ENTRY_ID | entry2=VAR1::VAR2::VAR3::VAR4|... diff -r 625fa968d99a -r beb9619263b0 wsdl_hmdb.pl --- a/wsdl_hmdb.pl Thu Jan 10 09:10:14 2019 -0500 +++ b/wsdl_hmdb.pl Wed Jan 16 10:18:18 2019 -0500 @@ -28,6 +28,8 @@ my ( $masses_file, $col_id, $col_mass, $header_choice, $nbline_header ) = ( undef, undef, undef, undef, undef ) ; my $max_query = undef ; my ( $delta, $molecular_species, $out_tab, $out_html, $out_xls ) = ( undef, undef, undef, undef, undef ) ; +my $advancedFeatures = 0 ; +my $VERBOSE = ( 3 ) ; #============================================================================= @@ -46,6 +48,8 @@ "output_tabular:s" => \$out_tab, ## option : path to the ouput (tabular : input+results ) "output_html|v:s" => \$out_html, ## option : path to the results view (output2) "output_xlsx:s" => \$out_xls, ## option : path to the xls-like format output + "advancedFeatures:s"=> \$advancedFeatures, ## option : set to 1 to get advanced options or 0 to get first level only. + "verbose:i" => \$VERBOSE, ## VERBOSE Of the tool ) ; #============================================================================= @@ -57,6 +61,7 @@ # MAIN SCRIPT #============================================================================= +print "* * * The hmdb client program is launched: * * *\n" if ($VERBOSE>1) ; ## -------------- Conf file ------------------------ : my ( $CONF ) = ( undef ) ; @@ -79,6 +84,8 @@ my $metabocard_features = undef ; my $search_condition = "Search params : Molecular specie = $molecular_species / delta (mass-to-charge ratio) = $delta" ; +print "\t with $search_condition\n" if ($VERBOSE>1) ; + ## --------------- retrieve input data -------------- : ## manage only one mass @@ -107,52 +114,74 @@ &help ; } #print Dumper $masses ; + ## ---------------- launch queries -------------------- : if ( ( defined $delta ) and ( $delta > 0 ) and ( defined $molecular_species ) and ( $molecular_species ne '' ) ) { ## prepare masses list and execute query my $oHmdb = lib::hmdb::new() ; my $hmdb_pages = undef ; + my $status = undef ; my $hmdb_ids = undef ; $results = [] ; # prepare arrays ref my $submasses = $oHmdb->extract_sub_mz_lists($masses, $CONF->{HMDB_LIMITS} ) ; + print "\t and ".scalar(@$masses)." masses are submitted as ".scalar(@$submasses)." queries to HMDB \n\n" if ($VERBOSE>1) ; + ## get the hmdb server status by a test query - continuous queries or kill script. - my $status = $oHmdb->test_matches_from_hmdb_ua() ; + $status = $oHmdb->test_matches_from_hmdb_ua() ; $oHmdb->check_state_from_hmdb_ua($status) ; ## can kill the script execution - + + my $cluster = 1 ; foreach my $mzs ( @{$submasses} ) { # print Dumper $mzs ; my $result = undef ; + my ( $hmdb_masses, $nb_masses_to_submit ) = $oHmdb->prepare_multi_masses_query($mzs) ; - $hmdb_pages = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ; + + print "\t Submission of m/z cluster ".sprintf '%04s',$cluster."" if ($VERBOSE>1) ; -# print Dumper $hmdb_pages ; + ($hmdb_pages, $status) = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ; + print "...HMDB reply results with status: $status\n" if ($VERBOSE>1) ; - sleep(2) ; + print Dumper $hmdb_pages ; + + sleep(1) ; ($result) = $oHmdb->parse_hmdb_csv_results($hmdb_pages, $mzs, $max_query) ; ## hash format result ## This previous step return results with cutoff on the number of entries returned ! $results = [ @$results, @$result ] ; - sleep(2) ; +# sleep(1) ; + $cluster ++ ; } - ## foreach metabolite get its own metabocard - $hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ; -# $hmdb_ids->{'HMDB03125'} = 1 , - $metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying - - ## Map metabocards with results (add supplementary data) + ## Add more information of each found metabolite (1 for extra or 0 by default) + if ($advancedFeatures > 0) { + + ## foreach metabolite get its own metabocard + $hmdb_ids = $oHmdb->get_unik_ids_from_results($results) ; + + print "\tComplementary annotation: asking for ".scalar(@$hmdb_ids)."metabocards\n" if ($VERBOSE>1) ; + + # $hmdb_ids->{'HMDB03125'} = 1 , + $metabocard_features = $oHmdb->get_hmdb_metabocard_from_id($hmdb_ids, $CONF->{'HMDB_METABOCARD_URL'}) ; ## Try to multithread the querying + # print Dumper $results ; + # print Dumper $hmdb_ids ; + # print Dumper $metabocard_features ; + ## Map metabocards with results (add supplementary data) + if ( ( defined $results ) and ( defined $metabocard_features ) ) { + $results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ; + } + } + else { + print "\tNo complementary annotation asked\n" if ($VERBOSE>1) ; + ## Fill with msg not asked advanced annotation + $results = $oHmdb->map_suppl_data_on_hmdb_results($results, undef) ; + } -# print Dumper $results ; -# print Dumper $hmdb_ids ; -# print Dumper $metabocard_features ; - if ( ( defined $results ) and ( defined $metabocard_features ) ) { - $results = $oHmdb->map_suppl_data_on_hmdb_results($results, $metabocard_features) ; - } ## Uses N mz and theirs entries per page (see config file). # how many pages you need with your input mz list? @@ -165,6 +194,7 @@ } ## end ELSE ## -------------- Produce HTML/CSV output ------------------ : +print "\n\tProducing html and tabular outputs\n" if ($VERBOSE>1) ; if ( ( defined $out_html ) and ( defined $results ) ) { my $oHtml = lib::hmdb::new() ; @@ -209,6 +239,7 @@ $ocsv->write_csv_one_mass($masses, $ids, $results, $out_xls) ; } +print "\n* * * The hmdb client program ended * * *\n" if ($VERBOSE>1) ; #==================================================================================== # Help subroutine called with -h option diff -r 625fa968d99a -r beb9619263b0 wsdl_hmdb.xml --- a/wsdl_hmdb.xml Thu Jan 10 09:10:14 2019 -0500 +++ b/wsdl_hmdb.xml Wed Jan 16 10:18:18 2019 -0500 @@ -34,6 +34,8 @@ -output_tabular "$variableMetadata2" -output_html "$HmdbOutWeb2" -output_xls "$HmdbOutXlsx2" #end if -delta "$delta" -mode "$mode" -maxquery "$maxquery" + -advancedFeatures "$advancedFeatures" + -verbose "$verbose" ]]> @@ -68,6 +70,14 @@ + + + + + + + +