Mercurial > repos > fgiacomoni > hmdb_ms_search
comparison lib/hmdb.pm @ 21:63ba1cb240b7 draft
Prod branch Updating - - Fxx
author | fgiacomoni |
---|---|
date | Thu, 14 Feb 2019 08:36:52 -0500 |
parents | b5a1d5e43685 |
children | 453fbe98925a |
comparison
equal
deleted
inserted
replaced
20:b5a1d5e43685 | 21:63ba1cb240b7 |
---|---|
142 } | 142 } |
143 return($hmdb_masses, $nb_masses) ; | 143 return($hmdb_masses, $nb_masses) ; |
144 } | 144 } |
145 ## END of SUB | 145 ## END of SUB |
146 | 146 |
147 | |
148 =head2 METHOD prepareAdductListFormat | |
149 | |
150 ## Description : prepare a adduct list well formatted for https queries | |
151 ## Input : $adductString | |
152 ## Output : $formattedAdductString | |
153 ## Usage : my ( $formattedAdductString ) = prepareAdductListFormat ( $adductString ) ; | |
154 | |
155 =cut | |
156 ## START of SUB | |
157 sub prepareAdductListFormat { | |
158 ## Retrieve Values | |
159 my $self = shift ; | |
160 my ( $adductString ) = @_; | |
161 my ( $formattedAdductString, $nbAdducts ) = ( undef, 0 ) ; | |
162 | |
163 ## Formatting is converting [+] in %2B, [-] as - and [,] in converted space as 'M%2BH%202M%2BH' for 'M+H,2M+2H' | |
164 # print "\t$adductString ..." ; | |
165 | |
166 if (defined $adductString) { | |
167 | |
168 ## counting selected adducts | |
169 $nbAdducts = scalar( my @adducts = ( split (/,/, $adductString) ) ) ; | |
170 | |
171 ## Converting string into http post format | |
172 $adductString =~ s/\+/%2B/g ; | |
173 $adductString =~ s/,/%20/g ; | |
174 $formattedAdductString = $adductString ; | |
175 } | |
176 else { | |
177 warn "\t[WARN]the adduct type is not defined...It will set to 'Unknown'\n" ; | |
178 } | |
179 | |
180 # print "->$formattedAdductString\n" ; | |
181 | |
182 return ($formattedAdductString, $nbAdducts) ; | |
183 } | |
184 ### END of SUB | |
185 | |
147 =head2 METHOD test_matches_from_hmdb_ua DEPRECATED | 186 =head2 METHOD test_matches_from_hmdb_ua DEPRECATED |
148 | 187 |
149 ## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra. | 188 ## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra. |
150 ## Input : none | 189 ## Input : none |
151 ## Output : $status_line | 190 ## Output : $status_line |
230 # Fix a limit at 3 tries... | 269 # Fix a limit at 3 tries... |
231 if ($top < 4) { | 270 if ($top < 4) { |
232 print "\tTesting HMDB server connexion ($top time(s) )...\n" ; | 271 print "\tTesting HMDB server connexion ($top time(s) )...\n" ; |
233 $mech->post( | 272 $mech->post( |
234 "http://specdb.wishartlab.com/ms/search.csv", | 273 "http://specdb.wishartlab.com/ms/search.csv", |
235 Content => 'utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV' | 274 Content => 'utf8=TRUE&mode=positive&adduct_type=M%2BH%202M%2BH&query_masses=125.0089&tolerance=0.001&database=HMDB&commit=Download Results As CSV' |
236 ); | 275 ); |
237 | 276 |
238 # print Dumper $mech ; | 277 # print Dumper $mech ; |
239 $statusPostLine = $mech->status() ; | 278 $statusPostLine = $mech->status() ; |
240 } | 279 } |
241 else { | 280 else { |
242 last ; | 281 last ; |
243 } | 282 } |
244 $top++ ; | 283 $top++ ; |
245 }## End While | 284 }## End While |
246 | |
247 return (\$statusPostLine) ; | 285 return (\$statusPostLine) ; |
248 } | 286 } |
249 ## END of SUB | 287 ## END of SUB |
250 | 288 |
251 | 289 |
276 else { | 314 else { |
277 ## None supported http code error ## | 315 ## None supported http code error ## |
278 croak "Internal Server Error $$status..." ; | 316 croak "Internal Server Error $$status..." ; |
279 } | 317 } |
280 } | 318 } |
281 if ( $$status == 200 ) { print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ; } | 319 if ( $$status == 200 ) { |
320 print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ; | |
321 print "\tAll searches should be sent successfully to HMDB...(Set verbose to \"High\" for more information!)\n" ; | |
322 } | |
282 } | 323 } |
283 | 324 |
284 return (1) ; | 325 return (1) ; |
285 } | 326 } |
286 ## END of SUB | 327 ## END of SUB |
340 ## END of SUB | 381 ## END of SUB |
341 | 382 |
342 =head2 METHOD getMatchesFromHmdbWithUA | 383 =head2 METHOD getMatchesFromHmdbWithUA |
343 | 384 |
344 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) | 385 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) |
345 ## Input : $mass, $delta, $mode | 386 ## Input : $mass, $delta, $mode, adducts |
346 ## Output : $results | 387 ## Output : $results |
347 ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ; | 388 ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ; |
348 | 389 |
349 =cut | 390 =cut |
350 ## START of SUB | 391 ## START of SUB |
351 sub getMatchesFromHmdbWithUA { | 392 sub getMatchesFromHmdbWithUA { |
352 ## Retrieve Values | 393 ## Retrieve Values |
353 my $self = shift ; | 394 my $self = shift ; |
354 my ( $masses, $delta, $mode ) = @_ ; | 395 my ( $masses, $delta, $mode, $adducts ) = @_ ; |
355 | 396 |
356 my @page = () ; | 397 my @page = () ; |
357 | 398 |
358 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and | 399 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and |
359 | 400 |
379 $mech->add_header('Connection', 'keep-alive'); | 420 $mech->add_header('Connection', 'keep-alive'); |
380 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); | 421 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); |
381 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); | 422 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); |
382 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); | 423 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); |
383 } | 424 } |
384 | 425 ## adduct format is adduct_type=M%2BH%202M%2BH |
426 | |
427 if ( (!defined $adducts) or ( $adducts eq '') ) { | |
428 $adducts = 'Unknown' ; | |
429 } | |
430 | |
385 my $res = $mech->post( | 431 my $res = $mech->post( |
386 "http://specdb.wishartlab.com/ms/search.csv", | 432 "http://specdb.wishartlab.com/ms/search.csv", |
387 Content => 'utf8=TRUE&mode=' | 433 Content => 'utf8=TRUE&mode=' |
388 .$mode.'&query_masses=' | 434 .$mode.'&adduct_type=' |
435 .$adducts.'&query_masses=' | |
389 .$masses.'&tolerance=' | 436 .$masses.'&tolerance=' |
390 .$delta.'&database=HMDB&commit=Download Results As CSV' | 437 .$delta.'&database=HMDB&commit=Download Results As CSV' |
391 ); | 438 ); |
392 | 439 |
393 if ($mech->success) { | 440 if ($mech->success) { |
471 ## manage per query_mzs (keep query masses order by array) | 518 ## manage per query_mzs (keep query masses order by array) |
472 my @results = () ; | 519 my @results = () ; |
473 foreach (@{$masses}) { | 520 foreach (@{$masses}) { |
474 if ($result_by_entry{$_}) { | 521 if ($result_by_entry{$_}) { |
475 | 522 |
476 ## cut all entries > $max_query | 523 ## cut all entries > $max_query - all entries were already sorted...by hmdb |
477 my @temp_entries = @{$result_by_entry{$_}} ; | 524 my @temp_entries = @{$result_by_entry{$_}} ; |
478 my @temp_cut = () ; | 525 my @temp_cut = () ; |
479 my $current_query = 0 ; | 526 my $current_query = 0 ; |
480 foreach (@temp_entries) { | 527 foreach (@temp_entries) { |
481 $current_query ++ ; | 528 $current_query ++ ; |
485 else { | 532 else { |
486 push (@temp_cut, $_) ; | 533 push (@temp_cut, $_) ; |
487 } | 534 } |
488 } | 535 } |
489 push (@results, \@temp_cut) ; | 536 push (@results, \@temp_cut) ; |
490 # push (@results, $result_by_entry{$_}) ; | |
491 } | 537 } |
492 else {push (@results, [] ) ;} ; | 538 else { push (@results, [] ) ; } ; |
493 | 539 |
494 } | 540 } |
495 | |
496 return(\@results) ; | 541 return(\@results) ; |
497 } | 542 } |
498 ## END of SUB | 543 ## END of SUB |
499 | 544 |
500 =head2 METHOD parse_hmdb_page_results | 545 =head2 METHOD parse_hmdb_page_results |
579 | 624 |
580 =head2 METHOD get_unik_ids_from_results | 625 =head2 METHOD get_unik_ids_from_results |
581 | 626 |
582 ## Description : get all unik ids from the hmdb result object | 627 ## Description : get all unik ids from the hmdb result object |
583 ## Input : $results | 628 ## Input : $results |
584 ## Output : $ids | 629 ## Output : $ids, $idsNumber |
585 ## Usage : my ( $ids ) = get_unik_ids_from_results ( $results ) ; | 630 ## Usage : my ( $ids ) = get_unik_ids_from_results ( $results ) ; |
586 | 631 |
587 =cut | 632 =cut |
588 ## START of SUB | 633 ## START of SUB |
589 sub get_unik_ids_from_results { | 634 sub get_unik_ids_from_results { |
599 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) { | 644 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) { |
600 $ids{$entries->{'ENTRY_ENTRY_ID'}} = 1 ; | 645 $ids{$entries->{'ENTRY_ENTRY_ID'}} = 1 ; |
601 } | 646 } |
602 } | 647 } |
603 } | 648 } |
604 | 649 my $idsNumber = keys %ids ; |
605 return (\%ids) ; | 650 return (\%ids, $idsNumber) ; |
606 } | 651 } |
607 ### END of SUB | 652 ### END of SUB |
608 | 653 |
609 | 654 |
610 | 655 |
634 | 679 |
635 | 680 |
636 if( (defined $ids) and ($ids > 0 ) ) { | 681 if( (defined $ids) and ($ids > 0 ) ) { |
637 | 682 |
638 foreach my $id (keys %{$ids}) { | 683 foreach my $id (keys %{$ids}) { |
639 | 684 print "$id...\n" ; |
640 # print "\n============== > $id **********************\n " ; | |
641 my $twig = undef ; | 685 my $twig = undef ; |
642 | 686 |
643 if (defined $hmdb_url) { | 687 if (defined $hmdb_url) { |
644 $query = $hmdb_url.$id.'.xml' ; | 688 $query = $hmdb_url.$id.'.xml' ; |
645 | 689 |
646 ## test the header if exists | 690 ## test the header if exists |
647 my $response = head($query) ; | 691 my $response = head($query) ; |
648 | 692 |
649 if (!defined $response) { | 693 if (!defined $response) { |
694 $metabocard_features{$id}{'STATUS'} = 'NOT_EXISTING' ; | |
650 $metabocard_features{$id}{'metabolite_name'} = undef ; | 695 $metabocard_features{$id}{'metabolite_name'} = undef ; |
651 $metabocard_features{$id}{'metabolite_inchi'} = undef ; | 696 $metabocard_features{$id}{'metabolite_inchi'} = undef ; |
652 $metabocard_features{$id}{'metabolite_logp'} = undef ; | 697 $metabocard_features{$id}{'metabolite_logp'} = undef ; |
653 ## Need to be improve to manage http 404 or other response diff than 200 | 698 ## Need to be improve to manage http 404 or other response diff than 200 |
654 } | 699 } ## IF error |
655 elsif ($response->is_success) { | 700 elsif ( $response->is_success ) { |
656 | 701 |
657 $twig = XML::Twig->nparse_ppe( | 702 $twig = XML::Twig->nparse_ppe( |
658 | 703 |
659 twig_handlers => { | 704 twig_handlers => { |
660 # metabolite name | 705 # metabolite name |
661 'metabolite/name' => sub { $metabocard_features{$id}{'metabolite_name'} = $_ -> text_only ; } , | 706 'metabolite/name' => sub { $metabocard_features{$id}{'metabolite_name'} = $_ -> text_only ; $metabocard_features{$id}{'STATUS'} = 'EXISTING' ; } , |
662 # metabolite inchi | 707 # metabolite inchi |
663 'metabolite/inchi' => sub { $metabocard_features{$id}{'metabolite_inchi'} = $_ -> text_only ; } , | 708 'metabolite/inchi' => sub { $metabocard_features{$id}{'metabolite_inchi'} = $_ -> text_only ; $metabocard_features{$id}{'STATUS'} = 'EXISTING' ;} , |
664 ## metabolite logP | 709 ## metabolite logP |
665 'metabolite/predicted_properties/property' => sub { | 710 'metabolite/predicted_properties/property' => sub { |
666 | 711 |
667 my ($kind, $source, $value ) = ( undef, undef, undef ) ; | 712 my ($kind, $source, $value ) = ( undef, undef, undef ) ; |
668 | 713 |
673 elsif ( $field->name eq 'value') { $value = $field->text ; } | 718 elsif ( $field->name eq 'value') { $value = $field->text ; } |
674 | 719 |
675 if (defined $source ) { | 720 if (defined $source ) { |
676 if ( ( $kind eq 'logp' ) and ( $source eq 'ALOGPS' ) ) { | 721 if ( ( $kind eq 'logp' ) and ( $source eq 'ALOGPS' ) ) { |
677 $metabocard_features{$id}{'metabolite_logp'} = $value ; | 722 $metabocard_features{$id}{'metabolite_logp'} = $value ; |
723 $metabocard_features{$id}{'STATUS'} = 'EXISTING' ; | |
678 } | 724 } |
679 ($kind, $source, $value ) = ( undef, undef, undef ) ; | 725 ($kind, $source, $value ) = ( undef, undef, undef ) ; |
680 } | 726 } |
681 } | 727 } |
682 } | 728 } |
687 ); | 733 ); |
688 | 734 |
689 # $twig->print; | 735 # $twig->print; |
690 $twig->purge ; | 736 $twig->purge ; |
691 | 737 |
692 if (!$@) { | 738 # if (!$@) { |
693 | 739 # |
694 } | 740 # } |
695 else { | 741 # else { |
696 warn $@ ; | 742 # warn $@ ; |
697 } | 743 # } |
698 } | 744 } ## ELSIF success |
699 } | 745 } # END if defined URL |
700 else { | 746 else { |
701 warn "The hmdb metabocard url is not defined\n" ; | 747 warn "\tThe hmdb metabocard url is not defined\n" ; |
702 last; | 748 last; |
703 } | 749 } |
704 } | 750 } |
705 } | 751 } ## End IF defined ids |
706 else { | 752 else { |
707 warn "The HMDB ids list from HMDB is empty - No metabocard found\n" ; | 753 warn "The HMDB ids list from HMDB is empty - No metabocard found\n" ; |
708 } | 754 } |
709 | 755 |
710 # print Dumper %metabocard_features ; | 756 # print Dumper %metabocard_features ; |
713 ### END of SUB | 759 ### END of SUB |
714 | 760 |
715 | 761 |
716 =head2 METHOD map_suppl_data_on_hmdb_results | 762 =head2 METHOD map_suppl_data_on_hmdb_results |
717 | 763 |
718 ## Description : map supplementary data with already collected results with hmdb search | 764 ## Description : map supplementary data with already collected results with hmdb search - delete the entry if hmdb card doesn't exist... |
719 ## Input : $results, $features | 765 ## Input : $results, $features |
720 ## Output : $results | 766 ## Output : $results |
721 ## Usage : my ( $results ) = map_suppl_data_on_hmdb_results ( $results, $features ) ; | 767 ## Usage : my ( $results ) = map_suppl_data_on_hmdb_results ( $results, $features ) ; |
722 | 768 |
723 =cut | 769 =cut |
724 ## START of SUB | 770 ## START of SUB |
725 sub map_suppl_data_on_hmdb_results { | 771 sub map_suppl_data_on_hmdb_results { |
726 ## Retrieve Values | 772 ## Retrieve Values |
727 my $self = shift ; | 773 my $self = shift ; |
728 my ( $results, $features ) = @_; | 774 my ( $results, $features ) = @_; |
729 my ( @more_results ) = ( () ) ; | 775 my ( @moreResults ) = ( () ) ; |
730 | 776 |
731 @more_results = @{$results} ; ## Dump array ref to map | 777 foreach my $result (@{$results}) { |
732 | 778 |
733 foreach my $result (@more_results) { | 779 my @newResult = () ; |
734 | 780 |
735 foreach my $entries (@{$result}) { | 781 foreach my $entry (@{$result}) { |
736 | 782 |
737 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) { | 783 if ( ($entry->{'ENTRY_ENTRY_ID'}) and ($entry->{'ENTRY_ENTRY_ID'} ne '' ) ) { |
738 ## check that we have a ID for mapping | 784 |
739 my $current_id = $entries->{'ENTRY_ENTRY_ID'} ; | 785 my $current_id = $entry->{'ENTRY_ENTRY_ID'} ; |
740 if ($features->{"$current_id"}) { | 786 my $newCompletedEntry = $entry ; |
787 | |
788 ## If the id exists in feature hash and its status is not NOT_EXISTING | |
789 if ( ($features->{"$current_id"} ) and ( $features->{"$current_id"}{STATUS} eq 'EXISTING' ) ) { | |
790 | |
741 ## Metabolite NAME | 791 ## Metabolite NAME |
742 if (defined $features->{"$current_id"}{'metabolite_name'} ) { | 792 if (defined $features->{"$current_id"}{'metabolite_name'} ) { |
743 $entries->{'ENTRY_ENTRY_NAME'} = $features->{"$current_id"}{'metabolite_name'} | 793 $newCompletedEntry->{'ENTRY_ENTRY_NAME'} = $features->{"$current_id"}{'metabolite_name'} |
744 } | 794 } |
745 else { | 795 else { |
746 $entries->{'ENTRY_ENTRY_NAME'} = 'UNKNOWN' ; | 796 $newCompletedEntry->{'ENTRY_ENTRY_NAME'} = 'UNKNOWN' ; |
747 } | 797 } |
748 ## Metabolite INCHI | 798 ## Metabolite INCHI |
749 if (defined $features->{"$current_id"}{'metabolite_inchi'} ) { | 799 if (defined $features->{"$current_id"}{'metabolite_inchi'} ) { |
750 $entries->{'ENTRY_ENTRY_INCHI'} = $features->{"$current_id"}{'metabolite_inchi'} | 800 $newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = $features->{"$current_id"}{'metabolite_inchi'} |
751 } | 801 } |
752 else { | 802 else { |
753 $entries->{'ENTRY_ENTRY_INCHI'} = 'NA' ; | 803 $newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = 'NA' ; |
754 } | 804 } |
755 ## Metabolite LOGP | 805 ## Metabolite LOGP |
756 if (defined $features->{"$current_id"}{'metabolite_logp'} ) { | 806 if (defined $features->{"$current_id"}{'metabolite_logp'} ) { |
757 $entries->{'ENTRY_ENTRY_LOGP'} = $features->{"$current_id"}{'metabolite_logp'} | 807 $newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = $features->{"$current_id"}{'metabolite_logp'} |
758 } | 808 } |
759 else { | 809 else { |
760 $entries->{'ENTRY_ENTRY_LOGP'} = 'NA' ; | 810 $newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = 'NA' ; |
761 } | 811 } |
812 push (@newResult, $newCompletedEntry) ; | |
762 } | 813 } |
814 elsif ( ($features->{"$current_id"} ) and ( $features->{"$current_id"}{STATUS} eq 'NOT_EXISTING' ) ) { | |
815 $newCompletedEntry = undef ; | |
816 next ; | |
817 | |
818 } | |
819 ## In cas no features are given | |
763 else { | 820 else { |
764 # if ($features) { | 821 $newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ; |
765 # warn "This HMDB id doesn't match any collected ids\n" ; | 822 $newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ; |
766 # } | 823 push (@newResult, $newCompletedEntry) ; |
767 $entries->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ; | |
768 $entries->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ; | |
769 } | 824 } |
770 } | 825 } |
771 } | 826 } ## END FOREACH ENTRIES |
772 } | 827 |
773 | 828 push (@moreResults, \@newResult) ; |
774 return (\@more_results) ; | 829 |
830 } ## END FOREACH RESULT | |
831 | |
832 return (\@moreResults) ; | |
775 } | 833 } |
776 ### END of SUB | 834 ### END of SUB |
777 | 835 |
778 | 836 |
779 =head2 METHOD set_html_tbody_object | 837 =head2 METHOD set_html_tbody_object |