comparison lib/hmdb.pm @ 21:63ba1cb240b7 draft

Prod branch Updating - - Fxx
author fgiacomoni
date Thu, 14 Feb 2019 08:36:52 -0500
parents b5a1d5e43685
children 453fbe98925a
comparison
equal deleted inserted replaced
20:b5a1d5e43685 21:63ba1cb240b7
142 } 142 }
143 return($hmdb_masses, $nb_masses) ; 143 return($hmdb_masses, $nb_masses) ;
144 } 144 }
145 ## END of SUB 145 ## END of SUB
146 146
147
148 =head2 METHOD prepareAdductListFormat
149
150 ## Description : prepare a adduct list well formatted for https queries
151 ## Input : $adductString
152 ## Output : $formattedAdductString
153 ## Usage : my ( $formattedAdductString ) = prepareAdductListFormat ( $adductString ) ;
154
155 =cut
156 ## START of SUB
157 sub prepareAdductListFormat {
158 ## Retrieve Values
159 my $self = shift ;
160 my ( $adductString ) = @_;
161 my ( $formattedAdductString, $nbAdducts ) = ( undef, 0 ) ;
162
163 ## Formatting is converting [+] in %2B, [-] as - and [,] in converted space as 'M%2BH%202M%2BH' for 'M+H,2M+2H'
164 # print "\t$adductString ..." ;
165
166 if (defined $adductString) {
167
168 ## counting selected adducts
169 $nbAdducts = scalar( my @adducts = ( split (/,/, $adductString) ) ) ;
170
171 ## Converting string into http post format
172 $adductString =~ s/\+/%2B/g ;
173 $adductString =~ s/,/%20/g ;
174 $formattedAdductString = $adductString ;
175 }
176 else {
177 warn "\t[WARN]the adduct type is not defined...It will set to 'Unknown'\n" ;
178 }
179
180 # print "->$formattedAdductString\n" ;
181
182 return ($formattedAdductString, $nbAdducts) ;
183 }
184 ### END of SUB
185
147 =head2 METHOD test_matches_from_hmdb_ua DEPRECATED 186 =head2 METHOD test_matches_from_hmdb_ua DEPRECATED
148 187
149 ## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra. 188 ## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra.
150 ## Input : none 189 ## Input : none
151 ## Output : $status_line 190 ## Output : $status_line
230 # Fix a limit at 3 tries... 269 # Fix a limit at 3 tries...
231 if ($top < 4) { 270 if ($top < 4) {
232 print "\tTesting HMDB server connexion ($top time(s) )...\n" ; 271 print "\tTesting HMDB server connexion ($top time(s) )...\n" ;
233 $mech->post( 272 $mech->post(
234 "http://specdb.wishartlab.com/ms/search.csv", 273 "http://specdb.wishartlab.com/ms/search.csv",
235 Content => 'utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV' 274 Content => 'utf8=TRUE&mode=positive&adduct_type=M%2BH%202M%2BH&query_masses=125.0089&tolerance=0.001&database=HMDB&commit=Download Results As CSV'
236 ); 275 );
237 276
238 # print Dumper $mech ; 277 # print Dumper $mech ;
239 $statusPostLine = $mech->status() ; 278 $statusPostLine = $mech->status() ;
240 } 279 }
241 else { 280 else {
242 last ; 281 last ;
243 } 282 }
244 $top++ ; 283 $top++ ;
245 }## End While 284 }## End While
246
247 return (\$statusPostLine) ; 285 return (\$statusPostLine) ;
248 } 286 }
249 ## END of SUB 287 ## END of SUB
250 288
251 289
276 else { 314 else {
277 ## None supported http code error ## 315 ## None supported http code error ##
278 croak "Internal Server Error $$status..." ; 316 croak "Internal Server Error $$status..." ;
279 } 317 }
280 } 318 }
281 if ( $$status == 200 ) { print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ; } 319 if ( $$status == 200 ) {
320 print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ;
321 print "\tAll searches should be sent successfully to HMDB...(Set verbose to \"High\" for more information!)\n" ;
322 }
282 } 323 }
283 324
284 return (1) ; 325 return (1) ;
285 } 326 }
286 ## END of SUB 327 ## END of SUB
340 ## END of SUB 381 ## END of SUB
341 382
342 =head2 METHOD getMatchesFromHmdbWithUA 383 =head2 METHOD getMatchesFromHmdbWithUA
343 384
344 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) 385 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
345 ## Input : $mass, $delta, $mode 386 ## Input : $mass, $delta, $mode, adducts
346 ## Output : $results 387 ## Output : $results
347 ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ; 388 ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ;
348 389
349 =cut 390 =cut
350 ## START of SUB 391 ## START of SUB
351 sub getMatchesFromHmdbWithUA { 392 sub getMatchesFromHmdbWithUA {
352 ## Retrieve Values 393 ## Retrieve Values
353 my $self = shift ; 394 my $self = shift ;
354 my ( $masses, $delta, $mode ) = @_ ; 395 my ( $masses, $delta, $mode, $adducts ) = @_ ;
355 396
356 my @page = () ; 397 my @page = () ;
357 398
358 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and 399 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
359 400
379 $mech->add_header('Connection', 'keep-alive'); 420 $mech->add_header('Connection', 'keep-alive');
380 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); 421 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
381 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); 422 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search');
382 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); 423 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
383 } 424 }
384 425 ## adduct format is adduct_type=M%2BH%202M%2BH
426
427 if ( (!defined $adducts) or ( $adducts eq '') ) {
428 $adducts = 'Unknown' ;
429 }
430
385 my $res = $mech->post( 431 my $res = $mech->post(
386 "http://specdb.wishartlab.com/ms/search.csv", 432 "http://specdb.wishartlab.com/ms/search.csv",
387 Content => 'utf8=TRUE&mode=' 433 Content => 'utf8=TRUE&mode='
388 .$mode.'&query_masses=' 434 .$mode.'&adduct_type='
435 .$adducts.'&query_masses='
389 .$masses.'&tolerance=' 436 .$masses.'&tolerance='
390 .$delta.'&database=HMDB&commit=Download Results As CSV' 437 .$delta.'&database=HMDB&commit=Download Results As CSV'
391 ); 438 );
392 439
393 if ($mech->success) { 440 if ($mech->success) {
471 ## manage per query_mzs (keep query masses order by array) 518 ## manage per query_mzs (keep query masses order by array)
472 my @results = () ; 519 my @results = () ;
473 foreach (@{$masses}) { 520 foreach (@{$masses}) {
474 if ($result_by_entry{$_}) { 521 if ($result_by_entry{$_}) {
475 522
476 ## cut all entries > $max_query 523 ## cut all entries > $max_query - all entries were already sorted...by hmdb
477 my @temp_entries = @{$result_by_entry{$_}} ; 524 my @temp_entries = @{$result_by_entry{$_}} ;
478 my @temp_cut = () ; 525 my @temp_cut = () ;
479 my $current_query = 0 ; 526 my $current_query = 0 ;
480 foreach (@temp_entries) { 527 foreach (@temp_entries) {
481 $current_query ++ ; 528 $current_query ++ ;
485 else { 532 else {
486 push (@temp_cut, $_) ; 533 push (@temp_cut, $_) ;
487 } 534 }
488 } 535 }
489 push (@results, \@temp_cut) ; 536 push (@results, \@temp_cut) ;
490 # push (@results, $result_by_entry{$_}) ;
491 } 537 }
492 else {push (@results, [] ) ;} ; 538 else { push (@results, [] ) ; } ;
493 539
494 } 540 }
495
496 return(\@results) ; 541 return(\@results) ;
497 } 542 }
498 ## END of SUB 543 ## END of SUB
499 544
500 =head2 METHOD parse_hmdb_page_results 545 =head2 METHOD parse_hmdb_page_results
579 624
580 =head2 METHOD get_unik_ids_from_results 625 =head2 METHOD get_unik_ids_from_results
581 626
582 ## Description : get all unik ids from the hmdb result object 627 ## Description : get all unik ids from the hmdb result object
583 ## Input : $results 628 ## Input : $results
584 ## Output : $ids 629 ## Output : $ids, $idsNumber
585 ## Usage : my ( $ids ) = get_unik_ids_from_results ( $results ) ; 630 ## Usage : my ( $ids ) = get_unik_ids_from_results ( $results ) ;
586 631
587 =cut 632 =cut
588 ## START of SUB 633 ## START of SUB
589 sub get_unik_ids_from_results { 634 sub get_unik_ids_from_results {
599 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) { 644 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) {
600 $ids{$entries->{'ENTRY_ENTRY_ID'}} = 1 ; 645 $ids{$entries->{'ENTRY_ENTRY_ID'}} = 1 ;
601 } 646 }
602 } 647 }
603 } 648 }
604 649 my $idsNumber = keys %ids ;
605 return (\%ids) ; 650 return (\%ids, $idsNumber) ;
606 } 651 }
607 ### END of SUB 652 ### END of SUB
608 653
609 654
610 655
634 679
635 680
636 if( (defined $ids) and ($ids > 0 ) ) { 681 if( (defined $ids) and ($ids > 0 ) ) {
637 682
638 foreach my $id (keys %{$ids}) { 683 foreach my $id (keys %{$ids}) {
639 684 print "$id...\n" ;
640 # print "\n============== > $id **********************\n " ;
641 my $twig = undef ; 685 my $twig = undef ;
642 686
643 if (defined $hmdb_url) { 687 if (defined $hmdb_url) {
644 $query = $hmdb_url.$id.'.xml' ; 688 $query = $hmdb_url.$id.'.xml' ;
645 689
646 ## test the header if exists 690 ## test the header if exists
647 my $response = head($query) ; 691 my $response = head($query) ;
648 692
649 if (!defined $response) { 693 if (!defined $response) {
694 $metabocard_features{$id}{'STATUS'} = 'NOT_EXISTING' ;
650 $metabocard_features{$id}{'metabolite_name'} = undef ; 695 $metabocard_features{$id}{'metabolite_name'} = undef ;
651 $metabocard_features{$id}{'metabolite_inchi'} = undef ; 696 $metabocard_features{$id}{'metabolite_inchi'} = undef ;
652 $metabocard_features{$id}{'metabolite_logp'} = undef ; 697 $metabocard_features{$id}{'metabolite_logp'} = undef ;
653 ## Need to be improve to manage http 404 or other response diff than 200 698 ## Need to be improve to manage http 404 or other response diff than 200
654 } 699 } ## IF error
655 elsif ($response->is_success) { 700 elsif ( $response->is_success ) {
656 701
657 $twig = XML::Twig->nparse_ppe( 702 $twig = XML::Twig->nparse_ppe(
658 703
659 twig_handlers => { 704 twig_handlers => {
660 # metabolite name 705 # metabolite name
661 'metabolite/name' => sub { $metabocard_features{$id}{'metabolite_name'} = $_ -> text_only ; } , 706 'metabolite/name' => sub { $metabocard_features{$id}{'metabolite_name'} = $_ -> text_only ; $metabocard_features{$id}{'STATUS'} = 'EXISTING' ; } ,
662 # metabolite inchi 707 # metabolite inchi
663 'metabolite/inchi' => sub { $metabocard_features{$id}{'metabolite_inchi'} = $_ -> text_only ; } , 708 'metabolite/inchi' => sub { $metabocard_features{$id}{'metabolite_inchi'} = $_ -> text_only ; $metabocard_features{$id}{'STATUS'} = 'EXISTING' ;} ,
664 ## metabolite logP 709 ## metabolite logP
665 'metabolite/predicted_properties/property' => sub { 710 'metabolite/predicted_properties/property' => sub {
666 711
667 my ($kind, $source, $value ) = ( undef, undef, undef ) ; 712 my ($kind, $source, $value ) = ( undef, undef, undef ) ;
668 713
673 elsif ( $field->name eq 'value') { $value = $field->text ; } 718 elsif ( $field->name eq 'value') { $value = $field->text ; }
674 719
675 if (defined $source ) { 720 if (defined $source ) {
676 if ( ( $kind eq 'logp' ) and ( $source eq 'ALOGPS' ) ) { 721 if ( ( $kind eq 'logp' ) and ( $source eq 'ALOGPS' ) ) {
677 $metabocard_features{$id}{'metabolite_logp'} = $value ; 722 $metabocard_features{$id}{'metabolite_logp'} = $value ;
723 $metabocard_features{$id}{'STATUS'} = 'EXISTING' ;
678 } 724 }
679 ($kind, $source, $value ) = ( undef, undef, undef ) ; 725 ($kind, $source, $value ) = ( undef, undef, undef ) ;
680 } 726 }
681 } 727 }
682 } 728 }
687 ); 733 );
688 734
689 # $twig->print; 735 # $twig->print;
690 $twig->purge ; 736 $twig->purge ;
691 737
692 if (!$@) { 738 # if (!$@) {
693 739 #
694 } 740 # }
695 else { 741 # else {
696 warn $@ ; 742 # warn $@ ;
697 } 743 # }
698 } 744 } ## ELSIF success
699 } 745 } # END if defined URL
700 else { 746 else {
701 warn "The hmdb metabocard url is not defined\n" ; 747 warn "\tThe hmdb metabocard url is not defined\n" ;
702 last; 748 last;
703 } 749 }
704 } 750 }
705 } 751 } ## End IF defined ids
706 else { 752 else {
707 warn "The HMDB ids list from HMDB is empty - No metabocard found\n" ; 753 warn "The HMDB ids list from HMDB is empty - No metabocard found\n" ;
708 } 754 }
709 755
710 # print Dumper %metabocard_features ; 756 # print Dumper %metabocard_features ;
713 ### END of SUB 759 ### END of SUB
714 760
715 761
716 =head2 METHOD map_suppl_data_on_hmdb_results 762 =head2 METHOD map_suppl_data_on_hmdb_results
717 763
718 ## Description : map supplementary data with already collected results with hmdb search 764 ## Description : map supplementary data with already collected results with hmdb search - delete the entry if hmdb card doesn't exist...
719 ## Input : $results, $features 765 ## Input : $results, $features
720 ## Output : $results 766 ## Output : $results
721 ## Usage : my ( $results ) = map_suppl_data_on_hmdb_results ( $results, $features ) ; 767 ## Usage : my ( $results ) = map_suppl_data_on_hmdb_results ( $results, $features ) ;
722 768
723 =cut 769 =cut
724 ## START of SUB 770 ## START of SUB
725 sub map_suppl_data_on_hmdb_results { 771 sub map_suppl_data_on_hmdb_results {
726 ## Retrieve Values 772 ## Retrieve Values
727 my $self = shift ; 773 my $self = shift ;
728 my ( $results, $features ) = @_; 774 my ( $results, $features ) = @_;
729 my ( @more_results ) = ( () ) ; 775 my ( @moreResults ) = ( () ) ;
730 776
731 @more_results = @{$results} ; ## Dump array ref to map 777 foreach my $result (@{$results}) {
732 778
733 foreach my $result (@more_results) { 779 my @newResult = () ;
734 780
735 foreach my $entries (@{$result}) { 781 foreach my $entry (@{$result}) {
736 782
737 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) { 783 if ( ($entry->{'ENTRY_ENTRY_ID'}) and ($entry->{'ENTRY_ENTRY_ID'} ne '' ) ) {
738 ## check that we have a ID for mapping 784
739 my $current_id = $entries->{'ENTRY_ENTRY_ID'} ; 785 my $current_id = $entry->{'ENTRY_ENTRY_ID'} ;
740 if ($features->{"$current_id"}) { 786 my $newCompletedEntry = $entry ;
787
788 ## If the id exists in feature hash and its status is not NOT_EXISTING
789 if ( ($features->{"$current_id"} ) and ( $features->{"$current_id"}{STATUS} eq 'EXISTING' ) ) {
790
741 ## Metabolite NAME 791 ## Metabolite NAME
742 if (defined $features->{"$current_id"}{'metabolite_name'} ) { 792 if (defined $features->{"$current_id"}{'metabolite_name'} ) {
743 $entries->{'ENTRY_ENTRY_NAME'} = $features->{"$current_id"}{'metabolite_name'} 793 $newCompletedEntry->{'ENTRY_ENTRY_NAME'} = $features->{"$current_id"}{'metabolite_name'}
744 } 794 }
745 else { 795 else {
746 $entries->{'ENTRY_ENTRY_NAME'} = 'UNKNOWN' ; 796 $newCompletedEntry->{'ENTRY_ENTRY_NAME'} = 'UNKNOWN' ;
747 } 797 }
748 ## Metabolite INCHI 798 ## Metabolite INCHI
749 if (defined $features->{"$current_id"}{'metabolite_inchi'} ) { 799 if (defined $features->{"$current_id"}{'metabolite_inchi'} ) {
750 $entries->{'ENTRY_ENTRY_INCHI'} = $features->{"$current_id"}{'metabolite_inchi'} 800 $newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = $features->{"$current_id"}{'metabolite_inchi'}
751 } 801 }
752 else { 802 else {
753 $entries->{'ENTRY_ENTRY_INCHI'} = 'NA' ; 803 $newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = 'NA' ;
754 } 804 }
755 ## Metabolite LOGP 805 ## Metabolite LOGP
756 if (defined $features->{"$current_id"}{'metabolite_logp'} ) { 806 if (defined $features->{"$current_id"}{'metabolite_logp'} ) {
757 $entries->{'ENTRY_ENTRY_LOGP'} = $features->{"$current_id"}{'metabolite_logp'} 807 $newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = $features->{"$current_id"}{'metabolite_logp'}
758 } 808 }
759 else { 809 else {
760 $entries->{'ENTRY_ENTRY_LOGP'} = 'NA' ; 810 $newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = 'NA' ;
761 } 811 }
812 push (@newResult, $newCompletedEntry) ;
762 } 813 }
814 elsif ( ($features->{"$current_id"} ) and ( $features->{"$current_id"}{STATUS} eq 'NOT_EXISTING' ) ) {
815 $newCompletedEntry = undef ;
816 next ;
817
818 }
819 ## In cas no features are given
763 else { 820 else {
764 # if ($features) { 821 $newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ;
765 # warn "This HMDB id doesn't match any collected ids\n" ; 822 $newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ;
766 # } 823 push (@newResult, $newCompletedEntry) ;
767 $entries->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ;
768 $entries->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ;
769 } 824 }
770 } 825 }
771 } 826 } ## END FOREACH ENTRIES
772 } 827
773 828 push (@moreResults, \@newResult) ;
774 return (\@more_results) ; 829
830 } ## END FOREACH RESULT
831
832 return (\@moreResults) ;
775 } 833 }
776 ### END of SUB 834 ### END of SUB
777 835
778 836
779 =head2 METHOD set_html_tbody_object 837 =head2 METHOD set_html_tbody_object