Mercurial > repos > fgiacomoni > hmdb_ms_search
diff lib/hmdb.pm @ 20:b5a1d5e43685 draft
Master branch Updating - - Fxx
author | fgiacomoni |
---|---|
date | Wed, 23 Jan 2019 07:49:34 -0500 |
parents | 49f87ddb2c78 |
children | 63ba1cb240b7 |
line wrap: on
line diff
--- a/lib/hmdb.pm Wed Jan 23 03:35:34 2019 -0500 +++ b/lib/hmdb.pm Wed Jan 23 07:49:34 2019 -0500 @@ -7,9 +7,10 @@ use LWP::Simple; use LWP::UserAgent; +use WWW::Mechanize qw(); use URI::URL; use SOAP::Lite; -use Encode; +use Encode qw(encode_utf8); use HTML::Template ; use XML::Twig ; use Text::CSV ; @@ -143,9 +144,9 @@ } ## END of SUB -=head2 METHOD test_matches_from_hmdb_ua +=head2 METHOD test_matches_from_hmdb_ua DEPRECATED - ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra. + ## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra. ## Input : none ## Output : $status_line ## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ; @@ -159,24 +160,97 @@ my @page = () ; my $ua = new LWP::UserAgent; - $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"); + $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); + + my $url = 'http://specdb.wishartlab.com/ms/search.csv'; + my $header = ['Connection' => 'keep-alive', 'Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8']; - my $req = HTTP::Request->new( - POST => 'http://specdb.wishartlab.com/ms/search.csv'); + my $req = HTTP::Request->new('POST', $url, $header); +# print Dumper $req ; - $req->content_type('application/x-www-form-urlencoded'); + ## Query format for HMDB 4.0 + #"utf8=✓&authenticity_token=K1Ys7oyMKmVNm9n8p0jiTxBlh4G4OO0cqKZYnQKDCw0pM6zmm/CiBxv+/cXhuRsVFV98LLeAMJRN5dCyhIWlAA==&query_masses=175.01 238.19 420.16 780.32 956.25 1100.45&ms_search_ion_mode=positive&adduct_type[]=Unknown&tolerance=0.05&tolerance_units=Da&commit=Search" + $req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV'); - +# print Dumper $req ; my $res = $ua->request($req); -# print $res->as_string; + + print $res->as_string; my $status_line = $res->status_line ; ($status_line) = ($status_line =~ /(\d+)/); - - + return (\$status_line) ; } ## END of SUB +=head2 METHOD testMatchesFromHmdbWithUA + + ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra. + ## Input : none + ## Output : $status_line + ## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ; + +=cut +## START of SUB +sub testMatchesFromHmdbWithUA { + ## Retrieve Values + my $self = shift ; + + my @page = () ; + #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and + + my $mech = WWW::Mechanize->new( + agent => 'wonderbot for W4M 1.01', + autocheck => 1, + ); + + my $statusGetLine = 0 ; + my $statusPostLine = 0 ; + + #receiving cookies and authentication token (CFRS) + my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search"); + $statusGetLine = $mech->status() ; + + if ($statusGetLine == 200 ) { + die 'no CSRF_REQUEST_TOKEN_VALUE in page found' + unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; + my $csrftoken = $1; +# print "\nTOKEN: $csrftoken\n" ; + $mech->add_header("X-CSRFToken", $csrftoken); + $mech->add_header('Connection', 'keep-alive'); + $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); + $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); + $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); + } + + ## POST test + my $top = 1 ; + while ($statusPostLine != 200 ) { + + # Fix a limit at 3 tries... + if ($top < 4) { + print "\tTesting HMDB server connexion ($top time(s) )...\n" ; + $mech->post( + "http://specdb.wishartlab.com/ms/search.csv", + Content => 'utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV' + ); + + # print Dumper $mech ; + $statusPostLine = $mech->status() ; + } + else { + last ; + } + $top++ ; + }## End While + + return (\$statusPostLine) ; +} +## END of SUB + + + + =head2 METHOD check_state_from_hmdb_ua ## Description : check the thhp status of hmdb and kill correctly the script if necessary. @@ -196,8 +270,9 @@ } else { unless ( $$status == 200 ) { - if ( $$status == 504 ) { croak "Gateway Timeout: The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server" ; } - if ( $$status == 500 ) { croak "Internal Server Error: The HMDB server returns an unexpected internal server error" ; } + if ( $$status == 502 ) { croak "Bad Gateway (502): The HMDB server, while acting as a gateway or proxy, received an invalid response from the upstream server. The Hmdb tool is stopped with error." ; } + if ( $$status == 504 ) { croak "Gateway Timeout (504): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server. The Hmdb tool is stopped with error." ; } + if ( $$status == 500 ) { croak "Internal Server Error (500): The HMDB server returns an unexpected internal server error. The Hmdb tool is stopped with error." ; } else { ## None supported http code error ## croak "Internal Server Error $$status..." ; @@ -210,9 +285,9 @@ } ## END of SUB -=head2 METHOD get_matches_from_hmdb_ua +=head2 METHOD get_matches_from_hmdb_ua DEPRECATED - ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) + ## Description : [DEPRECATED]HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) ## Input : $mass, $delta, $mode ## Output : $results ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ; @@ -227,18 +302,29 @@ my @page = () ; my $ua = LWP::UserAgent->new( keep_alive => 10 ); - $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"); + $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); $ua->timeout(2400) ; + + # Cookies +# my $cookie = new HTTP::Cookies( ignore_discard => 1 ); +# $ua->cookie_jar( $cookie ); - my $req = HTTP::Request->new( - POST => 'http://specdb.wishartlab.com/ms/search.csv'); +# my $req = HTTP::Request->new( +# POST => 'http://specdb.wishartlab.com/ms/search.csv'); + + my $url = 'http://specdb.wishartlab.com/ms/search.csv'; + my $header = ['Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8']; + my $data = {mode => $mode, query_masses => $masses, 'tolerance' => $delta, database => 'HMDB', commit => 'Download Results As CSV'}; + my $encoded_data = encode_utf8($data); + + my $req = HTTP::Request->new('POST', $url, $header, $encoded_data); - $req->content_type('application/x-www-form-urlencoded'); +# $req->content_type('application/x-www-form-urlencoded'); $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV'); # print Dumper $req ; my $res = $ua->request($req); my $status_line = undef ; -# print $res->as_string; + print $res->as_string; if ($res->is_success) { @page = split ( /\n/, $res->decoded_content ) ; $status_line = 'OK' ; @@ -253,6 +339,73 @@ } ## END of SUB +=head2 METHOD getMatchesFromHmdbWithUA + + ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) + ## Input : $mass, $delta, $mode + ## Output : $results + ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ; + +=cut +## START of SUB +sub getMatchesFromHmdbWithUA { + ## Retrieve Values + my $self = shift ; + my ( $masses, $delta, $mode ) = @_ ; + + my @page = () ; + + #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and + + my $mech = WWW::Mechanize->new( + agent => 'wonderbot for W4M 1.01', + autocheck => 1, + timeout => 2400, + ); + + my $statusGetLine = 0 ; + my $statusPostLine = 0 ; + + #receiving cookies and authentication token (CFRS) + my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search"); + $statusGetLine = $mech->status() ; + + if ($statusGetLine == 200 ) { + die 'no CSRF_REQUEST_TOKEN_VALUE in page found' + unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; + my $csrftoken = $1; +# print "\nTOKEN: $csrftoken\n" ; + $mech->add_header("X-CSRFToken", $csrftoken); + $mech->add_header('Connection', 'keep-alive'); + $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); + $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); + $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); + } + + my $res = $mech->post( + "http://specdb.wishartlab.com/ms/search.csv", + Content => 'utf8=TRUE&mode=' + .$mode.'&query_masses=' + .$masses.'&tolerance=' + .$delta.'&database=HMDB&commit=Download Results As CSV' + ); + + if ($mech->success) { + @page = split ( /\n/, $res->decoded_content ) ; + $statusPostLine = 'OK' ; + } + else { + $statusPostLine = $mech->status() ; + warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ; + } + +# print Dumper $res->decoded_content ; + + return (\@page, $statusPostLine) ; +} +## END of SUB + + =head2 METHOD parse_hmdb_csv_results ## Description : parse the csv results and get data