diff lib/hmdb.pm @ 20:b5a1d5e43685 draft

Master branch Updating - - Fxx
author fgiacomoni
date Wed, 23 Jan 2019 07:49:34 -0500
parents 49f87ddb2c78
children 63ba1cb240b7
line wrap: on
line diff
--- a/lib/hmdb.pm	Wed Jan 23 03:35:34 2019 -0500
+++ b/lib/hmdb.pm	Wed Jan 23 07:49:34 2019 -0500
@@ -7,9 +7,10 @@
 
 use LWP::Simple;
 use LWP::UserAgent;
+use WWW::Mechanize qw();
 use URI::URL;
 use SOAP::Lite;
-use Encode;
+use Encode qw(encode_utf8);
 use HTML::Template ;
 use XML::Twig ;
 use Text::CSV ;
@@ -143,9 +144,9 @@
 }
 ## END of SUB
 
-=head2 METHOD test_matches_from_hmdb_ua
+=head2 METHOD test_matches_from_hmdb_ua DEPRECATED
 
-	## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra.
+	## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra.
 	## Input : none
 	## Output : $status_line
 	## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ;
@@ -159,24 +160,97 @@
     my @page = () ;
 
 	my $ua = new LWP::UserAgent;
-	$ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
+	$ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0");
+	    
+	my $url = 'http://specdb.wishartlab.com/ms/search.csv';
+	my $header = ['Connection' => 'keep-alive', 'Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'];
 	 
-	my $req = HTTP::Request->new(
-	    POST => 'http://specdb.wishartlab.com/ms/search.csv');
+	my $req = HTTP::Request->new('POST', $url, $header);
+#	print Dumper $req ;
 	
-	$req->content_type('application/x-www-form-urlencoded');
+	## Query format for HMDB 4.0
+	#"utf8=✓&authenticity_token=K1Ys7oyMKmVNm9n8p0jiTxBlh4G4OO0cqKZYnQKDCw0pM6zmm/CiBxv+/cXhuRsVFV98LLeAMJRN5dCyhIWlAA==&query_masses=175.01 238.19 420.16 780.32 956.25 1100.45&ms_search_ion_mode=positive&adduct_type[]=Unknown&tolerance=0.05&tolerance_units=Da&commit=Search"
+	
 	$req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV');
-	 
+#	print Dumper $req ;
 	my $res = $ua->request($req);
-#	print $res->as_string;
+	
+	print $res->as_string;
 	my $status_line = $res->status_line ;
 	($status_line) = ($status_line =~ /(\d+)/);
-	
-	
+
 	return (\$status_line) ;
 }
 ## END of SUB
 
+=head2 METHOD testMatchesFromHmdbWithUA
+
+	## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra.
+	## Input : none
+	## Output : $status_line
+	## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ;
+	
+=cut
+## START of SUB
+sub testMatchesFromHmdbWithUA {
+	## Retrieve Values
+    my $self = shift ;
+    
+    my @page = () ;
+    #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
+
+	my $mech = WWW::Mechanize->new(
+		agent => 'wonderbot for W4M 1.01',
+		autocheck => 1,	
+	);
+	
+	my $statusGetLine = 0 ;
+	my $statusPostLine = 0 ;
+	
+	#receiving cookies and authentication token (CFRS)
+	my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search");
+	$statusGetLine = $mech->status() ;
+	
+	if ($statusGetLine == 200 ) {
+		die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
+		unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
+		my $csrftoken = $1;
+#		print "\nTOKEN: $csrftoken\n" ;
+		$mech->add_header("X-CSRFToken", $csrftoken);
+		$mech->add_header('Connection', 'keep-alive');
+		$mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
+		$mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search');
+		$mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
+	}
+	
+	## POST test
+	my $top = 1 ;
+	while ($statusPostLine != 200 ) {
+		
+		# Fix a limit at 3 tries...
+		if ($top < 4) {
+			print "\tTesting HMDB server connexion ($top time(s) )...\n" ;
+			$mech->post(
+				"http://specdb.wishartlab.com/ms/search.csv",
+		    	Content => 'utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV'
+			);
+			
+		#	print Dumper $mech ;
+			$statusPostLine = $mech->status() ;
+		}
+		else {
+			last ;
+		}
+		$top++ ;
+	}## End While
+	
+	return (\$statusPostLine) ;
+}
+## END of SUB
+
+
+
+
 =head2 METHOD check_state_from_hmdb_ua
 
 	## Description : check the thhp status of hmdb and kill correctly the script if necessary.
@@ -196,8 +270,9 @@
     }
     else {
     	unless ( $$status == 200 ) {
-    		if  ( $$status == 504 ) { croak "Gateway Timeout: The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server" ; }
-    		if  ( $$status == 500 ) { croak "Internal Server Error: The HMDB server returns an unexpected internal server error" ; }
+    		if  ( $$status == 502 ) { croak "Bad Gateway (502): The HMDB server, while acting as a gateway or proxy, received an invalid response from the upstream server. The Hmdb tool is stopped with error." ; }
+    		if  ( $$status == 504 ) { croak "Gateway Timeout (504): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server. The Hmdb tool is stopped with error." ; }
+    		if  ( $$status == 500 ) { croak "Internal Server Error (500): The HMDB server returns an unexpected internal server error. The Hmdb tool is stopped with error." ; }
     		else {
     			## None supported http code error ##
     			croak "Internal Server Error $$status..." ;
@@ -210,9 +285,9 @@
 }
 ## END of SUB
 
-=head2 METHOD get_matches_from_hmdb_ua
+=head2 METHOD get_matches_from_hmdb_ua DEPRECATED
 
-	## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
+	## Description : [DEPRECATED]HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
 	## Input : $mass, $delta, $mode
 	## Output : $results
 	## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ;
@@ -227,18 +302,29 @@
     my @page = () ;
 
 	my $ua = LWP::UserAgent->new( keep_alive => 10 );
-	$ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
+	$ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0");
 	$ua->timeout(2400) ;
+	
+	# Cookies
+#	my $cookie = new HTTP::Cookies( ignore_discard => 1 );
+#	$ua->cookie_jar( $cookie );
 	 
-	my $req = HTTP::Request->new(
-	    POST => 'http://specdb.wishartlab.com/ms/search.csv');
+#	my $req = HTTP::Request->new(
+#	    POST => 'http://specdb.wishartlab.com/ms/search.csv');
+
+	my $url = 'http://specdb.wishartlab.com/ms/search.csv';
+	my $header = ['Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'];
+	my $data = {mode => $mode, query_masses => $masses, 'tolerance' => $delta, database => 'HMDB', commit => 'Download Results As CSV'};
+	my $encoded_data = encode_utf8($data);
+	 
+	my $req = HTTP::Request->new('POST', $url, $header, $encoded_data);
 	
-	$req->content_type('application/x-www-form-urlencoded');
+#	$req->content_type('application/x-www-form-urlencoded');
 	$req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');
 #	print Dumper $req ; 
 	my $res = $ua->request($req);
 	my $status_line = undef ;
-#	print $res->as_string;
+	print $res->as_string;
 	if ($res->is_success) {
 	     @page = split ( /\n/, $res->decoded_content ) ;
 	     $status_line = 'OK' ;
@@ -253,6 +339,73 @@
 }
 ## END of SUB
 
+=head2 METHOD getMatchesFromHmdbWithUA
+
+	## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
+	## Input : $mass, $delta, $mode
+	## Output : $results
+	## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ;
+	
+=cut
+## START of SUB
+sub getMatchesFromHmdbWithUA {
+	## Retrieve Values
+    my $self = shift ;
+    my ( $masses, $delta, $mode ) = @_ ;
+    
+    my @page = () ;
+    
+    #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
+
+	my $mech = WWW::Mechanize->new(
+		agent => 'wonderbot for W4M 1.01',
+		autocheck => 1,
+		timeout => 2400,
+	);
+	
+	my $statusGetLine = 0 ;
+	my $statusPostLine = 0 ;
+	
+	#receiving cookies and authentication token (CFRS)
+	my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search");
+	$statusGetLine = $mech->status() ;
+	
+	if ($statusGetLine == 200 ) {
+		die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
+		unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
+		my $csrftoken = $1;
+#		print "\nTOKEN: $csrftoken\n" ;
+		$mech->add_header("X-CSRFToken", $csrftoken);
+		$mech->add_header('Connection', 'keep-alive');
+		$mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
+		$mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search');
+		$mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
+	}
+	
+	my $res = $mech->post(
+		"http://specdb.wishartlab.com/ms/search.csv",
+    	Content => 'utf8=TRUE&mode='
+    		.$mode.'&query_masses='
+    		.$masses.'&tolerance='
+    		.$delta.'&database=HMDB&commit=Download Results As CSV'
+	);
+	
+	if ($mech->success) {
+		@page = split ( /\n/, $res->decoded_content ) ;
+		$statusPostLine = 'OK' ;
+	}
+	else {
+		$statusPostLine = $mech->status() ;
+		warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ;
+	}
+	
+#	print Dumper $res->decoded_content ;
+
+	return (\@page, $statusPostLine) ;
+}
+## END of SUB
+
+
 =head2 METHOD parse_hmdb_csv_results
 
 	## Description : parse the csv results and get data