diff downloader_bank_hmdb.pl @ 0:7c9269bded0e draft

Init repository for [downloader_bank_hmdb]
author fgiacomoni
date Tue, 14 Jan 2020 05:21:23 -0500
parents
children be504ccbc41c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/downloader_bank_hmdb.pl	Tue Jan 14 05:21:23 2020 -0500
@@ -0,0 +1,321 @@
+#!perl
+
+## script  : downloader_bank_hmdb.pl
+#=============================================================================
+#                              Included modules and versions
+#=============================================================================
+## Perl modules
+use strict ;
+use warnings ;
+use Carp qw (cluck croak carp) ;
+
+use Data::Dumper ;
+use Getopt::Long ;
+use File::Basename ;
+use FindBin ; ## Allows you to locate the directory of original perl script
+
+## Specific Perl Modules (PFEM)
+use lib $FindBin::Bin.'/lib' ;
+my $binPath = $FindBin::Bin ;
+
+## Dedicate Perl Modules (Home made...)
+use hmdb_api qw( :ALL ) ;
+use utils qw( :ALL ) ;
+use conf qw( :ALL ) ;
+use csv qw( :ALL ) ;
+
+## Initialized values
+my $OptHelp ;
+my $VERBOSE = 3 ;
+my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ;
+
+#=============================================================================
+#                                Manage EXCEPTIONS
+#=============================================================================
+&GetOptions ( 	"h"     	=> \$OptHelp,       # HELP
+				"bank|b:s"	=> \$bankName,		# bank name to get the rigth url
+				"format|f:s"=> \$format,		# output format
+				"outTab:s"	=> \$outTab,		# tabular output
+				"outJson:s"	=> \$outJson,		# Json output
+				"verbose:i"	=> \$VERBOSE,		# level of scriptt verbose [should be 1 or 3]
+
+
+            ) ;
+         
+## if you put the option -help or -h function help is started
+if ( defined($OptHelp) ){ &help ; }
+
+#=============================================================================
+#                                MAIN SCRIPT
+#=============================================================================
+
+
+# get conf informations
+my ( $CONF ) = ( undef ) ;
+foreach my $conf ( <$binPath/*.cfg> ) {
+	my $oConf = conf::new() ;
+	$CONF = $oConf->as_conf($conf) ;
+}
+
+# MAIN SCRIPT :
+
+if ( (defined $bankName) ) {
+
+	my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ;
+	my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ;
+	
+	print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ;
+	
+	# get information from conf
+	if ($CONF->{$bankName.'_URL'}) {
+		$bankUrl = $CONF->{$bankName.'_URL'}  ;
+		# get version of the http resource
+		my $oUtils = utils->new() ;
+    	($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ;
+    	
+    	print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ;	
+	}
+	else {
+		croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ;
+	}
+	
+	# manage if needed to download or not the bank (get or not the last version)
+	my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ;
+	
+	if ($CONF->{$bankName.'_HTTP_FORMAT'}) {
+		$bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'}  ;
+	}
+	else {
+		croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ;
+	}
+	
+	print "** Manage bank environment\n" if $VERBOSE > 1 ;
+	my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ;
+	if ( !-d $bankDir ) {
+		print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ;
+		mkdir $bankDir ;
+	}
+	
+	
+	print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ;
+	
+	if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) {
+		if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) 	{ 		$bankFileExist = 'TRUE' ;  		}
+		else 																			{		$bankFileExist = 'FALSE' ; 		}
+		# Init the bank file name
+		$bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ;
+		$bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ;
+		# For final files (tabular/json)
+		$bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ;
+		$bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ;
+		
+		print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ;
+		print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ;
+		
+	}
+	else {
+		croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ;
+	}
+	
+	my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ;
+	
+	# dowload the bank if does not exist !
+	if ( $bankFileExist eq 'FALSE' ) {
+		
+		print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ;
+		if (!-d $bankFileDir) {
+			print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ;
+			mkdir $bankFileDir ;
+		}
+		
+		if (-d $bankFileDir) {
+			print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ;
+			print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ;
+			my $oDownloader = utils->new() ;
+			my ($fileZip) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ;
+			
+			print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ;
+			
+			my $oUnzip = utils->new() ;
+			# if archive is a zip
+			if ($bankSuffixe eq 'ZIP') {
+				$oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
+    			$oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
+			}
+			# elsif archive is a gz
+			elsif ($bankSuffixe eq 'GZ') {
+				$oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
+				$oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
+			}
+		}
+		else {
+			croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ;
+		}
+	}
+	else {
+		print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ;
+		print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ;
+	}
+	
+	## 
+	if (-e $bankOutputTabularFile) {
+		# copy the ref file into the user history/session
+		my $ocsv = csv->new( ) ;
+		my $csv = $ocsv->get_csv_object("\t") ;
+		my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ;
+		$ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ;
+		print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ;
+	}
+	## the final tabular file does not exists - need to be created from xml
+	else {
+		
+		## Build a HASH with all metabolites from downloaded xml
+		my ($handler, $metabolites, $nbEntries) = (undef, undef) ;
+		
+		if (-e $bankFile) {
+			my $oHandler = hmdb_api->new ;
+			
+			# in case the download file is in XML or Metabocard or SDF...
+			if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
+				($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ;
+				print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ;
+			}
+			elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
+				($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ;
+				($metabolites) = $oHandler->cowmetdb_hash($handler) ;
+				print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ;
+			}
+			else {
+				#TODO...
+			}
+		}
+		else {
+			print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ;
+		}
+		
+		## Write outputs !
+		print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ;
+		
+		if ( (defined $metabolites) and (defined $format) ) {
+			
+			## Generation of M+H and M-H masses
+			my $ometmz = hmdb_api->new ;
+			my $completedMetabolites = undef ;
+			$completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ;
+			
+#			print Dumper $completedMetabolites ;
+			
+			## tabular output
+			if ( ( $format eq 'tabular') and (defined $outTab) ) {
+				print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ;
+#				print Dumper $metabolites ;
+				# sort metabolites
+				my $omet = hmdb_api->new ;
+				my $sortedMetabolites = undef ;
+				
+				if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
+					$sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ;
+				}
+				elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
+					$sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ;
+				}
+				
+	#			print Dumper $sortedMetabolites ;
+				my $ocsv = csv->new( ) ;
+				my $csv = $ocsv->get_csv_object("\t") ;
+				# create the ref file for ./bank repo
+				$ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ;
+				# generate also a copy for user history
+				$ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ;
+				
+			}
+			elsif ( (defined $format) and (defined $outJson) ) {
+				#TODO...
+			}
+		}
+		else {
+			croak "No metabolites are extracted from the $bankName bank file\n" ;
+		}
+		## Clean local envt
+		unlink $bankFile if (-e $bankFile) ;
+	}
+} ## END IF defined $bankName
+else {
+	&help ;
+	croak "No bank name and format are defined - Please set one" ;
+}
+
+print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ;
+### END of main script
+
+
+
+
+
+#====================================================================================
+# Help subroutine called with -h option
+# number of arguments : 0
+# Argument(s)        :
+# Return           : 1
+#====================================================================================
+sub help {
+	print STDERR "
+downloader_bank_hmdb.pl
+
+# downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source.
+# Input : N/A
+# Author : Franck Giacomoni
+# Email : fgiacomoni\@inra.fr
+# Version : 1.0
+# Created : 21/11/2018
+USAGE :		 
+		downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name]
+		
+		";
+	exit(1);
+}
+
+## END of script - F Giacomoni 
+
+__END__
+
+=head1 NAME
+
+ downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source.
+
+=head1 USAGE
+
+ downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json] 
+ or downloader_bank_hmdb.pl -help
+
+=head1 SYNOPSIS
+
+This script export specific tissue/matrix bank from HMDB source. 
+
+=head1 DESCRIPTION
+
+This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format.
+
+=over 4
+
+=item B<function01>
+
+=item B<function02>
+
+=back
+
+=head1 AUTHOR
+
+Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt>
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
+
+=head1 VERSION
+
+version 1 : 21 / 11 / 2018
+
+version 2 : ??
+
+=cut
\ No newline at end of file