view hr2_manager.pl @ 3:78afd7f439f3 draft default tip

master branch Updating with tag :CI_COMMIT_TAG - - Fxx
author fgiacomoni
date Wed, 15 Feb 2023 15:57:49 +0000
parents 23970530a518
children
line wrap: on
line source

#!perl

## script  : hr2_manager.pl
## VERSIONS :
#	- 01/03/2019 : Fix P issue, permit more flexible atom settings and add requirements for conda auto managing.
#	- 09/12/2022 : Fix "Additional atomes not taken into account" - Ticket issue #7 
#	- 15/02/2023 : New feature - optionnal managing max atom value

#=============================================================================
#                              Included modules and versions
#=============================================================================
## Perl modules
use strict ;
use warnings ;
use Carp qw (cluck croak carp) ;

use Data::Dumper ;
use Getopt::Long ;
use POSIX ;
use FindBin ; ## Allows you to locate the directory of original perl script

## Dedicate Perl Modules (Home made...)
use lib $FindBin::Bin ;
my $binPath = $FindBin::Bin ;
use lib::hr qw( :ALL ) ;
## PFEM Perl Modules
use lib::conf  qw( :ALL ) ;
use lib::csv  qw( :ALL ) ;

## Initialized values
use vars qw(%parametre);
my $help = undef ; 
my ( $input_file, $line_header, $col_id, $col_mass ) = ( undef, undef, undef, undef ) ; # manage input option file of masses
my ( $mass ) = ( undef ) ; # manage input option masses list
my ( $tolerance, $mode, $charge, $has_golden_rules, $atomes, $atomes_basic ) = ( undef, undef, undef, undef, undef, undef ) ; # manage params
my ( $C13_max_value, $C12_max_value, $H_max_value, $N_max_value, $O_max_value, $P_max_value, $S_max_value, $F_max_value, $Cl_max_value, $K_max_value, $Br_max_value, $Na_max_value) = (undef, undef,undef,undef,undef,undef,undef,undef,undef,undef,undef,undef) ;
my ($customAtomValues) = (undef) ;
my ( $output_csv, $output_html ) = ( undef, undef) ; # manage ouputs
my $verbose = 2 ;

#=============================================================================
#                                Manage EXCEPTIONS
#=============================================================================
&GetOptions ( 	"h"     		=> 	\$help,       # HELP
				"input:s"		=>	\$input_file,
				"colId:i"		=>	\$col_id,
				"nbHeader:i"	=>	\$line_header,
				"colmass:i"		=>	\$col_mass,
				"masse:s"		=>	\$mass,
				"tolerance:f"	=>	\$tolerance,
				"mode:s"		=>	\$mode,
				"charge:i"		=>	\$charge,
				"regleOr:s"		=>	\$has_golden_rules,
				"atomes_basic:s"=>	\$atomes_basic, # [basic atoms like CNOHP]
				"atomes_sup:s"	=>	\$atomes,
				#"customAtomValues:s"=>	\$customAtomValues,
				
				"C12_max_value:i"	=>	\$C12_max_value, # OPTIONNAL
		    	"C13_max_value:i"	=>	\$C13_max_value,
		    	"H_max_value:i"		=>	\$H_max_value,
		    	"N_max_value:i"		=>	\$N_max_value,
		    	"O_max_value:i"		=>	\$O_max_value,
		    	"P_max_value:i"		=>	\$P_max_value,
		    	"S_max_value:i"		=>	\$S_max_value,
		    	"F_max_value:i"		=>	\$F_max_value,
		    	"Cl_max_value:i"	=>	\$Cl_max_value,
		    	"K_max_value:i"		=>	\$K_max_value,
		    	"Br_max_value:i"	=>	\$Br_max_value,
		    	"Na_max_value:i"	=>	\$Na_max_value,
				
				"output1:s"		=>	\$output_csv,
				"outputView:s"	=>	\$output_html,
				"verbose:i"		=> 	\$verbose,
            ) ;
         
#=============================================================================
#                                EXCEPTIONS
#=============================================================================
$help and &help ;

#=============================================================================
#                                MAIN SCRIPT
#=============================================================================

#print "CUSTOM C12: $C12_max_value\n" if (defined $C12_max_value) ;
#print "CUSTOM H: $H_max_value if (defined $H_max_value) ;
#print "CUSTOM N: $N_max_value if (defined $N_max_value) ;
#print "CUSTOM O: $O_max_value if (defined $O_max_value) ;
#print "CUSTOM P: $P_max_value if (defined $P_max_value) ;
#print "CUSTOM S: $S_max_value if (defined $S_max_value) ;
#print "CUSTOM F: $F_max_value if (defined $F_max_value) ;
#print "CUSTOM Cl: $Cl_max_value if (defined $Cl_max_value) ;
#print "CUSTOM K: $K_max_value if (defined $K_max_value) ;
#print "CUSTOM Br: $Br_max_value if (defined $Br_max_value) ;
#print "CUSTOM Na: $Na_max_value if (defined $Na_max_value) ;
#print "CUSTOM C13: $C13_max_value if (defined $C13_max_value) ;

## -------------- Conf file and verbose ------------------------ :
my ( $CONF ) = ( undef ) ; ## verbose level is 3 for debugg
my $time_start = time ;

foreach my $conf ( <$binPath/*.cfg> ) {
	my $oConf = lib::conf::new() ;
	$CONF = $oConf->as_conf($conf) ;
}
## --------------- Global parameters ---------------- :
my ( $ids, $masses, $hr_cmds, $results, $parsed_results ) = ( undef, undef, undef, undef, undef ) ;
my $complete_rows = undef ;
my ($hr_atoms_list, $hr_atoms_and_ranges, $set_tol, ) = (undef, undef, undef, ) ;

## Check and manage params
my $ohr = lib::hr->new() ;

## set tolerance
$set_tol = $ohr->manage_tolerance( \$tolerance, \$CONF->{'tolerance'} ) ;

## check HR exe envt :
my $hr_check = $ohr->check_hr_exe(\$CONF->{'HR2_EXE'}, \$CONF->{'HR2_VERSION'}) ;
if (!defined $hr_check ) { croak "No hr exe available (wrong path) or wrong version will be used  -- end of script\n" ; }

## manage atoms and their ranges
#$hr_atoms_list = $ohr->manage_atoms(\$atomes, \$CONF->{'DEFAULT_ATOMS'}) ; ## DEPRECATED

# manage atoms and their ranges with a hash structure

my $AtomsFullConf = $ohr->manageAtomsConf($CONF, $C12_max_value, $C13_max_value, $H_max_value, $N_max_value, $O_max_value, $P_max_value, $S_max_value, $F_max_value, $Cl_max_value, $K_max_value, $Br_max_value, $Na_max_value ) ;

my $atomsCurrentConf = $ohr->manage_atoms_and_ranges($AtomsFullConf, $atomes_basic, $atomes) ;

## Parsing input file with masses/ids or unik mass :
## manage only one mass
if ( ( defined $mass ) and ( $mass ne "" ) and ( $mass > 0 ) ) {
	$ids = ['mass_01'] ;
	$masses = [$mass] ;
	
} ## END IF
## manage csv file containing list of masses
elsif ( ( defined $input_file ) and ( $input_file ne "" ) and ( -e $input_file ) ) {
	
	## parse all csv for later : output csv build
	my $ocsv_input  = lib::csv->new() ;
	my $complete_csv = $ocsv_input->get_csv_object( "\t" ) ;
	$complete_rows = $ocsv_input->parse_csv_object($complete_csv, \$input_file) ;
	
	## parse csv ids and masses
	my $is_header = undef ;
	my $ocsv = lib::csv->new() ;
	my $csv = $ocsv->get_csv_object( "\t" ) ;
	if ( ( defined $line_header ) and ( $line_header > 0 ) ) { $is_header = 'yes' ;	}		else{ $is_header = 'no' ; }
	$masses = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_mass, $is_header, $line_header ) ; ## retrieve mz values on csv
	$ids = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_id, $is_header, $line_header ) ; ## retrieve ids values on csv
	
}
else {
	croak "Can't work with HR2 : missing input file or mass (list of masses, ids)\n" ;
} ## end ELSE

## check using golden rules
if ( $has_golden_rules eq 'NO') { $has_golden_rules = undef ; }

## ---------------- launch queries -------------------- :

## prepare cmd
foreach my $mz (@{ $masses }) {
	## computes mass
	my $ohr_mode = lib::hr->new() ;
	my ( $exact_mass ) = $ohr_mode->manage_mode( \$mode, \$charge, \$CONF->{'electron'}, \$CONF->{'proton'}, \$mz ) ;
	print "Current MZ $$exact_mass send to HR\n"  if $verbose == 3 ;
	## build exe line
	my $ohr_exe = lib::hr->new() ;
	my $hr_cmd = $ohr_exe->config_hr_exe( \$CONF->{'HR2_EXE'}, \$tolerance, $exact_mass, \$has_golden_rules, \$atomsCurrentConf ) ;
	print "\t$$hr_cmd\n" if $verbose == 3 ;
	push(@{$hr_cmds}, $$hr_cmd) ;
}

## MultiThreading execution of Hr :
my $threads = lib::hr->new() ;
my $hr_object = lib::hr->new() ;
if ( $hr_object->can('hr_exe') ) {
	my $method = $hr_object->can('hr_exe') ;
	$results = $threads->threading_hr_exe( $method, $hr_cmds) ;

}

## MultiThreading parsing of Hr outputs :
my $hrres_object = lib::hr->new() ;
if ( $hrres_object->can('hr_out_parser') ) {
	my $method = $hr_object->can('hr_out_parser') ;
	if ( defined $results ) { 	$parsed_results = $threads->threading_hr_exe( $method, $results ) ; }
}

## -------------- Produce HTML/CSV output ------------------ :
my $search_condition = 'Mode used: '.$mode.' / Charge: +'.$charge.' / Mass tolerance: '.$$set_tol.' / Composition: '.$atomsCurrentConf ;
## Uses N mz and theirs entries per page (see config file).
# how many pages you need with your input mz list?
my $nb_pages_for_html_out = ceil( scalar(@{$masses} ) / $CONF->{HTML_ENTRIES_PER_PAGE} )  ;

if ( ( defined $output_html ) and ( defined $parsed_results ) ) {	
	my $oHtml = lib::hr::new() ;
	my ($tbody_object) = $oHtml->set_html_tbody_object( $nb_pages_for_html_out, $CONF->{HTML_ENTRIES_PER_PAGE} ) ;
	($tbody_object) = $oHtml->add_mz_to_tbody_object($tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $masses, $ids, $parsed_results ) ;
	($tbody_object) = $oHtml->add_entries_to_tbody_object($tbody_object, $parsed_results) ;
	
	#print Dumper $tbody_object ;
	
	my $html_file = $binPath.'/'.$CONF->{'HTML_TEMPLATE'} ;
	my $output = $oHtml->write_html_skel(\$output_html, $tbody_object, $nb_pages_for_html_out, $search_condition, $html_file, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ;
	
} ## END IF
else {
	croak "Can't create a HTML output for HMDB : no result found or your output file is not defined\n" ;
}

if ( ( defined $output_csv ) and ( defined $parsed_results ) ) {
	# produce a csv based on METLIN format
	my $ocsv = lib::hr::new() ;
	if (defined $input_file) {
		my $hr_matrix = undef ;
		if ( ( defined $line_header ) and ( $line_header == 1 ) ) { $hr_matrix = $ocsv->set_hr_matrix_object('hr2', $masses, $parsed_results ) ; }
		elsif ( ( defined $line_header ) and ( $line_header == 0 ) ) { $hr_matrix = $ocsv->set_hr_matrix_object(undef, $masses, $parsed_results ) ; }
		
		$hr_matrix = $ocsv->add_hr_matrix_to_input_matrix($complete_rows, $hr_matrix) ;
		$ocsv->write_csv_skel(\$output_csv, $hr_matrix) ;
	}
	elsif (defined $mass) {
		$ocsv->write_csv_one_mass($masses, $ids, $parsed_results, $output_csv) ;
	}
} ## END IF
else {
#	croak "Can't create a tabular output for HR2 : no result found or your output file is not defined\n" ;
}



### VERBOSE OUTPUTs
if ( $verbose == 3 ) {
	print "\n\n-- Conf file contains :\n" ;
	print Dumper $CONF ;
	print "-- Atoms input list :\n" ;
	print Dumper $atomes_basic ;
	print "-- Suppl. atoms input list :\n" ;
	print Dumper $atomes ;
	print "-- HR envt ready  :\n" ;
	print Dumper $hr_check ;
	print "-- Atoms and ranges :\n" ;
	print Dumper $atomsCurrentConf ;
	print "-- Tolerance :\n" ;
	print Dumper $set_tol ;
	print "-- Complete input file :\n" ;
	print Dumper $complete_rows ;
	print "-- Inputs initiales masses :\n" ;
	print Dumper $masses ;
	print "-- Inputs initiales ids :\n" ;
	print Dumper $ids ;
	print "-- Hr_Cmds :\n" ;
	print Dumper $hr_cmds ;
#	print "-- Hr_Results :\n" ;
#	print Dumper $results ;
#	print "-- Hr_parsed Results :\n" ;
#	print Dumper $parsed_results ;
	
	my $nb_results = scalar (@{$results}) ;
	print "-- Hr_Results return  : $nb_results\n" ;
}

my $time_end = time ;
my $seconds = $time_end-$time_start ;
print "\n------  Time used in threaded mode by 6 : $seconds seconds --------\n\n" ;






#====================================================================================
# Help subroutine called with -h option
# number of arguments : 0
# Argument(s)        :
# Return           : 1
#====================================================================================
sub help {
	print STDERR "
hr2_manager.pl

# hr2_manager is a script to elucide chemical formula by their accurate masses. The HiRes program is integrate in this package
# Input : a accurate mass or a file of masses
# Authors : Lain Pavot, Marion Landi and Franck Giacomoni
# Email : franck.giacomoni\@inrae.fr
# Version : 1.1.2
# Created : 01/12/2011
# Last Update : 06/12/2022
USAGE :		 
		hr2_manager.pl -h or
		hr2_manager.pl -input [cvs file of masses] -colId [int] -colmass [int] -nbHeader [int] -tolerance [float] -mode [positive, neutral or negative] -charge [int] -regleOr [yes or no] -atome [P, S, F, Cl, K, B, A, 1 ] -output1 [csv file] -outputView [html file]
		hr2_manager.pl -masse [double] -tolerance [float] -mode [positive, neutral or negative] -charge [int] -regleOr [yes or no] -atomes_basic [C, N, O, P, H ] -atomes_sup [S, F, Cl, K, B, A, 1 ] -output1 [csv file] -outputView [html file]
	" ;
	exit(1);
}

## END of script - F Giacomoni 

__END__

=head1 NAME

 hr2_manager.pl -- script for launch / manage hr2 binary

=head1 USAGE

 hr2_manager.pl -h or
 hr2_manager.pl -input [cvs file of masses] -colId [int] -colmass [int] -nbHeader [int] -tolerance [float] -mode [positive, neutral or negative] -charge [int] -regleOr [yes or no] -atomes_basic [C, N, O, P, H ] -atomes_sup [S, F, Cl, K, B, A, 1 ] -output1 [csv file] -outputView [html file]
 hr2_manager.pl -masse [double] -tolerance [float] -mode [positive, neutral or negative] -charge [int] -regleOr [yes or no] -atomes_basic [C, N, O, P, H ] -atomes_sup [S, F, Cl, K, B, A, 1 ] -output1 [csv file] -outputView [html file]

=head1 SYNOPSIS

This script manages hr2 binary which elucids raw formula with exact masses.

=head1 DESCRIPTION

This main program is a module to elucidate chemical formula with HiRes program. Source is available on Fiehn lab web.

=over 4

=item B<function01>

=item B<function02>

=back

=head1 AUTHOR

Franck Giacomoni E<lt>franck.giacomoni@inrae.frE<gt>
Marion Landi E<lt>marion.landi@inrae.frE<gt>
Lain Pavot E<lt>lain.pavot@inrae.frE<gt>

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 VERSION

version 1 : 18/07/2012

version 2 : 02/10/2013

version 3 : 20/02/2014

version 4 : 01/03/2019

version 5 : 09/12/2022

version 6 : 15/02/2023

=cut