Mercurial > repos > fgiacomoni > lipidmaps_textsearch

#! perl

use strict ;
no strict "refs" ;
use warnings ;
use Carp qw (cluck croak carp confess) ;
use Exporter ;
use diagnostics ;

use Data::Dumper ;
use POSIX ;
use XML::Twig;
use Getopt::Long ;
use Time::HiRes;


## Permet de localisez le repertoire du script perl d'origine
use FindBin;
## permet de definir la localisation des .pm et .conf
use lib $FindBin::Bin ;
#my $libPath = $FindBin::Bin."/lib";
my $binPath = $FindBin::Bin ;

## dedicated lib

use lib::lipidmaps ;
use lib::parser ;
use lib::writer ;
# more inra lib
use lib::conf  qw( :ALL ) ;
use lib::csv  qw( :ALL ) ;
use lib::operations  qw( :ALL ) ;

## Initialized values
#
#my $version = '1.2';
my $version = '1.2.1'; # add a output to summaries results.

my ( $help, $input_file, $line_header, $col_mass, $col_rt, $decimal, $round_type, $delta, $mode ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef ) ;
my ( $list_oxidation, $list_neutral_loss ) = ( undef, undef ) ;
my ( $col_classif_id, $selected_cat, $selected_cl, $selected_subcl ) = ( undef, undef, undef, undef ) ;
my ( $output_csv_file, $output_html_file, $output_summary_file  ) = ( undef, undef, undef ) ;

## Verbose levels (1 OR 3)
my $verbose = 3 ;


&GetOptions ( 	"help|h"     		=> \$help,       		# HELP
				"input|i:s"			=> \$input_file,		# path for input file (CSV format) -- Mandatory
				"lineheader:i"		=> \$line_header, 		## header presence in tabular file
				"colmass:i"			=> \$col_mass,			# Input file Column containing Masses for query -- Mandatory
#				"colrt:i"			=> \$col_rt,			# Input file Column containing Retention time
				"decimal:i"			=> \$decimal	,		# Significante decimal on mass -- Mandatory
				"listoxidation:s"	=> \$list_oxidation,	## option : liste des atomes a gerer sur les masses experimentales
				"listneutralloss:s"	=> \$list_neutral_loss,	## option : liste des atomes a gerer sur les masses experimentales
				"round:s" 			=> \$round_type,		# Type of truncation -- Mandatory
				"delta:f" 			=> \$delta,				# delta of mass -- Mandatory
				"cat:s" 			=> \$selected_cat,		# Number corresponding to the main category in LIPIDMAPS -- Optional
				"class:s"			=> \$selected_cl,		# Number corresponding to the main classe in LIPIDMAPS -- Optional
				"subclass:s"		=> \$selected_subcl,	# Number corresponding to the sub class in LIPIDMAPS -- Optional
				"output:s"			=> \$output_csv_file,	# File+Path for the results (CVS) -- Mandatory
				"view:s"			=> \$output_html_file,	# File+Path for the view results (HTML) -- Mandatory
				"summary:s"			=> \$output_summary_file,# File+Path for the result summary (CSV) -- OPTIONNAL (test metabo.tsv with Galaxy)
				"colclassif:i"		=> \$col_classif_id,	# Input file Column containing LM classes ID for query -- Optional
				"mode:s"			=> \$mode,				# mode of the initial data
            ) ;

#=============================================================================
#                                EXCEPTIONS
#=============================================================================
$help and &help ;

## --------------- Global parameters ---------------- :
my $nb_pages_for_html_out = 1 ;

## Conf file
my ( $CONF, %RULES, %RECIPES, %TRANSFO ) = ( undef, (), (), ()  ) ;
foreach my $conf ( <$binPath/*.conf> ) {
	my $oConf = lib::conf::new() ;
	$CONF = $oConf->as_conf($conf) ;
}

## -------------- HTML template file ------------------------ :
foreach my $html_template ( <$binPath/*.tmpl> ) { $CONF->{'HTML_TEMPLATE'} = $html_template ; }

## work with it :
## get RULES lists :
foreach (keys (%$CONF)) {
	if( $_ =~/^RULE/ ) { $RULES{$_} = $CONF->{$_} ; 	} ## rules for clustering
	elsif( $_ =~/^RECIPE/ ) { $RECIPES{$_} = $CONF->{$_} ; 	} ## fields retrieved with each rule
	elsif( $_ =~/^ANNOT/ ) { $TRANSFO{$_} = $CONF->{$_} ; 	} ## Transformation annotation in output files
}

## Init var
my ( $init_csv_rows, $init_mzs, $init_rts, $classif_ids, $round_init_mzs ) = ( undef, undef, undef, undef, undef ) ;
my ( @ox_or_loss_names, @ox_or_loss_values, @transfo_init_mzs, @transfo_annotations, @transfo_init_mz_queries, @transfo_init_mz_results, @entries_results, @clusters_results, @entries_total_nb ) = ( (), (), (), (), (), (), (), (), ()  ) ;
my ( $ox_names, $ox_values, $loss_names, $loss_values ) = ( [], [], [], [] ) ;
my ( $is_header, $tbody_object) = (undef, undef) ;


print "-----------**********START of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3);

#### --------------------------------- 01 :: Prepare all and Parsing steps on inputs -------------------------------------

## Open CVS FILE / Extract and transform Masses
if ( ( defined $input_file ) and ( -e $input_file ) ) {
	print "\n[INFO] Open input file and get values...\n" if ($verbose == 3);
	## parse all csv for later : output csv build
	my $ocsv_input  = lib::csv->new() ;
	my $csv = $ocsv_input->get_csv_object( "\t" ) ;
	$init_csv_rows = $ocsv_input->parse_csv_object($csv, \$input_file) ;


	if ( ( defined $line_header ) and ( $line_header > 0 ) ) { $is_header = 'yes' ;	}

	## parse masses
	if ( defined $col_mass ) {
		print "[INFO] Get masses from input file $input_file ...\n" if ($verbose == 3);
#		print "[INFO] Get RT from input file $input_file ...\n" if ($verbose == 3);
		my $ocsv = lib::csv->new() ;
		my $csv = $ocsv->get_csv_object( "\t" ) ;
		$init_mzs = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_mass, $is_header, $line_header ) ; ## retrieve mz values on csv
#		$init_rts = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_rt, $is_header, $line_header ) ; ## retrieve rt values on csv
	}

	## Adjust the mz to the instrument mode (POS/NEG)
	if ( ( defined $mode ) and ( ($mode eq 'POS') or ($mode eq 'NEG') ) ) {
		print "\t [INFO] Apply mass mode transforming (POS to NEU or NEG to NEU) ...\n" if ($verbose == 3);
		my @mode_init_mzs = () ;
		my $omode = lib::operations::new() ;
		foreach my $mz (@$init_mzs) {
			push (@mode_init_mzs, ${$omode->manage_mode(\$mode, \1, \0.0005486, \1.007825, \$mz)} ) ;
		}

		if ( (scalar @$init_mzs) == (scalar @mode_init_mzs) ) {
			$init_mzs = \@mode_init_mzs ;
		}
		else {
			carp "[ERROR] The mode managing process failed and init mzs have been corrompted\n"
		}
	}
	else {
		print "\t [INFO] Apply no mass mode transforming\n" if ($verbose == 3);
	}

	## round masses
	if ( ( defined $round_type ) and ( defined $decimal ) ) {
		print "\t [INFO] Apply mass rounding ...\n" if ($verbose == 3);
		my $oround = lib::operations::new() ;
		if 		( $round_type eq 'truncation' ) { 	$round_init_mzs = $oround->truncate_nums( $init_mzs, $decimal ) ; 			}
		elsif 	( $round_type eq 'round' ) {		$round_init_mzs = $oround->round_nums( $init_mzs, $decimal ) ;  			}
		else {										croak "The selected option for data transformation is unknown !\n" ; 	}
	}
	## parse classif ids -- optionnal
	if ( defined $col_classif_id ) {
		print "\t [INFO] Get LM classification IDS from input file $input_file ...\n" if ($verbose == 3);
		my $ocsv = lib::csv::new() ;
		my $csv = $ocsv->get_csv_object( "\t" ) ;
		$classif_ids = $ocsv->get_value_from_csv( $csv, $input_file, $col_classif_id, $is_header, $line_header ) ;
	}


	## Uses N mz and theirs entries per page (see config file).
	# how many pages you need with your input mz list?
#	$nb_pages_for_html_out = ceil( scalar(@{$init_mzs} ) / $CONF->{HTML_ENTRIES_PER_PAGE} )  ;
#	print "[INFO] Your analysis will generate $nb_pages_for_html_out pages of results...\n" if ($verbose == 3);

}
else {
	print "[ERROR] Can't find any input file $input_file\n" if ($verbose == 3);
	croak "Can't find any input file $input_file\n" ;
}

#### ------------------- 02 :: optionnal work on masses with neutral loss and/or oxydation == modif : -------------------

# get and merge ox and neutral loss envt :
my $oparser = lib::parser::new() ;
if ( ( defined $list_oxidation ) and ( defined $CONF )  ) 		{ ( $ox_names, $ox_values ) = $oparser->get_oxidation_ref( $CONF, $list_oxidation ) ; }
if ( @{$ox_values} ) 	{ 	push( @ox_or_loss_values, @{$ox_values} ) ;  push( @ox_or_loss_names, @{$ox_names} ) ; }
if ( ( defined $list_neutral_loss ) and ( defined $CONF )  ) 	{ ( $loss_names, $loss_values ) = $oparser->get_neutral_loss_ref( $CONF, $list_neutral_loss ) ; }
if ( @{$loss_values} ) 	{ 	push( @ox_or_loss_values, @{$loss_values} ) ;  push( @ox_or_loss_names, @{$loss_names} ) ; }

# prepare a list of masses indpt of modif (ox/neutral loss) presence.
my $init_mz_index = 0 ;
my $i = 0 ;

foreach my $init_mz (@{$round_init_mzs}) {

	my @transfo_values_list = () ;
	my @transfo_name_list = () ;
	my $init_annot = 'Init_MZ' ;

	push ( @transfo_values_list, \$init_mz ) ; ## the submitted init mass
		## work on values
	if ( @ox_or_loss_values ) {
		my $oround = lib::operations::new() ;
		my $round_transfo_mzs = $oround->round_nums( \@ox_or_loss_values, $decimal ) ; ## We choose to around the number.
		foreach my $transfo_mz ( @{$round_transfo_mzs} ) {
			my $osub = lib::operations::new() ;
			my $transfo_init_mz = $osub->subtract_num( $init_mz, $transfo_mz ) ;
			push ( @transfo_values_list, $transfo_init_mz ) ;
		}
	}

	## work on annotation for output
	push ( @transfo_name_list, \$init_annot) ; ## init annot
	if ( @ox_or_loss_names ) {
		foreach my $ox_or_loss_name (@ox_or_loss_names) {
			if ( $TRANSFO{'ANNOT_'.$ox_or_loss_name} ) {
				my $transfo = $TRANSFO{'ANNOT_'.$ox_or_loss_name} ;
				push ( @transfo_name_list, \$transfo ) ; }
		}
	}

	## push final arrays
	push ( @transfo_init_mzs, \@transfo_values_list ) ;
	push ( @transfo_annotations, \@transfo_name_list ) ;

	## foreach transfo mass (round and/or modif)

	my ( @queries, @query_results, @query_result_entries, @query_result_entry_nbs, @query_result_clusters ) = ( (), (), (), (), () ) ;

	foreach my $transfo_mz ( @{$transfo_init_mzs[$init_mz_index]} ) {
		print "[INFO] Prepare the $i.th query with the mz: $$transfo_mz... \n" if ($verbose == 3);
		## LM recommandation : If you write a script to automate calls to LMSD,
		# please be kind and do not hit our server more often than once per 20 seconds.
		# We may have to kill scripts that hit our server more frequently.
		Time::HiRes::sleep(0.1); #.1 seconds
		my ( $cat, $cl, $subcl ) = ( undef, undef, undef ) ;
	#	if ( $i >= ( scalar( @transfos_values )-1 ) ) { $i = 0 ; } ## manage the modif for each masses.

		## get the classif level :
		if ( defined $classif_ids ) {

			if ( $classif_ids->[$i] ) {
				my $olevel = lib::parser::new() ;
				$cat = $olevel->set_category( $classif_ids->[$i] ) ;
				$cl = $olevel->set_class( $classif_ids->[$i] ) ;
				$subcl = $olevel->set_subclass( $classif_ids->[$i] ) ;
#				( $cat, $cl, $subcl ) = ( $$cat, $$cl, $$subcl ) ;
			}
			else { croak "This information is not available in your parsing ids\n" ; }
		}
		else {
			if ( ( defined $selected_subcl) or ( defined $selected_cl ) or ( defined $selected_cat ) ) {
				if ( ( $selected_cat !~ /^NA/ ) ) { ( $cat ) = ( \$selected_cat ) ; }
				if ( ( $selected_cl !~ /^NA(.*)/ ) ) { ( $cl ) = ( \$selected_cl ) ; }
				if ( ( $selected_subcl !~ /^NA(.*)/ ) ) { ( $subcl ) = ( \$selected_subcl ) ; }
			}
			else { croak "No selected category or classification ids list\n" ; }
		}

		## buid and get http query :
		my $oquery = lib::lipidmaps::new() ;
		my $ref_http_query = $oquery->build_lm_mass_query( \$CONF->{'SEARCH_URL'}, \$delta, $cat, $cl, $subcl ) ; ## build the query for LM WS, return a list of http, get method
		print "\t[INFO] Exec $$ref_http_query \n" if ($verbose == 3);

		## set entries clusters
		my ( $http_result_mz, $http_query_mz ) = $oquery->get_lm_mass_query($ref_http_query, $transfo_mz) ; ## execute the query, return a list of non-splited lm_entries.
		my ( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( undef, undef, undef ) ;
		if ( (defined $http_result_mz) and ( $$http_result_mz ne '' ) ) { # avoid empty LM results
			( $mz_entries_results, $mz_entries_nb ) = $oquery->get_lm_entry_object($http_result_mz, $transfo_mz) ; ## get all features of each entry and return a list of features keept in a hash
			$mz_clusters_results = $oquery->get_cluster_object($mz_entries_results, \%RULES, \%RECIPES) ; ## clustering all entries and return a list of clusters keept in a hash
			print "\t[INFO] The query return $$mz_entries_nb entries\n" if ($verbose == 3);
		}
		else { # manage empty LM results
			( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( [], \0, [] ) ;
			print "\t[INFO] The query return none entry with LM\n" if ($verbose == 3);
		}

		push( @queries, $http_query_mz ) ;
		push( @query_results, $http_result_mz ) ;
		push( @query_result_entries, $mz_entries_results ) ;
		push( @query_result_entry_nbs, $mz_entries_nb ) ;
		push( @query_result_clusters, $mz_clusters_results ) ;

	} ## end foreach transfo_mz

	$i++ ; # implem the mz rank

	push( @transfo_init_mz_queries, \@queries ) ;
	push( @transfo_init_mz_results, \@query_results ) ;
	push( @entries_results, \@query_result_entries ) ;
	push( @entries_total_nb, \@query_result_entry_nbs ) ;
	push( @clusters_results, \@query_result_clusters ) ;

	$init_mz_index++ ;
} ## end foreach init_mz


#### -------------------------------- 05 :: Writes LM results --------------------------------------------

# prepare data and write html output :
if ( defined $output_html_file) {
	## Adjust html output with only mz with records
	my ($nb_pages, $total_entries) = (0, 0) ;
	foreach (@entries_total_nb) {
		foreach my $nb ( @{$_} ) { $total_entries += $$nb ; }
		if ($total_entries > 0) { $nb_pages++ ; }
		$total_entries = 0 ;
	}

	$nb_pages_for_html_out = ceil( $nb_pages / $CONF->{HTML_ENTRIES_PER_PAGE} )  ;
	print "[INFO] write HTML output file containing $nb_pages_for_html_out pages\n" if ($verbose == 3);

	my $ohtml = lib::writer->new() ;
	$tbody_object = $ohtml->set_html_tbody_object( $nb_pages_for_html_out ) ;
	$tbody_object = $ohtml->add_mz_to_tbody_object( $tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $init_mzs, \@entries_total_nb) ;
	$tbody_object = $ohtml->add_transformation_to_tbody_object( \@transfo_init_mzs, \@transfo_annotations, $tbody_object ) ;
	$tbody_object = $ohtml->add_cluster_to_tbody_object( \@transfo_init_mzs, \@clusters_results, $tbody_object ) ;
	$tbody_object = $ohtml->add_entry_to_tbody_object( \@transfo_init_mzs, \@clusters_results, \@entries_results, $tbody_object ) ;

	$tbody_object = $ohtml->sort_tbody_object($tbody_object) ;

	my $output_html = $ohtml->write_html_skel(\$output_html_file, $tbody_object, $nb_pages_for_html_out, $CONF->{'HTML_TEMPLATE'}, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ;
}


#write csv ouput : add 'lipidmaps' column to input file
my $lm_matrix = undef ;
my $ocsv = lib::writer->new() ;
if ( defined $is_header ) { $lm_matrix = $ocsv->set_lm_matrix_object('LIPIDMAPS(score::name::mz::formula::adduct::id)', $init_mzs, \@transfo_annotations, \@clusters_results ) ;	}
else { $lm_matrix = $ocsv->set_lm_matrix_object( undef, $init_mzs, \@transfo_annotations, \@clusters_results ) ;	}


$lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($init_csv_rows, $lm_matrix) ;
$ocsv->write_csv_skel(\$output_csv_file, $lm_matrix) ;
print "[INFO] write CSV output file\n" if ($verbose == 3);

my $lm_sum_matrix = undef ;
if (defined $output_summary_file) {
	$lm_sum_matrix = $ocsv->convert_tbody_to_globalmatrix(undef, $tbody_object) ;
	$ocsv->write_csv_skel(\$output_summary_file, $lm_sum_matrix) ;
	print "[INFO] write CSV SUMMARY output file\n" if ($verbose == 3);
}

print "-----------**********END of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3);

#print "-----------**********RETURNS*********-------------\n" ;
#print "\n----- Init Input Data in CSV -----\n" ;
#print Dumper $init_csv_rows ;
#print "\n---- Init masses parsed ...\n" ;
#print Dumper $init_mzs ;
#print "\n---- Init rts parsed ...\n" ;
#print Dumper $init_rts ;
#print "\n---- Init masses arounded ...\n" ;
#print Dumper $round_init_mzs ;
#print "\n---- Ox ...\n" ;
#print Dumper $ox_names ;
#print Dumper $ox_values ;
#print "\n---- Neutral loss ...\n" ;
#print Dumper $loss_names ;
#print Dumper $loss_values ;
#print "\n---- Applied transformations ('\@ox_or_loss_values') ...\n" ;
#print Dumper @ox_or_loss_values ;
#print "\n---- Masses modif ('\@transfo_init_mzs') ...\n" ;
#print Dumper @transfo_init_mzs ;
#print "\n---- Transfo annotation ('\@transfo_annotations') ...\n" ;
#print Dumper @transfo_annotations ;
#print "\n---- Queries ('\@transfo_init_mz_queries')...\n" ;
#print Dumper @transfo_init_mz_queries ;
#print "\n---- WS Results ('@transfo_init_mz_results')...\n" ;
#print Dumper @transfo_init_mz_results ;
#print "\n---- Entries results ('\@entries_results')...\n" ;
#print Dumper @entries_results ;
#print "\n---- Entries results numbers ('\@entries_total_nb')...\n" ;
#print Dumper @entries_total_nb ;
#print "\n---- Clusters results ('\@clusters_results')...\n" ;
#print Dumper @clusters_results ;
#print "\n---- Data model filed...\n" ;
#print "...with csv->\n" ;
#print Dumper $lm_matrix ;
#print "...with html->\n" ;
#print Dumper $tbody_object ;
#print "...with SUMMARY csv->\n" ;
#print Dumper $lm_sum_matrix ;


#====================================================================================
# Help subroutine called with -h option
# number of arguments : 0
# Argument(s)        :
# Return           : 1
#====================================================================================
sub help {
    print STDERR "

	# wsdl_lipidmaps
	# Input :
	# Author : Franck GIACOMONI and Marion LANDI
	# Email : fgiacomoni\@clermont.inra.fr
	# Version : $version
	# Created : 16/07/2012
	# Updated: 09/06/2016 - REST implem
	USAGE :
	        wsdl_lipidmaps.pl -help
	        wsdl_lipidmaps.pl
	        	-input \$file_input -colmass \$col_mass -colrt \$col_rt -decimal \$decimal -round \$round_type -delta \$tolerance
	        	-output \$output_result -view \$output_view
	        	-cat -class -subclass OR -colclassif
				-listneutralloss \$neutral_loss -listoxidation \$oxidation [optionnal]
	";
}
author	fgiacomoni
date	Wed, 03 Oct 2018 05:47:14 -0400
parents	1276908e8fc4
children