Mercurial > repos > fgiacomoni > bank_inhouse

package lib::bih ;

use strict;
use warnings ;
use Exporter ;
use Carp ;
use Math::BigFloat;

use LWP::Simple;
use LWP::UserAgent;
use URI::URL;
use SOAP::Lite;
use Encode;
use HTML::Template ;
#use Net::SSL ;
use Data::Dumper ;
#use REST::Client;
use JSON;

use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);

our $VERSION = "1.0";
our @ISA = qw(Exporter);
our @EXPORT = qw( db_pforest_get_clean_range map_pfjson_bankobject prepare_multi_masses_query );
our %EXPORT_TAGS = ( ALL => [qw( db_pforest_get_clean_range map_pfjson_bankobject prepare_multi_masses_query  )] );

=head1 NAME

My::Module - An example module

=head1 SYNOPSIS

    use My::Module;
    my $object = My::Module->new();
    print $object->as_string;

=head1 DESCRIPTION

This module does not really exist, it
was made for the sole purpose of
demonstrating how POD works.

=head1 METHODS

Methods are :

=head2 METHOD new

	## Description : new
	## Input : $self
	## Ouput : bless $self ;
	## Usage : new() ;

=cut

sub new {
    ## Variables
    my $self={};
    bless($self) ;
    return $self ;
}
### END of SUB

=head2 METHOD check_interval

	## Description : checks that the value is in the interval
	## Input : $value, $min, $max
	## Output : $message
	## Usage : $message= check_interval($value, $min, $max) ;

=cut
## START of SUB
sub check_interval {
	## Retrieve Values
    my $self = shift ;
    my ( $value, $min, $max ) = @_ ;

    my ( $message ) = undef ;
    if ( $min !~ m/^-?\d+\.?\d*$/ ) {
    	$message="the minimum '".$min."' isn't a valid number!";
    }
    elsif ( $max !~ m/^-?\d+\.?\d*$/ ) {
    	$message="the maximum '".$max."' isn't a valid number!";
    }
    elsif ( $value !~ m/^-?\d+\.?\d*$/ ) {
    	$message="'".$value."' isn't a valid number!";
    }
    elsif ( $value < $min ) {
    	$message="'".$value."' is below the minimum!";
    }
    elsif ( $value > $max ) {
    	$message="'".$value."' is greater than the maximum!";
    }
    else {
    	$message="OK" ;
    }
    return($message) ;
}
## END of SUB

=head2 METHOD format_manual_list_values

	## Description : extract a list of values and built identifiers
	## Input : $value, $sep
	## Output : \@values, \@ids
	## Usage : ($masses, $ids)= format_manual_list_values($mass, $sep) ;

=cut
## START of SUB
sub format_manual_list_values {
	## Retrieve Values
    my $self = shift ;
    my ( $value, $sep ) = @_ ;

    my ( @values, @ids ) = ( (), () ) ;

    if ( ( defined $value ) and ( $value ne "" ) and ( defined $sep ) ) {
    	@values = split($sep, $value);
		my $nb = 1+int(log($#values+1)/log(10));
		my $sf = '%0'.$nb.'s';
		for (my $i=1 ; $i<=$#values+1 ; $i++){
			my $id = sprintf($sf, $i) ;
			push (@ids,"value_".$id );
		}
    }
    else {
    	croak "No value list found \n" ;
    }
    return(\@values, \@ids) ;
}
## END of SUB

=head2 METHOD parse_bank_interest

	## Description : parse csv object and return a two-dimensional array in a hash by grouping information according to the interest value as key.
	## Input : $csv, \$file, $col_interest
	## Output : \%bank_interest, \$head
	## Usage : my ( $bank_interest, $head ) = parse_bank_interest ( $csv, $file, $col_interest ) ;

=cut
## START of SUB
sub parse_bank_interest {
	## Retrieve Values
    my $self = shift ;
    my ( $csv, $file, $col_interest ) = @_ ;

    my $bank_interest = () ;
    my $oBih = new() ;

	open my $fh, "<:encoding(utf8)", $$file or die "Can't open csv file $$file: $!";

	my $head = $csv->getline( $fh );
	my $nb_line = 1 ;	## for error messages

	while ( my $row = $csv->getline( $fh ) ) {
	    $nb_line++ ;
		if ($#$head != $#$row) {	croak "Not the same number of columns over the file of the interest bank! See the line: $nb_line in your input bank file!\n" ;	}
		## it would be more general to do the following masse check out this function
		my ($MZmessage) = $oBih->check_interval($$row[$col_interest], 0, 10000) ;
		if ($MZmessage ne 'OK') {	$col_interest++;	croak "There is at least one row (See the line : $nb_line) where in the column $col_interest : $MZmessage\n" ;	} #/!\ col_interest++ to print to user a not-table (@) value.

	    push (@{$bank_interest->{$$row[$col_interest]}}, $row ) ;
	}
#	print Dumper $bank_interest; exit;
	$csv->eof or $csv->error_diag();
	close $fh;
    return($bank_interest, $head) ;
}
## END of SUB

=head2 METHOD mz_delta_conversion

	## Description : returns the minimum and maximum mass according to the delta
	## Input : \$mass, \$delta_type, \$mz_delta
	## Output : \$min, \$max
	## Usage : ($min, $max)= mz_delta_conversion($mass, $delta_type, $mz_delta) ;

=cut
## START of SUB
sub mz_delta_conversion {
	## Retrieve Values
    my $self = shift ;
    my ( $mass, $delta_type, $mz_delta ) = @_ ;
    my ( $min, $max ) = ( undef, undef ) ;

	if ($$delta_type eq 'ppm'){
		$min = $$mass - ($$mz_delta * 10**-6 * $$mass);
		$max = $$mass + ($$mz_delta * 10**-6 * $$mass) + 0.0000000001; ## it's to included the maximum value in the search
	}
	elsif ($$delta_type eq 'Da'){
		$min = $$mass - $$mz_delta;
		$max = $$mass + $$mz_delta + 0.0000000001; ## it's to included the maximum value in the search
	}
	else {	croak "The masses delta type '$$delta_type' isn't a valid type !\n" ;	}

    return(\$min, \$max) ;
}
## END of SUB

=head2 METHOD dichotomi_search

	## Description : returns the index of the position or the interval value
	##               does not work if there are duplicates in the table
	## Input : \@tab, \$search
	## Output : \$index
	## Usage : ($index)= dichotomi_search($mass, $sep) ;

=cut
## START of SUB
sub dichotomi_search {
	## Retrieve Values
    my $self = shift ;
    my ( $tab, $search ) = @_ ;
    my ($sup, $inf, $demi) = (scalar(@{$tab})-1, 0, 0);

    while(1) {
		$demi = int(($sup + $inf)/2);
		if($sup < $inf)  {
			if($inf==0){	$demi=-1;	}	elsif($sup==scalar(@{$tab})-1){	$demi=scalar(@{$tab});	} ## to distinguish items off limits
			last;
		}
		elsif ( ($$search == $$tab[$demi]) ) { last; }
		elsif ( ($$search > $$tab[$demi-1]) && ($$search < @$tab[$demi]) ) { last; }
		elsif($$search < $$tab[$demi]) { $sup = $demi - 1; next; }
		else { $inf = $demi + 1; next; }
	}

    return(\$demi) ;
}
## END of SUB

=head2 METHOD extract_sub_mz_lists

	## Description : extract a couples of sublist from a long mz list (more than $HMDB_LIMITS)
	## Input : $HMDB_LIMITS, $masses
	## Output : $sublists
	## Usage : my ( $sublists ) = extract_sub_mz_lists( $HMDB_LIMITS, $masses ) ;

=cut
## START of SUB
sub extract_sub_mz_lists {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $HMDB_LIMITS ) = @_ ;

    my ( @sublists, @sublist ) = ( (), () ) ;
    my $nb_mz = 0 ;
    my $nb_total_mzs = scalar(@{$masses}) ;

    for ( my $current_pos = 0 ; $current_pos < $nb_total_mzs ; $current_pos++ ) {

    	if ( $nb_mz < $HMDB_LIMITS ) {
    		if ( $masses->[$current_pos] ) { 	push (@sublist, $masses->[$current_pos]) ; $nb_mz++ ;	} # build sub list
    	}
    	elsif ( $nb_mz == $HMDB_LIMITS ) {
    		my @tmp = @sublist ; push (@sublists, \@tmp) ; @sublist = () ;	$nb_mz = 0 ;
    		$current_pos-- ;
    	}
    	if ($current_pos == $nb_total_mzs-1) { 	my @tmp = @sublist ; push (@sublists, \@tmp) ; }
	}
    return(\@sublists) ;
}
## END of SUB

=head2 METHOD prepare_multi_masses_query

	## Description : permet de generer une liste de masses au format d'interrogation de hmdb
	## Input : $masses
	## Output : $hmdb_masses
	## Usage : my ( $hmdb_masses ) = prepare_multi_masses_query( $masses ) ;

=cut
## START of SUB
sub prepare_multi_masses_query {
	## Retrieve Values
    my $self = shift ;
    my ( $masses ) = @_ ;

    my $hmdb_masses = undef ;
    my $sep = '%0D%0A' ; ## retour chariot encode
    my ($nb_masses, $i) = (0, 0) ;

    if ( defined $masses ) {
    	my @masses = @{$masses} ;
    	my $nb_masses = scalar ( @masses ) ;
    	if ( $nb_masses == 0 ) { croak "Your mass list is empty \n" ; }
    	elsif ( $nb_masses >= 150 ) { croak "Your mass list is too long : HMDB allows maximum 150 query masses per request \n" ; } ## Del it --- temporary patch

	    foreach my $mass (@masses) {

	    	if ($i < $nb_masses) {
	    		$hmdb_masses .= $mass.$sep ;
	    	}
	    	elsif ( $i == $nb_masses ) {
	    		$hmdb_masses .= $mass ;
	    	}
	    	else {
	    		last ;
	    	}
	    	$i ++ ;
	    }
    }
    else {
    	croak "No mass list found \n" ;
    }
    return($hmdb_masses, $nb_masses) ;
}
## END of SUB

=head2 METHOD get_matches_from_hmdb

	## Description : permet de requeter sur hmdb avec une masse, un delta de masse sur la banque de metabolites hmdb
	## Input : $mass, $delta, $mode
	## Output : $results
	## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ;

=cut
## START of SUB
sub get_matches_from_hmdb {
	## Retrieve Values
    my $self = shift ;
    my ( $mass, $delta, $mode ) = @_ ;

    my @pages = () ;
    my $page = undef ;

    if ( (defined $mass) and (defined $delta) and (defined $mode) ) {
    	my $url = 'http://www.hmdb.ca/spectra/spectra/ms/search?utf8=TRUE&query_masses='.$mass.'&tolerance='.$delta.'&mode='.$mode.'&commit=Search' ;

#    	print $url."\n" ;

	    my $oUrl = url($url);
	    $page = get($oUrl);
#	    print $page."\n" ;

	    ## manage output
	    if ( ( !defined $page ) or ( $page eq "" ) ) {	die "Problem to connect to HMDB, page empty or undefined.\n" ; }
	    else { 					@pages = split(/\n/, $page); 	    }
    }
    return(\@pages) ;
}
## END of SUB

=head2 METHOD get_matches_from_hmdb_ua

	## Description : permet de requeter via un user agent sur hmdb avec une masse, un delta de masse sur la banque de metabolites hmdb
	## Input : $mass, $delta, $mode
	## Output : $results
	## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ;

=cut
## START of SUB
sub get_matches_from_hmdb_ua {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $delta, $mode ) = @_ ;

    my @page = () ;

	my $ua = new LWP::UserAgent;
	$ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");

	my $req = HTTP::Request->new(
	    POST => 'http://specdb.wishartlab.com/ms/search.csv');

	$req->content_type('application/x-www-form-urlencoded');
	$req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');

	my $res = $ua->request($req);
#	print $res->as_string;
	@page = split ( /\n/, $res->decoded_content ) ;

	return (\@page) ;
}
## END of SUB


=head2 METHOD parse_hmdb_csv_results

	## Description : parse the csv results and get data
	## Input : $csv
	## Output : $results
	## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ;

=cut
## START of SUB
sub parse_hmdb_csv_results {
	## Retrieve Values
    my $self = shift ;
    my ( $csv, $masses ) = @_ ;

    my $test = 0 ;
    my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ;

    my %result_by_entry = () ;
    my %features = () ;

    foreach my $line (@{$csv}) {

    	if ($line !~ /query_mass,compound_id,formula,compound_mass,adduct,adduct_type,adduct_mass,delta/) {
    		my @entry = split(/,/, $line) ;

    		if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }

    		$features{ENTRY_ENTRY_ID} = $entry[1] ;
    		$features{ENTRY_FORMULA} = $entry[2] ;
    		$features{ENTRY_CPD_MZ} = $entry[3] ;
    		$features{ENTRY_ADDUCT} = $entry[4] ;
    		$features{ENTRY_ADDUCT_TYPE} = $entry[5] ;
    		$features{ENTRY_ADDUCT_MZ} = $entry[6] ;
    		$features{ENTRY_DELTA} = $entry[7] ;

    		my %temp = %features ;

    		push (@{$result_by_entry{$entry[0]} }, \%temp) ;
    	}
    	else {
    		next ;
    	}
    } ## end foreach

    ## manage per query_mzs (keep query masses order by array)
    my @results = () ;
    foreach (@{$masses}) {
    	if ($result_by_entry{$_}) { push (@results, $result_by_entry{$_}) ; }
    	else {push (@results, [] ) ;} ;
    }
    return(\@results) ;
}
## END of SUB

=head2 METHOD parse_hmdb_page_results

	## Description : permet de parser le contenu des resultats hmdb
	## Input : $page
	## Output : $results
	## Usage : my ( $results ) = parse_hmdb_page_result( $pages ) ;

=cut
## START of SUB
sub parse_hmdb_page_results {
	## Retrieve Values
    my $self = shift ;
    my ( $page ) = @_ ;

    my @results = () ;
    my ($catch_table, $catch_name) = (0, 0) ;
    my ($name, $adduct, $adduct_mw, $cpd_mw, $delta) = (undef, undef, undef, undef, undef) ;

    if ( defined $page ) {

    	my @page = @{$page} ;
    	my $ID = undef ;
    	my @result_by_mz = () ;
    	my %result_by_entry = () ;

		foreach my $line (@page)   {

			#Section de la page contenant les resultat
			if( $line =~/<table>/ ) { $catch_table = 1 ; }

			## Si il existe un resultat :
			if($catch_table == 1) {

			    #Id de la molecule, et creation du lien
			    if( $line =~ /<a href=\"\/metabolites\/(\w+)\" (.*)>/ )  {
			    	$ID = $1 ;
			    	$catch_name = 0 ;
			    	next ;
			    }
			    #Nom de la molecule ONLY!!
			    if ( $catch_name == 0 ) {

			    	if( $line =~ /<td>(.+)<\/td>/ ) {

			    		if ( !defined $name ) {
			    			$name = $1 ;
			    			$result_by_entry{'ENTRY_ENTRY_ID'} = $ID ;
					    	$result_by_entry{'ENTRY_NAME'} = $name ;
					    	next ;
			    		}
			    		if ( !defined $adduct ) { $adduct = $1 ;  $result_by_entry{'ENTRY_ADDUCT'} = $adduct ; next ; }
			    		if ( !defined $adduct_mw ) {  $adduct_mw = $1 ; $result_by_entry{'ENTRY_ADDUCT_MZ'} = $adduct_mw ; next ; 	}
			    		if ( !defined $cpd_mw ) { $cpd_mw = $1 ; $result_by_entry{'ENTRY_CPD_MZ'} = $cpd_mw ; next ; 	}
			    		if ( !defined $delta ) {
			    			$delta = $1 ;
			    			$result_by_entry{'ENTRY_DELTA'} = $delta ;
			    			$catch_name = 1 ;
			    			my %tmp = %result_by_entry ;
			    			push (@result_by_mz, \%tmp) ;
			    			%result_by_entry = () ;
			    			( $name, $cpd_mw, $delta, $adduct, $adduct_mw ) = ( undef, undef, undef, undef, undef ) ;
			    			next ;
			    		}
				    }
			    }
			}
			#Fin de la section contenant les resultats
			if( $line =~ /<\/table>/ ) {
				$catch_table = 0 ;
				my @Tmp = @result_by_mz ;
				push(@results, \@Tmp) ;
				@result_by_mz = () ;
			}
	    }
    }
    return(\@results) ;
}
## END of SUB

=head2 METHOD set_html_tbody_object

	## Description : initializes and build the tbody object (perl array) need to html template
	## Input : $nb_pages, $nb_items_per_page
	## Output : $tbody_object
	## Usage : my ( $tbody_object ) = set_html_tbody_object($nb_pages, $nb_items_per_page) ;

=cut
## START of SUB
sub set_html_tbody_object {
	my $self = shift ;
    my ( $nb_pages, $nb_items_per_page ) = @_ ;

	my ( @tbody_object ) = ( ) ;

	for ( my $i = 1 ; $i <= $nb_pages ; $i++ ) {

	    my %pages = (
	    	# tbody feature
	    	PAGE_NB => $i,
	    	MASSES => [], ## end MASSES
	    ) ; ## end TBODY N
	    push (@tbody_object, \%pages) ;
	}
    return(\@tbody_object) ;
}
## END of SUB

=head2 METHOD add_mz_to_tbody_object

	## Description : initializes and build the mz object (perl array) need to html template
	## Input : $tbody_object, $nb_items_per_page, $mz_list
	## Output : $tbody_object
	## Usage : my ( $tbody_object ) = add_mz_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list ) ;

=cut
## START of SUB
sub add_mz_to_tbody_object {
	my $self = shift ;
    my ( $tbody_object, $nb_items_per_page, $mz_list, $ids_list ) = @_ ;

	my ( $current_page, $mz_index ) = ( 0, 0 ) ;

	foreach my $page ( @{$tbody_object} ) {

		my @colors = ('white', 'green') ;
		my ( $current_index, , $icolor ) = ( 0, 0 ) ;

		for ( my $i = 1 ; $i <= $nb_items_per_page ; $i++ ) {
			#
			if ( $current_index > $nb_items_per_page ) { ## manage exact mz per html page
				$current_index = 0 ;
				last ; ##
			}
			else {
				$current_index++ ;
				if ( $icolor > 1 ) { $icolor = 0 ; }

				if ( exists $mz_list->[$mz_index]  ) {

					my %mz = (
						# mass feature
						MASSES_ID_QUERY => $ids_list->[$mz_index],
						MASSES_MZ_QUERY => $mz_list->[$mz_index],
						MZ_COLOR => $colors[$icolor],
						MASSES_NB => $mz_index+1,
						ENTRIES => [] ,
					) ;
					push ( @{ $tbody_object->[$current_page]{MASSES} }, \%mz ) ;
					# Html attr for mass
					$icolor++ ;
				}
			}
			$mz_index++ ;
		} ## foreach mz

		$current_page++ ;
	}
    return($tbody_object) ;
}
## END of SUB

=head2 METHOD add_entries_to_tbody_object

	## Description : initializes and build the mz object (perl array) need to html template
	## Input : $tbody_object, $nb_items_per_page, $mz_list, $entries
	## Output : $tbody_object
	## Usage : my ( $tbody_object ) = add_entries_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list, $entries ) ;

=cut
## START of SUB
sub add_entries_to_tbody_object {
	## Retrieve Values
    my $self = shift ;
    my ( $tbody_object, $nb_items_per_page, $mz_list, $entries ) = @_ ;

    my $index_page = 0 ;
    my $index_mz_continous = 0 ;

    foreach my $page (@{$tbody_object}) {

    	my $index_mz = 0 ;

    	foreach my $mz (@{ $tbody_object->[$index_page]{MASSES} }) {

    		my $index_entry = 0 ;

    		my @anti_redondant = ('N/A') ;
    		my $check_rebond = 0 ;

    		foreach my $entry (@{ $entries->[$index_mz_continous] }) {

    			## dispo anti doublons des entries
    			foreach my $rebond (@anti_redondant) {
    				if ( $rebond eq $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) {	$check_rebond = 1 ; last ; }
    			}

    			if ( $check_rebond == 0 ) {

    				 push ( @anti_redondant, $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) ;

    				my %entry = (
		    			ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR},
		   				ENTRY_ENTRY_ID => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
		   				ENTRY_ENTRY_ID2 => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
						ENTRY_FORMULA => $entries->[$index_mz_continous][$index_entry]{ENTRY_FORMULA},
						ENTRY_CPD_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_CPD_MZ},
						ENTRY_ADDUCT => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT},
						ENTRY_ADDUCT_TYPE => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_TYPE},
						ENTRY_ADDUCT_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_MZ},
						ENTRY_DELTA => $entries->[$index_mz_continous][$index_entry]{ENTRY_DELTA},
		    		) ;

	    			push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ;
    			}
    			$check_rebond = 0 ; ## reinit double control
    			$index_entry++ ;
    		}
    		$index_mz ++ ;
    		$index_mz_continous ++ ;
    	}
    	$index_page++ ;
    }
    return($tbody_object) ;
}
## END of SUB

=head2 METHOD write_html_skel

	## Description : prepare and write the html output file
	## Input : $html_file_name, $html_object, $html_template
	## Output : $html_file_name
	## Usage : my ( $html_file_name ) = write_html_skel( $html_file_name, $html_object ) ;

=cut
## START of SUB
sub write_html_skel {
	## Retrieve Values
    my $self = shift ;
    my ( $html_file_name,  $html_object, $pages , $search_condition, $html_template, $js_path, $css_path ) = @_ ;

    my $html_file = $$html_file_name ;

    if ( defined $html_file ) {
		open ( HTML, ">$html_file" ) or die "Can't create the output file $html_file " ;

		if (-e $html_template) {
			my $ohtml = HTML::Template->new(filename => $html_template);
			$ohtml->param(  JS_GALAXY_PATH => $js_path, CSS_GALAXY_PATH => $css_path  ) ;
			$ohtml->param(  CONDITIONS => $search_condition  ) ;
			$ohtml->param(  PAGES_NB => $pages  ) ;
			$ohtml->param(  PAGES => $html_object  ) ;
			print HTML $ohtml->output ;
		}
		else {
			croak "Can't fill any html output : No template available ($html_template)\n" ;
		}

		close (HTML) ;
    }
    else {
    	croak "No output file name available to write HTML file\n" ;
    }
    return(\$html_file) ;
}
## END of SUB

=head2 METHOD set_lm_matrix_object

	## Description : build the bih_row under its ref form
	## Input : $header, $init_mzs, $col_mzdb, $results, $rts, $col_rtdb, $bank_head, $sep
	## Output : $hmdb_matrix
	## Usage : my ( $hmdb_matrix ) = set_lm_matrix_object( $header, $init_mzs, $col_mzdb, $results, $rts, $col_rtdb, $bank_head, $sep ) ;

=cut
## START of SUB
sub set_lm_matrix_object {
	## Retrieve Values
    my $self = shift ;
    my ( $header, $init_mzs, $col_mzdb, $results, $rts, $col_rtdb, $bank_head, $sep ) = @_ ;

    my @bih_matrix = () ;

    if ( defined $header ) {
    	my @headers = () ;
    	push @headers, $header ;
    	push @bih_matrix, \@headers ;
    }

    my $index_mz = 0 ;
    $col_mzdb -= 1;     $col_rtdb -= 1; ## conversion in array number

    foreach my $mz ( @{$init_mzs} ) {

    	my $index_entries = 0 ;
    	my @clusters = () ;
    	my $cluster_col = undef ;

    	foreach my $entry (@{ $results->[$index_mz] }) {

    		my $format_float = Math::BigFloat->new($entry->[$col_mzdb]);	## requires "use Math::BigFloat;"
			my $delta_mz = abs( $format_float-$mz );	## management problem on small float
			# manage final pipe
			if ($index_entries == 0){	$cluster_col .= $delta_mz.$sep.$entry->[$col_mzdb].$sep ;	} ## Managing multiple results transition pipes "|"
			else{	$cluster_col .= '|'.$delta_mz.$sep.$entry->[$col_mzdb].$sep ;	}

			if  ( ( defined $rts ) and ( $rts ne "" ) ) {
				my $rt = $rts->[$index_mz] ;
				my $format_float_rt = Math::BigFloat->new($entry->[$col_rtdb]);	## requires "use Math::BigFloat;"
				my $delta_rt = abs( $format_float_rt-$rt );	## management problem on small float
				$cluster_col .= $delta_rt.$sep.$entry->[$col_rtdb].$sep;
			}
    		for (my $i=0; $i<=$#$entry; $i++){
    			if($i == $#$entry){	$cluster_col .= $entry->[$i];	} ## Managing multiple results transition "#"
    			else {	$cluster_col .= $entry->[$i].$sep;	}
    		}
    		$index_entries++ ;
    	}
    	if ( !defined $cluster_col ) { $cluster_col = 'No_result_found_in_bank_inhouse' ; }

	    ## $cluster_col like METLIN data display model but the "::" have been modified (#) for ease of Excel reading
	   	## entry1=VAR1::VAR2::VAR3::VAR4|entry2=VAR1::VAR2::VAR3::VAR4|...
    	push (@clusters, $cluster_col) ;
    	push (@bih_matrix, \@clusters) ;
    	$index_mz++ ;
    }
    return(\@bih_matrix) ;
}
## END of SUB

=head2 METHOD add_lm_matrix_to_input_matrix

	## Description : build a full matrix (input + lm column)
	## Input : $input_matrix_object, $lm_matrix_object, $nb_header
	## Output : $output_matrix_object
	## Usage : my ( $output_matrix_object ) = add_lm_matrix_to_input_matrix( $input_matrix_object, $lm_matrix_object, $nb_header ) ;

=cut
## START of SUB
sub add_lm_matrix_to_input_matrix {
	## Retrieve Values
    my $self = shift ;
    my ( $input_matrix_object, $lm_matrix_object, $nb_header ) = @_ ;

    my @output_matrix_object = () ;
    my $index_row = 0 ;
    my $line = 0 ;

    foreach my $row ( @{$input_matrix_object} ) {
    	my @init_row = @{$row} ;
    	$line++;

    	if ( ( defined $nb_header ) and ( $line <= $nb_header) ) {
    		push (@output_matrix_object, \@init_row) ;
    		next ;
    	}

    	if ( $lm_matrix_object->[$index_row] ) {
    		my $dim = scalar(@{$lm_matrix_object->[$index_row]}) ;

    		if ($dim > 1) { warn "the add method can't manage more than one column\n" ;}
    		if (defined $lm_matrix_object->[$index_row][$dim-1]) {
    			my $lm_col =  $lm_matrix_object->[$index_row][$dim-1] ;
   		 		push (@init_row, $lm_col) ;
    		}

	    	$index_row++ ;
    	}
    	push (@output_matrix_object, \@init_row) ;
    }
    return(\@output_matrix_object) ;
}
## END of SUB

=head2 METHOD write_csv_skel

	## Description : prepare and write csv output file
	## Input : $csv_file, $rows
	## Output : $csv_file
	## Usage : my ( $csv_file ) = write_csv_skel( $csv_file, $rows ) ;

=cut
## START of SUB
sub write_csv_skel {
	## Retrieve Values
    my $self = shift ;
    my ( $csv_file, $rows ) = @_ ;

    my $ocsv = lib::csv::new() ;
	my $csv = $ocsv->get_csv_object("\t") ;
	$ocsv->write_csv_from_arrays($csv, $$csv_file, $rows) ;

    return($csv_file) ;
}
## END of SUB


=head2 METHOD write_full_excel_like

	## Description : allows to print a tsv file
	## Input : $input_matrix_object, $sep, $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $nb_header, $bank_head, $bank_name
	## Output : N/A
	## Usage : write_full_excel_like( $input_matrix_object, $sep, $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $nb_header, $bank_head, $bank_name ) ;

=cut
## START of SUB
sub write_full_excel_like {
	## Retrieve Values
    my $self = shift ;
    my ( $input_matrix_object, $sep, $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $nb_header, $bank_head, $bank_name ) = @_ ;

    open(CSV, '>:utf8', $file) or die "Cant' create the file $file\n" ;

    my $line = 0 ;
    my $index_mz = 0 ;
    $col_mzdb -= 1;     $col_rtdb -= 1; ## conversion in array number

    foreach my $row ( @{$input_matrix_object} ) {
    	my $join_row = join($sep, @$row);

    	if ( defined $nb_header ){
    		$line++;
    		if ( $line < $nb_header ) {	print CSV $join_row."\n";	next ;	}
    		elsif ( $line == $nb_header ){
    			my $head = join("_".$bank_name.$sep, @$bank_head);
    			print CSV $join_row.$sep.$head."_".$bank_name."\n";
    			next ;
    		}
    	}

    	my $mass = $masses->[$index_mz] ;
    	my $results4mass = $results->[$index_mz];

    	if ( ref($results4mass) eq 'ARRAY' and defined $results4mass and $results4mass ne [] and $#$results4mass>=0) { ## an requested id has a result in the list of array $results.
    		foreach my $entry (@{$results->[$index_mz]}) {
    			print CSV $join_row."\t";
    			my $format_float = Math::BigFloat->new($entry->[$col_mzdb]);	## requires "use Math::BigFloat;"
				my $delta_mass = abs( $format_float-$mass );	## management problem on small float
				print CSV $delta_mass."\t".$entry->[$col_mzdb];

				if  ( ( defined $rts ) and ( $rts ne "" ) ) {
	    			my $rt = $rts->[$index_mz] ;
	    			my $format_float_rt = Math::BigFloat->new($entry->[$col_rtdb]);	## requires "use Math::BigFloat;"
					my $delta_rt = abs( $format_float_rt-$rt );	## management problem on small float
	    			print CSV "\t".$rt."\t".$delta_rt."\t".$entry->[$col_rtdb];
				}
    			for (my $i=0; $i<=$#$entry; $i++){	print CSV "\t".$entry->[$i];	}
    			print CSV "\n";
    		}
    	}
    	else {
    		print CSV $join_row."\tno results found";
    		for (my $i=0; $i<$#$bank_head; $i++){	print CSV "\t";	}
    		print CSV "\n";
    	}
    	$index_mz++ ;
    }
   	close(CSV) ;
    return() ;
}
## END of SUB


=head2 METHOD write_excel_like_mass

	## Description : allows to print a tsv file if retention time is required
	## Input : $masses, $ids, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $bank_head
	## Output : N/A
	## Usage : write_excel_like_mass( $masses, $ids, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $bank_head ) ;
=cut
## START of SUB
sub write_excel_like_mass {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $out_head ) = @_ ;

    open(CSV, '>:utf8', $file) or die "Cant' create the file $file\n" ;

    my $index_mz = 0 ;	my @bank_head = @$out_head;
    $col_mzdb -= 1; ## conversion in array number
    if  ( ( defined $rts ) and ( $rts ne "" ) ) {
	    splice (@bank_head, 2, 0, "RT_Submit");
	    $col_rtdb -= 1; ## conversion in array number
    }
    my $head = join("\t", @bank_head);
    print CSV "MASS_Submit\t".$head."\n" ;

    foreach my $mass (@{$masses}) {
    	my $results4mass = $results->[$index_mz];

    	if ( ref($results4mass) eq 'ARRAY' and defined $results4mass and $results4mass ne [] and $#$results4mass>=0) { ## an requested id has a result in the list of array $results.
    		foreach my $entry (@{$results->[$index_mz]}) {
    			print CSV $mass."\t";
    			my $format_float = Math::BigFloat->new($entry->[$col_mzdb]);	## requires "use Math::BigFloat;"
				my $delta_mass = abs( $format_float-$mass );	## management problem on small float
				print CSV $delta_mass."\t".$entry->[$col_mzdb];

				if  ( ( defined $rts ) and ( $rts ne "" ) ) {
	    			my $rt = $rts->[$index_mz] ;
	    			my $format_float_rt = Math::BigFloat->new($entry->[$col_rtdb]);	## requires "use Math::BigFloat;"
					my $delta_rt = abs( $format_float_rt-$rt );	## management problem on small float
	    			print CSV "\t".$rt."\t".$delta_rt."\t".$entry->[$col_rtdb];
				}
    			for (my $i=0; $i<=$#$entry; $i++){	print CSV "\t".$entry->[$i];	}
    			print CSV "\n";
    		}
    	}
    	else {
    		print CSV $mass."\tno results found";
    		for (my $i=0; $i<$#bank_head; $i++){	print CSV "\t";	}
    		print CSV "\n";
    	}
    	$index_mz++ ;
    }
   	close(CSV) ;
    return() ;
}
## END of SUB


=head2 METHOD db_pforest_get_clean_range

	## Description : get a clean range of mass from PeakForest and REST
	## Input : $ws_host, $query, $max
	## Output : $json
	## Usage : $json = db_pforest_get_clean_range( $ws_host, $query, $max ) ;
=cut
## START of SUB


#sub db_pforest_get_clean_range {
#    my $self = shift;
#    my ( $ws_host, $query, $min, $max, $mode) = @_;
#    my $json = undef ;
#	# init
##	my $ws_url = "https://rest.peakforest.org/search/compounds/monoisotopicmass/59.048/0.02";
#
#	$ENV{HTTPS_VERSION} = 3;
#	#$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0 ;
#
#	my $headers = {Accept => 'application/json', Authorization => 'Basic '};
#	my $client = REST::Client->new({
#         host    => $ws_host,
##         cert    => '/path/to/ssl.crt',
##         key     => '/path/to/ssl.key',
##         ca      => '/path/to/ca.file',
#         timeout => 100,
#     });
#    my $complete_query = $query.'/'.$min.'/'.$max ;
#
#    if (defined $mode) {
#    	$complete_query = $complete_query.'?mode='.$mode ;
#    }
#
#
#    print $complete_query."\n" ;
#    $client->GET($complete_query , $headers);
#	$json = from_json ($client->responseContent()) ;
#
#	return ($json) ;
#}

=head2 METHOD map_pfjson_bankobject

	## Description : map PForest json with the original BiH Bank object
	## Input : $json
	## Output : $complete_bank, $bank_head
	## Usage : ($complete_bank, $bank_heads) = map_pfjson_bankobject( json ) ;
=cut
## START of SUB
sub map_pfjson_bankobject {
	my $self = shift;
    my ( $json ) = @_;

    my ( %complete_bank ) = () ;
    my ( @bank_head ) = ('id', 'mz')  ;

    foreach my $cpd (@$json) {
    	$complete_bank{$cpd->{'mz'}} = [] ;
    	my @tmp = @{$cpd->{'cpds'}} ;

    	push ( @tmp, $cpd->{'mz'} ) ;
    	push ( @{ $complete_bank{$cpd->{'mz'} } }, \@tmp ) ;

    }
    return (\%complete_bank, \@bank_head) ;
}
## END of SUB

1 ;


__END__

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

 perldoc bih.pm

=head1 Exports

=over 4

=item :ALL is prepare_multi_masses_query

=back

=head1 AUTHOR

Marion Landi E<lt>marion.landi@clermont.inra.frE<gt>

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 VERSION

version 1 : 19 / 11 / 2014

=cut
author	fgiacomoni
date	Thu, 04 Oct 2018 10:37:14 -0400
parents
children	52798007c6b2